def LoadFromMongo(self, find_filter, db_name="AnalysisEvo", coll_name="Simulation", client="local"): MS = MongoStorage(client=client) meta_list = [ j for j in MS.FilterAndGetMetadata( find_filter=find_filter, db_name=db_name, coll_name=coll_name) ] if len(meta_list) == 0: raise ValueError( "Could not find any simulation series from the given find_filter!" ) original_seed_list = [ j.get("extra").get("original_seed") for j in meta_list ] id_list = [j.get("_id") for j in meta_list] meta_df = pd.DataFrame( dict(original_seed=original_seed_list, _id=id_list)).sort_values("original_seed") self.result_rs_list = [ MS.LoadAllSeriesFromId([j], db_name=db_name, coll_name=coll_name)[0] for j in meta_df["_id"].values ] self.startdate = meta_list[0].get("extra").get("startdate") self.enddate = meta_list[0].get("extra").get("enddate")
def SampleStrategy(self, random_n_sample = None, filter_dict = {}, dont_load = False): if filter_dict is None: filter_dict = {"User": {"$nin": ["Deleted"]}, "Name": {"$regex": "_[0-9]{4}_"}} MS = MongoStorage() meta_list = [j for j in MS.FilterAndGetMetadata(filter_dict)] df = pd.DataFrame(meta_list) df["filter1"] = df.groupby("Name")["SavedDate"].apply(lambda x: x==np.max(x)) # conflicting df["filter2"] = df.groupby("Name")["EndDate"].apply(lambda x: x==np.max(x)) df["filter"] = df["filter2"] #& df["filter1"] fdf = df[df["filter"]].sort_values("Name") fdf = fdf.set_index("Name") fdf = fdf.loc[~fdf.index.duplicated(keep='last')] fdf = fdf.reset_index() if random_n_sample is not None: fdf = fdf.sample(random_n_sample) id_list = fdf["_id"].values name_list = fdf["Name"].values if dont_load: return name_list else: status = [self.AddSeries(j) for j in MS.LoadAllSeriesFromId(id_list)]