예제 #1
0
    def LoadFromMongo(self,
                      find_filter,
                      db_name="AnalysisEvo",
                      coll_name="Simulation",
                      client="local"):
        MS = MongoStorage(client=client)
        meta_list = [
            j for j in MS.FilterAndGetMetadata(
                find_filter=find_filter, db_name=db_name, coll_name=coll_name)
        ]
        if len(meta_list) == 0:
            raise ValueError(
                "Could not find any simulation series from the given find_filter!"
            )

        original_seed_list = [
            j.get("extra").get("original_seed") for j in meta_list
        ]
        id_list = [j.get("_id") for j in meta_list]

        meta_df = pd.DataFrame(
            dict(original_seed=original_seed_list,
                 _id=id_list)).sort_values("original_seed")
        self.result_rs_list = [
            MS.LoadAllSeriesFromId([j], db_name=db_name,
                                   coll_name=coll_name)[0]
            for j in meta_df["_id"].values
        ]
        self.startdate = meta_list[0].get("extra").get("startdate")
        self.enddate = meta_list[0].get("extra").get("enddate")
예제 #2
0
    def SampleStrategy(self, random_n_sample = None, filter_dict = {}, dont_load = False):
        if filter_dict is None:
            filter_dict = {"User": {"$nin": ["Deleted"]}, "Name": {"$regex": "_[0-9]{4}_"}}

        MS = MongoStorage()
        meta_list = [j for j in MS.FilterAndGetMetadata(filter_dict)]
        df = pd.DataFrame(meta_list)
        df["filter1"] = df.groupby("Name")["SavedDate"].apply(lambda x: x==np.max(x)) # conflicting
        df["filter2"] = df.groupby("Name")["EndDate"].apply(lambda x: x==np.max(x))
        df["filter"] = df["filter2"] #& df["filter1"]
        fdf = df[df["filter"]].sort_values("Name")
        fdf = fdf.set_index("Name")
        fdf = fdf.loc[~fdf.index.duplicated(keep='last')]
        fdf = fdf.reset_index()

        if random_n_sample is not None:
            fdf = fdf.sample(random_n_sample)
        id_list = fdf["_id"].values
        name_list = fdf["Name"].values

        if dont_load:
            return name_list
        else:
            status = [self.AddSeries(j) for j in MS.LoadAllSeriesFromId(id_list)]