Ejemplo n.º 1
0
    def test_gp_bagging(self):
        df = pd.read_csv(os.path.join(CAMD_TEST_FILES, 'test_df.csv'))
        df_sub = df[df['N_species'] <= 3]
        n_seed = 200  # Starting sample size
        agent = BaggedGaussianProcessStabilityAgent(
            n_query=10,
            hull_distance=0.05,
            alpha=0.5,  # Fraction of std to include in expected improvement
            n_estimators=2,
            max_samples=195,
            parallel=False)
        analyzer = StabilityAnalyzer(hull_distance=0.05, parallel=False)
        experiment = ATFSampler(df_sub)
        candidate_data = df_sub

        new_loop = Campaign(candidate_data,
                            agent,
                            experiment,
                            analyzer,
                            create_seed=n_seed)
        new_loop.initialize()
        self.assertTrue(new_loop.initialized)

        new_loop.auto_loop(6)
        self.assertTrue(True)
Ejemplo n.º 2
0
    def test_random_agent_loop(self):
        df = load_default_atf_data()
        n_seed = 200  # Starting sample size
        agent = RandomAgent(n_query=10)
        analyzer = StabilityAnalyzer(hull_distance=0.05, parallel=False)
        experiment = ATFSampler(dataframe=df)
        candidate_data = df
        new_loop = Campaign(candidate_data,
                            agent,
                            experiment,
                            analyzer,
                            create_seed=n_seed)

        new_loop.initialize()
        self.assertFalse(new_loop.create_seed)

        for _ in range(6):
            new_loop.run()
            self.assertTrue(True)

        # Testing the continuation
        new_loop = Campaign(candidate_data,
                            agent,
                            experiment,
                            analyzer,
                            create_seed=n_seed)
        self.assertTrue(new_loop.initialized)
        self.assertEqual(new_loop.iteration, 6)
        self.assertEqual(new_loop.loop_state, None)

        new_loop.run()
        self.assertTrue(True)
        self.assertEqual(new_loop.iteration, 7)
    def test_mp_loop(self):
        df = pd.read_csv(os.path.join(CAMD_TEST_FILES, 'test_df_analysis.csv'))
        df['id'] = [int(mp_id.replace("mp-", "").replace('mvc-', ''))
                    for mp_id in df['id']]
        df.set_index("id")
        df['Composition'] = df['formula']

        # Just use the Ti-O-N chemsys
        seed = df.iloc[:38]
        candidates = df.iloc[38:209]
        agent = RandomAgent(n_query=20)
        analyzer = StabilityAnalyzer(hull_distance=0.05, parallel=False)
        experiment = ATFSampler(dataframe=df)
        new_loop = Campaign(
            candidates, agent, experiment, analyzer, seed_data=seed
        )

        new_loop.initialize()

        for iteration in range(6):
            new_loop.run()
            self.assertTrue(os.path.isfile("hull.png"))
            if iteration >= 1:
                self.assertTrue(
                    os.path.isfile("history.pickle"))

        # Testing the continuation
        new_loop = Campaign(df, agent, experiment, analyzer)
        self.assertTrue(new_loop.initialized)
        self.assertEqual(new_loop.iteration, 6)
        self.assertEqual(new_loop.loop_state, None)

        new_loop.run()
        self.assertTrue(True)
        self.assertEqual(new_loop.iteration, 7)
    def test_svgp_loop(self):
        df = pd.read_csv(os.path.join(CAMD_TEST_FILES, 'test_df.csv'))
        df_sub = df[df['N_species'] <= 3]
        n_seed = 200  # Starting sample size
        agent = SVGProcessStabilityAgent(n_query=10, hull_distance=0.05, alpha=0.5, M=100)
        analyzer = StabilityAnalyzer(hull_distance=0.05, parallel=False)
        experiment = ATFSampler(df_sub)
        candidate_data = df_sub

        new_loop = Campaign(candidate_data, agent, experiment, analyzer,
                            create_seed=n_seed)
        new_loop.initialize()
        self.assertTrue(new_loop.initialized)

        new_loop.auto_loop(3)
        self.assertTrue(True)
Ejemplo n.º 5
0
    def test_sync(self):
        with ScratchDir('.'):
            df = pd.read_csv(os.path.join(CAMD_TEST_FILES, 'test_df.csv'))

            # Construct and start campaign
            new_campaign = Campaign(df,
                                    AgentStabilityML5(),
                                    ATFSampler(df),
                                    StabilityAnalyzer(),
                                    create_seed=10,
                                    s3_prefix="test")
            new_campaign.initialize()
        s3 = boto3.resource('s3')
        obj = s3.Object(CAMD_S3_BUCKET, "test/iteration.json")
        loaded = json.loads(obj.get()['Body'].read())
        self.assertEqual(loaded, 0)
    def test_qbc_agent_loop(self):
        df = pd.read_csv(os.path.join(CAMD_TEST_FILES, 'test_df.csv'))
        df_sub = df[df['N_species'] <= 3]
        n_seed = 200  # Starting sample size
        agent = QBCStabilityAgent(model=MLPRegressor(hidden_layer_sizes=(84, 50)),
                                  n_query=10, hull_distance=0.05, alpha=0.5)
        analyzer = StabilityAnalyzer(hull_distance=0.05, parallel=False)
        experiment = ATFSampler(dataframe=df_sub)
        candidate_data = df_sub

        new_loop = Campaign(candidate_data, agent, experiment, analyzer,
                            create_seed=n_seed)
        new_loop.initialize()
        self.assertTrue(new_loop.initialized)

        new_loop.auto_loop(3)
        self.assertTrue(True)
    def test_random_agent_loop(self):
        df = load_dataframe("oqmd1.2_exp_based_entries_featurized_v2")
        n_seed = 5000
        agent = RandomAgent(n_query=200)
        analyzer = StabilityAnalyzer(hull_distance=0.05, parallel=False)
        experiment = ATFSampler(dataframe=df)
        candidate_data = df

        new_loop = Campaign(candidate_data, agent, experiment, analyzer,
                            create_seed=n_seed)

        new_loop.initialize()
        self.assertFalse(new_loop.create_seed)

        for _ in range(6):
            new_loop.run()
            self.assertTrue(True)
    def test_simple_gp_loop(self):
        df = pd.read_csv(os.path.join(CAMD_TEST_FILES, 'test_df.csv'))
        df_sub = df[df['N_species'] <= 3]
        n_seed = 200  # Starting sample size
        n_query = 10  # This many new candidates are "calculated with DFT" (i.e. requested from Oracle -- DFT)
        agent = GaussianProcessStabilityAgent(n_query=n_query, hull_distance=0.05, alpha=0.5, parallel=False)
        analyzer = StabilityAnalyzer(hull_distance=0.05, parallel=False)
        experiment = ATFSampler(dataframe=df_sub)
        candidate_data = df_sub

        new_loop = Campaign(candidate_data, agent, experiment, analyzer,
                            create_seed=n_seed)
        new_loop.initialize()
        self.assertTrue(new_loop.initialized)

        new_loop.auto_loop(2)
        self.assertTrue(True)
    def test_gp_ucb_generic(self):
        def f(x):
            return np.sin(x) * np.sin(x) * (x ** 2)

        x = np.linspace(0, 10, 500)
        y = f(x)
        df = pd.DataFrame({'x': x, 'target': y})

        N_query = 2  # This many experiments are requested in each iteration
        N_seed = 5  # This many samples are randomly acquired in the beginning to form a seed.
        agent = GenericGPUCB(n_query=2,kernel=ConstantKernel(100.0) + RBF(10.0) * ConstantKernel(1.0))
        analyzer = GenericMaxAnalyzer(threshold=58)
        experiment = ATFSampler(dataframe=df)
        candidate_data = df
        new_loop = Campaign(candidate_data, agent, experiment, analyzer, create_seed=N_seed)
        new_loop.initialize(random_state=20)
        self.assertTrue(new_loop.initialized)
        new_loop.run()
        self.assertTrue(True)