예제 #1
0
 def test_construction(self):
     test_dataframe = pd.DataFrame({
         "index": np.arange(5),
         "squared": np.arange(5)**2
     })
     params = {"dataframe": test_dataframe}
     simple_exp = ATFSampler(params)
     self.assertEqual(simple_exp.get_state(), "completed")
     self.assertTrue(
         (simple_exp.get_results([0, 2,
                                  3])['squared'] == np.array([0, 4,
                                                              9])).all())
예제 #2
0
    def test_gp_bagging(self):
        df = pd.read_csv(os.path.join(CAMD_TEST_FILES, 'test_df.csv'))
        df_sub = df[df['N_species'] <= 3]
        n_seed = 200  # Starting sample size
        agent = BaggedGaussianProcessStabilityAgent(
            n_query=10,
            hull_distance=0.05,
            alpha=0.5,  # Fraction of std to include in expected improvement
            n_estimators=2,
            max_samples=195,
            parallel=False)
        analyzer = StabilityAnalyzer(hull_distance=0.05, parallel=False)
        experiment = ATFSampler(df_sub)
        candidate_data = df_sub

        new_loop = Campaign(candidate_data,
                            agent,
                            experiment,
                            analyzer,
                            create_seed=n_seed)
        new_loop.initialize()
        self.assertTrue(new_loop.initialized)

        new_loop.auto_loop(6)
        self.assertTrue(True)
예제 #3
0
    def test_sync(self):
        with ScratchDir('.'):
            df = pd.read_csv(os.path.join(CAMD_TEST_FILES, 'test_df.csv'))

            # Construct and start campaign
            new_campaign = Campaign(df,
                                    AgentStabilityML5(),
                                    ATFSampler(df),
                                    StabilityAnalyzer(),
                                    create_seed=10,
                                    s3_prefix="test")
            new_campaign.auto_loop(n_iterations=3,
                                   save_iterations=True,
                                   initialize=True)
        # Test iteration read
        s3 = boto3.resource('s3')
        obj = s3.Object(CAMD_S3_BUCKET, "test/iteration.json")
        loaded = json.loads(obj.get()['Body'].read())
        self.assertEqual(loaded, 2)

        # Test save directories
        for iteration in [-1, 0, 1, 2]:
            obj = s3.Object(CAMD_S3_BUCKET, f"test/{iteration}/iteration.json")
            loaded = json.loads(obj.get()['Body'].read())
            self.assertEqual(loaded, iteration)
예제 #4
0
    def test_random_agent_loop(self):
        df = load_default_atf_data()
        n_seed = 200  # Starting sample size
        agent = RandomAgent(n_query=10)
        analyzer = StabilityAnalyzer(hull_distance=0.05, parallel=False)
        experiment = ATFSampler(dataframe=df)
        candidate_data = df
        new_loop = Campaign(candidate_data,
                            agent,
                            experiment,
                            analyzer,
                            create_seed=n_seed)

        new_loop.initialize()
        self.assertFalse(new_loop.create_seed)

        for _ in range(6):
            new_loop.run()
            self.assertTrue(True)

        # Testing the continuation
        new_loop = Campaign(candidate_data,
                            agent,
                            experiment,
                            analyzer,
                            create_seed=n_seed)
        self.assertTrue(new_loop.initialized)
        self.assertEqual(new_loop.iteration, 6)
        self.assertEqual(new_loop.loop_state, None)

        new_loop.run()
        self.assertTrue(True)
        self.assertEqual(new_loop.iteration, 7)
예제 #5
0
 def test_simulated(self):
     exp_dataframe = pd.read_pickle(
         os.path.join(CAMD_TEST_FILES, "mn-ni-o-sb.pickle"))
     experiment = ATFSampler(exp_dataframe)
     candidate_data = exp_dataframe.iloc[:, :-11]
     # Set up agents and loop parameters
     agent = AgentStabilityAdaBoost(
         model=MLPRegressor(hidden_layer_sizes=(84, 50)),
         n_query=2,
         hull_distance=0.2,
         exploit_fraction=1.0,
         uncertainty=True,
         alpha=0.5,
         diversify=True,
         n_estimators=20)
     analyzer = StabilityAnalyzer(hull_distance=0.2)
     # Reduce seed_data
     icsd_data = load_dataframe("oqmd1.2_exp_based_entries_featurized_v2")
     seed_data = filter_dataframe_by_composition(icsd_data, "MnNiOSb")
     leftover = ~icsd_data.index.isin(seed_data.index)
     # Add some random other data to test compositional flexibility
     seed_data = seed_data.append(icsd_data.loc[leftover].sample(30))
     del icsd_data
     with ScratchDir('.'):
         campaign = ProtoDFTCampaign(candidate_data=candidate_data,
                                     agent=agent,
                                     experiment=experiment,
                                     analyzer=analyzer,
                                     seed_data=seed_data,
                                     heuristic_stopper=5)
         campaign.autorun()
         self.assertTrue(os.path.isfile('hull_finalized.png'))
예제 #6
0
 def setUp(self):
     test_dataframe = pd.DataFrame({
         "index": np.arange(5),
         "squared": np.arange(5)**2
     })
     params = {"dataframe": test_dataframe, "index_values": [0, 2, 3]}
     self.simple_exp = ATFSampler(params)
예제 #7
0
 def test_submit_get_results(self):
     test_dataframe = pd.DataFrame({
         "index": np.arange(5),
         "squared": np.arange(5)**2
     })
     simple_exp = ATFSampler(test_dataframe)
     simple_exp.submit(test_dataframe.loc[[0, 2, 3]])
     self.assertEqual(simple_exp.job_status, "COMPLETED")
     simple_exp.monitor()
     self.assertTrue((simple_exp.get_results()['squared'] == [0, 4,
                                                              9]).all())
예제 #8
0
    def test_sync(self):
        with ScratchDir('.'):
            df = pd.read_csv(os.path.join(CAMD_TEST_FILES, 'test_df.csv'))

            # Construct and start campaign
            new_campaign = Campaign(df,
                                    AgentStabilityML5(),
                                    ATFSampler(df),
                                    StabilityAnalyzer(),
                                    create_seed=10,
                                    s3_prefix="test")
            new_campaign.initialize()
        s3 = boto3.resource('s3')
        obj = s3.Object(CAMD_S3_BUCKET, "test/iteration.json")
        loaded = json.loads(obj.get()['Body'].read())
        self.assertEqual(loaded, 0)
예제 #9
0
 def test_simulated(self):
     exp_dataframe = pd.read_pickle(
         os.path.join(CAMD_TEST_FILES, "mn-ni-o-sb.pickle"))
     experiment = ATFSampler(exp_dataframe)
     candidate_data = exp_dataframe.iloc[:, :-11]
     agent = RandomAgent(n_query=2)
     analyzer = StabilityAnalyzer(hull_distance=0.2)
     # Reduce seed_data
     seed_data = load_dataframe("oqmd1.2_exp_based_entries_featurized_v2")
     seed_data = filter_dataframe_by_composition(seed_data, "MnNiOSb")
     with ScratchDir('.'):
         campaign = ProtoDFTCampaign(candidate_data=candidate_data,
                                     agent=agent,
                                     experiment=experiment,
                                     analyzer=analyzer,
                                     seed_data=seed_data,
                                     heuristic_stopper=5)
         campaign.autorun()
예제 #10
0
    def test_random_agent_loop(self):
        df = load_dataframe("oqmd1.2_exp_based_entries_featurized_v2")
        n_seed = 5000
        agent = RandomAgent(n_query=200)
        analyzer = StabilityAnalyzer(hull_distance=0.05, parallel=False)
        experiment = ATFSampler(dataframe=df)
        candidate_data = df

        new_loop = Campaign(candidate_data,
                            agent,
                            experiment,
                            analyzer,
                            create_seed=n_seed)

        new_loop.initialize()
        self.assertFalse(new_loop.create_seed)

        for _ in range(6):
            new_loop.run()
            self.assertTrue(True)
예제 #11
0
 def from_chemsys(cls, chemsys):
     """
     Args:
         chemsys:
     Returns:
     """
     s3_prefix = "oqmd-atf/runs/{}".format(chemsys)
     df = pd.read_csv(os.path.join(CAMD_TEST_FILES, 'test_df.csv'))
     n_seed = 200  # Starting sample size
     n_query = 10  # This many new candidates are "calculated with DFT" (i.e. requested from Oracle -- DFT)
     agent = RandomAgent(n_query=n_query)
     analyzer = StabilityAnalyzer(hull_distance=0.05)
     experiment = ATFSampler(dataframe=df)
     candidate_data = df
     return cls(candidate_data,
                agent,
                experiment,
                analyzer,
                create_seed=n_seed,
                s3_prefix=s3_prefix)
예제 #12
0
    def test_gp_bucb_generic(self):
        def f(x):
            return np.sin(x) * np.sin(x) * (x**2)

        x = np.linspace(0, 10, 500)
        y = f(x)
        df = pd.DataFrame({'x': x, 'target': y})
        N_seed = 5  # This many samples are randomly acquired in the beginning to form a seed.
        agent = GPBatchUCB(n_query=2)
        analyzer = GenericMaxAnalyzer(threshold=58)
        experiment = ATFSampler(dataframe=df)
        candidate_data = df
        new_loop = Campaign(candidate_data,
                            agent,
                            experiment,
                            analyzer,
                            create_seed=N_seed)
        new_loop.initialize(random_state=20)
        self.assertTrue(new_loop.initialized)
        new_loop.run()
        self.assertTrue(True)
예제 #13
0
    def test_agent(self, agent):
        """
        Runs a simulation of a given agent according to the
        class attributes

        Args:
            agent (HypothesisAgent):

        Returns:
            None

        """
        campaign = Campaign(
            candidate_data=self.atf_dataframe,
            seed_data=self.seed_data,
            agent=agent,
            analyzer=self.analyzer,
            experiment=ATFSampler(dataframe=self.atf_dataframe),
        )
        campaign.auto_loop(n_iterations=self.iterations, initialize=True)
        return campaign
예제 #14
0
    def test_svgp_loop(self):
        df = pd.read_csv(os.path.join(CAMD_TEST_FILES, 'test_df.csv'))
        df_sub = df[df['N_species'] <= 3]
        n_seed = 200  # Starting sample size
        agent = SVGProcessStabilityAgent(n_query=10,
                                         hull_distance=0.05,
                                         alpha=0.5,
                                         M=100)
        analyzer = StabilityAnalyzer(hull_distance=0.05, parallel=False)
        experiment = ATFSampler(df_sub)
        candidate_data = df_sub

        new_loop = Campaign(candidate_data,
                            agent,
                            experiment,
                            analyzer,
                            create_seed=n_seed)
        new_loop.initialize()
        self.assertTrue(new_loop.initialized)

        new_loop.auto_loop(3)
        self.assertTrue(True)
예제 #15
0
    def test_mp_loop(self):
        df = pd.read_csv(os.path.join(CAMD_TEST_FILES, 'test_df_analysis.csv'))
        df['id'] = [
            int(mp_id.replace("mp-", "").replace('mvc-', ''))
            for mp_id in df['id']
        ]
        df.set_index("id")
        df['Composition'] = df['formula']

        # Just use the Ti-O-N chemsys
        seed = df.iloc[:38]
        candidates = df.iloc[38:209]
        agent = RandomAgent(n_query=20)
        analyzer = StabilityAnalyzer(hull_distance=0.05, parallel=False)
        experiment = ATFSampler(dataframe=df)
        new_loop = Campaign(candidates,
                            agent,
                            experiment,
                            analyzer,
                            seed_data=seed)

        new_loop.initialize()

        for iteration in range(6):
            new_loop.run()
            self.assertTrue(os.path.isfile("hull.png"))
            if iteration >= 1:
                self.assertTrue(os.path.isfile("history.pickle"))

        # Testing the continuation
        new_loop = Campaign(df, agent, experiment, analyzer)
        self.assertTrue(new_loop.initialized)
        self.assertEqual(new_loop.iteration, 6)
        self.assertEqual(new_loop.loop_state, None)

        new_loop.run()
        self.assertTrue(True)
        self.assertEqual(new_loop.iteration, 7)
예제 #16
0
    def test_simple_gp_loop(self):
        df = pd.read_csv(os.path.join(CAMD_TEST_FILES, 'test_df.csv'))
        df_sub = df[df['N_species'] <= 3]
        n_seed = 200  # Starting sample size
        n_query = 10  # This many new candidates are "calculated with DFT" (i.e. requested from Oracle -- DFT)
        agent = GaussianProcessStabilityAgent(n_query=n_query,
                                              hull_distance=0.05,
                                              alpha=0.5,
                                              parallel=False)
        analyzer = StabilityAnalyzer(hull_distance=0.05, parallel=False)
        experiment = ATFSampler(dataframe=df_sub)
        candidate_data = df_sub

        new_loop = Campaign(candidate_data,
                            agent,
                            experiment,
                            analyzer,
                            create_seed=n_seed)
        new_loop.initialize()
        self.assertTrue(new_loop.initialized)

        new_loop.auto_loop(2)
        self.assertTrue(True)
예제 #17
0
    def test_qbc_agent_loop(self):
        df = pd.read_csv(os.path.join(CAMD_TEST_FILES, 'test_df.csv'))
        df_sub = df[df['N_species'] <= 3]
        n_seed = 200  # Starting sample size
        agent = QBCStabilityAgent(model=MLPRegressor(hidden_layer_sizes=(84,
                                                                         50)),
                                  n_query=10,
                                  hull_distance=0.05,
                                  alpha=0.5)
        analyzer = StabilityAnalyzer(hull_distance=0.05, parallel=False)
        experiment = ATFSampler(dataframe=df_sub)
        candidate_data = df_sub

        new_loop = Campaign(candidate_data,
                            agent,
                            experiment,
                            analyzer,
                            create_seed=n_seed)
        new_loop.initialize()
        self.assertTrue(new_loop.initialized)

        new_loop.auto_loop(3)
        self.assertTrue(True)
예제 #18
0
from sklearn.neural_network import MLPRegressor
from camd.agent.stability import AgentStabilityML5
from camd.analysis import StabilityAnalyzer
from camd.experiment.base import ATFSampler
from camd.utils.data import load_default_atf_data

##########################################################
# Load dataset and filter by N_species of 2 or less
##########################################################
df = load_default_atf_data()

## Epsilon-Greedy
n_seed = 5000  # Starting sample size - a seed of this size will be randomly chosen.
n_query = 200  # This many new candidates are "calculated with DFT" (i.e. requested from Oracle -- DFT)
agent = AgentStabilityML5(model=MLPRegressor(hidden_layer_sizes=(84, 50)),
                          n_query=n_query,
                          hull_distance=0.05,
                          exploit_fraction=0.5)
analyzer = StabilityAnalyzer(hull_distance=0.05)
experiment = ATFSampler(dataframe=df)
candidate_data = df
##########################################################
new_loop = Campaign(candidate_data,
                    agent,
                    experiment,
                    analyzer,
                    create_seed=n_seed)

new_loop.auto_loop(n_iterations=4, initialize=True)