def test_gp_bagging(self): df = pd.read_csv(os.path.join(CAMD_TEST_FILES, 'test_df.csv')) df_sub = df[df['N_species'] <= 3] n_seed = 200 # Starting sample size agent = BaggedGaussianProcessStabilityAgent( n_query=10, hull_distance=0.05, alpha=0.5, # Fraction of std to include in expected improvement n_estimators=2, max_samples=195, parallel=False) analyzer = StabilityAnalyzer(hull_distance=0.05, parallel=False) experiment = ATFSampler(df_sub) candidate_data = df_sub new_loop = Campaign(candidate_data, agent, experiment, analyzer, create_seed=n_seed) new_loop.initialize() self.assertTrue(new_loop.initialized) new_loop.auto_loop(6) self.assertTrue(True)
def test_sync(self): with ScratchDir('.'): df = pd.read_csv(os.path.join(CAMD_TEST_FILES, 'test_df.csv')) # Construct and start campaign new_campaign = Campaign(df, AgentStabilityML5(), ATFSampler(df), StabilityAnalyzer(), create_seed=10, s3_prefix="test") new_campaign.auto_loop(n_iterations=3, save_iterations=True, initialize=True) # Test iteration read s3 = boto3.resource('s3') obj = s3.Object(CAMD_S3_BUCKET, "test/iteration.json") loaded = json.loads(obj.get()['Body'].read()) self.assertEqual(loaded, 2) # Test save directories for iteration in [-1, 0, 1, 2]: obj = s3.Object(CAMD_S3_BUCKET, f"test/{iteration}/iteration.json") loaded = json.loads(obj.get()['Body'].read()) self.assertEqual(loaded, iteration)
def test_simulated(self): exp_dataframe = pd.read_pickle( os.path.join(CAMD_TEST_FILES, "mn-ni-o-sb.pickle")) experiment = ATFSampler(exp_dataframe) candidate_data = exp_dataframe.iloc[:, :-11] # Set up agents and loop parameters agent = AgentStabilityAdaBoost( model=MLPRegressor(hidden_layer_sizes=(84, 50)), n_query=2, hull_distance=0.2, exploit_fraction=1.0, uncertainty=True, alpha=0.5, diversify=True, n_estimators=20) analyzer = StabilityAnalyzer(hull_distance=0.2) # Reduce seed_data icsd_data = load_dataframe("oqmd1.2_exp_based_entries_featurized_v2") seed_data = filter_dataframe_by_composition(icsd_data, "MnNiOSb") leftover = ~icsd_data.index.isin(seed_data.index) # Add some random other data to test compositional flexibility seed_data = seed_data.append(icsd_data.loc[leftover].sample(30)) del icsd_data with ScratchDir('.'): campaign = ProtoDFTCampaign(candidate_data=candidate_data, agent=agent, experiment=experiment, analyzer=analyzer, seed_data=seed_data, heuristic_stopper=5) campaign.autorun() self.assertTrue(os.path.isfile('hull_finalized.png'))
def setUp(self): test_dataframe = pd.DataFrame({ "index": np.arange(5), "squared": np.arange(5)**2 }) params = {"dataframe": test_dataframe, "index_values": [0, 2, 3]} self.simple_exp = ATFSampler(params)
def test_random_agent_loop(self): df = load_default_atf_data() n_seed = 200 # Starting sample size agent = RandomAgent(n_query=10) analyzer = StabilityAnalyzer(hull_distance=0.05, parallel=False) experiment = ATFSampler(dataframe=df) candidate_data = df new_loop = Campaign(candidate_data, agent, experiment, analyzer, create_seed=n_seed) new_loop.initialize() self.assertFalse(new_loop.create_seed) for _ in range(6): new_loop.run() self.assertTrue(True) # Testing the continuation new_loop = Campaign(candidate_data, agent, experiment, analyzer, create_seed=n_seed) self.assertTrue(new_loop.initialized) self.assertEqual(new_loop.iteration, 6) self.assertEqual(new_loop.loop_state, None) new_loop.run() self.assertTrue(True) self.assertEqual(new_loop.iteration, 7)
def test_submit_get_results(self): test_dataframe = pd.DataFrame({ "index": np.arange(5), "squared": np.arange(5)**2 }) simple_exp = ATFSampler(test_dataframe) simple_exp.submit(test_dataframe.loc[[0, 2, 3]]) self.assertEqual(simple_exp.job_status, "COMPLETED") simple_exp.monitor() self.assertTrue((simple_exp.get_results()['squared'] == [0, 4, 9]).all())
def test_construction(self): test_dataframe = pd.DataFrame({ "index": np.arange(5), "squared": np.arange(5)**2 }) params = {"dataframe": test_dataframe} simple_exp = ATFSampler(params) self.assertEqual(simple_exp.get_state(), "completed") self.assertTrue( (simple_exp.get_results([0, 2, 3])['squared'] == np.array([0, 4, 9])).all())
def test_sync(self): with ScratchDir('.'): df = pd.read_csv(os.path.join(CAMD_TEST_FILES, 'test_df.csv')) # Construct and start campaign new_campaign = Campaign(df, AgentStabilityML5(), ATFSampler(df), StabilityAnalyzer(), create_seed=10, s3_prefix="test") new_campaign.initialize() s3 = boto3.resource('s3') obj = s3.Object(CAMD_S3_BUCKET, "test/iteration.json") loaded = json.loads(obj.get()['Body'].read()) self.assertEqual(loaded, 0)
def test_simulated(self): exp_dataframe = pd.read_pickle( os.path.join(CAMD_TEST_FILES, "mn-ni-o-sb.pickle")) experiment = ATFSampler(exp_dataframe) candidate_data = exp_dataframe.iloc[:, :-11] agent = RandomAgent(n_query=2) analyzer = StabilityAnalyzer(hull_distance=0.2) # Reduce seed_data seed_data = load_dataframe("oqmd1.2_exp_based_entries_featurized_v2") seed_data = filter_dataframe_by_composition(seed_data, "MnNiOSb") with ScratchDir('.'): campaign = ProtoDFTCampaign(candidate_data=candidate_data, agent=agent, experiment=experiment, analyzer=analyzer, seed_data=seed_data, heuristic_stopper=5) campaign.autorun()
def test_random_agent_loop(self): df = load_dataframe("oqmd1.2_exp_based_entries_featurized_v2") n_seed = 5000 agent = RandomAgent(n_query=200) analyzer = StabilityAnalyzer(hull_distance=0.05, parallel=False) experiment = ATFSampler(dataframe=df) candidate_data = df new_loop = Campaign(candidate_data, agent, experiment, analyzer, create_seed=n_seed) new_loop.initialize() self.assertFalse(new_loop.create_seed) for _ in range(6): new_loop.run() self.assertTrue(True)
def from_chemsys(cls, chemsys): """ Args: chemsys: Returns: """ s3_prefix = "oqmd-atf/runs/{}".format(chemsys) df = pd.read_csv(os.path.join(CAMD_TEST_FILES, 'test_df.csv')) n_seed = 200 # Starting sample size n_query = 10 # This many new candidates are "calculated with DFT" (i.e. requested from Oracle -- DFT) agent = RandomAgent(n_query=n_query) analyzer = StabilityAnalyzer(hull_distance=0.05) experiment = ATFSampler(dataframe=df) candidate_data = df return cls(candidate_data, agent, experiment, analyzer, create_seed=n_seed, s3_prefix=s3_prefix)
def test_gp_bucb_generic(self): def f(x): return np.sin(x) * np.sin(x) * (x**2) x = np.linspace(0, 10, 500) y = f(x) df = pd.DataFrame({'x': x, 'target': y}) N_seed = 5 # This many samples are randomly acquired in the beginning to form a seed. agent = GPBatchUCB(n_query=2) analyzer = GenericMaxAnalyzer(threshold=58) experiment = ATFSampler(dataframe=df) candidate_data = df new_loop = Campaign(candidate_data, agent, experiment, analyzer, create_seed=N_seed) new_loop.initialize(random_state=20) self.assertTrue(new_loop.initialized) new_loop.run() self.assertTrue(True)
def test_agent(self, agent): """ Runs a simulation of a given agent according to the class attributes Args: agent (HypothesisAgent): Returns: None """ campaign = Campaign( candidate_data=self.atf_dataframe, seed_data=self.seed_data, agent=agent, analyzer=self.analyzer, experiment=ATFSampler(dataframe=self.atf_dataframe), ) campaign.auto_loop(n_iterations=self.iterations, initialize=True) return campaign
def test_svgp_loop(self): df = pd.read_csv(os.path.join(CAMD_TEST_FILES, 'test_df.csv')) df_sub = df[df['N_species'] <= 3] n_seed = 200 # Starting sample size agent = SVGProcessStabilityAgent(n_query=10, hull_distance=0.05, alpha=0.5, M=100) analyzer = StabilityAnalyzer(hull_distance=0.05, parallel=False) experiment = ATFSampler(df_sub) candidate_data = df_sub new_loop = Campaign(candidate_data, agent, experiment, analyzer, create_seed=n_seed) new_loop.initialize() self.assertTrue(new_loop.initialized) new_loop.auto_loop(3) self.assertTrue(True)
def test_mp_loop(self): df = pd.read_csv(os.path.join(CAMD_TEST_FILES, 'test_df_analysis.csv')) df['id'] = [ int(mp_id.replace("mp-", "").replace('mvc-', '')) for mp_id in df['id'] ] df.set_index("id") df['Composition'] = df['formula'] # Just use the Ti-O-N chemsys seed = df.iloc[:38] candidates = df.iloc[38:209] agent = RandomAgent(n_query=20) analyzer = StabilityAnalyzer(hull_distance=0.05, parallel=False) experiment = ATFSampler(dataframe=df) new_loop = Campaign(candidates, agent, experiment, analyzer, seed_data=seed) new_loop.initialize() for iteration in range(6): new_loop.run() self.assertTrue(os.path.isfile("hull.png")) if iteration >= 1: self.assertTrue(os.path.isfile("history.pickle")) # Testing the continuation new_loop = Campaign(df, agent, experiment, analyzer) self.assertTrue(new_loop.initialized) self.assertEqual(new_loop.iteration, 6) self.assertEqual(new_loop.loop_state, None) new_loop.run() self.assertTrue(True) self.assertEqual(new_loop.iteration, 7)
def test_simple_gp_loop(self): df = pd.read_csv(os.path.join(CAMD_TEST_FILES, 'test_df.csv')) df_sub = df[df['N_species'] <= 3] n_seed = 200 # Starting sample size n_query = 10 # This many new candidates are "calculated with DFT" (i.e. requested from Oracle -- DFT) agent = GaussianProcessStabilityAgent(n_query=n_query, hull_distance=0.05, alpha=0.5, parallel=False) analyzer = StabilityAnalyzer(hull_distance=0.05, parallel=False) experiment = ATFSampler(dataframe=df_sub) candidate_data = df_sub new_loop = Campaign(candidate_data, agent, experiment, analyzer, create_seed=n_seed) new_loop.initialize() self.assertTrue(new_loop.initialized) new_loop.auto_loop(2) self.assertTrue(True)
def test_qbc_agent_loop(self): df = pd.read_csv(os.path.join(CAMD_TEST_FILES, 'test_df.csv')) df_sub = df[df['N_species'] <= 3] n_seed = 200 # Starting sample size agent = QBCStabilityAgent(model=MLPRegressor(hidden_layer_sizes=(84, 50)), n_query=10, hull_distance=0.05, alpha=0.5) analyzer = StabilityAnalyzer(hull_distance=0.05, parallel=False) experiment = ATFSampler(dataframe=df_sub) candidate_data = df_sub new_loop = Campaign(candidate_data, agent, experiment, analyzer, create_seed=n_seed) new_loop.initialize() self.assertTrue(new_loop.initialized) new_loop.auto_loop(3) self.assertTrue(True)
from sklearn.neural_network import MLPRegressor from camd.agent.stability import AgentStabilityML5 from camd.analysis import StabilityAnalyzer from camd.experiment.base import ATFSampler from camd.utils.data import load_default_atf_data ########################################################## # Load dataset and filter by N_species of 2 or less ########################################################## df = load_default_atf_data() ## Epsilon-Greedy n_seed = 5000 # Starting sample size - a seed of this size will be randomly chosen. n_query = 200 # This many new candidates are "calculated with DFT" (i.e. requested from Oracle -- DFT) agent = AgentStabilityML5(model=MLPRegressor(hidden_layer_sizes=(84, 50)), n_query=n_query, hull_distance=0.05, exploit_fraction=0.5) analyzer = StabilityAnalyzer(hull_distance=0.05) experiment = ATFSampler(dataframe=df) candidate_data = df ########################################################## new_loop = Campaign(candidate_data, agent, experiment, analyzer, create_seed=n_seed) new_loop.auto_loop(n_iterations=4, initialize=True)