def test_gp_bagging(self): df = pd.read_csv(os.path.join(CAMD_TEST_FILES, 'test_df.csv')) df_sub = df[df['N_species'] <= 3] n_seed = 200 # Starting sample size agent = BaggedGaussianProcessStabilityAgent( n_query=10, hull_distance=0.05, alpha=0.5, # Fraction of std to include in expected improvement n_estimators=2, max_samples=195, parallel=False) analyzer = StabilityAnalyzer(hull_distance=0.05, parallel=False) experiment = ATFSampler(df_sub) candidate_data = df_sub new_loop = Campaign(candidate_data, agent, experiment, analyzer, create_seed=n_seed) new_loop.initialize() self.assertTrue(new_loop.initialized) new_loop.auto_loop(6) self.assertTrue(True)
def test_sync(self): with ScratchDir('.'): df = pd.read_csv(os.path.join(CAMD_TEST_FILES, 'test_df.csv')) # Construct and start campaign new_campaign = Campaign(df, AgentStabilityML5(), ATFSampler(df), StabilityAnalyzer(), create_seed=10, s3_prefix="test") new_campaign.auto_loop(n_iterations=3, save_iterations=True, initialize=True) # Test iteration read s3 = boto3.resource('s3') obj = s3.Object(CAMD_S3_BUCKET, "test/iteration.json") loaded = json.loads(obj.get()['Body'].read()) self.assertEqual(loaded, 2) # Test save directories for iteration in [-1, 0, 1, 2]: obj = s3.Object(CAMD_S3_BUCKET, f"test/{iteration}/iteration.json") loaded = json.loads(obj.get()['Body'].read()) self.assertEqual(loaded, iteration)
def test_svgp_loop(self): df = pd.read_csv(os.path.join(CAMD_TEST_FILES, 'test_df.csv')) df_sub = df[df['N_species'] <= 3] n_seed = 200 # Starting sample size agent = SVGProcessStabilityAgent(n_query=10, hull_distance=0.05, alpha=0.5, M=100) analyzer = StabilityAnalyzer(hull_distance=0.05, parallel=False) experiment = ATFSampler(df_sub) candidate_data = df_sub new_loop = Campaign(candidate_data, agent, experiment, analyzer, create_seed=n_seed) new_loop.initialize() self.assertTrue(new_loop.initialized) new_loop.auto_loop(3) self.assertTrue(True)
def test_qbc_agent_loop(self): df = pd.read_csv(os.path.join(CAMD_TEST_FILES, 'test_df.csv')) df_sub = df[df['N_species'] <= 3] n_seed = 200 # Starting sample size agent = QBCStabilityAgent(model=MLPRegressor(hidden_layer_sizes=(84, 50)), n_query=10, hull_distance=0.05, alpha=0.5) analyzer = StabilityAnalyzer(hull_distance=0.05, parallel=False) experiment = ATFSampler(dataframe=df_sub) candidate_data = df_sub new_loop = Campaign(candidate_data, agent, experiment, analyzer, create_seed=n_seed) new_loop.initialize() self.assertTrue(new_loop.initialized) new_loop.auto_loop(3) self.assertTrue(True)
def test_simple_gp_loop(self): df = pd.read_csv(os.path.join(CAMD_TEST_FILES, 'test_df.csv')) df_sub = df[df['N_species'] <= 3] n_seed = 200 # Starting sample size n_query = 10 # This many new candidates are "calculated with DFT" (i.e. requested from Oracle -- DFT) agent = GaussianProcessStabilityAgent(n_query=n_query, hull_distance=0.05, alpha=0.5, parallel=False) analyzer = StabilityAnalyzer(hull_distance=0.05, parallel=False) experiment = ATFSampler(dataframe=df_sub) candidate_data = df_sub new_loop = Campaign(candidate_data, agent, experiment, analyzer, create_seed=n_seed) new_loop.initialize() self.assertTrue(new_loop.initialized) new_loop.auto_loop(2) self.assertTrue(True)
def test_agent(self, agent): """ Runs a simulation of a given agent according to the class attributes Args: agent (HypothesisAgent): Returns: None """ campaign = Campaign( candidate_data=self.atf_dataframe, seed_data=self.seed_data, agent=agent, analyzer=self.analyzer, experiment=ATFSampler(dataframe=self.atf_dataframe), ) campaign.auto_loop(n_iterations=self.iterations, initialize=True) return campaign
from sklearn.neural_network import MLPRegressor from camd.agent.stability import AgentStabilityML5 from camd.analysis import StabilityAnalyzer from camd.experiment.base import ATFSampler from camd.utils.data import load_default_atf_data ########################################################## # Load dataset and filter by N_species of 2 or less ########################################################## df = load_default_atf_data() ## Epsilon-Greedy n_seed = 5000 # Starting sample size - a seed of this size will be randomly chosen. n_query = 200 # This many new candidates are "calculated with DFT" (i.e. requested from Oracle -- DFT) agent = AgentStabilityML5(model=MLPRegressor(hidden_layer_sizes=(84, 50)), n_query=n_query, hull_distance=0.05, exploit_fraction=0.5) analyzer = StabilityAnalyzer(hull_distance=0.05) experiment = ATFSampler(dataframe=df) candidate_data = df ########################################################## new_loop = Campaign(candidate_data, agent, experiment, analyzer, create_seed=n_seed) new_loop.auto_loop(n_iterations=4, initialize=True)