def test_mp_loop(self):
        df = pd.read_csv(os.path.join(CAMD_TEST_FILES,
                                      'test_df_analysis.csv'), )
        # index_col="id")
        df['id'] = [
            int(mp_id.replace("mp-", "").replace('mvc-', ''))
            for mp_id in df['id']
        ]
        df.set_index("id")
        df['Composition'] = df['formula']

        # Just use the Ti-O-N chemsys
        seed_data = df.iloc[:38]
        candidate_data = df.iloc[38:209]
        n_query = 20  # This many new candidates are "calculated with DFT" (i.e. requested from Oracle -- DFT)
        agent = RandomAgent
        agent_params = {'n_query': n_query}
        analyzer = AnalyzeStability
        analyzer_params = {'hull_distance': 0.05}
        experiment = ATFSampler
        experiment_params = {'dataframe': df}
        # candidate_data = df
        new_loop = Loop(candidate_data,
                        agent,
                        experiment,
                        analyzer,
                        agent_params=agent_params,
                        analyzer_params=analyzer_params,
                        experiment_params=experiment_params,
                        seed_data=seed_data)

        new_loop.initialize()
        self.assertFalse(new_loop.create_seed)

        for iteration in range(6):
            new_loop.run()
            self.assertTrue(os.path.isfile("hull_{}.png".format(iteration)))
            if iteration >= 1:
                self.assertTrue(os.path.isfile("report.png"))

        # Testing the continuation
        new_loop = Loop(candidate_data,
                        agent,
                        experiment,
                        analyzer,
                        agent_params=agent_params,
                        analyzer_params=analyzer_params,
                        experiment_params=experiment_params)
        self.assertTrue(new_loop.initialized)
        self.assertEqual(new_loop.iteration, 6)
        self.assertEqual(new_loop.loop_state, None)

        new_loop.run()
        self.assertTrue(True)
        self.assertEqual(new_loop.iteration, 7)
Example #2
0
    def test_generate_final_report(self):
        with ScratchDir('.'):
            df = pd.read_csv(os.path.join(CAMD_TEST_FILES, 'test_df.csv'))
            experiment_params = {"dataframe": df}

            # Construct and start loop
            new_loop = Loop(df, AgentStabilityML5, ATFSampler,
                            AnalyzeStability, agent_params={}, create_seed=True,
                            analyzer_params={}, experiment_params=experiment_params,
                            )
            new_loop.generate_report_plot(
                "report.png", os.path.join(CAMD_TEST_FILES, "report.log"))
            self.assertTrue(os.path.isfile("report.png"))
Example #3
0
    def test_sync(self):
        with ScratchDir('.'):
            df = pd.read_csv(os.path.join(CAMD_TEST_FILES, 'test_df.csv'))
            experiment_params = {"dataframe": df}

            # Construct and start loop
            new_loop = Loop(df, AgentStabilityML5, ATFSampler,
                            AnalyzeStability, agent_params={}, create_seed=10,
                            analyzer_params={}, experiment_params=experiment_params,
                            s3_prefix="test")
            new_loop.initialize()
        s3 = boto3.resource('s3')
        obj = s3.Object(CAMD_S3_BUCKET, "test/iteration.json")
        loaded = json.loads(obj.get()['Body'].read())
        self.assertEqual(loaded, 0)
    def test_random_agent_loop(self):
        df = pd.read_csv(os.path.join(CAMD_TEST_FILES, 'test_df.csv'))
        n_seed = 200  # Starting sample size
        n_query = 10  # This many new candidates are "calculated with DFT" (i.e. requested from Oracle -- DFT)
        agent = RandomAgent
        agent_params = {'n_query': n_query}
        analyzer = AnalyzeStability
        analyzer_params = {'hull_distance': 0.05}
        experiment = ATFSampler
        experiment_params = {'dataframe': df}
        candidate_data = df
        new_loop = Loop(candidate_data,
                        agent,
                        experiment,
                        analyzer,
                        agent_params=agent_params,
                        analyzer_params=analyzer_params,
                        experiment_params=experiment_params,
                        create_seed=n_seed)

        new_loop.initialize()
        self.assertFalse(new_loop.create_seed)

        for _ in range(6):
            new_loop.run()
            self.assertTrue(True)

        # Testing the continuation
        new_loop = Loop(candidate_data,
                        agent,
                        experiment,
                        analyzer,
                        agent_params=agent_params,
                        analyzer_params=analyzer_params,
                        experiment_params=experiment_params,
                        create_seed=n_seed)
        self.assertTrue(new_loop.initialized)
        self.assertEqual(new_loop.iteration, 6)
        self.assertEqual(new_loop.loop_state, None)

        new_loop.run()
        self.assertTrue(True)
        self.assertEqual(new_loop.iteration, 7)
Example #5
0
def update_run(folder):
    """
    Updates existing runs in s3 to include plots

    Returns:
        List of modified chemsys

    """
    required_files = ["seed_data.pickle", "report.log"]
    with cd(folder):
        if os.path.isfile("error.json"):
            error = loadfn("error.json")
            print("{} ERROR: {}".format(folder, error))

        if not all([os.path.isfile(fn) for fn in required_files]):
            print("{} ERROR: no seed data, no analysis to be done")
        else:
            analyzer = AnalyzeStability(hull_distance=0.2)

            # Generate report plots
            for iteration in range(0, 25):
                print("{}: {}".format(folder, iteration))
                if not os.path.isdir(str(iteration)) or not os.path.isdir(
                        str(iteration - 1)):
                    continue
                with open(os.path.join(str(iteration), "seed_data.pickle"),
                          "rb") as f:
                    result_df = pickle.load(f)
                all_result_ids = loadfn(
                    os.path.join(str(iteration - 1),
                                 "consumed_candidates.json"))
                new_result_ids = loadfn(
                    os.path.join(str(iteration - 1),
                                 "submitted_experiment_requests.json"))
                analyzer.present(df=result_df,
                                 new_result_ids=new_result_ids,
                                 all_result_ids=all_result_ids,
                                 filename="hull_{}.png".format(iteration),
                                 finalize=False)

            Loop.generate_report_plot()
Example #6
0
def run_atf_campaign(chemsys):
    """
    A very simple test campaign

    Returns:
        True

    """
    s3_prefix = "oqmd-atf/runs/{}".format(chemsys)
    df = pd.read_csv(os.path.join(CAMD_TEST_FILES, 'test_df.csv'))
    n_seed = 200  # Starting sample size
    n_query = 10  # This many new candidates are "calculated with DFT" (i.e. requested from Oracle -- DFT)
    agent = RandomAgent
    agent_params = {'hull_distance': 0.05, 'N_query': n_query}
    analyzer = AnalyzeStability
    analyzer_params = {'hull_distance': 0.05}
    experiment = ATFSampler
    experiment_params = {'dataframe': df}
    candidate_data = df
    new_loop = Loop(candidate_data, agent, experiment, analyzer,
                    agent_params=agent_params, analyzer_params=analyzer_params,
                    experiment_params=experiment_params, create_seed=n_seed,
                    s3_prefix=s3_prefix)

    new_loop.initialize()

    for _ in range(3):
        new_loop.run()

    return True
    def test_svgp_loop(self):
        df = pd.read_csv(os.path.join(CAMD_TEST_FILES, 'test_df.csv'))
        df_sub = df[df['N_species'] <= 3]
        n_seed = 200  # Starting sample size
        n_query = 10  # This many new candidates are "calculated with DFT" (i.e. requested from Oracle -- DFT)
        agent = SVGProcessStabilityAgent
        agent_params = {
            'n_query': n_query,
            'hull_distance':
            0.05,  # Distance to hull to consider a finding as discovery (eV/atom)
            'alpha': 0.5,  # Fraction of std to include in expected improvement
            'M': 100  # number of inducing points for SVGP
        }
        analyzer = AnalyzeStability
        analyzer_params = {'hull_distance': 0.05}
        experiment = ATFSampler
        experiment_params = {'dataframe': df_sub}
        candidate_data = df_sub
        path = '.'

        new_loop = Loop(candidate_data,
                        agent,
                        experiment,
                        analyzer,
                        agent_params=agent_params,
                        analyzer_params=analyzer_params,
                        experiment_params=experiment_params,
                        create_seed=n_seed)
        new_loop.initialize()
        self.assertTrue(new_loop.initialized)

        new_loop.auto_loop(3)
        self.assertTrue(True)
    def test_random_agent_loop(self):

        df = load_dataframe("oqmd1.2_exp_based_entries_featurized_v2")
        n_seed = 5000
        n_query = 200
        agent = RandomAgent
        agent_params = {
            'hull_distance': 0.05,
            'n_query': n_query,
        }
        analyzer = AnalyzeStability
        analyzer_params = {'hull_distance': 0.05}
        experiment = ATFSampler
        experiment_params = {'params': {'dataframe': df}}
        candidate_data = df
        path = '.'

        new_loop = Loop(candidate_data,
                        agent,
                        experiment,
                        analyzer,
                        agent_params=agent_params,
                        analyzer_params=analyzer_params,
                        experiment_params=experiment_params,
                        create_seed=n_seed)

        new_loop.initialize()
        self.assertFalse(new_loop.create_seed)

        for _ in range(6):
            new_loop.run()
            self.assertTrue(True)
    def test_adaboost_loop(self):
        df = pd.read_csv(os.path.join(CAMD_TEST_FILES, 'test_df.csv'))
        df_sub = df[df['N_species'] <= 3]
        n_seed = 200  # Starting sample size
        n_query = 10  # This many new candidates are "calculated with DFT" (i.e. requested from Oracle -- DFT)
        agent = AgentStabilityAdaBoost
        agent_params = {
            'ml_algorithm': MLPRegressor,
            'ml_algorithm_params': {
                'hidden_layer_sizes': (84, 50)
            },
            'n_query': n_query,
            'hull_distance':
            0.05,  # Distance to hull to consider a finding as discovery (eV/atom)
            'exploit_fraction':
            1.0,  # Fraction to exploit (rest will be explored -- randomly picked)
            'alpha': 0.5,  # Fraction of std to include in expected improvement
            'n_estimators': 10
        }
        analyzer = AnalyzeStability
        analyzer_params = {'hull_distance': 0.05}
        experiment = ATFSampler
        experiment_params = {'dataframe': df_sub}
        candidate_data = df_sub
        path = '.'

        new_loop = Loop(candidate_data,
                        agent,
                        experiment,
                        analyzer,
                        agent_params=agent_params,
                        analyzer_params=analyzer_params,
                        experiment_params=experiment_params,
                        create_seed=n_seed)
        new_loop.initialize()
        self.assertTrue(new_loop.initialized)

        new_loop.auto_loop(6)
        self.assertTrue(True)
Example #10
0
def run_proto_dft_campaign(chemsys, s3_prefix="proto-dft-2"):
    """

    Args:
        chemsys (str): chemical system for the campaign
        s3_prefix (str): s3 prefix to sync to

    Returns:
        (bool): True if run exits

    """
    s3_prefix = "{}/runs/{}".format(s3_prefix, chemsys)

    # Initialize s3
    dumpfn({"started": datetime.now().isoformat(),
            "version": __version__}, "start.json")
    s3_sync(s3_bucket=CAMD_S3_BUCKET, s3_prefix=s3_prefix, sync_path='.')

    try:
        # Get structure domain
        element_list = chemsys.split('-')
        g_max, charge_balanced = heuristic_setup(element_list)
        domain = StructureDomain.from_bounds(
            element_list, charge_balanced=charge_balanced,
            n_max_atoms=20, **{'grid': range(1, g_max)})
        candidate_data = domain.candidates()
        structure_dict = domain.hypo_structures_dict

        # Dump structure/candidate data
        with open('candidate_data.pickle', 'wb') as f:
            pickle.dump(candidate_data, f)
        with open('structure_dict.pickle', 'wb') as f:
            pickle.dump(structure_dict, f)

        # Set up agents and loop parameters
        agent = AgentStabilityAdaBoost
        agent_params = {
            'ml_algorithm': MLPRegressor,
            'ml_algorithm_params': {'hidden_layer_sizes': (84, 50)},
            'n_query': 10,
            'hull_distance': 0.2,  # Distance to hull to consider a finding as discovery (eV/atom)
            'exploit_fraction': 1.0,  # Fraction to exploit (rest will be explored -- randomly picked)
            'uncertainty': True,
            'alpha': 0.5,
            'diversify': True,
            'n_estimators': 20
        }
        analyzer = AnalyzeStability
        analyzer_params = {'hull_distance': 0.2}  # analysis criterion (need not be exactly same as agent's goal)
        experiment = OqmdDFTonMC1
        experiment_params = {'structure_dict': structure_dict, 'candidate_data': candidate_data, 'timeout': 30000}
        experiment_params.update({'timeout': 30000})
        finalizer = FinalizeQqmdCampaign
        finalizer_params = {'hull_distance': 0.2}
        n_max_iter = n_max_iter_heuristics(len(candidate_data), 10)

        # Construct and start loop
        new_loop = Loop(
            candidate_data, agent, experiment, analyzer, agent_params=agent_params,
            analyzer_params=analyzer_params, experiment_params=experiment_params,
            finalizer=finalizer, finalizer_params=finalizer_params, heuristic_stopper=5,
            s3_prefix=s3_prefix)
        new_loop.auto_loop_in_directories(
            n_iterations=n_max_iter, timeout=10, monitor=True,
            initialize=True, with_icsd=True)
    except Exception as e:
        error_msg = {"error": "{}".format(e),
                     "traceback": traceback.format_exc()}
        dumpfn(error_msg, "error.json")
        dumpfn({"status": "error"}, "job_status.json")
        s3_sync(s3_bucket=CAMD_S3_BUCKET, s3_prefix=s3_prefix, sync_path='.')

    return True
n_seed = 5000  # Starting sample size - a seed of this size will be randomly chosen.
n_query = 200  # This many new candidates are "calculated with DFT" (i.e. requested from Oracle -- DFT)
agent = QBCStabilityAgent
agent_params = {
    'ml_algorithm': MLPRegressor,
    'ml_algorithm_params': {
        'hidden_layer_sizes': (84, 50)
    },
    'n_query': n_query,
    'n_members': 10,  # Committee size in QBC
    'hull_distance':
    0.05,  # Distance to hull to consider a finding as discovery (eV/atom)
    'frac': 0.5  # Fraction of data to choose to form a committee member
}
analyzer = AnalyzeStability
analyzer_params = {'hull_distance': 0.05}
experiment = ATFSampler
experiment_params = {'dataframe': df}
candidate_data = df
##########################################################
new_loop = Loop(candidate_data,
                agent,
                experiment,
                analyzer,
                agent_params=agent_params,
                analyzer_params=analyzer_params,
                experiment_params=experiment_params,
                create_seed=n_seed)

new_loop.auto_loop(n_iterations=4, timeout=5, initialize=True)
Example #12
0
from sklearn.neural_network import MLPRegressor

# Let's create our search domain as Ir-Fe-O ternary. We restrict our search to structures with max 10 atoms.
# We further restrict the possible stoichiometry coefficients to integers to [1,4).
domain = StructureDomain.from_bounds(["Ir", "Fe", "O"], charge_balanced=True, n_max_atoms = 10, **{"grid": range(1,4)})
candidate_data = domain.candidates()
structure_dict = domain.hypo_structures_dict

# Setup the loop for this campaign.

agent = QBCStabilityAgent           # We use a query-by-committee (QBC) based agent
agent_params = {                    # Parameters of the agent
    'ml_algorithm': MLPRegressor,   # We use simple fully connected neural network as our regressor
    'ml_algorithm_params': {'hidden_layer_sizes': (84, 50)},
    'n_query': 3,                   # Agent is allowed 3 experiments per iteration
    'n_members': 10,                # Committee size for QBC
    'hull_distance': 0.1,   # Distance to hull to consider a finding as discovery (eV/atom)
    'frac': 0.5
    }
analyzer = AnalyzeStability     # Analyzer for stability
analyzer_params = {'hull_distance': 0.1}
experiment = OqmdDFTonMC1           # This is the Experiment method to run OQMD compatible DFT on AWS-MC1
experiment_params = {'structure_dict': structure_dict,  # Parameters of this experiment class include structures.
                     'candidate_data': candidate_data, 'timeout': 30000}

# Loop class puts all the above pieces together.
new_loop = Loop(candidate_data, agent, experiment, analyzer,
                agent_params=agent_params, analyzer_params=analyzer_params, experiment_params=experiment_params)

# Let's start the campaign!
new_loop.auto_loop_in_directories(n_iterations=3, timeout=1, monitor=True, initialize=True, with_icsd=True)