예제 #1
0
    def from_chemsys(cls, chemsys):
        """
        Class factory method for constructing campaign from
        chemsys.

        Args:
            chemsys (str): chemical system for the campaign

        Returns:
            (ProtoDFTCampaign): Standard proto-dft campaign from
                the chemical system

        """
        s3_prefix = "proto-dft-2/runs/{}".format(chemsys)

        # Initialize s3
        dumpfn({
            "started": datetime.now().isoformat(),
            "version": __version__
        }, "start.json")
        s3_sync(s3_bucket=CAMD_S3_BUCKET, s3_prefix=s3_prefix, sync_path='.')

        # Get structure domain
        element_list = chemsys.split('-')
        max_coeff, charge_balanced = heuristic_setup(element_list)
        domain = StructureDomain.from_bounds(element_list,
                                             charge_balanced=charge_balanced,
                                             n_max_atoms=20,
                                             **{'grid': range(1, max_coeff)})
        candidate_data = domain.candidates()

        # Dump structure/candidate data
        with open('candidate_data.pickle', 'wb') as f:
            pickle.dump(candidate_data, f)

        # Set up agents and loop parameters
        agent = AgentStabilityAdaBoost(
            model=MLPRegressor(hidden_layer_sizes=(84, 50)),
            n_query=10,
            hull_distance=0.2,
            exploit_fraction=1.0,
            uncertainty=True,
            alpha=0.5,
            diversify=True,
            n_estimators=20)
        analyzer = StabilityAnalyzer(hull_distance=0.2)
        experiment = OqmdDFTonMC1(timeout=30000)
        seed_data = load_dataframe("oqmd1.2_exp_based_entries_featurized_v2")

        # Construct and start loop
        return cls(candidate_data=candidate_data,
                   agent=agent,
                   experiment=experiment,
                   analyzer=analyzer,
                   seed_data=seed_data,
                   heuristic_stopper=5,
                   s3_prefix="proto-dft/runs/{}".format(chemsys))
예제 #2
0
파일: base.py 프로젝트: handong-ling/CAMD
 def s3_sync(self):
     """
     Syncs current run to s3_prefix and bucket
     """
     s3_sync(self.s3_bucket, self.s3_prefix, self.path)
예제 #3
0
def run_proto_dft_campaign(chemsys, s3_prefix="proto-dft-2"):
    """

    Args:
        chemsys (str): chemical system for the campaign
        s3_prefix (str): s3 prefix to sync to

    Returns:
        (bool): True if run exits

    """
    s3_prefix = "{}/runs/{}".format(s3_prefix, chemsys)

    # Initialize s3
    dumpfn({"started": datetime.now().isoformat(),
            "version": __version__}, "start.json")
    s3_sync(s3_bucket=CAMD_S3_BUCKET, s3_prefix=s3_prefix, sync_path='.')

    try:
        # Get structure domain
        element_list = chemsys.split('-')
        g_max, charge_balanced = heuristic_setup(element_list)
        domain = StructureDomain.from_bounds(
            element_list, charge_balanced=charge_balanced,
            n_max_atoms=20, **{'grid': range(1, g_max)})
        candidate_data = domain.candidates()
        structure_dict = domain.hypo_structures_dict

        # Dump structure/candidate data
        with open('candidate_data.pickle', 'wb') as f:
            pickle.dump(candidate_data, f)
        with open('structure_dict.pickle', 'wb') as f:
            pickle.dump(structure_dict, f)

        # Set up agents and loop parameters
        agent = AgentStabilityAdaBoost
        agent_params = {
            'ml_algorithm': MLPRegressor,
            'ml_algorithm_params': {'hidden_layer_sizes': (84, 50)},
            'n_query': 10,
            'hull_distance': 0.2,  # Distance to hull to consider a finding as discovery (eV/atom)
            'exploit_fraction': 1.0,  # Fraction to exploit (rest will be explored -- randomly picked)
            'uncertainty': True,
            'alpha': 0.5,
            'diversify': True,
            'n_estimators': 20
        }
        analyzer = AnalyzeStability
        analyzer_params = {'hull_distance': 0.2}  # analysis criterion (need not be exactly same as agent's goal)
        experiment = OqmdDFTonMC1
        experiment_params = {'structure_dict': structure_dict, 'candidate_data': candidate_data, 'timeout': 30000}
        experiment_params.update({'timeout': 30000})
        finalizer = FinalizeQqmdCampaign
        finalizer_params = {'hull_distance': 0.2}
        n_max_iter = n_max_iter_heuristics(len(candidate_data), 10)

        # Construct and start loop
        new_loop = Loop(
            candidate_data, agent, experiment, analyzer, agent_params=agent_params,
            analyzer_params=analyzer_params, experiment_params=experiment_params,
            finalizer=finalizer, finalizer_params=finalizer_params, heuristic_stopper=5,
            s3_prefix=s3_prefix)
        new_loop.auto_loop_in_directories(
            n_iterations=n_max_iter, timeout=10, monitor=True,
            initialize=True, with_icsd=True)
    except Exception as e:
        error_msg = {"error": "{}".format(e),
                     "traceback": traceback.format_exc()}
        dumpfn(error_msg, "error.json")
        dumpfn({"status": "error"}, "job_status.json")
        s3_sync(s3_bucket=CAMD_S3_BUCKET, s3_prefix=s3_prefix, sync_path='.')

    return True
예제 #4
0
    def from_chemsys(cls,
                     chemsys,
                     prefix="proto-dft-2/runs",
                     n_max_atoms=20,
                     agent=None,
                     analyzer=None,
                     experiment=None,
                     log_file="campaign.log",
                     cloudwatch_group="/camd/worker/dev/"):
        """
        Class factory method for constructing campaign from
        chemsys.

        Args:
            chemsys (str): chemical system for the campaign
            prefix (str): prefix for s3
            n_max_atoms (int): number of maximum atoms
            agent (Agent): agent for stability campaign
            analyzer (Analyzer): analyzer for stability campaign
            experiment (Agent): experiment for stability campaign
            log_file (str): log filename
            cloudwatch_group (str): cloudwatch group to log to

        Returns:
            (ProtoDFTCampaign): Standard proto-dft campaign from
                the chemical system

        """
        logger = logging.Logger("camd")
        logger.setLevel("INFO")
        file_handler = logging.FileHandler(log_file)
        cw_handler = CloudWatchLogHandler(log_group=cloudwatch_group,
                                          stream_name=chemsys)
        logger.addHandler(file_handler)
        logger.addHandler(cw_handler)
        logger.addHandler(logging.StreamHandler())

        logger.info(
            "Starting campaign factory from_chemsys {}".format(chemsys))
        s3_prefix = "{}/{}".format(prefix, chemsys)

        # Initialize s3
        dumpfn({
            "started": datetime.now().isoformat(),
            "version": __version__
        }, "start.json")
        s3_sync(s3_bucket=CAMD_S3_BUCKET, s3_prefix=s3_prefix, sync_path='.')

        # Get structure domain
        element_list = chemsys.split('-')
        max_coeff, charge_balanced = heuristic_setup(element_list)
        domain = StructureDomain.from_bounds(element_list,
                                             charge_balanced=charge_balanced,
                                             n_max_atoms=n_max_atoms,
                                             **{'grid': range(1, max_coeff)})
        candidate_data = domain.candidates()

        # Dump structure/candidate data
        with open('candidate_data.pickle', 'wb') as f:
            pickle.dump(candidate_data, f)
        s3_sync(s3_bucket=CAMD_S3_BUCKET, s3_prefix=s3_prefix, sync_path='.')
        logger.info("Candidates generated")

        # Set up agents and loop parameters
        agent = agent or AgentStabilityAdaBoost(
            model=MLPRegressor(hidden_layer_sizes=(84, 50)),
            n_query=10,
            hull_distance=0.2,
            exploit_fraction=1.0,
            uncertainty=True,
            alpha=0.5,
            diversify=True,
            n_estimators=20)
        analyzer = analyzer or StabilityAnalyzer(hull_distance=0.2)
        experiment = experiment or OqmdDFTonMC1(timeout=30000)
        seed_data = load_dataframe("oqmd1.2_exp_based_entries_featurized_v2")

        # Construct and start loop
        return cls(candidate_data=candidate_data,
                   agent=agent,
                   experiment=experiment,
                   analyzer=analyzer,
                   seed_data=seed_data,
                   heuristic_stopper=5,
                   s3_prefix=s3_prefix,
                   logger=logger)
예제 #5
0
    def from_chemsys(cls,
                     chemsys,
                     prefix="proto-dft-2/runs",
                     n_max_atoms=20,
                     agent=None,
                     analyzer=None,
                     experiment=None,
                     log_file="campaign.log",
                     cloudwatch_group="/camd/worker/dev/"):
        """
        Class factory method for constructing campaign from
        chemsys.

        Args:
            chemsys (str): chemical system for the campaign
            prefix (str): prefix for s3
            n_max_atoms (int): number of maximum atoms
            agent (Agent): agent for stability campaign
            analyzer (Analyzer): analyzer for stability campaign
            experiment (Agent): experiment for stability campaign
            log_file (str): log filename
            cloudwatch_group (str): cloudwatch group to log to

        Returns:
            (ProtoDFTCampaign): Standard proto-dft campaign from
                the chemical system

        """
        logger = logging.Logger("camd")
        logger.setLevel("INFO")
        file_handler = logging.FileHandler(log_file)
        cw_handler = CloudWatchLogHandler(log_group=cloudwatch_group,
                                          stream_name=chemsys)
        logger.addHandler(file_handler)
        logger.addHandler(cw_handler)
        logger.addHandler(logging.StreamHandler())

        logger.info(
            "Starting campaign factory from_chemsys {}".format(chemsys))
        s3_prefix = "{}/{}".format(prefix, chemsys)

        # Initialize s3
        dumpfn({
            "started": datetime.now().isoformat(),
            "version": __version__
        }, "start.json")
        s3_sync(s3_bucket=CAMD_S3_BUCKET, s3_prefix=s3_prefix, sync_path='.')

        # Get structure domain
        # Check cache
        cache_key = "protosearch_cache/v1/{}/{}/candidates.pickle".format(
            chemsys, n_max_atoms)
        # TODO: create test of isfile
        if s3_key_exists(bucket=CAMD_S3_BUCKET, key=cache_key):
            logger.info("Found cached protosearch domain.")
            candidate_data = pd.read_pickle("s3://{}/{}".format(
                CAMD_S3_BUCKET, cache_key))
            logger.info("Loaded cached {}.".format(cache_key))
        else:
            logger.info(
                "Generating domain with max {} atoms.".format(n_max_atoms))
            element_list = chemsys.split('-')
            max_coeff, charge_balanced = heuristic_setup(element_list)
            domain = StructureDomain.from_bounds(
                element_list,
                charge_balanced=charge_balanced,
                n_max_atoms=n_max_atoms,
                **{'grid': range(1, max_coeff)})
            candidate_data = domain.candidates()
            logger.info("Candidates generated")
            candidate_data.to_pickle("s3://{}/{}".format(
                CAMD_S3_BUCKET, cache_key))
            logger.info("Cached protosearch domain at {}.".format(cache_key))

        # Dump structure/candidate data
        candidate_data.to_pickle("candidate_data.pickle")
        s3_sync(s3_bucket=CAMD_S3_BUCKET, s3_prefix=s3_prefix, sync_path='.')

        # Set up agents and loop parameters
        agent = agent or AgentStabilityAdaBoost(
            model=MLPRegressor(hidden_layer_sizes=(84, 50)),
            n_query=10,
            hull_distance=0.2,
            exploit_fraction=1.0,
            uncertainty=True,
            alpha=0.5,
            diversify=True,
            n_estimators=20)
        analyzer = analyzer or StabilityAnalyzer(hull_distance=0.2)
        experiment = experiment or OqmdDFTonMC1(timeout=30000,
                                                prefix_append="proto-dft")
        seed_data = load_dataframe("oqmd1.2_exp_based_entries_featurized_v2")

        # Load cached experiments
        logger.info("Loading cached experiments")
        cached_experiments = experiment.fetch_cached(candidate_data)
        logger.info("Found {} experiments.".format(len(cached_experiments)))
        if len(cached_experiments) > 0:
            summary, seed_data = analyzer.analyze(cached_experiments,
                                                  seed_data)
            # Remove cached experiments from candidate_data
            candidate_space = candidate_data.index.difference(
                cached_experiments.index, sort=False).tolist()
            candidate_data = candidate_data.loc[candidate_space]
            logger.info("Cached experiments added to seed.")

        # Construct and start loop
        return cls(candidate_data=candidate_data,
                   agent=agent,
                   experiment=experiment,
                   analyzer=analyzer,
                   seed_data=seed_data,
                   heuristic_stopper=5,
                   s3_prefix=s3_prefix,
                   logger=logger)