def _get_compositions_by_mpid(self):
        '''
        We use the Materials Project's python API to find the composition of
        various materials given their MPIDs. This can take awhile though, so we
        also cache the results and modify the cache as necessary.

        Resulting attribute:
            compositions_by_mpid_   A dictionary whose keys are MPIDs and whose
                                    values are lists of strings for each
                                    element that is present in the
                                    corresponding material. This object is
                                    cached and therefore may have extra
                                    key:value pairings that you may not need.
        '''
        # Find the current cache of compositions.
        try:
            with open(CACHE_LOCATION + 'mp_comp_data.pkl',
                      'rb') as file_handle:
                compositions_by_mpid = pickle.load(file_handle)

        # If the cache is not there, then create it
        except FileNotFoundError:
            compositions_by_mpid = {}

            # Figure out which compositions we still need to figure out
            catalog_docs = get_catalog_docs()
            mpids = {
                doc['mpid']
                for doc in self.adsorption_docs + catalog_docs
            }

            # Each MP document may contain several MPIDs. Here we get every
            # single document whose list of associated MPIDs matches anything
            # in our list of missing MPIDs.
            with MPRester(read_rc('matproj_api_key')) as rester:
                query = {'task_ids': {'$elemMatch': {'$in': list(mpids)}}}
                properties = ['elements', 'task_ids']
                mp_docs = rester.query(criteria=query, properties=properties)

            # Match the MP documents to our missing MPIDs.
            for mpid in mpids:
                for doc in mp_docs:
                    if mpid in set(doc['task_ids']):
                        compositions_by_mpid[mpid] = doc['elements']
                        break

            # Save the updated cache
            with open(CACHE_LOCATION + 'mp_comp_data.pkl',
                      'wb') as file_handle:
                pickle.dump(compositions_by_mpid, file_handle)

        self.compositions_by_mpid_ = compositions_by_mpid
Beispiel #2
0
def cache_predictions(adsorbate, model_name='model0', processes=32):
    '''
    Wrapper to make and save our adsorption energy predictions in a pickle.

    Args:
        adsorbate   A string indicating which adsorbate you want to make/cache
                    predictions for.
        model_name  A string indicating which model you want to use to make the
                    predictions.
        processes   The number of threads/processes you want to be using
    Returns:
        predictions     A dictionary whose keys are the ObjectIDs of the
                        documents in our catalog and whose values are floats of
                        the adsorption energy prediction of that document.
    '''
    # We'll make the predictions on our catalog, whose documents have no
    # adsorbates associated with them. The fingerprinter (the first step in the
    # pipeline) needs to know the adsorbate though, so we add that information
    # here.
    docs = get_catalog_docs()
    for doc in docs:
        doc['adsorbate'] = adsorbate

    # Load the model/pipeline
    file_name = GASDB_LOCATION + '/pipeline_%s_%s.pkl' % (adsorbate,
                                                          model_name)
    with open(file_name, 'rb') as file_handle:
        pipeline = pickle.load(file_handle)

    # Create the predictions
    print('[%s] Making adsorption energy predictions for %s using %s...' %
          (datetime.utcnow(), adsorbate, model_name))
    adsorption_energies = multimap_method(pipeline,
                                          'predict',
                                          docs,
                                          chunked=True,
                                          processes=processes,
                                          maxtasksperchild=100,
                                          chunksize=1000,
                                          n_calcs=len(docs))
    predictions = {
        doc['mongo_id']: prediction
        for doc, prediction in zip(docs, adsorption_energies)
    }

    # Save and return our answers
    cache_name = PREDICTION_CACHES[(model_name, adsorbate)]
    with atomic_write(cache_name, mode='wb', overwrite=True) as file_handle:
        bytes_ = pickle.dumps(predictions, protocol=pickle.HIGHEST_PROTOCOL)
        file_handle.write(bytes_)
    return predictions
    def _get_compositions_by_mpid(self):
        '''
        We use the Materials Project's python API to find the composition of
        various materials given their MPIDs. This can take awhile though, so we
        also cache the results and modify the cache as necessary.

        Resulting attribute:
            compositions_by_mpid_   A dictionary whose keys are MPIDs and whose
                                    values are lists of strings for each
                                    element that is present in the
                                    corresponding material. This object is
                                    cached and therefore may have extra
                                    key:value pairings that you may not need.
        '''
        catalog_docs = get_catalog_docs()
        mpids = {doc['mpid'] for doc in self.adsorption_docs + catalog_docs}
        stoichs = {mpid: get_stoich_from_mpid(mpid) for mpid in mpids}
        self.compositions_by_mpid_ = {
            mpid: list(stoich.keys())
            for mpid, stoich in stoichs.items()
        }
from gaspy.tasks import schedule_tasks
from gaspy.gasdb import get_catalog_docs
from gaspy.tasks.metadata_calculators import CalculateAdsorptionEnergy

# Get all of the sites that we have enumerated
all_site_documents = get_catalog_docs()

# Pick the sites that we want to run. In this case, it'll be sites on
# palladium (as per Materials Project ID 2, mp-2) on (111) facets.
site_documents_to_calc = [
    doc for doc in all_site_documents
    if (doc['mpid'] == 'mp-33' and doc['miller'] == [1, 1, 1])
]

# Turn the sites into GASpy/Luigi tasks
tasks = [
    CalculateAdsorptionEnergy(adsorbate_name='H',
                              adsorption_site=doc['adsorption_site'],
                              mpid=doc['mpid'],
                              miller_indices=doc['miller'],
                              shift=doc['shift'],
                              top=doc['top']) for doc in site_documents_to_calc
]

# Schedule/run all of the tasks
schedule_tasks(tasks)
import pickle
from tqdm import tqdm
from gaspy.gasdb import get_catalog_docs

docs = get_catalog_docs()

mpids = {doc['mpid'] for doc in tqdm(docs, desc='finding mpids')}
sites_by_mpid = {mpid: [] for mpid in mpids}

for doc in tqdm(docs, desc='sorting sites'):
    sites_by_mpid[doc['mpid']].append(doc)

with open('sites_by_mpid.pkl', 'wb') as file_handle:
    pickle.dump(sites_by_mpid, file_handle)