def _get_compositions_by_mpid(self): ''' We use the Materials Project's python API to find the composition of various materials given their MPIDs. This can take awhile though, so we also cache the results and modify the cache as necessary. Resulting attribute: compositions_by_mpid_ A dictionary whose keys are MPIDs and whose values are lists of strings for each element that is present in the corresponding material. This object is cached and therefore may have extra key:value pairings that you may not need. ''' # Find the current cache of compositions. try: with open(CACHE_LOCATION + 'mp_comp_data.pkl', 'rb') as file_handle: compositions_by_mpid = pickle.load(file_handle) # If the cache is not there, then create it except FileNotFoundError: compositions_by_mpid = {} # Figure out which compositions we still need to figure out catalog_docs = get_catalog_docs() mpids = { doc['mpid'] for doc in self.adsorption_docs + catalog_docs } # Each MP document may contain several MPIDs. Here we get every # single document whose list of associated MPIDs matches anything # in our list of missing MPIDs. with MPRester(read_rc('matproj_api_key')) as rester: query = {'task_ids': {'$elemMatch': {'$in': list(mpids)}}} properties = ['elements', 'task_ids'] mp_docs = rester.query(criteria=query, properties=properties) # Match the MP documents to our missing MPIDs. for mpid in mpids: for doc in mp_docs: if mpid in set(doc['task_ids']): compositions_by_mpid[mpid] = doc['elements'] break # Save the updated cache with open(CACHE_LOCATION + 'mp_comp_data.pkl', 'wb') as file_handle: pickle.dump(compositions_by_mpid, file_handle) self.compositions_by_mpid_ = compositions_by_mpid
def cache_predictions(adsorbate, model_name='model0', processes=32): ''' Wrapper to make and save our adsorption energy predictions in a pickle. Args: adsorbate A string indicating which adsorbate you want to make/cache predictions for. model_name A string indicating which model you want to use to make the predictions. processes The number of threads/processes you want to be using Returns: predictions A dictionary whose keys are the ObjectIDs of the documents in our catalog and whose values are floats of the adsorption energy prediction of that document. ''' # We'll make the predictions on our catalog, whose documents have no # adsorbates associated with them. The fingerprinter (the first step in the # pipeline) needs to know the adsorbate though, so we add that information # here. docs = get_catalog_docs() for doc in docs: doc['adsorbate'] = adsorbate # Load the model/pipeline file_name = GASDB_LOCATION + '/pipeline_%s_%s.pkl' % (adsorbate, model_name) with open(file_name, 'rb') as file_handle: pipeline = pickle.load(file_handle) # Create the predictions print('[%s] Making adsorption energy predictions for %s using %s...' % (datetime.utcnow(), adsorbate, model_name)) adsorption_energies = multimap_method(pipeline, 'predict', docs, chunked=True, processes=processes, maxtasksperchild=100, chunksize=1000, n_calcs=len(docs)) predictions = { doc['mongo_id']: prediction for doc, prediction in zip(docs, adsorption_energies) } # Save and return our answers cache_name = PREDICTION_CACHES[(model_name, adsorbate)] with atomic_write(cache_name, mode='wb', overwrite=True) as file_handle: bytes_ = pickle.dumps(predictions, protocol=pickle.HIGHEST_PROTOCOL) file_handle.write(bytes_) return predictions
def _get_compositions_by_mpid(self): ''' We use the Materials Project's python API to find the composition of various materials given their MPIDs. This can take awhile though, so we also cache the results and modify the cache as necessary. Resulting attribute: compositions_by_mpid_ A dictionary whose keys are MPIDs and whose values are lists of strings for each element that is present in the corresponding material. This object is cached and therefore may have extra key:value pairings that you may not need. ''' catalog_docs = get_catalog_docs() mpids = {doc['mpid'] for doc in self.adsorption_docs + catalog_docs} stoichs = {mpid: get_stoich_from_mpid(mpid) for mpid in mpids} self.compositions_by_mpid_ = { mpid: list(stoich.keys()) for mpid, stoich in stoichs.items() }
from gaspy.tasks import schedule_tasks from gaspy.gasdb import get_catalog_docs from gaspy.tasks.metadata_calculators import CalculateAdsorptionEnergy # Get all of the sites that we have enumerated all_site_documents = get_catalog_docs() # Pick the sites that we want to run. In this case, it'll be sites on # palladium (as per Materials Project ID 2, mp-2) on (111) facets. site_documents_to_calc = [ doc for doc in all_site_documents if (doc['mpid'] == 'mp-33' and doc['miller'] == [1, 1, 1]) ] # Turn the sites into GASpy/Luigi tasks tasks = [ CalculateAdsorptionEnergy(adsorbate_name='H', adsorption_site=doc['adsorption_site'], mpid=doc['mpid'], miller_indices=doc['miller'], shift=doc['shift'], top=doc['top']) for doc in site_documents_to_calc ] # Schedule/run all of the tasks schedule_tasks(tasks)
import pickle from tqdm import tqdm from gaspy.gasdb import get_catalog_docs docs = get_catalog_docs() mpids = {doc['mpid'] for doc in tqdm(docs, desc='finding mpids')} sites_by_mpid = {mpid: [] for mpid in mpids} for doc in tqdm(docs, desc='sorting sites'): sites_by_mpid[doc['mpid']].append(doc) with open('sites_by_mpid.pkl', 'wb') as file_handle: pickle.dump(sites_by_mpid, file_handle)