def test_get_structure(self): auid = 'aflow:0132ab6b9cddd429' data = self.afa_store.store.query_one(criteria={'auid': auid}, properties=[ 'aurl', 'geometry', 'species', 'composition', 'positions_fractional', 'CONTCAR_relax_vasp' ]) structure = self.afa_store._get_structure(Entry(**data), use_web_api=False) self.assertIsInstance(structure, Structure) del data['CONTCAR_relax_vasp'] structure = self.afa_store._get_structure(Entry(**data), use_web_api=False) self.assertIsInstance(structure, Structure) structure = self.afa_store._get_structure(Entry(aurl=data['aurl']), use_web_api=True) self.assertIsInstance(structure, Structure)
def test_lazy(): """Tests lazy retrieval of entry attributes. """ a = { "compound": "Be2O2", "auid": "aflow:ed51b7b3938f117f", "aurl": "aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/HEX/Be1O1_ICSD_15620", "agl_thermal_conductivity_300K": "53.361", "Egap": "7.4494" } b = { "compound": "B1H4Na1", "auid": "aflow:3a531e5b3aa9205e", "aurl": "aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/FCC/B1H4Na1_ICSD_165835", "agl_thermal_conductivity_300K": "7.45279", "Egap": "6.6252" } from aflow.entries import Entry A = Entry(**a) B = Entry(**b) assert A.Egap == 7.4494 assert A.agl_thermal_conductivity_300K == 53.361 assert A.energy_atom == -7.10342 assert B.Egap == 6.6252 assert B.agl_thermal_conductivity_300K == 7.45279 assert B.volume_cell == 56.9766
def test_atoms_read(batch=50): """test on randomly sampled entries""" shuffle(raw_entries) for e in raw_entries[:batch]: print(e["aurl"]) entry = Entry(**e) # Read the CONTCAR.relax, which should always present atoms = entry.atoms() assert atoms is not None
def __next__(self): """Yields a generator over AFLUX API request results. """ #First, find out which entry we are on. n = (self._iter // self.k) + 1 i = self._iter % self.k #Reverse the sign now that we have figured out the ordinal page number. if self.reverse: n *= -1 if n not in self.responses and self._iter < self.max_N: self._n = abs(n) self._request(self.n, self.k) assert len(self.responses) > 0 from aflow.entries import Entry if self._iter < self.max_N: index = self.k * (abs(n) - 1) + i + 1 key = "{} of {}".format(index, self.N) raw = self.responses[n][key] result = Entry(**raw) #Increment the iterator right before we return the entry. self._iter += 1 return result else: raise StopIteration()
def _read_file(filename, format="json"): if format == "json": with open(filename, "r") as fd: raw_entries = json.load(fd) else: with open(filename, "r") as fd: strings = fd.readline().split("|") raw_entries = dict() for item in strings: key, value = item.strip().split("=") raw_entries[key] = value entry = Entry(**raw_entries) for key, value in entry.attributes.items(): print(key, value) if hasattr(K, key): cls = getattr(K, key) if cls.status == "deprecated": continue if value is None: # None value can also occur continue if cls.ptype in (float, int, str): assert isinstance(value, cls.ptype) else: format = cls.ptype[1] assert _all_types_in_list(value, format) else: print(key, value, "Not recognized")
def test_all(): """Tests all retrievals for a given entry, including those it doesn't have. """ entries = [ { "compound": "Ag2I2", "auid": "aflow:008f8da25d4acde9", "aurl": "aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/TET/Ag1I1_ICSD_28230", "agl_thermal_conductivity_300K": "0.562013", "Egap": "1.9774" }, { "compound": "Mg1", "auid": "aflow:00528d06f69c7b55", "aurl": "aflowlib.duke.edu:AFLOWDATA/LIB2_RAW/CeMg_pv/304" } ] from aflow.entries import Entry from aflow import list_keywords for entry_vals in entries: A = Entry(**entry_vals) kws = list_keywords() kws.append('catalog') haskw = A.keywords for kw in kws: if kw in haskw: assert getattr(A, kw) is not None else: assert getattr(A, kw) is None
def _read_out(filename): with open(filename, "r") as fd: strings = fd.readline().split("|") raw_entries = dict() for item in strings: key, value = item.strip().split("=") print(key, value) raw_entries[key] = value entry = Entry(**raw_entries) # test if Egap can be parsed from null --> None if entry.Egap is not None: assert isinstance(entry.Egap, float) assert isinstance(entry.species, list) # composition should be returned to nd array assert isinstance(entry.composition, np.ndarray) assert isinstance(entry.composition, list) is False # int types assert isinstance(entry.spacegroup_orig, int) # string to plain list assert isinstance(entry.species_pp_version, list) assert isinstance(entry.species, list) return
def get_properties_from_store(self, criteria=None, properties=None, **kwargs): """ Produces raw property data from a MongoDB AFLOW database using a Mongo-like query construction. To use the web API, use get_properties_from_web(). Note: if `properties` is empty, only the AFLOW keywords listed in AflowAdapter.mapping will be retrieved. Args: criteria (`dict` or `None`): Mongo-like query criteria properties (`list` or `dict` or `None`): list of fields to retrieve or Mongo-like projection dictionary (e.g. `{'field': True}`) **kwargs: arguments to MongoStore object Returns: generator: generates dicts of data, keyed by AFLOW keyword. """ if not self.store: raise ValueError("No store specified!") if not properties: properties = list(self.mapping.keys()) properties_to_retrieve = set(properties) file_properties_to_map = dict() for p in self.property_store_field_mapping.keys(): if p in properties_to_retrieve: additional_fields = self.property_store_field_mapping[p] file_properties_to_map[p] = additional_fields properties_to_retrieve.remove(p) properties_to_retrieve = properties_to_retrieve.union(set(additional_fields)) q = self.store.query(criteria=criteria, properties=properties_to_retrieve, **kwargs) for raw_data in q: raw_data.pop('_id') for prop in file_properties_to_map.keys(): transformed_data = self.file_transform_func[prop](Entry(**raw_data)) if transformed_data is not None: raw_data[prop] = transformed_data yield self._convert_entry_to_dict(Entry(**raw_data), props=properties)
def test_query_files(batch=10): """test on randomly sampled entries""" shuffle(raw_entries) for e in raw_entries[:batch]: entry = Entry(**e) print(entry.aurl) # Read the CONTCAR.relax, which should always present afile = AflowFile(entry.aurl, "CONTCAR.relax") if "CONTCAR.relax" not in entry.files: warn( f"{aurl} does not contain CONTCAR.relax file, probably a MD calculation" ) continue else: assert "CONTCAR.relax" in afile.filename # read the content, watch for HTTP404 error # hope no http404 error content = afile() print(content)
def _read_json(filename): with open(filename, "r") as fd: raw_entries = json.load(fd) entry = Entry(**raw_entries) # test if Egap can be parsed from null --> None if entry.Egap is not None: assert isinstance(entry.Egap, float) assert isinstance(entry.species, list) # composition should be returned to nd array assert isinstance(entry.composition, np.ndarray) assert isinstance(entry.composition, list) is False # int types assert isinstance(entry.spacegroup_orig, int) # string to plain list assert isinstance(entry.species_pp_version, list) assert isinstance(entry.species, list) return
def test_aurl_with_colon(): """Test if aurl with colon can be read.""" # Series with aurl that contain 0 ~ 3 colons after the edu domain name for ncolon in range(4): shuffle(raw_entries) for e in raw_entries: entry = Entry(**e) aurl = entry.aurl print(entry.aurl) # edu:xx --> 2 if len(aurl.split(":")) == ncolon + 2: afile = AflowFile(aurl, "CONTCAR.relax") if "CONTCAR.relax" not in entry.files: warn( f"{aurl} does not contain CONTCAR.relax file, probably a MD calculation" ) continue else: assert "CONTCAR.relax" in afile.filename content = afile() print(content) break
def test_all(): """Tests all retrievals for a given entry, including those it doesn't have. """ a = { "compound": "Be2O2", "auid": "aflow:ed51b7b3938f117f", "aurl": "aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/HEX/Be1O1_ICSD_15620", "agl_thermal_conductivity_300K": "53.361", "Egap": "7.4494" } from aflow.entries import Entry A = Entry(**a) from aflow import list_keywords kws = list_keywords() haskw = A.keywords for kw in kws: if kw in haskw: assert getattr(A, kw) is not None else: assert getattr(A, kw) is None
def get_properties_from_web(self, criteria=None, properties=None, max_request_size=1000): """ Produces raw property data from the AFLUX API using a Mongo-like query construction. To use a MongoDB store, use get_properties_from_store(). Note: `criteria` cannot be complex. Simple equality, inequality, or '$in' schema are supported. Note: if `properties` is empty, only the AFLOW keywords listed in AflowAdapter.mapping will be retrieved. If a valid, but unmappable AFLOW keyword is specified, it will be ignored. Note: this method uses threads to download external files if a specified property depends on an external AFLOW file. Args: criteria (`dict` or `None`): Mongo-like query criteria. Must be simple. properties (`list` or `None`): list of fields to retrieve. Does not support MongoDB projection dictionary. max_request_size (int): maxmimum number of materials to retrieve per request. If total number of records is greater than `max_request_size`, multiple requests will be made. Note that external file downloads will be limited to 10 concurrent connections, and is not related to this keyword. Default: 1000 Returns: generator: generates dicts of data, keyed by AFLOW keyword. """ if not properties: properties = list(self.mapping.keys()) files_to_download = defaultdict(list) properties_to_retrieve = set(properties) for p, fn in self.property_web_file_mapping.items(): if p in properties: files_to_download[fn].append(p) properties_to_retrieve.remove(p) aflow_kw_reset() q = AflowAPIQuery.from_pymongo(criteria, list(properties_to_retrieve), max_request_size, batch_reduction=True, property_reduction=True) futures = [] materials = dict() files = defaultdict(dict) for material in q: auid = material.auid materials[auid] = material.raw for filename in files_to_download: if not self._executor: self._executor = ThreadPoolExecutor(max_workers=10) future = self._executor.submit( self._get_aflow_file, material.aurl, filename, auid=material.auid, with_metadata=True ) futures.append(future) if futures: for future in as_completed(futures): response, auid, filename = future.result() if isinstance(response, HTTPError): # pragma: no cover logger.info("Encountered error downloading file " "{} for {}:\n{}".format(filename, auid, str(response))) response = None files[auid].update({filename: response}) if len(files[auid]) == len(files_to_download): for fn, props in files_to_download.items(): data_in = materials[auid].copy() fn_mongo = fn.replace('.', '_') data_in[fn_mongo] = files[auid][fn] for prop in props: transformed_data = self.file_transform_func[prop](Entry(**data_in)) if transformed_data is not None: materials[auid][prop] = transformed_data yield self._convert_entry_to_dict(Entry(**materials[auid])) else: for material in materials.values(): yield self._convert_entry_to_dict(Entry(**material))