def _to_user_defined(pif_obj): """Read the systems in the PIF to populate the user-defined portion""" res = {} # make a read view to flatten the hierarchy rv = ReadView(pif_obj) # Iterate over the keys in the read view for k in rv.keys(): name, value = _extract_key_value(rv[k].raw) # add any objects that can be extracted if name and value is not None: res[name] = value # Grab interesting values not in the ReadView pif = pif_obj.as_dictionary() elements = {} if pif.get("composition"): for comp in pif["composition"]: if comp.get("actualAtomicPercent"): elements[comp["element"]] = float( comp["actualAtomicPercent"]["value"]) elif comp.get("actualWeightPercent"): elements[comp["element"]] = float( comp["actualWeightPercent"]["value"]) if elements: res["elemental_percent"] = elements elif pif.get("chemicalFormula"): symbol = "" num = "" # Chemical formulae are comprised of letters, numbers, and potentially characters we don't care about for char in pif["chemicalFormula"]: # Uppercase char indicates beginning of new symbol if char.isupper(): # If there is already a symbol in holding, process it if symbol: try: elements[symbol] = int(num) # If num is a float, raises ValueError except ValueError: elements[symbol] = float(num) if num else 1 symbol = "" num = "" symbol += char # Lowercase chars or digits are continuations of a symbol elif char.islower(): symbol += char elif char.isdigit(): num += char elif char == ".": num += char # All other chars are not useful if elements: res["elemental_proportion"] = elements return res
def test_method_software(): """Testing that method and software names are elevated""" method = Method(name="spam", software=[Software(name="magic")]) pif = System(properties=[ Property(name="foo", scalars=[Scalar(value="bar")], methods=[method]) ]) r = ReadView(pif) assert r["foo"].scalars[0].value == "bar", "Didn't elevate property key" assert "spam" in r.keys(), "Expected spam in keys" assert "magic" in r.keys(), "Expected magic in keys"
def parsePifKey(pif, key): """Parse a single pif key for single scalar values; return nan if no scalar found. """ if (key in ReadView(pif).keys()): if 'scalars' in dir(ReadView(pif)[key]): try: return ReadView(pif)[key].scalars[0].value except IndexError: return np.nan else: return np.nan else: return np.nan
def pifs2df(pifs): """Converts a collection of PIFs to tabular data Very simple, purpose-built utility script. Converts an iterable of PIFs to a dataframe. Returns the superset of all PIF keys as the set of columns. Non-scalar values are converted to nan. Usage df = pifs2df(pifs) Arguments pifs = an iterable of PIFs Returns df = Pandas DataFrame examples import os from citrination_client import CitrinationClient from citrination_client import PifSystemReturningQuery, DatasetQuery from citrination_client import DataQuery, Filter ## Set-up citrination search client site = "https://citrination.com" client = CitrinationClient(api_key = os.environ["CITRINATION_API_KEY"], site = site) search_client = client.search ## Query the Agrawal (2014) dataset system_query = \ PifSystemReturningQuery( size = 500, query = DataQuery( dataset = DatasetQuery(id = Filter(equal = "150670")) ) ) query_result = search_client.pif_search(system_query) pifs = [x.system for x in query_results.hits] ## Rectangularize the pifs df = pifs2df(pifs) """ ## Consolidate superset of keys key_sets = [set(ReadView(pif).keys()) for pif in pifs] keys_ref = reduce(lambda s1, s2: s1.union(s2), key_sets) ## Rectangularize ## TODO: Append dataframes, rather than using a comprehension df_data = \ pd.DataFrame( columns = keys_ref, data = [ [ parsePifKey(pif, key) \ for key in keys_ref ] for pif in pifs ] ) return df_data
def test_ambiguity(): """Test that ambiguous keys are removed from the top level dict""" pif = System() pif.uid = "10245" pif.properties = [ Property(name="foo", scalars=[Scalar(value=1.0)]), Property(name="bar", scalars=[Scalar(value=2.0)]) ] pif2 = System( sub_systems=[ pif, ], properties=[Property(name="foo", scalars=[Scalar(value=10.0)])]) r = ReadView(pif2) assert r.properties["foo"].scalars[0].value == 10.0 assert r.sub_systems["10245"].properties["foo"].scalars[0].value == 1.0 assert "foo" not in r.keys() assert r.sub_systems["10245"]["foo"].scalars[0].value == 1.0 assert r["bar"].scalars[0].value == 2.0
def test_condition_elevation(): """Test that read views elevate conditions""" condition = Value(name="spam", scalars=[Scalar(value="eggs")]) pif = System(properties=[ Property( name="foo", scalars=[Scalar(value="bar")], conditions=[condition]) ]) r = ReadView(pif) assert r["foo"].scalars[0].value == "bar", "Didn't elevate property key" assert r["spam"].scalars[0].value == "eggs", "Didn't elevate condition key"
def test_unambig(): """Test that properties are mirrored in a top level dic""" pif = System() pif.properties = [ Property(name="foo", scalars=[Scalar(value=1.0)]), Property(name="bar", scalars=[Scalar(value=2.0)]) ] r = ReadView(pif) assert r["foo"].scalars[0].value == 1.0 assert r["bar"].scalars[0].value == 2.0
def parsePifKey(pif, key): """Parse a single pif key for single scalar values; return nan if no scalar found. :param pif: PIF to access :type pif: pif :param key: key to access data :type key: string :returns: scalar value or np.nan :rtype: """ if (key in ReadView(pif).keys()): if 'scalars' in dir(ReadView(pif)[key]): try: return ReadView(pif)[key].scalars[0].value except IndexError: return np.nan else: return np.nan else: return np.nan
def test_read_view(): """Test that properties are passed through to the readview""" pif = System() pif.uid = "10245" pif.names = ["example", "ex"] pif.properties = [ Property(name="foo", scalars=[Scalar(value=1.0)]), Property(name="bar", scalars=[Scalar(value=2.0)]) ] r = ReadView(pif) assert r.uid == pif.uid assert r.names == pif.names assert r.properties["foo"].scalars[0].value == 1.0 assert r.properties["bar"].scalars[0].value == 2.0
def test_nested_read_view(): """Test that nested Pios (system here) are recursively processed""" pif = System() pif.uid = "10245" pif.properties = [ Property(name="foo", scalars=[Scalar(value=1.0)]), Property(name="bar", scalars=[Scalar(value=2.0)]) ] pif2 = System(sub_systems=[pif]) r = ReadView(pif2) assert r.sub_systems["10245"].uid == pif.uid assert r["10245"].uid == pif.uid assert r.sub_systems["10245"].properties["foo"].scalars[0].value == 1.0 assert r.sub_systems["10245"].properties["bar"].scalars[0].value == 2.0 assert r["foo"].scalars[0].value == 1.0 assert r["bar"].scalars[0].value == 2.0
def test_multiple_instances(): """Test that keys that show up in different places with the same value are kept""" pif = System() pif.uid = "10245" pif.properties = [ Property(name="foo", scalars=[Scalar(value=1.0)]), Property(name="bar", scalars=[Scalar(value=2.0)]) ] pif2 = System( sub_systems=[ pif, ], properties=[Property(name="bar", scalars=[Scalar(value=2.0)])]) r = ReadView(pif2) assert r.properties["bar"].scalars[0].value == 2.0 assert r.sub_systems["10245"].properties["bar"].scalars[0].value == 2.0 assert r["bar"].scalars[0].value == 2.0
def test_key_error_case(): with open("./pypif_sdk/readview/tests/csv_pif.pif") as f: test_case = load(f) rv = ReadView(test_case) assert len(rv.keys()) > 0