Example #1
0
def run(mpfile, **kwargs):
    import pymatgen
    import pandas as pd
    from mpcontribs.users.swf.rest.rester import SwfRester

    # load data from google sheet
    google_sheet = mpfile.document[mp_level01_titles[0]].pop('google_sheet')
    google_sheet += '/export?format=xlsx'
    df_dct = pd.read_excel(google_sheet, sheet_name=None)

    # rename sheet columns
    elements = ['Fe', 'V', 'Co']
    df_dct['IP Energy Product'].columns = ['IP_Energy_product'] + elements
    df_dct['total'].columns = elements
    df_dct['MOKE'].columns = elements + ['thickness', 'MOKE_IP_Hc']
    df_dct['VSM'].columns = elements + ['thickness', 'VSM_IP_Hc']
    df_dct['formula'].columns = elements
    df_dct['Kondorsky'].columns = ['angle', 'Kondorsky_Model', 'Experiment']

    # round all compositions to 100%
    for sheet, df in df_dct.items():
        if sheet != 'Kondorsky':
            for idx, row in df.iterrows():
                df.loc[idx:idx, elements] = round_to_100_percent(row[elements])

    row5 = df_dct['formula'].iloc[0]
    formula5 = get_composition_from_string(
        pymatgen.Composition(10 * row5).formula.replace(' ', ''))
    dct = dict((k, clean_value(v, '%')) for k, v in row5.to_dict().items())
    mpfile.add_hierarchical_data({'data': dct}, identifier=formula5)
    mpfile.add_data_table(formula5,
                          df_dct['Kondorsky'],
                          name='Angular Dependence of Switching Field')

    for sheet, df in df_dct.items():
        if sheet == 'formula' or sheet == 'Kondorsky' or sheet == 'total':
            continue
        for idx, row in df.iterrows():
            composition = pymatgen.Composition(row[elements] * 10)
            formula = get_composition_from_string(
                composition.formula.replace(' ', ''))
            dct = dict((k, clean_value(v, '%'))
                       for k, v in row[elements].to_dict().items())
            mpfile.add_hierarchical_data({'data': dct}, identifier=formula)
            columns = [x for x in row.index if x not in elements]
            if columns:
                data = row[columns].round(decimals=1)
                dct = dict(
                    (k, clean_value(v)) for k, v in data.to_dict().items())
                mpfile.add_hierarchical_data({'data': dct}, identifier=formula)
def run(mpfile, **kwargs):
    import pymatgen
    import pandas as pd
    from mpcontribs.users.swf.rest.rester import SwfRester

    # load data from google sheet
    google_sheet = mpfile.document[mp_level01_titles[0]].pop("google_sheet")
    google_sheet += "/export?format=xlsx"
    df_dct = pd.read_excel(google_sheet, sheet_name=None)

    # rename sheet columns
    elements = ["Fe", "V", "Co"]
    df_dct["IP Energy Product"].columns = ["IP_Energy_product"] + elements
    df_dct["total"].columns = elements
    df_dct["MOKE"].columns = elements + ["thickness", "MOKE_IP_Hc"]
    df_dct["VSM"].columns = elements + ["thickness", "VSM_IP_Hc"]
    df_dct["formula"].columns = elements
    df_dct["Kondorsky"].columns = ["angle", "Kondorsky_Model", "Experiment"]

    # round all compositions to 100%
    for sheet, df in df_dct.items():
        if sheet != "Kondorsky":
            for idx, row in df.iterrows():
                df.loc[idx:idx, elements] = round_to_100_percent(row[elements])

    row5 = df_dct["formula"].iloc[0]
    formula5 = get_composition_from_string(
        pymatgen.Composition(10 * row5).formula.replace(" ", ""))
    dct = dict((k, clean_value(v, "%")) for k, v in row5.to_dict().items())
    mpfile.add_hierarchical_data({"data": dct}, identifier=formula5)
    mpfile.add_data_table(formula5,
                          df_dct["Kondorsky"],
                          name="Angular Dependence of Switching Field")

    for sheet, df in df_dct.items():
        if sheet == "formula" or sheet == "Kondorsky" or sheet == "total":
            continue
        for idx, row in df.iterrows():
            composition = pymatgen.Composition(row[elements] * 10)
            formula = get_composition_from_string(
                composition.formula.replace(" ", ""))
            dct = dict((k, clean_value(v, "%"))
                       for k, v in row[elements].to_dict().items())
            mpfile.add_hierarchical_data({"data": dct}, identifier=formula)
            columns = [x for x in row.index if x not in elements]
            if columns:
                data = row[columns].round(decimals=1)
                dct = dict(
                    (k, clean_value(v)) for k, v in data.to_dict().items())
                mpfile.add_hierarchical_data({"data": dct}, identifier=formula)
def run(mpfile, **kwargs):
    import pymatgen
    import pandas as pd
    from mpcontribs.users.swf.rest.rester import SwfRester

    # load data from google sheet
    google_sheet = mpfile.document[mp_level01_titles[0]].pop('google_sheet')
    google_sheet += '/export?format=xlsx'
    df_dct = pd.read_excel(google_sheet, sheet_name=None)

    # rename sheet columns
    elements = ['Fe', 'V', 'Co']
    df_dct['IP Energy Product'].columns = ['IP_Energy_product'] + elements
    df_dct['total'].columns = elements
    df_dct['MOKE'].columns = elements + ['thickness', 'MOKE_IP_Hc']
    df_dct['VSM'].columns = elements + ['thickness', 'VSM_IP_Hc']
    df_dct['formula'].columns = elements
    df_dct['Kondorsky'].columns = ['angle', 'Kondorsky_Model', 'Experiment']

    # round all compositions to 100%
    for sheet, df in df_dct.items():
        if sheet != 'Kondorsky':
            for idx, row in df.iterrows():
                df.loc[idx:idx, elements] = round_to_100_percent(row[elements])

    row5 = df_dct['formula'].iloc[0]
    formula5 = get_composition_from_string(
        pymatgen.Composition(10*row5).formula.replace(' ', '')
    )
    dct = dict((k, clean_value(v, '%')) for k,v in row5.to_dict().items())
    mpfile.add_hierarchical_data({'data': dct}, identifier=formula5)
    mpfile.add_data_table(
        formula5, df_dct['Kondorsky'], name='Angular Dependence of Switching Field'
    )

    for sheet, df in df_dct.items():
        if sheet == 'formula' or sheet == 'Kondorsky' or sheet == 'total':
            continue
        for idx, row in df.iterrows():
            composition = pymatgen.Composition(row[elements]*10)
            formula = get_composition_from_string(composition.formula.replace(' ', ''))
            dct = dict((k, clean_value(v, '%')) for k,v in row[elements].to_dict().items())
            mpfile.add_hierarchical_data({'data': dct}, identifier=formula)
            columns = [x for x in row.index if x not in elements]
            if columns:
                data = row[columns].round(decimals=1)
                dct = dict((k, clean_value(v)) for k,v in data.to_dict().items())
                mpfile.add_hierarchical_data({'data': dct}, identifier=formula)
Example #4
0
    def add_structure(self, source, name=None, identifier=None, fmt=None):
        """add a structure to the mpfile"""
        from pymatgen import Structure, MPRester

        if isinstance(source, Structure):
            structure = source
        elif isinstance(source, dict):
            structure = Structure.from_dict(source)
        elif os.path.exists(source):
            structure = Structure.from_file(source, sort=True)
        elif isinstance(source, six.string_types):
            if fmt is None:
                raise ValueError("Need fmt to get structure from string!")
            structure = Structure.from_str(source, fmt, sort=True)
        else:
            raise ValueError(source, "not supported!")

        if name is not None:
            if not isinstance(name, six.string_types):
                raise ValueError("structure name needs to be a string")
            elif "." in name:
                raise ValueError("structure name cannot contain dots (.)")

        mpr = MPRester()
        if not mpr.api_key:
            raise ValueError(
                "API key not set. Run `pmg config --add PMG_MAPI_KEY <USER_API_KEY>`."
            )
        matched_mpids = mpr.find_structure(structure)
        formula = get_composition_from_string(structure.composition.formula)
        if not matched_mpids:
            if identifier is None:
                identifier = formula
                print(
                    "Structure not found in MP! Please submit via MPComplete to "
                    "obtain mp-id or manually choose an anchor mp-id! Continuing "
                    "with {} as identifier!".format(identifier))
            else:
                print("Structure not found in MP! Forcing {} as identifier!".
                      format(identifier))
        elif identifier is None:
            identifier = matched_mpids[0]
            if len(matched_mpids) > 1:
                print("Multiple matching structures found in MP. Using",
                      identifier)
        elif identifier not in matched_mpids:
            msg = "Structure does not match {} but instead {}!".format(
                identifier, matched_mpids)
            raise ValueError(msg)

        idx = len(
            self.document.get(identifier, {}).get(mp_level01_titles[3], {}))
        sub_key = formula if name is None else name
        if sub_key in self.document.get(identifier,
                                        {}).get(mp_level01_titles[3], {}):
            sub_key += "_{}".format(idx)
        self.document.rec_update(
            nest_dict(structure.as_dict(),
                      [identifier, mp_level01_titles[3], sub_key]))
        return identifier
def run(mpfile, **kwargs):

    input_dir = mpfile.hdata['_hdata']['input_dir']
    identifier = get_composition_from_string('PbZr20Ti80O3')
    print identifier

    # 'SP128_NSO_LPFM0000.ibw' too big to display in notebook
    files = ['BR_60016 (1).ibw', 'SP128_NSO_VPFM0000.ibw']
    for f in files:
        file_name = os.path.join(input_dir, f)
        df = load_data(file_name)
        name = f.split('.')[0]
        mpfile.add_data_table(identifier, df, name)
        print 'imported', f

    xrd_file = os.path.join(input_dir, 'Program6_JA_6_2th0m Near SRO (002)_2.xrdml.xml')
    data = read_xrdml(xrd_file)
    df = DataFrame(np.stack((data['2Theta'],data['data']),1), columns=['2Theta','Intensity'])
    opts = {'yaxis': {'type': 'log'}} # see plotly docs
    mpfile.add_data_table(identifier, df, 'NearSRO', plot_options=opts)
    print 'imported', os.path.basename(xrd_file)

    rsm_file = os.path.join(input_dir, 'JA 42 RSM 103 STO 001.xrdml.xml')
    rvals, df = load_RSM(rsm_file)
    mpfile.add_hierarchical_data({'rsm_range': {
        'x': '{} {}'.format(rvals[0], rvals[1]),
        'y': '{} {}'.format(rvals[2], rvals[3]),
    }}, identifier=identifier)
    mpfile.add_data_table(identifier, df, 'RSM')
    print 'imported', os.path.basename(rsm_file)
def run(mpfile, **kwargs):

    input_file = mpfile.document['_hdata'].pop('input_file')
    zip_path = os.path.join(os.environ['HOME'], 'work', input_file)
    if not os.path.exists(zip_path):
        return 'Please upload', zip_path
    zip_file = ZipFile(zip_path, 'r')

    composition_table_dict = mpfile.document['_hdata']['composition_table']
    conc_funcs = get_concentration_functions(composition_table_dict)

    for info in zip_file.infolist():
        print info.filename
        d = RecursiveDict()

        # positions.x/y from filename, <scan-id>_<meas-element>_<X>_<Y>.csv
        element, x, y = os.path.splitext(info.filename)[0].rsplit('_', 4)
        d['position'] = RecursiveDict(
            (k, clean_value(v, 'mm'))
            for k, v in zip(['x', 'y'], [x, y])
        )

        # composition
        d['composition'] = RecursiveDict(
            (el, clean_value(f(x, y), convert_to_percent=True))
            for el, f in conc_funcs.items()
        )

        # identifier
        identifier = get_composition_from_string(''.join([
            '{}{}'.format(el, int(round(Decimal(comp.split()[0]))))
            for el, comp in d['composition'].items()
        ]))

        # load csv file
        try:
            csv = zip_file.read(info.filename)
        except KeyError:
            print 'ERROR: Did not find %s in zip file' % info.filename

        # read csv to pandas DataFrame and add to MPFile
        df = read_csv(csv)
        df = df[['Energy', 'XAS', 'XMCD']]

        # min and max
        d.rec_update(RecursiveDict(
            (y, RecursiveDict([
                ('min', df[y].min()), ('max', df[y].max())
            ])) for y in ['XAS', 'XMCD']
        ))

        # add data to MPFile
        mpfile.add_hierarchical_data(nest_dict(d, ['data']), identifier=identifier)
        mpfile.add_data_table(identifier, df, name=element)
Example #7
0
def run(mpfile, **kwargs):

    input_file = mpfile.document["_hdata"].pop("input_file")
    zip_path = os.path.join(os.environ["HOME"], "work", input_file)
    if not os.path.exists(zip_path):
        return "Please upload", zip_path
    zip_file = ZipFile(zip_path, "r")

    composition_table_dict = mpfile.document["_hdata"]["composition_table"]
    conc_funcs = get_concentration_functions(composition_table_dict)

    for info in zip_file.infolist():
        print info.filename
        d = RecursiveDict()

        # positions.x/y from filename, <scan-id>_<meas-element>_<X>_<Y>.csv
        element, x, y = os.path.splitext(info.filename)[0].rsplit("_", 4)
        d["position"] = RecursiveDict(
            (k, clean_value(v, "mm")) for k, v in zip(["x", "y"], [x, y]))

        # composition
        d["composition"] = RecursiveDict(
            (el, clean_value(f(x, y), convert_to_percent=True))
            for el, f in conc_funcs.items())

        # identifier
        identifier = get_composition_from_string("".join([
            "{}{}".format(el, int(round(Decimal(comp.split()[0]))))
            for el, comp in d["composition"].items()
        ]))

        # load csv file
        try:
            csv = zip_file.read(info.filename)
        except KeyError:
            print "ERROR: Did not find %s in zip file" % info.filename

        # read csv to pandas DataFrame and add to MPFile
        df = read_csv(csv)
        df = df[["Energy", "XAS", "XMCD"]]

        # min and max
        d.rec_update(
            RecursiveDict(
                (y, RecursiveDict([("min", df[y].min()), ("max",
                                                          df[y].max())]))
                for y in ["XAS", "XMCD"]))

        # add data to MPFile
        mpfile.add_hierarchical_data(nest_dict(d, ["data"]),
                                     identifier=identifier)
        mpfile.add_data_table(identifier, df, name=element)
Example #8
0
def run(mpfile, **kwargs):

    input_file = mpfile.document['_hdata'].pop('input_file')
    zip_path = os.path.join(os.environ['HOME'], 'work', input_file)
    if not os.path.exists(zip_path):
        return 'Please upload', zip_path
    zip_file = ZipFile(zip_path, 'r')

    composition_table_dict = mpfile.document['_hdata']['composition_table']
    conc_funcs = get_concentration_functions(composition_table_dict)

    for info in zip_file.infolist():
        print info.filename
        d = RecursiveDict()

        # positions.x/y from filename, <scan-id>_<meas-element>_<X>_<Y>.csv
        element, x, y = os.path.splitext(info.filename)[0].rsplit('_', 4)
        d['position'] = RecursiveDict(
            (k, clean_value(v, 'mm')) for k, v in zip(['x', 'y'], [x, y]))

        # composition
        d['composition'] = RecursiveDict(
            (el, clean_value(f(x, y), convert_to_percent=True))
            for el, f in conc_funcs.items())

        # identifier
        identifier = get_composition_from_string(''.join([
            '{}{}'.format(el, int(round(Decimal(comp.split()[0]))))
            for el, comp in d['composition'].items()
        ]))

        # load csv file
        try:
            csv = zip_file.read(info.filename)
        except KeyError:
            print 'ERROR: Did not find %s in zip file' % info.filename

        # read csv to pandas DataFrame and add to MPFile
        df = read_csv(csv)
        df = df[['Energy', 'XAS', 'XMCD']]

        # min and max
        d.rec_update(
            RecursiveDict(
                (y, RecursiveDict([('min', df[y].min()), ('max',
                                                          df[y].max())]))
                for y in ['XAS', 'XMCD']))

        # add data to MPFile
        mpfile.add_hierarchical_data(nest_dict(d, ['data']),
                                     identifier=identifier)
        mpfile.add_data_table(identifier, df, name=element)
Example #9
0
def run(mpfile, dup_check_test_site=True):

    from pymatgen import MPRester
    existing_identifiers = {}
    #for b in [False, True]:
    #    with DlrVietenRester(test_site=b) as mpr:
    #        for doc in mpr.query_contributions():
    #            existing_identifiers[doc['mp_cat_id']] = doc['_id']
    #    if not dup_check_test_site:
    #        break

    google_sheet = mpfile.document[mp_level01_titles[0]].pop('google_sheet')
    google_sheet += '/export?format=xlsx'
    df_dct = pd.read_excel(google_sheet, sheetname=None)

    mpr = MPRester()
    update = 0
    for sheet in df_dct.keys():
        print(sheet)
        df = df_dct[sheet]

        sheet_split = sheet.split()
        composition = sheet_split[0]
        identifier = get_composition_from_string(composition)
        if len(sheet_split) > 1 and mp_id_pattern.match(sheet_split[1]):
            identifier = sheet_split[1]
        print('identifier = {}'.format(identifier))

        if 'CIF' in sheet_split:
            print('adding CIF ...')
            df.columns = [df.columns[0]] + [''] * (df.shape[1] - 1)
            cif = df.to_csv(na_rep='',
                            index=False,
                            sep='\t',
                            quoting=csv.QUOTE_NONE)
            mpfile.add_structure(cif, identifier=identifier, fmt='cif')

        else:
            print('adding data ...')
            mpfile.add_hierarchical_data({'composition': composition},
                                         identifier=identifier)
            mpfile.add_data_table(identifier, df, name='dH_dS')

        if identifier in existing_identifiers:
            cid = existing_identifiers[identifier]
            mpfile.insert_id(identifier, cid)
            update += 1

    print len(mpfile.ids), 'contributions to submit.'
    if update > 0:
        print update, 'contributions to update.'
def run(mpfile, **kwargs):

    input_dir = mpfile.hdata["_hdata"]["input_dir"]
    identifier = get_composition_from_string("PbZr20Ti80O3")
    print identifier

    # 'SP128_NSO_LPFM0000.ibw' too big to display in notebook
    files = ["BR_60016 (1).ibw", "SP128_NSO_VPFM0000.ibw"]
    for f in files:
        file_name = os.path.join(input_dir, f)
        df = load_data(file_name)
        name = f.split(".")[0]
        mpfile.add_data_table(identifier, df, name)
        print "imported", f

    xrd_file = os.path.join(input_dir, "Program6_JA_6_2th0m Near SRO (002)_2.xrdml.xml")
    data = read_xrdml(xrd_file)
    df = DataFrame(
        np.stack((data["2Theta"], data["data"]), 1), columns=["2Theta", "Intensity"]
    )
    opts = {"yaxis": {"type": "log"}}  # see plotly docs
    mpfile.add_data_table(identifier, df, "NearSRO", plot_options=opts)
    print "imported", os.path.basename(xrd_file)

    rsm_file = os.path.join(input_dir, "JA 42 RSM 103 STO 001.xrdml.xml")
    rvals, df = load_RSM(rsm_file)
    mpfile.add_hierarchical_data(
        {
            "rsm_range": {
                "x": "{} {}".format(rvals[0], rvals[1]),
                "y": "{} {}".format(rvals[2], rvals[3]),
            }
        },
        identifier=identifier,
    )
    mpfile.add_data_table(identifier, df, "RSM")
    print "imported", os.path.basename(rsm_file)
Example #11
0
def run(mpfile, **kwargs):

    input_dir = mpfile.hdata['_hdata']['input_dir']
    identifier = get_composition_from_string('PbZr20Ti80O3')
    print identifier

    # 'SP128_NSO_LPFM0000.ibw' too big to display in notebook
    files = ['BR_60016 (1).ibw', 'SP128_NSO_VPFM0000.ibw']
    for f in files:
        file_name = os.path.join(input_dir, f)
        df = load_data(file_name)
        name = f.split('.')[0]
        mpfile.add_data_table(identifier, df, name)
        print 'imported', f

    xrd_file = os.path.join(input_dir,
                            'Program6_JA_6_2th0m Near SRO (002)_2.xrdml.xml')
    data = read_xrdml(xrd_file)
    df = DataFrame(np.stack((data['2Theta'], data['data']), 1),
                   columns=['2Theta', 'Intensity'])
    opts = {'yaxis': {'type': 'log'}}  # see plotly docs
    mpfile.add_data_table(identifier, df, 'NearSRO', plot_options=opts)
    print 'imported', os.path.basename(xrd_file)

    rsm_file = os.path.join(input_dir, 'JA 42 RSM 103 STO 001.xrdml.xml')
    rvals, df = load_RSM(rsm_file)
    mpfile.add_hierarchical_data(
        {
            'rsm_range': {
                'x': '{} {}'.format(rvals[0], rvals[1]),
                'y': '{} {}'.format(rvals[2], rvals[3]),
            }
        },
        identifier=identifier)
    mpfile.add_data_table(identifier, df, 'RSM')
    print 'imported', os.path.basename(rsm_file)
Example #12
0
def run(mpfile, **kwargs):
    from pymatgen import Structure

    reference_project = None
    input_data, input_keys, extra = RecursiveDict(), RecursiveDict(
    ), RecursiveDict()
    #input_urls = mpfile.document['_hdata'].pop('input_urls')
    input_urls = {
        'NUS': {
            "file": "http://www.2dmatpedia.org/static/db.json.gz",
            "detail": "http://www.2dmatpedia.org/2dmaterials/doc/{}"
        },
        'JARVIS': {
            "file": "https://www.ctcms.nist.gov/~knc6/jdft_{}.json.tgz",
            "detail": "https://www.ctcms.nist.gov/~knc6/jsmol/{}.html"
        }
    }

    for project in input_urls:
        input_url = input_urls[project]['file']
        if '{}' in input_url:
            input_url = input_url.format('2d')  # TODO 3d for Jarvis

        #dbfile = os.path.join(os.environ['HOME'], 'work', input_url.rsplit('/')[-1])
        dbfile = input_url.rsplit('/')[-1]
        if not os.path.exists(dbfile):
            print('downloading', dbfile, '...')
            urllib.request.urlretrieve(input_url, dbfile)

        ext = os.path.splitext(dbfile)[1]
        is_nus = bool(ext == '.gz')
        id_key = 'source_id' if is_nus else 'mpid'
        if not is_nus:
            with tarfile.open(dbfile, "r:gz") as tar:
                member = tar.getmembers()[0]
                raw_data = json.load(tar.extractfile(member), cls=MontyDecoder)
        else:
            reference_project = project
            raw_data = []
            with gzip.open(dbfile, 'rb') as f:
                for line in f:
                    raw_data.append(json.loads(line, cls=MontyDecoder))
        input_data[project] = RecursiveDict((d[id_key], d) for d in raw_data)

        input_keys[project] = [
            'material_id', 'exfoliation_energy_per_atom', 'structure'
        ] if is_nus else ['jid', 'exfoliation_en', 'final_str']
        extra[project] = [
            ('fin_en', ('E', 'meV/atom')),
            ('op_gap', ('ΔE|optB88vdW', 'meV/atom')),
            ('mbj_gap', ('ΔE|mbj', 'meV/atom')),
            #('kv', ('Kᵥ', 'GPa')),
            #('gv', ('Gᵥ', 'GPa'))
        ] if not is_nus else []

        print(len(input_data[project]), 'materials loaded for', project)

    projects = input_data.keys()
    identifiers = []
    for d in input_data.values():
        identifiers += list(d.keys())

    for identifier in set(identifiers):
        print(identifier)
        data, structures = RecursiveDict(), RecursiveDict()

        for project in projects:
            if project not in data:
                data[project] = RecursiveDict()
            if identifier in input_data[project]:
                d = input_data[project][identifier]
                structures[project] = d[input_keys[project][-1]]
                if data.get('formula') is None:
                    data['formula'] = get_composition_from_string(
                        structures[project].composition.reduced_formula)
                data[project]['id'] = input_urls[project]['detail'].format(
                    d[input_keys[project][0]])
                if input_keys[project][1] in d:
                    Ex = d[input_keys[project][1]]
                    if project == reference_project:
                        Ex *= 1000.
                    data[project]['Eₓ'] = clean_value(Ex, 'eV')
                for k, (sym, unit) in extra[project]:
                    if d[k] != 'na':
                        data[project][sym] = clean_value(d[k], unit)

        mpfile.add_hierarchical_data(nest_dict(data, ['data']),
                                     identifier=identifier)
        #r = db.contributions.update_one(
        #    {'identifier': identifier, 'project': 'jarvis_dft'},
        #    {'$set': {'content.data': mpfile.document[identifier]['data']}},
        #    upsert=True
        #)
        #print(r.matched_count, r.modified_count, r.upserted_id)

        doc = db.contributions.find_one(
            {
                'identifier': identifier,
                'project': 'jarvis_dft'
            }, {
                '_id': 1,
                'content.structures': 1
            })
        if 'structures' in doc['content']:
            print('structures already added for', identifier)
            continue
        print(doc['_id'])

        inserted_ids = []
        for project, structure in structures.items():
            try:
                mpfile.add_structure(structure,
                                     name=project,
                                     identifier=identifier)
                sdct = mpfile.document[identifier]['structures'][project]
                sdct.pop('@module')
                sdct.pop('@class')
                if sdct['charge'] is None:
                    sdct.pop('charge')
                sdct['identifier'] = identifier
                sdct['project'] = 'jarvis_dft'
                sdct['name'] = project
                sdct['cid'] = doc['_id']
                r = db.structures.insert_one(sdct)
                inserted_ids.append(r.inserted_id)
            except Exception as ex:
                print(str(ex))

        print(inserted_ids)
        r = db.contributions.update_one(
            {'_id': doc['_id']},
            {'$set': {
                'content.structures': inserted_ids
            }})
        print(r.matched_count, r.modified_count)
def run(mpfile, **kwargs):
    from pymatgen import Structure

    reference_project = None
    input_data, input_keys, extra = RecursiveDict(), RecursiveDict(), RecursiveDict()
    input_urls = mpfile.document['_hdata'].pop('input_urls')

    for project in input_urls:
        input_url = input_urls[project]['file']
        if '{}' in input_url:
            input_url = input_url.format('2d') # TODO 3d for Jarvis

        dbfile = os.path.join(os.environ['HOME'], 'work', input_url.rsplit('/')[-1])
        if not os.path.exists(dbfile):
            print 'downloading', dbfile, '...'
            urllib.urlretrieve(input_url, dbfile)

        ext = os.path.splitext(dbfile)[1]
        is_nus = bool(ext == '.gz')
        id_key = 'parent_id' if is_nus else 'mpid'
        if not is_nus:
            with tarfile.open(dbfile, "r:gz") as tar:
                member = tar.getmembers()[0]
                raw_data = json.load(tar.extractfile(member), cls=MontyDecoder)
        else:
            reference_project = project
            raw_data = []
            with gzip.open(dbfile, 'rb') as f:
                for line in f:
                    raw_data.append(json.loads(line, cls=MontyDecoder))
        input_data[project] = RecursiveDict((d[id_key], d) for d in raw_data)

        input_keys[project] = [
            'material_id', 'exfoliation_energy_per_atom', 'structure'
        ] if is_nus else ['jid', 'exfoliation_en', 'final_str']
        extra[project] = [
            ('fin_en', ('E', 'eV')),
            ('op_gap', ('ΔE|optB88vdW', 'eV')),
            ('mbj_gap', ('ΔE|mbj', 'eV')),
            #('kv', ('Kᵥ', 'GPa')),
            #('gv', ('Gᵥ', 'GPa'))
        ] if not is_nus else []

        print len(input_data[project]), 'materials loaded for', project

    projects = input_data.keys()
    identifiers = []
    for d in input_data.values():
        identifiers += list(d.keys())

    for identifier in identifiers:
        data, structures = RecursiveDict(), RecursiveDict()

        for project in projects:
            if project not in data:
                data[project] = RecursiveDict()
            if identifier in input_data[project]:
                d = input_data[project][identifier]
                structures[project] = d[input_keys[project][-1]]
                if data.get('formula') is None:
                    data['formula'] = get_composition_from_string(
                        structures[project].composition.reduced_formula
                    )
                data[project]['id'] = input_urls[project]['detail'].format(d[input_keys[project][0]])
                Ex = d[input_keys[project][1]]
                if project == reference_project:
                    Ex *= 1000.
                data[project]['Eₓ'] = clean_value(Ex, 'eV')
                for k, (sym, unit) in extra[project]:
                    if d[k] != 'na':
                        data[project][sym] = clean_value(d[k], unit)

        mpfile.add_hierarchical_data(nest_dict(data, ['data']), identifier=identifier)
        for project, structure in structures.items():
            name = '{}_{}'.format(data['formula'], project)
            try:
                mpfile.add_structure(structure, name=name, identifier=identifier)
            except Exception as ex:
                print str(ex)
Example #14
0
def run(mpfile, **kwargs):
    from pymatgen import Structure

    reference_project = None
    input_data, input_keys, extra = RecursiveDict(), RecursiveDict(
    ), RecursiveDict()
    input_urls = mpfile.document['_hdata'].pop('input_urls')

    for project in input_urls:
        input_url = input_urls[project]['file']
        if '{}' in input_url:
            input_url = input_url.format('2d')  # TODO 3d for Jarvis

        dbfile = os.path.join(os.environ['HOME'], 'work',
                              input_url.rsplit('/')[-1])
        if not os.path.exists(dbfile):
            print 'downloading', dbfile, '...'
            urllib.urlretrieve(input_url, dbfile)

        ext = os.path.splitext(dbfile)[1]
        is_nus = bool(ext == '.gz')
        id_key = 'parent_id' if is_nus else 'mpid'
        if not is_nus:
            with tarfile.open(dbfile, "r:gz") as tar:
                member = tar.getmembers()[0]
                raw_data = json.load(tar.extractfile(member), cls=MontyDecoder)
        else:
            reference_project = project
            raw_data = []
            with gzip.open(dbfile, 'rb') as f:
                for line in f:
                    raw_data.append(json.loads(line, cls=MontyDecoder))
        input_data[project] = RecursiveDict((d[id_key], d) for d in raw_data)

        input_keys[project] = [
            'material_id', 'exfoliation_energy_per_atom', 'structure'
        ] if is_nus else ['jid', 'exfoliation_en', 'final_str']
        extra[project] = [
            ('fin_en', ('E', 'meV/atom')),
            ('op_gap', ('ΔE|optB88vdW', 'meV/atom')),
            ('mbj_gap', ('ΔE|mbj', 'meV/atom')),
            #('kv', ('Kᵥ', 'GPa')),
            #('gv', ('Gᵥ', 'GPa'))
        ] if not is_nus else []

        print len(input_data[project]), 'materials loaded for', project

    projects = input_data.keys()
    identifiers = []
    for d in input_data.values():
        identifiers += list(d.keys())

    for identifier in identifiers:
        data, structures = RecursiveDict(), RecursiveDict()

        for project in projects:
            if project not in data:
                data[project] = RecursiveDict()
            if identifier in input_data[project]:
                d = input_data[project][identifier]
                structures[project] = d[input_keys[project][-1]]
                if data.get('formula') is None:
                    data['formula'] = get_composition_from_string(
                        structures[project].composition.reduced_formula)
                data[project]['id'] = input_urls[project]['detail'].format(
                    d[input_keys[project][0]])
                Ex = d[input_keys[project][1]]
                if project == reference_project:
                    Ex *= 1000.
                data[project]['Eₓ'] = clean_value(Ex, 'eV')
                for k, (sym, unit) in extra[project]:
                    if d[k] != 'na':
                        data[project][sym] = clean_value(d[k], unit)

        mpfile.add_hierarchical_data(nest_dict(data, ['data']),
                                     identifier=identifier)
        for project, structure in structures.items():
            name = '{}_{}'.format(data['formula'], project)
            try:
                mpfile.add_structure(structure,
                                     name=name,
                                     identifier=identifier)
            except Exception as ex:
                print str(ex)
def run(mpfile, **kwargs):
    # TODO clone solar_perovskite if needed, abort if insufficient permissions
    import solar_perovskite
    from solar_perovskite.core import GetExpThermo
    from solar_perovskite.init.find_structures import FindStructures
    from solar_perovskite.init.import_data import Importdata
    from solar_perovskite.modelling.from_theo import EnthTheo

    input_file = mpfile.hdata.general['input_file']
    input_file = os.path.join(os.path.dirname(solar_perovskite.__file__), input_file)
    table = read_csv(open(input_file, 'r').read().replace(';', ','))
    dct = super(Table, table).to_dict(orient='records', into=RecursiveDict)

    shomate = pd.read_csv(os.path.abspath(os.path.join(
        os.path.dirname(solar_perovskite.__file__), "datafiles", "shomate.csv"
    )), index_col=0)
    shomate_dct = RecursiveDict()
    for col in shomate.columns:
        key = col.split('.')[0]
        if key not in shomate_dct:
            shomate_dct[key] = RecursiveDict()
        d = shomate[col].to_dict(into=RecursiveDict)
        subkey = '{}-{}'.format(int(d.pop('low')), int(d.pop('high')))
        shomate_dct[key][subkey] = RecursiveDict(
            (k, clean_value(v, max_dgts=6)) for k, v in d.items()
        )
    mpfile.add_hierarchical_data(nest_dict(shomate_dct, ['shomate']))

    for row in dct:

        sample_number = int(row['sample_number'])
        identifier = row['closest phase MP (oxidized)'].replace('n.a.', '')
        if not identifier.startswith('mp-'):
            continue
        if not identifier:
            identifier = get_composition_from_string(row['composition oxidized phase'])
        print identifier

        print 'add hdata ...'
        d = RecursiveDict()
        d['tolerance_factor'] = row['tolerance_factor']
        d['solid_solution'] = row['type of solid solution']
        d['oxidized_phase'] = RecursiveDict()
        d['oxidized_phase']['composition'] = row['composition oxidized phase']
        d['oxidized_phase']['crystal-structure'] = row['crystal structure (fully oxidized)']
        d['reduced_phase'] = RecursiveDict()
        d['reduced_phase']['composition'] = row['composition reduced phase']
        d['reduced_phase']['closest-MP'] = row['closest phase MP (reduced)'].replace('n.a.', '')
        d = nest_dict(d, ['data'])
        d['pars'] = get_fit_pars(sample_number)
        d['pars']['theo_compstr'] = row['theo_compstr']
        try:
            fs = FindStructures(compstr=row['theo_compstr'])
            theo_redenth = fs.find_theo_redenth()
            imp = Importdata()
            splitcomp = imp.split_comp(row['theo_compstr'])
            conc_act = imp.find_active(mat_comp=splitcomp)[1]
            et = EnthTheo(comp=row['theo_compstr'])
            dh_max, dh_min = et.calc_dh_endm()
            red_enth_mean_endm = (conc_act * dh_min) + ((1 - conc_act) * dh_max)
            difference = theo_redenth - red_enth_mean_endm
            d['pars']['dh_min'] = clean_value(dh_min + difference, max_dgts=8)
            d['pars']['dh_max'] = clean_value(dh_max + difference, max_dgts=8)
        except Exception as ex:
            print('error in dh_min/max!')
            print(str(ex))
            pass
        mpfile.add_hierarchical_data(d, identifier=identifier)

        print 'add ΔH ...'
        exp_thermo = GetExpThermo(sample_number, plotting=False)
        enthalpy = exp_thermo.exp_dh()
        table = get_table(enthalpy, 'H')
        mpfile.add_data_table(identifier, table, name='enthalpy')

        print 'add ΔS ...'
        entropy = exp_thermo.exp_ds()
        table = get_table(entropy, 'S')
        mpfile.add_data_table(identifier, table, name='entropy')

        print 'add raw data ...'
        tga_results = os.path.join(os.path.dirname(solar_perovskite.__file__), 'tga_results')
        for path in glob(os.path.join(tga_results, 'ExpDat_JV_P_{}_*.csv'.format(sample_number))):
            print path.split('_{}_'.format(sample_number))[-1].split('.')[0], '...'
            body = open(path, 'r').read()
            cols = ['Time [min]', 'Temperature [C]', 'dm [%]', 'pO2']
            table = read_csv(body, lineterminator=os.linesep, usecols=cols, skiprows=5)
            table = table[cols].iloc[::100, :]
            # scale/shift for better graphs
            T, dm, p = [pd.to_numeric(table[col]) for col in cols[1:]]
            T_min, T_max, dm_min, dm_max, p_max = T.min(), T.max(), dm.min(), dm.max(), p.max()
            rT, rdm = abs(T_max - T_min), abs(dm_max - dm_min)
            table[cols[2]] = (dm - dm_min) * rT/rdm
            table[cols[3]] = p * rT/p_max
            table.rename(columns={
                'dm [%]': '(dm [%] + {:.4g}) * {:.4g}'.format(-dm_min, rT/rdm),
                'pO2': 'pO₂ * {:.4g}'.format(rT/p_max)
            }, inplace=True)
            mpfile.add_data_table(identifier, table, name='raw')
Example #16
0
    def to_backgrid_dict(self):
        """Backgrid-conform dict from DataFrame"""
        # shorten global import times by importing django here
        import numpy as np
        from mpcontribs.io.core.utils import get_composition_from_string
        from pandas import MultiIndex
        import pymatgen.util as pmg_util
        from pymatgen.core.composition import CompositionError

        table = dict()
        nrows_max = 260
        nrows = self.shape[0]
        df = Table(self.head(n=nrows_max)) if nrows > nrows_max else self
        numeric_columns = df.select_dtypes(
            include=[np.number]).columns.tolist()

        if isinstance(df.index, MultiIndex):
            df.reset_index(inplace=True)

        table['columns'] = []
        table['rows'] = super(Table, df).to_dict(orient='records')

        for col_index, col in enumerate(list(df.columns)):
            cell_type = 'number'

            # avoid looping rows to minimize use of `df.iat` (time-consuming in 3d)
            if not col.startswith('level_') and col not in numeric_columns:
                is_url_column, prev_unit, old_col = True, None, col

                for row_index in range(df.shape[0]):
                    cell = str(df.iat[row_index, col_index])
                    cell_split = cell.split(' ', 1)

                    if not cell or len(
                            cell_split) == 1:  # empty cell or no space
                        is_url_column = bool(
                            is_url_column
                            and (not cell or mp_id_pattern.match(cell)))
                        if is_url_column:
                            if cell:
                                value = 'https://materialsproject.org/materials/{}'.format(
                                    cell)
                                table['rows'][row_index][col] = value
                        elif cell:
                            try:
                                composition = get_composition_from_string(cell)
                                composition = pmg_util.string.unicodeify(
                                    composition)
                                table['rows'][row_index][col] = composition
                            except (CompositionError, ValueError,
                                    OverflowError):
                                try:
                                    # https://stackoverflow.com/a/38020041
                                    result = urlparse(cell)
                                    if not all([
                                            result.scheme, result.netloc,
                                            result.path
                                    ]):
                                        break
                                    is_url_column = True
                                except:
                                    break

                    else:
                        value, unit = cell_split  # TODO convert cell_split[0] to float?
                        is_url_column = False
                        try:
                            float(value
                                  )  # unit is only a unit if value is number
                        except ValueError:
                            continue
                        table['rows'][row_index].pop(old_col)
                        if prev_unit is None:
                            prev_unit = unit
                            col = '{} [{}]'.format(col, unit)
                        table['rows'][row_index][
                            col] = cell if prev_unit != unit else value

                cell_type = 'uri' if is_url_column else 'string'

            col_split = col.split('##')
            nesting = [col_split[0]] if len(col_split) > 1 else []
            table['columns'].append({
                'name': col,
                'cell': cell_type,
                'nesting': nesting,
                'editable': 0
            })
            if len(col_split) > 1:
                table['columns'][-1].update(
                    {'label': '##'.join(col_split[1:])})
            if len(table['columns']) > 12:
                table['columns'][-1]['renderable'] = 0

        header = RecursiveDict()
        for idx, col in enumerate(table['columns']):
            if 'label' in col:
                k, sk = col['name'].split('##')
                sk_split = sk.split()
                if len(sk_split) == 2:
                    d = {'name': sk_split[0], 'unit': sk_split[1], 'idx': idx}
                    if k not in header:
                        header[k] = [d]
                    else:
                        header[k].append(d)
                elif k in header:
                    header.pop(k)

        for k, skl in header.items():
            units = [sk['unit'] for sk in skl]
            if units.count(units[0]) == len(units):
                for sk in skl:
                    table['columns'][sk['idx']]['label'] = sk['name']
                    table['columns'][sk['idx']]['nesting'][0] = '{} {}'.format(
                        k, sk['unit'])

        return table
Example #17
0
    def to_backgrid_dict(self):
        """Backgrid-conform dict from DataFrame"""
        # shorten global import times by importing django here
        from mpcontribs.io.core.utils import get_composition_from_string
        from pandas import MultiIndex
        import pymatgen.util as pmg_util
        from pymatgen.core.composition import CompositionError

        table = dict()
        nrows_max = 260
        nrows = self.shape[0]
        df = Table(self.head(n=nrows_max)) if nrows > nrows_max else self

        if isinstance(df.index, MultiIndex):
            df.reset_index(inplace=True)

        table["columns"] = []
        table["rows"] = super(Table, df).to_dict(orient="records")

        for col_index, col in enumerate(list(df.columns)):
            cell_type = "number"

            # avoid looping rows to minimize use of `df.iat` (time-consuming in 3d)
            if not col.startswith("level_") and col[-1] != "]":
                is_url_column = True

                for row_index in range(df.shape[0]):
                    cell = str(df.iat[row_index, col_index])
                    is_url_column = bool(
                        is_url_column
                        and (not cell or mp_id_pattern.match(cell)))
                    if is_url_column:
                        if cell:
                            value = "https://materialsproject.org/materials/{}".format(
                                cell)
                            table["rows"][row_index][col] = value
                    elif cell:
                        try:
                            composition = get_composition_from_string(cell)
                            composition = pmg_util.string.unicodeify(
                                composition)
                            table["rows"][row_index][col] = composition
                        except (CompositionError, ValueError, OverflowError):
                            try:
                                # https://stackoverflow.com/a/38020041
                                result = urlparse(cell)
                                if not all([
                                        result.scheme, result.netloc,
                                        result.path
                                ]):
                                    break
                                is_url_column = True
                            except Exception:
                                break

                cell_type = "uri" if is_url_column else "string"

            col_split = col.split(".")
            nesting = [col_split[0]] if len(col_split) > 1 else []
            table["columns"].append({
                "name": col,
                "cell": cell_type,
                "nesting": nesting,
                "editable": 0
            })
            if len(col_split) > 1:
                table["columns"][-1].update({"label": ".".join(col_split[1:])})

        return table