Python read_csv 예제들, mpcontribs.io.core.utils.read_csv Python 예제들

예제 #1

0

파일 보기

def run(mpfile, **kwargs):
    from pandas import Panel, np

    meta_data = mpfile.document['_hdata'].pop('input')
    file_path = os.path.join(os.environ['HOME'], 'work',
                             meta_data['file_path'])
    if not os.path.exists(file_path):
        print 'Please upload', file_path
        return

    table_columns = meta_data['table_columns'].split(' -- ')
    identifier = mpfile.ids[0]

    with tarfile.open(file_path, "r:gz") as tar:
        for member in tar.getmembers():
            name = os.path.splitext(member.name)[0]
            print 'load', name, '...'
            f = tar.extractfile(member)
            if 'pump' in name:
                #fstr = f.read()
                #fstr = ''.join([f.readline() for x in xrange(10)])
                # only load a small area
                list1, list2 = range(1), range(6)
                tuples = [(x, y) for x in list1 for y in list2]
                delta = 150
                for x, y in tuples:
                    lines = []
                    for i in xrange((x + 1) * delta):
                        line = f.readline()
                        if i > x * delta:
                            lines.append(line)
                    sub_lines = []
                    for line in lines:
                        sub_line = line.strip().split(',')[y * delta:(y + 1) *
                                                           delta]
                        sub_lines.append(','.join(sub_line))
                    fstr = '\n'.join(sub_lines)
                    print 'read_csv ...'
                    df = read_csv(fstr, header=None)
                    arr = [[[cell] for cell in row] for row in df.values]
                    sub_name = '{}_{}_{}'.format(name, x, y)
                    df = Panel(arr,
                               minor_axis=[sub_name]).transpose(2, 0,
                                                                1).to_frame()
                    print df.head()
                    print 'add', sub_name, '...'
                    mpfile.add_data_table(identifier, df, sub_name)
                    f.seek(0)
            else:
                fstr = f.read()
                df = read_csv(fstr, names=table_columns)
                print 'add', name, '...'
                mpfile.add_data_table(identifier, df, name)

    print 'Added data from {}'.format(file_path)

예제 #2

0

파일 보기

파일: pre_submission.py 프로젝트: ATNDiaye/MPContribsUsers

def run(mpfile, **kwargs):
    from pandas import Panel, np

    meta_data = mpfile.document['_hdata'].pop('input')
    file_path = os.path.join(os.environ['HOME'], 'work', meta_data['file_path'])
    if not os.path.exists(file_path):
        print 'Please upload', file_path
        return

    table_columns = meta_data['table_columns'].split(' -- ')
    identifier = mpfile.ids[0]

    with tarfile.open(file_path, "r:gz") as tar:
        for member in tar.getmembers():
            name = os.path.splitext(member.name)[0]
            print 'load', name, '...'
            f = tar.extractfile(member)
            if 'pump' in name:
                #fstr = f.read()
                #fstr = ''.join([f.readline() for x in xrange(10)])
                # only load a small area
                list1, list2 = range(1), range(6)
                tuples = [(x, y) for x in list1 for y in list2]
                delta = 150
                for x, y in tuples:
                    lines = []
                    for i in xrange((x+1)*delta):
                        line = f.readline()
                        if i > x*delta:
                            lines.append(line)
                    sub_lines = []
                    for line in lines:
                        sub_line = line.strip().split(',')[y*delta:(y+1)*delta]
                        sub_lines.append(','.join(sub_line))
                    fstr = '\n'.join(sub_lines)
                    print 'read_csv ...'
                    df = read_csv(fstr, header=None)
                    arr = [[[cell] for cell in row] for row in df.values]
                    sub_name = '{}_{}_{}'.format(name, x, y)
                    df = Panel(arr, minor_axis=[sub_name]).transpose(2, 0, 1).to_frame()
                    print df.head()
                    print 'add', sub_name, '...'
                    mpfile.add_data_table(identifier, df, sub_name)
                    f.seek(0)
            else:
                fstr = f.read()
                df = read_csv(fstr, names=table_columns)
                print 'add', name, '...'
                mpfile.add_data_table(identifier, df, name)

    print 'Added data from {}'.format(file_path)

예제 #3

0

파일 보기

파일: pre_submission.py 프로젝트: rossbutler2000/MPContribs

def run(mpfile):
    identifier = mpfile.ids[0]
    xcol, ycol = "V [V]", "J {}°C {} [mA/cm²]"
    full_df = None
    for fn in sorted(glob(os.path.join("Data", "Figure 4", "*_01_DIV.txt"))):
        with open(fn, "r") as f:
            name = os.path.splitext(os.path.basename(fn))[0]
            body = "\n".join(["\t".join([xcol, ycol]), f.read()])
            df = (
                read_csv(body, sep="\t")
                .apply(to_numeric, errors="coerce")
                .sort_values(by=[xcol])
            )
            if full_df is None:
                full_df = df[xcol].to_frame()

            offset = 0.0
            if "fwd_dB_p3" in name:
                offset = -6.70273000e-11
            elif "rev_dB_p3" in name:
                offset = 4.49694000e-10
            elif "fwd_dG_p6" in name:
                offset = -8.90037000e-11
            elif "rev_dG_p6" in name:
                offset = 8.42196000e-10

            temp = name[4:].split("CZnO", 1)[0]
            direction = "fwd" if "fwd" in name else "rev"
            col = ycol.format(temp, direction)
            full_df[col] = (df[ycol] + offset).abs() * 1000.0 / 0.045

    mpfile.add_data_table(identifier, full_df, "JV|dark")

예제 #4

0

파일 보기

파일: pre_submission.py 프로젝트: josuav1/MPContribsUsers-Nov-2018-

def get_fit_pars(sample_number):
    import solar_perovskite
    from solar_perovskite.modelling.isographs import Experimental
    from solar_perovskite.init.import_data import Importdata
    max_dgts = 6
    d = RecursiveDict()
    exp = Experimental(sample_number)
    fitparam = exp.get_fit_parameters()
    # fitparam = compstr, delta_0, tolfac, mol_mass, fit_param_enth,
    #            fit_type_entr, fit_param_entr, delta_min, delta_max
    fit_par_ent = [fitparam[6][0], fitparam[6][1], fitparam[1]]
    d['fit_par_ent'] = RecursiveDict((k, clean_value(v, max_dgts=max_dgts))
                                     for k, v in zip('abc', fit_par_ent))
    d['fit_param_enth'] = RecursiveDict((k, clean_value(v, max_dgts=max_dgts))
                                        for k, v in zip('abcd', fitparam[4]))
    d['fit_type_entr'] = clean_value(fitparam[5], max_dgts=max_dgts)
    d['delta_0'] = clean_value(fitparam[1], max_dgts=max_dgts)
    d['delta_min'] = clean_value(fitparam[7], max_dgts=max_dgts)
    d['delta_max'] = clean_value(fitparam[8], max_dgts=max_dgts)
    fit_param_fe = pd.np.loadtxt(
        os.path.abspath(
            os.path.join(os.path.dirname(solar_perovskite.__file__),
                         "datafiles", "entropy_fitparam_SrFeOx")))
    d['fit_param_fe'] = RecursiveDict((k, clean_value(v, max_dgts=max_dgts))
                                      for k, v in zip('abcd', fit_param_fe))
    imp = Importdata()
    act_mat = imp.find_active(sample_no=sample_number)
    d['act_mat'] = clean_value(act_mat[1], max_dgts=max_dgts)
    fpath = os.path.join(
        os.path.dirname(solar_perovskite.__file__), 'rawdata',
        'JV_P_{}_H_S_error_advanced.csv'.format(sample_number))
    temps = read_csv(open(fpath, 'r').read(), usecols=['T'])
    d['t_avg'] = clean_value(pd.to_numeric(temps['T']).mean(),
                             max_dgts=max_dgts)
    return d

예제 #5

0

파일 보기

파일: pre_submission.py 프로젝트: materialsproject/MPContribs

def run(mpfile, **kwargs):

    input_file = mpfile.document['_hdata'].pop('input_file')
    zip_path = os.path.join(os.environ['HOME'], 'work', input_file)
    if not os.path.exists(zip_path):
        return 'Please upload', zip_path
    zip_file = ZipFile(zip_path, 'r')

    composition_table_dict = mpfile.document['_hdata']['composition_table']
    conc_funcs = get_concentration_functions(composition_table_dict)

    for info in zip_file.infolist():
        print info.filename
        d = RecursiveDict()

        # positions.x/y from filename, <scan-id>_<meas-element>_<X>_<Y>.csv
        element, x, y = os.path.splitext(info.filename)[0].rsplit('_', 4)
        d['position'] = RecursiveDict(
            (k, clean_value(v, 'mm'))
            for k, v in zip(['x', 'y'], [x, y])
        )

        # composition
        d['composition'] = RecursiveDict(
            (el, clean_value(f(x, y), convert_to_percent=True))
            for el, f in conc_funcs.items()
        )

        # identifier
        identifier = get_composition_from_string(''.join([
            '{}{}'.format(el, int(round(Decimal(comp.split()[0]))))
            for el, comp in d['composition'].items()
        ]))

        # load csv file
        try:
            csv = zip_file.read(info.filename)
        except KeyError:
            print 'ERROR: Did not find %s in zip file' % info.filename

        # read csv to pandas DataFrame and add to MPFile
        df = read_csv(csv)
        df = df[['Energy', 'XAS', 'XMCD']]

        # min and max
        d.rec_update(RecursiveDict(
            (y, RecursiveDict([
                ('min', df[y].min()), ('max', df[y].max())
            ])) for y in ['XAS', 'XMCD']
        ))

        # add data to MPFile
        mpfile.add_hierarchical_data(nest_dict(d, ['data']), identifier=identifier)
        mpfile.add_data_table(identifier, df, name=element)

예제 #6

0

파일 보기

def run(mpfile, **kwargs):

    input_file = mpfile.document["_hdata"].pop("input_file")
    zip_path = os.path.join(os.environ["HOME"], "work", input_file)
    if not os.path.exists(zip_path):
        return "Please upload", zip_path
    zip_file = ZipFile(zip_path, "r")

    composition_table_dict = mpfile.document["_hdata"]["composition_table"]
    conc_funcs = get_concentration_functions(composition_table_dict)

    for info in zip_file.infolist():
        print info.filename
        d = RecursiveDict()

        # positions.x/y from filename, <scan-id>_<meas-element>_<X>_<Y>.csv
        element, x, y = os.path.splitext(info.filename)[0].rsplit("_", 4)
        d["position"] = RecursiveDict(
            (k, clean_value(v, "mm")) for k, v in zip(["x", "y"], [x, y]))

        # composition
        d["composition"] = RecursiveDict(
            (el, clean_value(f(x, y), convert_to_percent=True))
            for el, f in conc_funcs.items())

        # identifier
        identifier = get_composition_from_string("".join([
            "{}{}".format(el, int(round(Decimal(comp.split()[0]))))
            for el, comp in d["composition"].items()
        ]))

        # load csv file
        try:
            csv = zip_file.read(info.filename)
        except KeyError:
            print "ERROR: Did not find %s in zip file" % info.filename

        # read csv to pandas DataFrame and add to MPFile
        df = read_csv(csv)
        df = df[["Energy", "XAS", "XMCD"]]

        # min and max
        d.rec_update(
            RecursiveDict(
                (y, RecursiveDict([("min", df[y].min()), ("max",
                                                          df[y].max())]))
                for y in ["XAS", "XMCD"]))

        # add data to MPFile
        mpfile.add_hierarchical_data(nest_dict(d, ["data"]),
                                     identifier=identifier)
        mpfile.add_data_table(identifier, df, name=element)

예제 #7

0

파일 보기

def run(mpfile, **kwargs):

    input_file = mpfile.document['_hdata'].pop('input_file')
    zip_path = os.path.join(os.environ['HOME'], 'work', input_file)
    if not os.path.exists(zip_path):
        return 'Please upload', zip_path
    zip_file = ZipFile(zip_path, 'r')

    composition_table_dict = mpfile.document['_hdata']['composition_table']
    conc_funcs = get_concentration_functions(composition_table_dict)

    for info in zip_file.infolist():
        print info.filename
        d = RecursiveDict()

        # positions.x/y from filename, <scan-id>_<meas-element>_<X>_<Y>.csv
        element, x, y = os.path.splitext(info.filename)[0].rsplit('_', 4)
        d['position'] = RecursiveDict(
            (k, clean_value(v, 'mm')) for k, v in zip(['x', 'y'], [x, y]))

        # composition
        d['composition'] = RecursiveDict(
            (el, clean_value(f(x, y), convert_to_percent=True))
            for el, f in conc_funcs.items())

        # identifier
        identifier = get_composition_from_string(''.join([
            '{}{}'.format(el, int(round(Decimal(comp.split()[0]))))
            for el, comp in d['composition'].items()
        ]))

        # load csv file
        try:
            csv = zip_file.read(info.filename)
        except KeyError:
            print 'ERROR: Did not find %s in zip file' % info.filename

        # read csv to pandas DataFrame and add to MPFile
        df = read_csv(csv)
        df = df[['Energy', 'XAS', 'XMCD']]

        # min and max
        d.rec_update(
            RecursiveDict(
                (y, RecursiveDict([('min', df[y].min()), ('max',
                                                          df[y].max())]))
                for y in ['XAS', 'XMCD']))

        # add data to MPFile
        mpfile.add_hierarchical_data(nest_dict(d, ['data']),
                                     identifier=identifier)
        mpfile.add_data_table(identifier, df, name=element)

예제 #8

0

파일 보기

파일: mpfile.py 프로젝트: smithmackensie96/MPContribs

 def from_string(data):
     # use archieml-python parse to import data
     rdct = RecursiveDict(archieml.loads(data))
     rdct.rec_update()
     # post-process internal representation of file contents
     for key in rdct.keys():
         is_general, root_key = normalize_root_level(key)
         if is_general:
             # make part of shared (meta-)data, i.e. nest under `general` at
             # the beginning of the MPFile
             if mp_level01_titles[0] not in rdct:
                 rdct.insert_before(rdct.keys()[0],
                                    (mp_level01_titles[0], RecursiveDict()))
             rdct.rec_update(
                 nest_dict(rdct.pop(key), [mp_level01_titles[0], root_key]))
         else:
             # normalize identifier key (pop & insert)
             # using rec_update since we're looping over all entries
             # also: support data in bare tables (marked-up only by
             #       root-level identifier) by nesting under 'data'
             value = rdct.pop(key)
             keys = [root_key]
             if isinstance(value, list): keys.append('table')
             rdct.rec_update(nest_dict(value, keys))
             # Note: CSV section is marked with 'data ' prefix during iterate()
             for k, v in rdct[root_key].iterate():
                 if isinstance(k, six.string_types) and \
                    k.startswith(mp_level01_titles[1]):
                     # k = table name (incl. data prefix)
                     # v = csv string from ArchieML free-form arrays
                     table_name = k[len(mp_level01_titles[1] + '_'):]
                     pd_obj = read_csv(v)
                     rdct[root_key].pop(table_name)
                     rdct[root_key].rec_update(
                         nest_dict(pd_obj.to_dict(), [k]))
                     rdct[root_key].insert_default_plot_options(pd_obj, k)
             # convert CIF strings into pymatgen structures
             if mp_level01_titles[3] in rdct[root_key]:
                 from pymatgen.io.cif import CifParser
                 for name in rdct[root_key][mp_level01_titles[3]].keys():
                     cif = rdct[root_key][mp_level01_titles[3]].pop(name)
                     parser = CifParser.from_string(cif)
                     structure = parser.get_structures(primitive=False)[0]
                     rdct[root_key][mp_level01_titles[3]].rec_update(
                         nest_dict(structure.as_dict(), [name]))
     return MPFile.from_dict(rdct)

예제 #9

0

파일 보기

파일: pre_submission.py 프로젝트: materialsproject/MPContribs

def get_fit_pars(sample_number):
    import solar_perovskite
    from solar_perovskite.modelling.isographs import Experimental
    from solar_perovskite.init.import_data import Importdata
    max_dgts = 6
    d = RecursiveDict()
    exp = Experimental(sample_number)
    fitparam = exp.get_fit_parameters()
    # fitparam = compstr, delta_0, tolfac, mol_mass, fit_param_enth,
    #            fit_type_entr, fit_param_entr, delta_min, delta_max
    fit_par_ent = [fitparam[6][0], fitparam[6][1], fitparam[1]]
    d['fit_par_ent'] = RecursiveDict(
        (k, clean_value(v, max_dgts=max_dgts))
        for k, v in zip('abc', fit_par_ent)
    )
    d['fit_param_enth'] = RecursiveDict(
        (k, clean_value(v, max_dgts=max_dgts))
        for k, v in zip('abcd', fitparam[4])
    )
    d['fit_type_entr'] = clean_value(fitparam[5], max_dgts=max_dgts)
    d['delta_0'] = clean_value(fitparam[1], max_dgts=max_dgts)
    d['delta_min'] = clean_value(fitparam[7], max_dgts=max_dgts)
    d['delta_max'] = clean_value(fitparam[8], max_dgts=max_dgts)
    fit_param_fe = pd.np.loadtxt(os.path.abspath(os.path.join(
        os.path.dirname(solar_perovskite.__file__), "datafiles", "entropy_fitparam_SrFeOx"
    )))
    d['fit_param_fe'] = RecursiveDict(
        (k, clean_value(v, max_dgts=max_dgts))
        for k,v in zip('abcd', fit_param_fe)
    )
    imp = Importdata()
    act_mat = imp.find_active(sample_no=sample_number)
    d['act_mat'] = clean_value(act_mat[1], max_dgts=max_dgts)
    fpath = os.path.join(
        os.path.dirname(solar_perovskite.__file__), 'rawdata',
        'JV_P_{}_H_S_error_advanced.csv'.format(sample_number)
    )
    temps = read_csv(open(fpath, 'r').read(), usecols=['T'])
    d['t_avg'] = clean_value(pd.to_numeric(temps['T']).mean(), max_dgts=max_dgts)
    return d

예제 #10

0

파일 보기

파일: pre_submission.py 프로젝트: materialsproject/MPContribs

def run(mpfile, **kwargs):
    # TODO clone solar_perovskite if needed, abort if insufficient permissions
    try:
        import solar_perovskite
        from solar_perovskite.core import GetExpThermo
        from solar_perovskite.init.find_structures import FindStructures
        from solar_perovskite.init.import_data import Importdata
        from solar_perovskite.modelling.from_theo import EnthTheo
    except ImportError:
        print('could not import solar_perovskite, clone github repo')
        sys.exit(1)

    input_files = mpfile.hdata.general['input_files']
    input_dir = os.path.dirname(solar_perovskite.__file__)
    input_file = os.path.join(input_dir, input_files['exp'])
    exp_table = read_csv(open(input_file, 'r').read().replace(';', ','))
    print('exp data loaded.')
    with open(os.path.join(input_dir, input_files['theo']), 'r') as f:
        theo_data = json.loads(f.read()).pop('collection')
    print('theo data loaded.')
    with open(input_files['energy'], 'r') as f:
        data = json.load(f).pop('collection')
    print('energy data loaded.')
    l = [dict(sdoc, parameters=doc['_id']) for doc in data for sdoc in doc['energy_analysis']]
    frame = pd.DataFrame(l)
    parameters = frame['parameters']
    frame.drop(labels=['parameters'], axis=1, inplace=True)
    frame.insert(0, 'parameters', parameters)
    print('energy dataframe:', frame.shape)

    mpfile_singles = [m for m in mpfile.split()]
    for mpfile_single in mpfile_singles:
        identifier = mpfile_single.ids[0]
        #if identifier in run.existing_identifiers:
        #    print (not updating', identifier)
        #    continue
        if identifier != 'mp-1076585':
            continue
        hdata = mpfile_single.hdata[identifier]
        print(identifier)

        print('add hdata ...')
        d = RecursiveDict()
        d['data'] = RecursiveDict()
        compstr = hdata['pars']['theo_compstr']
        row = exp_table.loc[exp_table['theo_compstr'] == compstr]
        if not row.empty:
            sample_number = int(row.iloc[0]['sample_number'])
            d['pars'] = get_fit_pars(sample_number)
            d['data']['availability'] = 'Exp+Theo'
        else:
            d['pars'] = RecursiveDict()
            d['data']['availability'] = 'Theo'
        #print('dh_min, dh_max ...')
        #_, dh_min, dh_max, _ = redenth_act(compstr)
        #d['pars']['dh_min'] = clean_value(dh_min, max_dgts=4)
        #d['pars']['dh_max'] = clean_value(dh_max, max_dgts=4)
        #d['pars']['elastic'] = RecursiveDict()
        #print('debye temps ...')
        #d['pars']['elastic']['debye_temp'] = RecursiveDict()
        #try:
        #    t_d_perov = get_debye_temp(identifier)
        #    t_d_brownm = get_debye_temp(hdata['data']['reduced_phase']['closest-MP'])
        #    tensors_available = 'True'
        #except TypeError:
        #    t_d_perov = get_debye_temp("mp-510624")
        #    t_d_brownm = get_debye_temp("mp-561589")
        #    tensors_available = 'False'
        #d['pars']['elastic']['debye_temp']['perovskite'] = clean_value(t_d_perov, max_dgts=6)
        #d['pars']['elastic']['debye_temp']['brownmillerite'] = clean_value(t_d_brownm, max_dgts=6)
        #d['pars']['elastic']['tensors_available'] = tensors_available
        d['pars']['last_updated'] = str(datetime.now())
        mpfile_single.add_hierarchical_data(d, identifier=identifier)

        #for process in processes:
        #    if process != "AS":
        #        t_ox_l = t_ox_ws_cs
        #        t_red_l = t_red_ws_cs
        #        p_ox_l = p_ox_ws_cs
        #        p_red_l = p_red_ws_cs
        #        data_source = ["Theo"]
        #    else:
        #        t_ox_l = t_ox_airsep
        #        t_red_l = t_red_airsep
        #        p_ox_l = p_ox_airsep
        #        p_red_l = p_red_airsep
        #        data_source = ["Theo", "Exp"]

        #    for red_temp in t_red_l:
        #        for ox_temp in t_ox_l:
        #            for ox_pr in p_ox_l:
        #                for red_pr in p_red_l:
        #                    for data_sources in data_source:
        #                        db_id = process + "_" + str(float(ox_temp)) + "_" \
        #                                + str(float(red_temp)) + "_" + str(float(ox_pr)) \
        #                                + "_" + str(float(red_pr)) + "_" + data_sources + \
        #                                "_" + str(float(enth_steps))


        print('add energy analysis ...')
        group = frame.query('compstr.str.contains("{}")'.format(compstr[:-1]))
        group.drop(labels='compstr', axis=1, inplace=True)
        for prodstr, subgroup in group.groupby(['prodstr', 'prodstr_alt'], sort=False):
            subgroup.drop(labels=['prodstr', 'prodstr_alt'], axis=1, inplace=True)
            for unstable, subsubgroup in subgroup.groupby('unstable', sort=False):
                subsubgroup.drop(labels='unstable', axis=1, inplace=True)
                name = 'energy-analysis_{}_{}'.format('unstable' if unstable else 'stable', '-'.join(prodstr))
                print(name)
                mpfile_single.add_data_table(identifier, subsubgroup, name)

        print(mpfile_single)
        mpfile.concat(mpfile_single)
        break

        if not row.empty:
            print('add ΔH ...')
            exp_thermo = GetExpThermo(sample_number, plotting=False)
            enthalpy = exp_thermo.exp_dh()
            table = get_table(enthalpy, 'H')
            mpfile_single.add_data_table(identifier, table, name='enthalpy')

            print('add ΔS ...')
            entropy = exp_thermo.exp_ds()
            table = get_table(entropy, 'S')
            mpfile_single.add_data_table(identifier, table, name='entropy')

            print('add raw data ...')
            tga_results = os.path.join(os.path.dirname(solar_perovskite.__file__), 'tga_results')
            for path in glob(os.path.join(tga_results, 'ExpDat_JV_P_{}_*.csv'.format(sample_number))):
                print(path.split('_{}_'.format(sample_number))[-1].split('.')[0], '...')
                body = open(path, 'r').read()
                cols = ['Time [min]', 'Temperature [C]', 'dm [%]', 'pO2']
                table = read_csv(body, lineterminator=os.linesep, usecols=cols, skiprows=5)
                table = table[cols].iloc[::100, :]
                # scale/shift for better graphs
                T, dm, p = [pd.to_numeric(table[col]) for col in cols[1:]]
                T_min, T_max, dm_min, dm_max, p_max = T.min(), T.max(), dm.min(), dm.max(), p.max()
                rT, rdm = abs(T_max - T_min), abs(dm_max - dm_min)
                table[cols[2]] = (dm - dm_min) * rT/rdm
                table[cols[3]] = p * rT/p_max
                table.rename(columns={
                    'dm [%]': '(dm [%] + {:.4g}) * {:.4g}'.format(-dm_min, rT/rdm),
                    'pO2': 'pO₂ * {:.4g}'.format(rT/p_max)
                }, inplace=True)
                mpfile_single.add_data_table(identifier, table, name='raw')

예제 #11

0

파일 보기

파일: mpfile.py 프로젝트: rossbutler2000/MPContribs

    def from_string(data):
        # use archieml-python parse to import data
        rdct = RecursiveDict(loads(data))
        rdct.rec_update()

        # post-process internal representation of file contents
        for key in list(rdct.keys()):
            is_general, root_key = normalize_root_level(key)

            if is_general:
                # make part of shared (meta-)data, i.e. nest under `general` at
                # the beginning of the MPFile
                if mp_level01_titles[0] not in rdct:
                    rdct[mp_level01_titles[0]] = RecursiveDict()
                    rdct.move_to_end(mp_level01_titles[0], last=False)

            # normalize identifier key (pop & insert)
            # using rec_update since we're looping over all entries
            # also: support data in bare tables (marked-up only by
            #       root-level identifier) by nesting under 'data'
            value = rdct.pop(key)
            keys = [mp_level01_titles[0]] if is_general else []
            keys.append(root_key)
            if isinstance(value, list):
                keys.append("table")
            rdct.rec_update(nest_dict(value, keys))

            # reference to section to iterate or parse as CIF
            section = (rdct[mp_level01_titles[0]][root_key]
                       if is_general else rdct[root_key])

            # iterate to find CSV sections to parse
            # also parse propnet quantities
            if isinstance(section, dict):
                scope = []
                for k, v in section.iterate():
                    level, key = k
                    key = "".join([replacements.get(c, c) for c in key])
                    level_reduction = bool(level < len(scope))
                    if level_reduction:
                        del scope[level:]
                    if v is None:
                        scope.append(key)
                    elif isinstance(v, list) and isinstance(v[0], dict):
                        table = ""
                        for row_dct in v:
                            table = "\n".join([table, row_dct["value"]])
                        pd_obj = read_csv(table)
                        d = nest_dict(pd_obj.to_dict(), scope + [key])
                        section.rec_update(d, overwrite=True)
                        if not is_general and level == 0:
                            section.insert_default_plot_options(pd_obj, key)
                    elif (Quantity is not None
                          and isinstance(v, six.string_types) and " " in v):
                        quantity = Quantity.from_key_value(key, v)
                        d = nest_dict(quantity.as_dict(), scope +
                                      [key])  # TODO quantity.symbol.name
                        section.rec_update(d, overwrite=True)

            # convert CIF strings into pymatgen structures
            if mp_level01_titles[3] in section:
                from pymatgen.io.cif import CifParser

                for name in section[mp_level01_titles[3]].keys():
                    cif = section[mp_level01_titles[3]].pop(name)
                    parser = CifParser.from_string(cif)
                    structure = parser.get_structures(primitive=False)[0]
                    section[mp_level01_titles[3]].rec_update(
                        nest_dict(structure.as_dict(), [name]))

        return MPFile.from_dict(rdct)

예제 #12

0

파일 보기

def run(mpfile, **kwargs):
    # TODO clone solar_perovskite if needed, abort if insufficient permissions
    try:
        import solar_perovskite
        from solar_perovskite.core import GetExpThermo
        from solar_perovskite.init.find_structures import FindStructures
        from solar_perovskite.init.import_data import Importdata
        from solar_perovskite.modelling.from_theo import EnthTheo
    except ImportError:
        print("could not import solar_perovskite, clone github repo")
        sys.exit(1)

    input_files = mpfile.hdata.general["input_files"]
    input_dir = os.path.dirname(solar_perovskite.__file__)
    input_file = os.path.join(input_dir, input_files["exp"])
    exp_table = read_csv(open(input_file, "r").read().replace(";", ","))
    print("exp data loaded.")
    with open(os.path.join(input_dir, input_files["theo"]), "r") as f:
        theo_data = json.loads(f.read()).pop("collection")
    print("theo data loaded.")
    with open(input_files["energy"], "r") as f:
        data = json.load(f).pop("collection")
    print("energy data loaded.")
    l = [
        dict(sdoc, parameters=doc["_id"]) for doc in data
        for sdoc in doc["energy_analysis"]
    ]
    frame = pd.DataFrame(l)
    parameters = frame["parameters"]
    frame.drop(labels=["parameters"], axis=1, inplace=True)
    frame.insert(0, "parameters", parameters)
    print("energy dataframe:", frame.shape)

    mpfile_singles = [m for m in mpfile.split()]
    for mpfile_single in mpfile_singles:
        identifier = mpfile_single.ids[0]
        # if identifier in run.existing_identifiers:
        #    print (not updating', identifier)
        #    continue
        if identifier != "mp-1076585":
            continue
        hdata = mpfile_single.hdata[identifier]
        print(identifier)

        print("add hdata ...")
        d = RecursiveDict()
        d["data"] = RecursiveDict()
        compstr = hdata["pars"]["theo_compstr"]
        row = exp_table.loc[exp_table["theo_compstr"] == compstr]
        if not row.empty:
            sample_number = int(row.iloc[0]["sample_number"])
            d["pars"] = get_fit_pars(sample_number)
            d["data"]["availability"] = "Exp+Theo"
        else:
            d["pars"] = RecursiveDict()
            d["data"]["availability"] = "Theo"
        # print('dh_min, dh_max ...')
        # _, dh_min, dh_max, _ = redenth_act(compstr)
        # d['pars']['dh_min'] = clean_value(dh_min, max_dgts=4)
        # d['pars']['dh_max'] = clean_value(dh_max, max_dgts=4)
        # d['pars']['elastic'] = RecursiveDict()
        # print('debye temps ...')
        # d['pars']['elastic']['debye_temp'] = RecursiveDict()
        # try:
        #    t_d_perov = get_debye_temp(identifier)
        #    t_d_brownm = get_debye_temp(hdata['data']['reduced_phase']['closest-MP'])
        #    tensors_available = 'True'
        # except TypeError:
        #    t_d_perov = get_debye_temp("mp-510624")
        #    t_d_brownm = get_debye_temp("mp-561589")
        #    tensors_available = 'False'
        # d['pars']['elastic']['debye_temp']['perovskite'] = clean_value(t_d_perov, max_dgts=6)
        # d['pars']['elastic']['debye_temp']['brownmillerite'] = clean_value(t_d_brownm, max_dgts=6)
        # d['pars']['elastic']['tensors_available'] = tensors_available
        d["pars"]["last_updated"] = str(datetime.now())
        mpfile_single.add_hierarchical_data(d, identifier=identifier)

        # for process in processes:
        #    if process != "AS":
        #        t_ox_l = t_ox_ws_cs
        #        t_red_l = t_red_ws_cs
        #        p_ox_l = p_ox_ws_cs
        #        p_red_l = p_red_ws_cs
        #        data_source = ["Theo"]
        #    else:
        #        t_ox_l = t_ox_airsep
        #        t_red_l = t_red_airsep
        #        p_ox_l = p_ox_airsep
        #        p_red_l = p_red_airsep
        #        data_source = ["Theo", "Exp"]

        #    for red_temp in t_red_l:
        #        for ox_temp in t_ox_l:
        #            for ox_pr in p_ox_l:
        #                for red_pr in p_red_l:
        #                    for data_sources in data_source:
        #                        db_id = process + "_" + str(float(ox_temp)) + "_" \
        #                                + str(float(red_temp)) + "_" + str(float(ox_pr)) \
        #                                + "_" + str(float(red_pr)) + "_" + data_sources + \
        #                                "_" + str(float(enth_steps))

        print("add energy analysis ...")
        group = frame.query('compstr.str.contains("{}")'.format(compstr[:-1]))
        group.drop(labels="compstr", axis=1, inplace=True)
        for prodstr, subgroup in group.groupby(["prodstr", "prodstr_alt"],
                                               sort=False):
            subgroup.drop(labels=["prodstr", "prodstr_alt"],
                          axis=1,
                          inplace=True)
            for unstable, subsubgroup in subgroup.groupby("unstable",
                                                          sort=False):
                subsubgroup.drop(labels="unstable", axis=1, inplace=True)
                name = "energy-analysis_{}_{}".format(
                    "unstable" if unstable else "stable", "-".join(prodstr))
                print(name)
                mpfile_single.add_data_table(identifier, subsubgroup, name)

        print(mpfile_single)
        mpfile.concat(mpfile_single)
        break

        if not row.empty:
            print("add ΔH ...")
            exp_thermo = GetExpThermo(sample_number, plotting=False)
            enthalpy = exp_thermo.exp_dh()
            table = get_table(enthalpy, "H")
            mpfile_single.add_data_table(identifier, table, name="enthalpy")

            print("add ΔS ...")
            entropy = exp_thermo.exp_ds()
            table = get_table(entropy, "S")
            mpfile_single.add_data_table(identifier, table, name="entropy")

            print("add raw data ...")
            tga_results = os.path.join(
                os.path.dirname(solar_perovskite.__file__), "tga_results")
            for path in glob(
                    os.path.join(
                        tga_results,
                        "ExpDat_JV_P_{}_*.csv".format(sample_number))):
                print(
                    path.split("_{}_".format(sample_number))[-1].split(".")[0],
                    "...")
                body = open(path, "r").read()
                cols = ["Time [min]", "Temperature [C]", "dm [%]", "pO2"]
                table = read_csv(body,
                                 lineterminator=os.linesep,
                                 usecols=cols,
                                 skiprows=5)
                table = table[cols].iloc[::100, :]
                # scale/shift for better graphs
                T, dm, p = [pd.to_numeric(table[col]) for col in cols[1:]]
                T_min, T_max, dm_min, dm_max, p_max = (
                    T.min(),
                    T.max(),
                    dm.min(),
                    dm.max(),
                    p.max(),
                )
                rT, rdm = abs(T_max - T_min), abs(dm_max - dm_min)
                table[cols[2]] = (dm - dm_min) * rT / rdm
                table[cols[3]] = p * rT / p_max
                table.rename(
                    columns={
                        "dm [%]":
                        "(dm [%] + {:.4g}) * {:.4g}".format(-dm_min, rT / rdm),
                        "pO2":
                        "pO₂ * {:.4g}".format(rT / p_max),
                    },
                    inplace=True,
                )
                mpfile_single.add_data_table(identifier, table, name="raw")

예제 #13

0

파일 보기

파일: pre_submission.py 프로젝트: ATNDiaye/MPContribsUsers

def run(mpfile, **kwargs):
    # TODO clone solar_perovskite if needed, abort if insufficient permissions
    import solar_perovskite
    from solar_perovskite.core import GetExpThermo
    from solar_perovskite.init.find_structures import FindStructures
    from solar_perovskite.init.import_data import Importdata
    from solar_perovskite.modelling.from_theo import EnthTheo

    input_file = mpfile.hdata.general['input_file']
    input_file = os.path.join(os.path.dirname(solar_perovskite.__file__), input_file)
    table = read_csv(open(input_file, 'r').read().replace(';', ','))
    dct = super(Table, table).to_dict(orient='records', into=RecursiveDict)

    shomate = pd.read_csv(os.path.abspath(os.path.join(
        os.path.dirname(solar_perovskite.__file__), "datafiles", "shomate.csv"
    )), index_col=0)
    shomate_dct = RecursiveDict()
    for col in shomate.columns:
        key = col.split('.')[0]
        if key not in shomate_dct:
            shomate_dct[key] = RecursiveDict()
        d = shomate[col].to_dict(into=RecursiveDict)
        subkey = '{}-{}'.format(int(d.pop('low')), int(d.pop('high')))
        shomate_dct[key][subkey] = RecursiveDict(
            (k, clean_value(v, max_dgts=6)) for k, v in d.items()
        )
    mpfile.add_hierarchical_data(nest_dict(shomate_dct, ['shomate']))

    for row in dct:

        sample_number = int(row['sample_number'])
        identifier = row['closest phase MP (oxidized)'].replace('n.a.', '')
        if not identifier.startswith('mp-'):
            continue
        if not identifier:
            identifier = get_composition_from_string(row['composition oxidized phase'])
        print identifier

        print 'add hdata ...'
        d = RecursiveDict()
        d['tolerance_factor'] = row['tolerance_factor']
        d['solid_solution'] = row['type of solid solution']
        d['oxidized_phase'] = RecursiveDict()
        d['oxidized_phase']['composition'] = row['composition oxidized phase']
        d['oxidized_phase']['crystal-structure'] = row['crystal structure (fully oxidized)']
        d['reduced_phase'] = RecursiveDict()
        d['reduced_phase']['composition'] = row['composition reduced phase']
        d['reduced_phase']['closest-MP'] = row['closest phase MP (reduced)'].replace('n.a.', '')
        d = nest_dict(d, ['data'])
        d['pars'] = get_fit_pars(sample_number)
        d['pars']['theo_compstr'] = row['theo_compstr']
        try:
            fs = FindStructures(compstr=row['theo_compstr'])
            theo_redenth = fs.find_theo_redenth()
            imp = Importdata()
            splitcomp = imp.split_comp(row['theo_compstr'])
            conc_act = imp.find_active(mat_comp=splitcomp)[1]
            et = EnthTheo(comp=row['theo_compstr'])
            dh_max, dh_min = et.calc_dh_endm()
            red_enth_mean_endm = (conc_act * dh_min) + ((1 - conc_act) * dh_max)
            difference = theo_redenth - red_enth_mean_endm
            d['pars']['dh_min'] = clean_value(dh_min + difference, max_dgts=8)
            d['pars']['dh_max'] = clean_value(dh_max + difference, max_dgts=8)
        except Exception as ex:
            print('error in dh_min/max!')
            print(str(ex))
            pass
        mpfile.add_hierarchical_data(d, identifier=identifier)

        print 'add ΔH ...'
        exp_thermo = GetExpThermo(sample_number, plotting=False)
        enthalpy = exp_thermo.exp_dh()
        table = get_table(enthalpy, 'H')
        mpfile.add_data_table(identifier, table, name='enthalpy')

        print 'add ΔS ...'
        entropy = exp_thermo.exp_ds()
        table = get_table(entropy, 'S')
        mpfile.add_data_table(identifier, table, name='entropy')

        print 'add raw data ...'
        tga_results = os.path.join(os.path.dirname(solar_perovskite.__file__), 'tga_results')
        for path in glob(os.path.join(tga_results, 'ExpDat_JV_P_{}_*.csv'.format(sample_number))):
            print path.split('_{}_'.format(sample_number))[-1].split('.')[0], '...'
            body = open(path, 'r').read()
            cols = ['Time [min]', 'Temperature [C]', 'dm [%]', 'pO2']
            table = read_csv(body, lineterminator=os.linesep, usecols=cols, skiprows=5)
            table = table[cols].iloc[::100, :]
            # scale/shift for better graphs
            T, dm, p = [pd.to_numeric(table[col]) for col in cols[1:]]
            T_min, T_max, dm_min, dm_max, p_max = T.min(), T.max(), dm.min(), dm.max(), p.max()
            rT, rdm = abs(T_max - T_min), abs(dm_max - dm_min)
            table[cols[2]] = (dm - dm_min) * rT/rdm
            table[cols[3]] = p * rT/p_max
            table.rename(columns={
                'dm [%]': '(dm [%] + {:.4g}) * {:.4g}'.format(-dm_min, rT/rdm),
                'pO2': 'pO₂ * {:.4g}'.format(rT/p_max)
            }, inplace=True)
            mpfile.add_data_table(identifier, table, name='raw')