def run(mpfile, **kwargs): from pandas import Panel, np meta_data = mpfile.document['_hdata'].pop('input') file_path = os.path.join(os.environ['HOME'], 'work', meta_data['file_path']) if not os.path.exists(file_path): print 'Please upload', file_path return table_columns = meta_data['table_columns'].split(' -- ') identifier = mpfile.ids[0] with tarfile.open(file_path, "r:gz") as tar: for member in tar.getmembers(): name = os.path.splitext(member.name)[0] print 'load', name, '...' f = tar.extractfile(member) if 'pump' in name: #fstr = f.read() #fstr = ''.join([f.readline() for x in xrange(10)]) # only load a small area list1, list2 = range(1), range(6) tuples = [(x, y) for x in list1 for y in list2] delta = 150 for x, y in tuples: lines = [] for i in xrange((x + 1) * delta): line = f.readline() if i > x * delta: lines.append(line) sub_lines = [] for line in lines: sub_line = line.strip().split(',')[y * delta:(y + 1) * delta] sub_lines.append(','.join(sub_line)) fstr = '\n'.join(sub_lines) print 'read_csv ...' df = read_csv(fstr, header=None) arr = [[[cell] for cell in row] for row in df.values] sub_name = '{}_{}_{}'.format(name, x, y) df = Panel(arr, minor_axis=[sub_name]).transpose(2, 0, 1).to_frame() print df.head() print 'add', sub_name, '...' mpfile.add_data_table(identifier, df, sub_name) f.seek(0) else: fstr = f.read() df = read_csv(fstr, names=table_columns) print 'add', name, '...' mpfile.add_data_table(identifier, df, name) print 'Added data from {}'.format(file_path)
def run(mpfile, **kwargs): from pandas import Panel, np meta_data = mpfile.document['_hdata'].pop('input') file_path = os.path.join(os.environ['HOME'], 'work', meta_data['file_path']) if not os.path.exists(file_path): print 'Please upload', file_path return table_columns = meta_data['table_columns'].split(' -- ') identifier = mpfile.ids[0] with tarfile.open(file_path, "r:gz") as tar: for member in tar.getmembers(): name = os.path.splitext(member.name)[0] print 'load', name, '...' f = tar.extractfile(member) if 'pump' in name: #fstr = f.read() #fstr = ''.join([f.readline() for x in xrange(10)]) # only load a small area list1, list2 = range(1), range(6) tuples = [(x, y) for x in list1 for y in list2] delta = 150 for x, y in tuples: lines = [] for i in xrange((x+1)*delta): line = f.readline() if i > x*delta: lines.append(line) sub_lines = [] for line in lines: sub_line = line.strip().split(',')[y*delta:(y+1)*delta] sub_lines.append(','.join(sub_line)) fstr = '\n'.join(sub_lines) print 'read_csv ...' df = read_csv(fstr, header=None) arr = [[[cell] for cell in row] for row in df.values] sub_name = '{}_{}_{}'.format(name, x, y) df = Panel(arr, minor_axis=[sub_name]).transpose(2, 0, 1).to_frame() print df.head() print 'add', sub_name, '...' mpfile.add_data_table(identifier, df, sub_name) f.seek(0) else: fstr = f.read() df = read_csv(fstr, names=table_columns) print 'add', name, '...' mpfile.add_data_table(identifier, df, name) print 'Added data from {}'.format(file_path)
def run(mpfile): identifier = mpfile.ids[0] xcol, ycol = "V [V]", "J {}°C {} [mA/cm²]" full_df = None for fn in sorted(glob(os.path.join("Data", "Figure 4", "*_01_DIV.txt"))): with open(fn, "r") as f: name = os.path.splitext(os.path.basename(fn))[0] body = "\n".join(["\t".join([xcol, ycol]), f.read()]) df = ( read_csv(body, sep="\t") .apply(to_numeric, errors="coerce") .sort_values(by=[xcol]) ) if full_df is None: full_df = df[xcol].to_frame() offset = 0.0 if "fwd_dB_p3" in name: offset = -6.70273000e-11 elif "rev_dB_p3" in name: offset = 4.49694000e-10 elif "fwd_dG_p6" in name: offset = -8.90037000e-11 elif "rev_dG_p6" in name: offset = 8.42196000e-10 temp = name[4:].split("CZnO", 1)[0] direction = "fwd" if "fwd" in name else "rev" col = ycol.format(temp, direction) full_df[col] = (df[ycol] + offset).abs() * 1000.0 / 0.045 mpfile.add_data_table(identifier, full_df, "JV|dark")
def get_fit_pars(sample_number): import solar_perovskite from solar_perovskite.modelling.isographs import Experimental from solar_perovskite.init.import_data import Importdata max_dgts = 6 d = RecursiveDict() exp = Experimental(sample_number) fitparam = exp.get_fit_parameters() # fitparam = compstr, delta_0, tolfac, mol_mass, fit_param_enth, # fit_type_entr, fit_param_entr, delta_min, delta_max fit_par_ent = [fitparam[6][0], fitparam[6][1], fitparam[1]] d['fit_par_ent'] = RecursiveDict((k, clean_value(v, max_dgts=max_dgts)) for k, v in zip('abc', fit_par_ent)) d['fit_param_enth'] = RecursiveDict((k, clean_value(v, max_dgts=max_dgts)) for k, v in zip('abcd', fitparam[4])) d['fit_type_entr'] = clean_value(fitparam[5], max_dgts=max_dgts) d['delta_0'] = clean_value(fitparam[1], max_dgts=max_dgts) d['delta_min'] = clean_value(fitparam[7], max_dgts=max_dgts) d['delta_max'] = clean_value(fitparam[8], max_dgts=max_dgts) fit_param_fe = pd.np.loadtxt( os.path.abspath( os.path.join(os.path.dirname(solar_perovskite.__file__), "datafiles", "entropy_fitparam_SrFeOx"))) d['fit_param_fe'] = RecursiveDict((k, clean_value(v, max_dgts=max_dgts)) for k, v in zip('abcd', fit_param_fe)) imp = Importdata() act_mat = imp.find_active(sample_no=sample_number) d['act_mat'] = clean_value(act_mat[1], max_dgts=max_dgts) fpath = os.path.join( os.path.dirname(solar_perovskite.__file__), 'rawdata', 'JV_P_{}_H_S_error_advanced.csv'.format(sample_number)) temps = read_csv(open(fpath, 'r').read(), usecols=['T']) d['t_avg'] = clean_value(pd.to_numeric(temps['T']).mean(), max_dgts=max_dgts) return d
def run(mpfile, **kwargs): input_file = mpfile.document['_hdata'].pop('input_file') zip_path = os.path.join(os.environ['HOME'], 'work', input_file) if not os.path.exists(zip_path): return 'Please upload', zip_path zip_file = ZipFile(zip_path, 'r') composition_table_dict = mpfile.document['_hdata']['composition_table'] conc_funcs = get_concentration_functions(composition_table_dict) for info in zip_file.infolist(): print info.filename d = RecursiveDict() # positions.x/y from filename, <scan-id>_<meas-element>_<X>_<Y>.csv element, x, y = os.path.splitext(info.filename)[0].rsplit('_', 4) d['position'] = RecursiveDict( (k, clean_value(v, 'mm')) for k, v in zip(['x', 'y'], [x, y]) ) # composition d['composition'] = RecursiveDict( (el, clean_value(f(x, y), convert_to_percent=True)) for el, f in conc_funcs.items() ) # identifier identifier = get_composition_from_string(''.join([ '{}{}'.format(el, int(round(Decimal(comp.split()[0])))) for el, comp in d['composition'].items() ])) # load csv file try: csv = zip_file.read(info.filename) except KeyError: print 'ERROR: Did not find %s in zip file' % info.filename # read csv to pandas DataFrame and add to MPFile df = read_csv(csv) df = df[['Energy', 'XAS', 'XMCD']] # min and max d.rec_update(RecursiveDict( (y, RecursiveDict([ ('min', df[y].min()), ('max', df[y].max()) ])) for y in ['XAS', 'XMCD'] )) # add data to MPFile mpfile.add_hierarchical_data(nest_dict(d, ['data']), identifier=identifier) mpfile.add_data_table(identifier, df, name=element)
def run(mpfile, **kwargs): input_file = mpfile.document["_hdata"].pop("input_file") zip_path = os.path.join(os.environ["HOME"], "work", input_file) if not os.path.exists(zip_path): return "Please upload", zip_path zip_file = ZipFile(zip_path, "r") composition_table_dict = mpfile.document["_hdata"]["composition_table"] conc_funcs = get_concentration_functions(composition_table_dict) for info in zip_file.infolist(): print info.filename d = RecursiveDict() # positions.x/y from filename, <scan-id>_<meas-element>_<X>_<Y>.csv element, x, y = os.path.splitext(info.filename)[0].rsplit("_", 4) d["position"] = RecursiveDict( (k, clean_value(v, "mm")) for k, v in zip(["x", "y"], [x, y])) # composition d["composition"] = RecursiveDict( (el, clean_value(f(x, y), convert_to_percent=True)) for el, f in conc_funcs.items()) # identifier identifier = get_composition_from_string("".join([ "{}{}".format(el, int(round(Decimal(comp.split()[0])))) for el, comp in d["composition"].items() ])) # load csv file try: csv = zip_file.read(info.filename) except KeyError: print "ERROR: Did not find %s in zip file" % info.filename # read csv to pandas DataFrame and add to MPFile df = read_csv(csv) df = df[["Energy", "XAS", "XMCD"]] # min and max d.rec_update( RecursiveDict( (y, RecursiveDict([("min", df[y].min()), ("max", df[y].max())])) for y in ["XAS", "XMCD"])) # add data to MPFile mpfile.add_hierarchical_data(nest_dict(d, ["data"]), identifier=identifier) mpfile.add_data_table(identifier, df, name=element)
def run(mpfile, **kwargs): input_file = mpfile.document['_hdata'].pop('input_file') zip_path = os.path.join(os.environ['HOME'], 'work', input_file) if not os.path.exists(zip_path): return 'Please upload', zip_path zip_file = ZipFile(zip_path, 'r') composition_table_dict = mpfile.document['_hdata']['composition_table'] conc_funcs = get_concentration_functions(composition_table_dict) for info in zip_file.infolist(): print info.filename d = RecursiveDict() # positions.x/y from filename, <scan-id>_<meas-element>_<X>_<Y>.csv element, x, y = os.path.splitext(info.filename)[0].rsplit('_', 4) d['position'] = RecursiveDict( (k, clean_value(v, 'mm')) for k, v in zip(['x', 'y'], [x, y])) # composition d['composition'] = RecursiveDict( (el, clean_value(f(x, y), convert_to_percent=True)) for el, f in conc_funcs.items()) # identifier identifier = get_composition_from_string(''.join([ '{}{}'.format(el, int(round(Decimal(comp.split()[0])))) for el, comp in d['composition'].items() ])) # load csv file try: csv = zip_file.read(info.filename) except KeyError: print 'ERROR: Did not find %s in zip file' % info.filename # read csv to pandas DataFrame and add to MPFile df = read_csv(csv) df = df[['Energy', 'XAS', 'XMCD']] # min and max d.rec_update( RecursiveDict( (y, RecursiveDict([('min', df[y].min()), ('max', df[y].max())])) for y in ['XAS', 'XMCD'])) # add data to MPFile mpfile.add_hierarchical_data(nest_dict(d, ['data']), identifier=identifier) mpfile.add_data_table(identifier, df, name=element)
def from_string(data): # use archieml-python parse to import data rdct = RecursiveDict(archieml.loads(data)) rdct.rec_update() # post-process internal representation of file contents for key in rdct.keys(): is_general, root_key = normalize_root_level(key) if is_general: # make part of shared (meta-)data, i.e. nest under `general` at # the beginning of the MPFile if mp_level01_titles[0] not in rdct: rdct.insert_before(rdct.keys()[0], (mp_level01_titles[0], RecursiveDict())) rdct.rec_update( nest_dict(rdct.pop(key), [mp_level01_titles[0], root_key])) else: # normalize identifier key (pop & insert) # using rec_update since we're looping over all entries # also: support data in bare tables (marked-up only by # root-level identifier) by nesting under 'data' value = rdct.pop(key) keys = [root_key] if isinstance(value, list): keys.append('table') rdct.rec_update(nest_dict(value, keys)) # Note: CSV section is marked with 'data ' prefix during iterate() for k, v in rdct[root_key].iterate(): if isinstance(k, six.string_types) and \ k.startswith(mp_level01_titles[1]): # k = table name (incl. data prefix) # v = csv string from ArchieML free-form arrays table_name = k[len(mp_level01_titles[1] + '_'):] pd_obj = read_csv(v) rdct[root_key].pop(table_name) rdct[root_key].rec_update( nest_dict(pd_obj.to_dict(), [k])) rdct[root_key].insert_default_plot_options(pd_obj, k) # convert CIF strings into pymatgen structures if mp_level01_titles[3] in rdct[root_key]: from pymatgen.io.cif import CifParser for name in rdct[root_key][mp_level01_titles[3]].keys(): cif = rdct[root_key][mp_level01_titles[3]].pop(name) parser = CifParser.from_string(cif) structure = parser.get_structures(primitive=False)[0] rdct[root_key][mp_level01_titles[3]].rec_update( nest_dict(structure.as_dict(), [name])) return MPFile.from_dict(rdct)
def get_fit_pars(sample_number): import solar_perovskite from solar_perovskite.modelling.isographs import Experimental from solar_perovskite.init.import_data import Importdata max_dgts = 6 d = RecursiveDict() exp = Experimental(sample_number) fitparam = exp.get_fit_parameters() # fitparam = compstr, delta_0, tolfac, mol_mass, fit_param_enth, # fit_type_entr, fit_param_entr, delta_min, delta_max fit_par_ent = [fitparam[6][0], fitparam[6][1], fitparam[1]] d['fit_par_ent'] = RecursiveDict( (k, clean_value(v, max_dgts=max_dgts)) for k, v in zip('abc', fit_par_ent) ) d['fit_param_enth'] = RecursiveDict( (k, clean_value(v, max_dgts=max_dgts)) for k, v in zip('abcd', fitparam[4]) ) d['fit_type_entr'] = clean_value(fitparam[5], max_dgts=max_dgts) d['delta_0'] = clean_value(fitparam[1], max_dgts=max_dgts) d['delta_min'] = clean_value(fitparam[7], max_dgts=max_dgts) d['delta_max'] = clean_value(fitparam[8], max_dgts=max_dgts) fit_param_fe = pd.np.loadtxt(os.path.abspath(os.path.join( os.path.dirname(solar_perovskite.__file__), "datafiles", "entropy_fitparam_SrFeOx" ))) d['fit_param_fe'] = RecursiveDict( (k, clean_value(v, max_dgts=max_dgts)) for k,v in zip('abcd', fit_param_fe) ) imp = Importdata() act_mat = imp.find_active(sample_no=sample_number) d['act_mat'] = clean_value(act_mat[1], max_dgts=max_dgts) fpath = os.path.join( os.path.dirname(solar_perovskite.__file__), 'rawdata', 'JV_P_{}_H_S_error_advanced.csv'.format(sample_number) ) temps = read_csv(open(fpath, 'r').read(), usecols=['T']) d['t_avg'] = clean_value(pd.to_numeric(temps['T']).mean(), max_dgts=max_dgts) return d
def run(mpfile, **kwargs): # TODO clone solar_perovskite if needed, abort if insufficient permissions try: import solar_perovskite from solar_perovskite.core import GetExpThermo from solar_perovskite.init.find_structures import FindStructures from solar_perovskite.init.import_data import Importdata from solar_perovskite.modelling.from_theo import EnthTheo except ImportError: print('could not import solar_perovskite, clone github repo') sys.exit(1) input_files = mpfile.hdata.general['input_files'] input_dir = os.path.dirname(solar_perovskite.__file__) input_file = os.path.join(input_dir, input_files['exp']) exp_table = read_csv(open(input_file, 'r').read().replace(';', ',')) print('exp data loaded.') with open(os.path.join(input_dir, input_files['theo']), 'r') as f: theo_data = json.loads(f.read()).pop('collection') print('theo data loaded.') with open(input_files['energy'], 'r') as f: data = json.load(f).pop('collection') print('energy data loaded.') l = [dict(sdoc, parameters=doc['_id']) for doc in data for sdoc in doc['energy_analysis']] frame = pd.DataFrame(l) parameters = frame['parameters'] frame.drop(labels=['parameters'], axis=1, inplace=True) frame.insert(0, 'parameters', parameters) print('energy dataframe:', frame.shape) mpfile_singles = [m for m in mpfile.split()] for mpfile_single in mpfile_singles: identifier = mpfile_single.ids[0] #if identifier in run.existing_identifiers: # print (not updating', identifier) # continue if identifier != 'mp-1076585': continue hdata = mpfile_single.hdata[identifier] print(identifier) print('add hdata ...') d = RecursiveDict() d['data'] = RecursiveDict() compstr = hdata['pars']['theo_compstr'] row = exp_table.loc[exp_table['theo_compstr'] == compstr] if not row.empty: sample_number = int(row.iloc[0]['sample_number']) d['pars'] = get_fit_pars(sample_number) d['data']['availability'] = 'Exp+Theo' else: d['pars'] = RecursiveDict() d['data']['availability'] = 'Theo' #print('dh_min, dh_max ...') #_, dh_min, dh_max, _ = redenth_act(compstr) #d['pars']['dh_min'] = clean_value(dh_min, max_dgts=4) #d['pars']['dh_max'] = clean_value(dh_max, max_dgts=4) #d['pars']['elastic'] = RecursiveDict() #print('debye temps ...') #d['pars']['elastic']['debye_temp'] = RecursiveDict() #try: # t_d_perov = get_debye_temp(identifier) # t_d_brownm = get_debye_temp(hdata['data']['reduced_phase']['closest-MP']) # tensors_available = 'True' #except TypeError: # t_d_perov = get_debye_temp("mp-510624") # t_d_brownm = get_debye_temp("mp-561589") # tensors_available = 'False' #d['pars']['elastic']['debye_temp']['perovskite'] = clean_value(t_d_perov, max_dgts=6) #d['pars']['elastic']['debye_temp']['brownmillerite'] = clean_value(t_d_brownm, max_dgts=6) #d['pars']['elastic']['tensors_available'] = tensors_available d['pars']['last_updated'] = str(datetime.now()) mpfile_single.add_hierarchical_data(d, identifier=identifier) #for process in processes: # if process != "AS": # t_ox_l = t_ox_ws_cs # t_red_l = t_red_ws_cs # p_ox_l = p_ox_ws_cs # p_red_l = p_red_ws_cs # data_source = ["Theo"] # else: # t_ox_l = t_ox_airsep # t_red_l = t_red_airsep # p_ox_l = p_ox_airsep # p_red_l = p_red_airsep # data_source = ["Theo", "Exp"] # for red_temp in t_red_l: # for ox_temp in t_ox_l: # for ox_pr in p_ox_l: # for red_pr in p_red_l: # for data_sources in data_source: # db_id = process + "_" + str(float(ox_temp)) + "_" \ # + str(float(red_temp)) + "_" + str(float(ox_pr)) \ # + "_" + str(float(red_pr)) + "_" + data_sources + \ # "_" + str(float(enth_steps)) print('add energy analysis ...') group = frame.query('compstr.str.contains("{}")'.format(compstr[:-1])) group.drop(labels='compstr', axis=1, inplace=True) for prodstr, subgroup in group.groupby(['prodstr', 'prodstr_alt'], sort=False): subgroup.drop(labels=['prodstr', 'prodstr_alt'], axis=1, inplace=True) for unstable, subsubgroup in subgroup.groupby('unstable', sort=False): subsubgroup.drop(labels='unstable', axis=1, inplace=True) name = 'energy-analysis_{}_{}'.format('unstable' if unstable else 'stable', '-'.join(prodstr)) print(name) mpfile_single.add_data_table(identifier, subsubgroup, name) print(mpfile_single) mpfile.concat(mpfile_single) break if not row.empty: print('add ΔH ...') exp_thermo = GetExpThermo(sample_number, plotting=False) enthalpy = exp_thermo.exp_dh() table = get_table(enthalpy, 'H') mpfile_single.add_data_table(identifier, table, name='enthalpy') print('add ΔS ...') entropy = exp_thermo.exp_ds() table = get_table(entropy, 'S') mpfile_single.add_data_table(identifier, table, name='entropy') print('add raw data ...') tga_results = os.path.join(os.path.dirname(solar_perovskite.__file__), 'tga_results') for path in glob(os.path.join(tga_results, 'ExpDat_JV_P_{}_*.csv'.format(sample_number))): print(path.split('_{}_'.format(sample_number))[-1].split('.')[0], '...') body = open(path, 'r').read() cols = ['Time [min]', 'Temperature [C]', 'dm [%]', 'pO2'] table = read_csv(body, lineterminator=os.linesep, usecols=cols, skiprows=5) table = table[cols].iloc[::100, :] # scale/shift for better graphs T, dm, p = [pd.to_numeric(table[col]) for col in cols[1:]] T_min, T_max, dm_min, dm_max, p_max = T.min(), T.max(), dm.min(), dm.max(), p.max() rT, rdm = abs(T_max - T_min), abs(dm_max - dm_min) table[cols[2]] = (dm - dm_min) * rT/rdm table[cols[3]] = p * rT/p_max table.rename(columns={ 'dm [%]': '(dm [%] + {:.4g}) * {:.4g}'.format(-dm_min, rT/rdm), 'pO2': 'pO₂ * {:.4g}'.format(rT/p_max) }, inplace=True) mpfile_single.add_data_table(identifier, table, name='raw')
def from_string(data): # use archieml-python parse to import data rdct = RecursiveDict(loads(data)) rdct.rec_update() # post-process internal representation of file contents for key in list(rdct.keys()): is_general, root_key = normalize_root_level(key) if is_general: # make part of shared (meta-)data, i.e. nest under `general` at # the beginning of the MPFile if mp_level01_titles[0] not in rdct: rdct[mp_level01_titles[0]] = RecursiveDict() rdct.move_to_end(mp_level01_titles[0], last=False) # normalize identifier key (pop & insert) # using rec_update since we're looping over all entries # also: support data in bare tables (marked-up only by # root-level identifier) by nesting under 'data' value = rdct.pop(key) keys = [mp_level01_titles[0]] if is_general else [] keys.append(root_key) if isinstance(value, list): keys.append("table") rdct.rec_update(nest_dict(value, keys)) # reference to section to iterate or parse as CIF section = (rdct[mp_level01_titles[0]][root_key] if is_general else rdct[root_key]) # iterate to find CSV sections to parse # also parse propnet quantities if isinstance(section, dict): scope = [] for k, v in section.iterate(): level, key = k key = "".join([replacements.get(c, c) for c in key]) level_reduction = bool(level < len(scope)) if level_reduction: del scope[level:] if v is None: scope.append(key) elif isinstance(v, list) and isinstance(v[0], dict): table = "" for row_dct in v: table = "\n".join([table, row_dct["value"]]) pd_obj = read_csv(table) d = nest_dict(pd_obj.to_dict(), scope + [key]) section.rec_update(d, overwrite=True) if not is_general and level == 0: section.insert_default_plot_options(pd_obj, key) elif (Quantity is not None and isinstance(v, six.string_types) and " " in v): quantity = Quantity.from_key_value(key, v) d = nest_dict(quantity.as_dict(), scope + [key]) # TODO quantity.symbol.name section.rec_update(d, overwrite=True) # convert CIF strings into pymatgen structures if mp_level01_titles[3] in section: from pymatgen.io.cif import CifParser for name in section[mp_level01_titles[3]].keys(): cif = section[mp_level01_titles[3]].pop(name) parser = CifParser.from_string(cif) structure = parser.get_structures(primitive=False)[0] section[mp_level01_titles[3]].rec_update( nest_dict(structure.as_dict(), [name])) return MPFile.from_dict(rdct)
def run(mpfile, **kwargs): # TODO clone solar_perovskite if needed, abort if insufficient permissions try: import solar_perovskite from solar_perovskite.core import GetExpThermo from solar_perovskite.init.find_structures import FindStructures from solar_perovskite.init.import_data import Importdata from solar_perovskite.modelling.from_theo import EnthTheo except ImportError: print("could not import solar_perovskite, clone github repo") sys.exit(1) input_files = mpfile.hdata.general["input_files"] input_dir = os.path.dirname(solar_perovskite.__file__) input_file = os.path.join(input_dir, input_files["exp"]) exp_table = read_csv(open(input_file, "r").read().replace(";", ",")) print("exp data loaded.") with open(os.path.join(input_dir, input_files["theo"]), "r") as f: theo_data = json.loads(f.read()).pop("collection") print("theo data loaded.") with open(input_files["energy"], "r") as f: data = json.load(f).pop("collection") print("energy data loaded.") l = [ dict(sdoc, parameters=doc["_id"]) for doc in data for sdoc in doc["energy_analysis"] ] frame = pd.DataFrame(l) parameters = frame["parameters"] frame.drop(labels=["parameters"], axis=1, inplace=True) frame.insert(0, "parameters", parameters) print("energy dataframe:", frame.shape) mpfile_singles = [m for m in mpfile.split()] for mpfile_single in mpfile_singles: identifier = mpfile_single.ids[0] # if identifier in run.existing_identifiers: # print (not updating', identifier) # continue if identifier != "mp-1076585": continue hdata = mpfile_single.hdata[identifier] print(identifier) print("add hdata ...") d = RecursiveDict() d["data"] = RecursiveDict() compstr = hdata["pars"]["theo_compstr"] row = exp_table.loc[exp_table["theo_compstr"] == compstr] if not row.empty: sample_number = int(row.iloc[0]["sample_number"]) d["pars"] = get_fit_pars(sample_number) d["data"]["availability"] = "Exp+Theo" else: d["pars"] = RecursiveDict() d["data"]["availability"] = "Theo" # print('dh_min, dh_max ...') # _, dh_min, dh_max, _ = redenth_act(compstr) # d['pars']['dh_min'] = clean_value(dh_min, max_dgts=4) # d['pars']['dh_max'] = clean_value(dh_max, max_dgts=4) # d['pars']['elastic'] = RecursiveDict() # print('debye temps ...') # d['pars']['elastic']['debye_temp'] = RecursiveDict() # try: # t_d_perov = get_debye_temp(identifier) # t_d_brownm = get_debye_temp(hdata['data']['reduced_phase']['closest-MP']) # tensors_available = 'True' # except TypeError: # t_d_perov = get_debye_temp("mp-510624") # t_d_brownm = get_debye_temp("mp-561589") # tensors_available = 'False' # d['pars']['elastic']['debye_temp']['perovskite'] = clean_value(t_d_perov, max_dgts=6) # d['pars']['elastic']['debye_temp']['brownmillerite'] = clean_value(t_d_brownm, max_dgts=6) # d['pars']['elastic']['tensors_available'] = tensors_available d["pars"]["last_updated"] = str(datetime.now()) mpfile_single.add_hierarchical_data(d, identifier=identifier) # for process in processes: # if process != "AS": # t_ox_l = t_ox_ws_cs # t_red_l = t_red_ws_cs # p_ox_l = p_ox_ws_cs # p_red_l = p_red_ws_cs # data_source = ["Theo"] # else: # t_ox_l = t_ox_airsep # t_red_l = t_red_airsep # p_ox_l = p_ox_airsep # p_red_l = p_red_airsep # data_source = ["Theo", "Exp"] # for red_temp in t_red_l: # for ox_temp in t_ox_l: # for ox_pr in p_ox_l: # for red_pr in p_red_l: # for data_sources in data_source: # db_id = process + "_" + str(float(ox_temp)) + "_" \ # + str(float(red_temp)) + "_" + str(float(ox_pr)) \ # + "_" + str(float(red_pr)) + "_" + data_sources + \ # "_" + str(float(enth_steps)) print("add energy analysis ...") group = frame.query('compstr.str.contains("{}")'.format(compstr[:-1])) group.drop(labels="compstr", axis=1, inplace=True) for prodstr, subgroup in group.groupby(["prodstr", "prodstr_alt"], sort=False): subgroup.drop(labels=["prodstr", "prodstr_alt"], axis=1, inplace=True) for unstable, subsubgroup in subgroup.groupby("unstable", sort=False): subsubgroup.drop(labels="unstable", axis=1, inplace=True) name = "energy-analysis_{}_{}".format( "unstable" if unstable else "stable", "-".join(prodstr)) print(name) mpfile_single.add_data_table(identifier, subsubgroup, name) print(mpfile_single) mpfile.concat(mpfile_single) break if not row.empty: print("add ΔH ...") exp_thermo = GetExpThermo(sample_number, plotting=False) enthalpy = exp_thermo.exp_dh() table = get_table(enthalpy, "H") mpfile_single.add_data_table(identifier, table, name="enthalpy") print("add ΔS ...") entropy = exp_thermo.exp_ds() table = get_table(entropy, "S") mpfile_single.add_data_table(identifier, table, name="entropy") print("add raw data ...") tga_results = os.path.join( os.path.dirname(solar_perovskite.__file__), "tga_results") for path in glob( os.path.join( tga_results, "ExpDat_JV_P_{}_*.csv".format(sample_number))): print( path.split("_{}_".format(sample_number))[-1].split(".")[0], "...") body = open(path, "r").read() cols = ["Time [min]", "Temperature [C]", "dm [%]", "pO2"] table = read_csv(body, lineterminator=os.linesep, usecols=cols, skiprows=5) table = table[cols].iloc[::100, :] # scale/shift for better graphs T, dm, p = [pd.to_numeric(table[col]) for col in cols[1:]] T_min, T_max, dm_min, dm_max, p_max = ( T.min(), T.max(), dm.min(), dm.max(), p.max(), ) rT, rdm = abs(T_max - T_min), abs(dm_max - dm_min) table[cols[2]] = (dm - dm_min) * rT / rdm table[cols[3]] = p * rT / p_max table.rename( columns={ "dm [%]": "(dm [%] + {:.4g}) * {:.4g}".format(-dm_min, rT / rdm), "pO2": "pO₂ * {:.4g}".format(rT / p_max), }, inplace=True, ) mpfile_single.add_data_table(identifier, table, name="raw")
def run(mpfile, **kwargs): # TODO clone solar_perovskite if needed, abort if insufficient permissions import solar_perovskite from solar_perovskite.core import GetExpThermo from solar_perovskite.init.find_structures import FindStructures from solar_perovskite.init.import_data import Importdata from solar_perovskite.modelling.from_theo import EnthTheo input_file = mpfile.hdata.general['input_file'] input_file = os.path.join(os.path.dirname(solar_perovskite.__file__), input_file) table = read_csv(open(input_file, 'r').read().replace(';', ',')) dct = super(Table, table).to_dict(orient='records', into=RecursiveDict) shomate = pd.read_csv(os.path.abspath(os.path.join( os.path.dirname(solar_perovskite.__file__), "datafiles", "shomate.csv" )), index_col=0) shomate_dct = RecursiveDict() for col in shomate.columns: key = col.split('.')[0] if key not in shomate_dct: shomate_dct[key] = RecursiveDict() d = shomate[col].to_dict(into=RecursiveDict) subkey = '{}-{}'.format(int(d.pop('low')), int(d.pop('high'))) shomate_dct[key][subkey] = RecursiveDict( (k, clean_value(v, max_dgts=6)) for k, v in d.items() ) mpfile.add_hierarchical_data(nest_dict(shomate_dct, ['shomate'])) for row in dct: sample_number = int(row['sample_number']) identifier = row['closest phase MP (oxidized)'].replace('n.a.', '') if not identifier.startswith('mp-'): continue if not identifier: identifier = get_composition_from_string(row['composition oxidized phase']) print identifier print 'add hdata ...' d = RecursiveDict() d['tolerance_factor'] = row['tolerance_factor'] d['solid_solution'] = row['type of solid solution'] d['oxidized_phase'] = RecursiveDict() d['oxidized_phase']['composition'] = row['composition oxidized phase'] d['oxidized_phase']['crystal-structure'] = row['crystal structure (fully oxidized)'] d['reduced_phase'] = RecursiveDict() d['reduced_phase']['composition'] = row['composition reduced phase'] d['reduced_phase']['closest-MP'] = row['closest phase MP (reduced)'].replace('n.a.', '') d = nest_dict(d, ['data']) d['pars'] = get_fit_pars(sample_number) d['pars']['theo_compstr'] = row['theo_compstr'] try: fs = FindStructures(compstr=row['theo_compstr']) theo_redenth = fs.find_theo_redenth() imp = Importdata() splitcomp = imp.split_comp(row['theo_compstr']) conc_act = imp.find_active(mat_comp=splitcomp)[1] et = EnthTheo(comp=row['theo_compstr']) dh_max, dh_min = et.calc_dh_endm() red_enth_mean_endm = (conc_act * dh_min) + ((1 - conc_act) * dh_max) difference = theo_redenth - red_enth_mean_endm d['pars']['dh_min'] = clean_value(dh_min + difference, max_dgts=8) d['pars']['dh_max'] = clean_value(dh_max + difference, max_dgts=8) except Exception as ex: print('error in dh_min/max!') print(str(ex)) pass mpfile.add_hierarchical_data(d, identifier=identifier) print 'add ΔH ...' exp_thermo = GetExpThermo(sample_number, plotting=False) enthalpy = exp_thermo.exp_dh() table = get_table(enthalpy, 'H') mpfile.add_data_table(identifier, table, name='enthalpy') print 'add ΔS ...' entropy = exp_thermo.exp_ds() table = get_table(entropy, 'S') mpfile.add_data_table(identifier, table, name='entropy') print 'add raw data ...' tga_results = os.path.join(os.path.dirname(solar_perovskite.__file__), 'tga_results') for path in glob(os.path.join(tga_results, 'ExpDat_JV_P_{}_*.csv'.format(sample_number))): print path.split('_{}_'.format(sample_number))[-1].split('.')[0], '...' body = open(path, 'r').read() cols = ['Time [min]', 'Temperature [C]', 'dm [%]', 'pO2'] table = read_csv(body, lineterminator=os.linesep, usecols=cols, skiprows=5) table = table[cols].iloc[::100, :] # scale/shift for better graphs T, dm, p = [pd.to_numeric(table[col]) for col in cols[1:]] T_min, T_max, dm_min, dm_max, p_max = T.min(), T.max(), dm.min(), dm.max(), p.max() rT, rdm = abs(T_max - T_min), abs(dm_max - dm_min) table[cols[2]] = (dm - dm_min) * rT/rdm table[cols[3]] = p * rT/p_max table.rename(columns={ 'dm [%]': '(dm [%] + {:.4g}) * {:.4g}'.format(-dm_min, rT/rdm), 'pO2': 'pO₂ * {:.4g}'.format(rT/p_max) }, inplace=True) mpfile.add_data_table(identifier, table, name='raw')