def get_string(self, df_head_only=False): from pymatgen import Structure lines, scope = [], [] for key,value in self.document.iterate(): if isinstance(value, Table): lines[-1] = lines[-1].replace('{', '[+').replace('}', ']') header = any([isinstance(col, str) for col in value]) if isinstance(value.index, MultiIndex): value.reset_index(inplace=True) if df_head_only: value = value.head() csv_string = value.to_csv( index=False, header=header, float_format='%g', encoding='utf-8' )[:-1] lines += csv_string.split('\n') if df_head_only: lines.append('...') elif isinstance(value, Structure): from pymatgen.io.cif import CifWriter cif = CifWriter(value, symprec=symprec).__str__() lines.append(make_pair( ''.join([replacements.get(c, c) for c in key]), cif+':end' )) elif Quantity is not None and isinstance(value, Quantity): lines.append(make_pair( value.display_symbols[0], value.pretty_string() )) else: level, key = key # truncate scope level_reduction = bool(level < len(scope)) if level_reduction: del scope[level:] # append scope if value is None: scope.append(''.join([ replacements.get(c, c) for c in key ])) # correct scope to omit internal 'general' section scope_corr = scope if scope[0] == mp_level01_titles[0]: scope_corr = scope[1:] # insert scope line if (value is None and scope_corr) or \ (value is not None and level_reduction): lines.append('\n{' + '.'.join(scope_corr) + '}') # insert key-value line if value is not None: val = str(value) value_lines = [val] if val.startswith('http') \ else textwrap.wrap(val) if len(value_lines) > 1: value_lines = [''] + value_lines + [':end'] lines.append(make_pair( ''.join([replacements.get(c, c) for c in key]), '\n'.join(value_lines) )) return '\n'.join(lines) + '\n'
def insert_default_plot_options(self, pd_obj, k, update_plot_options=None): # make default plot (add entry in 'plots') for each # table, first column as x-column table_name = ''.join([replacements.get(c, c) for c in k]) key = 'default_{}'.format(table_name) plots_dict = _OrderedDict([ (mp_level01_titles[2], _OrderedDict([(key, _OrderedDict([('x', pd_obj.columns[0]), ('table', table_name)]))])) ]) if update_plot_options is not None: plots_dict[mp_level01_titles[2]][key].update(update_plot_options) if mp_level01_titles[2] in self: self.rec_update(plots_dict) else: self[mp_level01_titles[2]] = plots_dict[mp_level01_titles[2]]
def add_data_table(self, identifier, dataframe, name, plot_options=None): """add a datatable to the root-level section Args: identifier (str): MP category ID (`mp_cat_id`) dataframe (pandas.DataFrame): tabular data as Pandas DataFrame name (str): table name, optional if only one table in section plot_options (dict): options for according plotly graph """ # TODO: optional table name, required if multiple tables per root-level section name = ''.join([replacements.get(c, c) for c in name]) self.document.rec_update(nest_dict( Table(dataframe).to_dict(), [identifier, name] )) self.document[identifier].insert_default_plot_options( dataframe, name, update_plot_options=plot_options )
def rec_update(self, other=None, overwrite=False, replace_newlines=True): """https://gist.github.com/Xjs/114831""" # overwrite=False: don't overwrite existing unnested key if other is None: # mode to force RecursiveDicts to be used other = self overwrite = True for key, value in other.items(): if isinstance(key, six.string_types): key = ''.join([replacements.get(c, c) for c in key]) if key in self and \ isinstance(self[key], dict) and \ isinstance(value, dict): # ensure RecursiveDict and update key (w/o underscores) self[key] = RecursiveDict(self[key]) replace_newlines = bool(key != mp_level01_titles[3]) self[key].rec_update(other=value, overwrite=overwrite, replace_newlines=replace_newlines) elif (key in self and overwrite) or key not in self: if isinstance(value, six.string_types) and replace_newlines: self[key] = value.replace('\n', ' ') else: self[key] = value
def get_string(self): from pymatgen import Structure lines, scope = [], [] table_start = mp_level01_titles[1] + '_' for key, value in self.document.iterate(): if isinstance(value, Table): header = any([ bool(isinstance(col, unicode) or isinstance(col, str)) for col in value ]) if isinstance(value.index, MultiIndex): value.reset_index(inplace=True) csv_string = value.to_csv(index=False, header=header, float_format='%g', encoding='utf-8')[:-1] lines += csv_string.decode('utf-8').split('\n') elif isinstance(value, Structure): from pymatgen.io.cif import CifWriter cif = CifWriter(value, symprec=symprec).__str__() lines.append( make_pair(''.join([replacements.get(c, c) for c in key]), cif + ':end')) else: level, key = key key = key if isinstance(key, unicode) else key.decode('utf-8') # truncate scope level_reduction = bool(level < len(scope)) if level_reduction: del scope[level:] # append scope and set delimiters if value is None: is_table = key.startswith(table_start) if is_table: # account for 'data_' prefix key = key[len(table_start):] start, end = '\n[+', ']' else: start, end = '\n{', '}' scope.append(''.join([replacements.get(c, c) for c in key])) # correct scope to omit internal 'general' section scope_corr = scope if scope[0] == mp_level01_titles[0]: scope_corr = scope[1:] # insert scope line if (value is None and scope_corr)or \ (value is not None and level_reduction): lines.append(start + '.'.join(scope_corr) + end) # insert key-value line if value is not None: val = unicode(value) if not isinstance(value, str) else value value_lines = [val] if val.startswith('http') \ else textwrap.wrap(val) if len(value_lines) > 1: value_lines = [''] + value_lines + [':end'] lines.append( make_pair( ''.join([replacements.get(c, c) for c in key]), '\n'.join(value_lines))) return '\n'.join(lines) + '\n'
def from_string(data): # use archieml-python parse to import data rdct = RecursiveDict(archieml.loads(data)) rdct.rec_update() # post-process internal representation of file contents for key in list(rdct.keys()): is_general, root_key = normalize_root_level(key) if is_general: # make part of shared (meta-)data, i.e. nest under `general` at # the beginning of the MPFile if mp_level01_titles[0] not in rdct: rdct[mp_level01_titles[0]] = RecursiveDict() rdct.move_to_end(mp_level01_titles[0], last=False) # normalize identifier key (pop & insert) # using rec_update since we're looping over all entries # also: support data in bare tables (marked-up only by # root-level identifier) by nesting under 'data' value = rdct.pop(key) keys = [mp_level01_titles[0]] if is_general else [] keys.append(root_key) if isinstance(value, list): keys.append('table') rdct.rec_update(nest_dict(value, keys)) # reference to section to iterate or parse as CIF section = rdct[mp_level01_titles[0]][root_key] \ if is_general else rdct[root_key] # iterate to find CSV sections to parse # also parse propnet quantities if isinstance(section, dict): scope = [] for k, v in section.iterate(): level, key = k key = ''.join([replacements.get(c, c) for c in key]) level_reduction = bool(level < len(scope)) if level_reduction: del scope[level:] if v is None: scope.append(key) elif isinstance(v, list) and isinstance(v[0], dict): table = '' for row_dct in v: table = '\n'.join([table, row_dct['value']]) pd_obj = read_csv(table) d = nest_dict(pd_obj.to_dict(), scope + [key]) section.rec_update(d, overwrite=True) if not is_general and level == 0: section.insert_default_plot_options(pd_obj, key) elif Quantity is not None and isinstance(v, six.string_types) and ' ' in v: quantity = Quantity.from_key_value(key, v) d = nest_dict(quantity.as_dict(), scope + [key]) # TODO quantity.symbol.name section.rec_update(d, overwrite=True) # convert CIF strings into pymatgen structures if mp_level01_titles[3] in section: from pymatgen.io.cif import CifParser for name in section[mp_level01_titles[3]].keys(): cif = section[mp_level01_titles[3]].pop(name) parser = CifParser.from_string(cif) structure = parser.get_structures(primitive=False)[0] section[mp_level01_titles[3]].rec_update(nest_dict( structure.as_dict(), [name] )) return MPFile.from_dict(rdct)