def get_contributions(self): docs = self.query_contributions( projection={'_id': 1, 'mp_cat_id': 1, 'content': 1} ) if not docs: raise Exception('No contributions found for JarvisDft Explorer!') data, data_jarvis = [], [] general_columns = ['mp-id', 'cid', 'formula'] keys, subkeys = ['NUS', 'JARVIS'], ['id', 'Eₓ', 'CIF'] columns = general_columns + ['##'.join([k, sk]) for k in keys for sk in subkeys] columns_jarvis = general_columns + ['id', 'E', 'ΔE|optB88vdW', 'ΔE|mbj', 'CIF'] for doc in docs: mpfile = MPFile.from_contribution(doc) mp_id = mpfile.ids[0] contrib = mpfile.hdata[mp_id]['data'] cid_url = self.get_cid_url(doc) structures = mpfile.sdata.get(mp_id) cif_urls = {} for k in keys: cif_urls[k] = '' name = '{}_{}'.format(contrib['formula'], k) if structures.get(name) is not None: cif_urls[k] = '/'.join([ self.preamble.rsplit('/', 1)[0], 'explorer', 'materials', doc['_id'], 'cif', name ]) row = [mp_id, cid_url, contrib['formula']] for k in keys: for sk in subkeys: if sk == subkeys[-1]: row.append(cif_urls[k]) else: cell = contrib.get(k, {sk: ''})[sk] row.append(cell) data.append((mp_id, row)) row_jarvis = [mp_id, cid_url, contrib['formula']] for k in columns_jarvis[len(general_columns):]: if k == columns_jarvis[-1]: row_jarvis.append(cif_urls[keys[1]]) else: row_jarvis.append(contrib.get(keys[1], {k: ''}).get(k, '')) if row_jarvis[3]: data_jarvis.append((mp_id, row_jarvis)) return [ Table.from_items(data, orient='index', columns=columns), Table.from_items(data_jarvis, orient='index', columns=columns_jarvis) ]
def get_ionic_radii(self): data = [] columns = ['mp-id', 'cid', 'species', 'charge', u'rᵢₒₙ', 'HS/LS', 'CN'] docs = self.query_contributions( criteria={'content.title': 'Ionic Radii'}, projection={'_id': 1, 'identifier': 1, 'content.data': 1} ) if not docs: raise Exception('No contributions found for RedoxThermoCsp Ionic Radii!') for doc in docs: mpfile = MPFile.from_contribution(doc) identifier = mpfile.ids[0] contrib = mpfile.hdata[identifier]['data'] cid_url = '/'.join([ self.preamble.rsplit('/', 1)[0], 'explorer', 'materials', doc['_id'] ]) nrows = sum(1 for v in contrib.values() if isinstance(v, dict)) rows = [[identifier, cid_url] for i in range(nrows)] for col in columns[2:]: for irow, row in enumerate(rows): val = contrib.get(col) if val is None: val = contrib[str(irow)].get(col, '-') row.append(val) for row in rows: data.append((identifier, row)) return Table.from_items(data, orient='index', columns=columns)
def get_ionic_radii(self): data = [] columns = ['mp-id', 'cid', 'species', 'charge', u'rᵢₒₙ', 'HS/LS', 'CN'] docs = self.query_contributions( criteria={'content.title': 'Ionic Radii'}, projection={'_id': 1, 'mp_cat_id': 1, 'content.data': 1} ) if not docs: raise Exception('No contributions found for DlrVieten Ionic Radii!') for doc in docs: mpfile = MPFile.from_contribution(doc) identifier = mpfile.ids[0] contrib = mpfile.hdata[identifier]['data'] cid_url = '/'.join([ self.preamble.rsplit('/', 1)[0], 'explorer', 'materials', doc['_id'] ]) nrows = sum(1 for v in contrib.values() if isinstance(v, dict)) rows = [[identifier, cid_url] for i in range(nrows)] for col in columns[2:]: for irow, row in enumerate(rows): val = contrib.get(col) if val is None: val = contrib[str(irow)].get(col, '-') row.append(val) for row in rows: data.append((identifier, row)) return Table.from_items(data, orient='index', columns=columns)
def read_csv(body, is_data_section=True): """run pandas.read_csv on (sub)section body""" if not body: return None from mpcontribs.io.core.components import Table if is_data_section: options = { 'sep': ',', 'header': 0 } if body.startswith('\nlevel_'): options.update({'index_col': [0, 1]}) cur_line = 1 while 1: first_line = body.split('\n', cur_line)[cur_line-1] cur_line += 1 if first_line and not first_line.strip().startswith(csv_comment_char): break ncols = len(first_line.split(options['sep'])) else: options = { 'sep': ':', 'header': None, 'index_col': 0 } ncols = 2 converters = dict((col,strip_converter) for col in range(ncols)) return Table(pandas.read_csv( StringIO(body), comment=csv_comment_char, skipinitialspace=True, squeeze=True, converters=converters, encoding='utf8', **options ).dropna(how='all'))
def get_contributions(self, doping): dopings = ['n', 'p'] if doping not in dopings: raise Exception('doping has to be n or p!') docs = self.query_contributions(projection={ '_id': 1, 'mp_cat_id': 1, 'content': 1 }) if not docs: raise Exception('No contributions found for Boltztrap Explorer!') data = [] columns = [ '##'.join(['general', sk]) for sk in ['mp-id', 'cid', 'formula'] ] keys, subkeys = [u'mₑᶜᵒⁿᵈ', u"Seebeck"], [u"e₁", u"e₂", u"e₃"] columns += ['##'.join([k, sk]) for k in keys for sk in subkeys] for doc in docs: mpfile = MPFile.from_contribution(doc) mp_id = mpfile.ids[0] contrib = mpfile.hdata[mp_id]['data'] cid_url = self.get_cid_url(doc) row = [mp_id, cid_url, contrib['pretty_formula']] row += [ contrib[k].get(doping, {}).get(sk, '') for k in keys for sk in subkeys ] data.append((mp_id, row)) return Table.from_items(data, orient='index', columns=columns)
def get_contributions(self, limit=20): docs = self.query_contributions( projection={'_id': 1, 'identifier': 1, 'content': 1}, limit=limit) # use URL for all data if not docs: raise Exception('No contributions found for CarrierTransport Explorer!') data = [] columns = ['##'.join(['general', sk]) for sk in ['mp-id', 'cid', 'formula']] keys, subkeys = ['<mₑᶜᵒⁿᵈ>', '<S>', '<σ>', '<S²σ>'], ['n', 'p'] columns += ['##'.join([k, sk]) for k in keys for sk in subkeys] for doc in docs: mpfile = MPFile.from_contribution(doc) mp_id = mpfile.ids[0] contrib = mpfile.hdata[mp_id] cid_url = self.get_cid_url(doc) row = [mp_id, cid_url, contrib['extra_data']['pretty_formula']] row += [ contrib['data'].get(k[1:-1], {}).get(sk, {}).get('<ε>', 'n.a. mₑ') for k in keys for sk in subkeys ] data.append((mp_id, row)) return Table.from_items(data, orient='index', columns=columns)
def get_contributions(self): data = [] columns = ['mp-id', 'contribution', 'formula', 'CIF', 'dISO', 'etaQ', 'QCC', 'B'] docs = self.query_contributions( projection={'_id': 1, 'mp_cat_id': 1, 'content': 1} ) if not docs: raise Exception('No contributions found for Dibbs Explorer!') for doc in docs: mpfile = MPFile.from_contribution(doc) mp_id = mpfile.ids[0] contrib = mpfile.hdata[mp_id] cid_url = self.get_cid_url(doc) row = [mp_id, cid_url, contrib['formula']] cif_url = '' structures = mpfile.sdata.get(mp_id) if structures: cif_url = '/'.join([ self.preamble.rsplit('/', 1)[0], 'explorer', 'materials', doc['_id'], 'cif', structures.keys()[0] ]) row.append(cif_url) row += [contrib['data'][col] for col in columns[-4:]] data.append((mp_id, row)) return Table.from_items(data, orient='index', columns=columns)
def get_contributions(self): projection = {'_id': 1, 'identifier': 1, 'content.data': 1} docs = self.query_contributions(projection=projection) if not docs: raise Exception( 'No contributions found for RedoxThermoCsp Explorer!') data, columns = [], ['identifier', 'contribution'] for doc in docs: mpfile = MPFile.from_contribution(doc) identifier = mpfile.ids[0] contrib = mpfile.hdata[identifier]['data'] cid_url = self.get_cid_url(doc) row = [identifier, cid_url] scope = [] for key, value in contrib.iterate(): level, key = key level_reduction = bool(level < len(scope)) if level_reduction: del scope[level:] if value is None: scope.append(key) else: col = '##'.join(scope + [key]).replace('_', ' ') if col not in columns: columns.append(col) row.append(value) data.append((identifier, row)) return Table.from_items(data, orient='index', columns=columns)
def get_contributions(self): projection = {'_id': 1, 'identifier': 1, 'content.data': 1} docs = self.query_contributions(projection=projection) if not docs: raise Exception('No contributions found for RedoxThermoCsp Explorer!') data, columns = [], ['identifier', 'contribution'] for doc in docs: mpfile = MPFile.from_contribution(doc) identifier = mpfile.ids[0] contrib = mpfile.hdata[identifier]['data'] cid_url = self.get_cid_url(doc) row = [identifier, cid_url] scope = [] for key, value in contrib.iterate(): level, key = key level_reduction = bool(level < len(scope)) if level_reduction: del scope[level:] if value is None: scope.append(key) else: col = '##'.join(scope + [key]).replace('_', ' ') if col not in columns: columns.append(col) row.append(value) data.append((identifier, row)) return Table.from_items(data, orient='index', columns=columns)
def get_concentration_functions(composition_table_dict): meta = composition_table_dict["meta"] composition_table = Table.from_dict(composition_table_dict["data"]) elements = [col for col in composition_table.columns if col not in meta] x = composition_table["X"].values y = composition_table["Y"].values cats = composition_table["X"].unique() concentration, conc, d, y_c, functions = {}, {}, {}, {}, RecursiveDict() for el in elements: concentration[el] = to_numeric(composition_table[el].values) / 100.0 conc[el], d[el], y_c[el] = {}, {}, {} if meta["X"] == "category": for i in cats: k = "{:06.2f}".format(float(i)) y_c[el][k] = to_numeric(y[where(x == i)]) conc[el][k] = to_numeric(concentration[el][where(x == i)]) d[el][k] = interp1d(y_c[el][k], conc[el][k]) functions[el] = lambda a, b, el=el: d[el][a](b) else: functions[el] = interp2d(float(x), float(y), concentration[el]) return functions
def get_concentration_functions(composition_table_dict): meta = composition_table_dict['meta'] composition_table = Table.from_dict(composition_table_dict['data']) elements = [col for col in composition_table.columns if col not in meta] x = composition_table["X"].values y = composition_table["Y"].values cats = composition_table["X"].unique() concentration, conc, d, y_c, functions = {}, {}, {}, {}, RecursiveDict() for el in elements: concentration[el] = to_numeric(composition_table[el].values)/100. conc[el], d[el], y_c[el] = {}, {}, {} if meta['X'] == 'category': for i in cats: k = '{:06.2f}'.format(float(i)) y_c[el][k] = to_numeric(y[where(x==i)]) conc[el][k] = to_numeric(concentration[el][where(x==i)]) d[el][k] = interp1d(y_c[el][k], conc[el][k]) functions[el] = lambda a, b, el=el: d[el][a](b) else: functions[el] = interp2d(float(x), float(y), concentration[el]) return functions
def get_contributions(self): docs = self.query_contributions( projection={'_id': 1, 'mp_cat_id': 1, 'content.data': 1} ) if not docs: raise Exception('No contributions found for SWF Explorer!') data = [] columns = ['formula', 'contribution'] ncols = 9 for doc in docs: mpfile = MPFile.from_contribution(doc) formula = mpfile.ids[0] contrib = mpfile.hdata[formula].get('data') if contrib is None: continue cid_url = self.get_cid_url(doc) for k in contrib.keys(): if k not in columns: columns.append(k) row = [formula, cid_url] for col in columns[2:]: row.append(contrib.get(col, '')) n = len(row) if n < ncols: row += [''] * (ncols - n) data.append((formula, row)) return Table.from_items(data, orient='index', columns=columns)
def get_contributions(self): docs = self.query_contributions( projection={'_id': 1, 'mp_cat_id': 1, 'content.data': 1} ) if not docs: raise Exception('No contributions found for MpWorkshop2017 Explorer!') data = [] columns = ['mp-id', 'contribution'] for doc in docs: mpfile = MPFile.from_contribution(doc) mp_id = mpfile.ids[0] contrib = mpfile.hdata[mp_id]['data'] cid_url = self.get_cid_url(doc) for k in contrib.keys(): if k not in columns: columns.append(k) row = [mp_id, cid_url] for col in columns[2:]: row.append(contrib.get(col, '')) data.append([mp_id, row]) # enforce equal row lengths ncols = len(columns) for entry in data: n = len(entry[1]) if n != ncols: entry[1] += [''] * (ncols - n) return Table.from_items(data, orient='index', columns=columns)
def get_contributions(self): projection = {'_id': 1, 'identifier': 1, 'content': 1} docs = self.query_contributions(projection=projection) if not docs: raise Exception('No contributions found for ALS Beamline Explorer!') data = [] columns = ['formula', 'cid'] keys = RecursiveDict([ ('composition', ['Co', 'Cu', 'Ce']), #('position', ['x', 'y']), ('XAS', ['min', 'max']), ('XMCD', ['min', 'max']) ]) columns += ['##'.join([k, sk]) for k, subkeys in keys.items() for sk in subkeys] for doc in docs: mpfile = MPFile.from_contribution(doc) identifier = mpfile.ids[0] contrib = mpfile.hdata[identifier]['data'] cid_url = self.get_cid_url(doc) row = [identifier, cid_url] row += [contrib[k][sk] for k, subkeys in keys.items() for sk in subkeys] data.append((identifier, row)) return Table.from_items(data, orient='index', columns=columns)
def get_contributions(self): projection = {'_id': 1, 'mp_cat_id': 1, 'content': 1} docs = self.query_contributions(projection=projection) if not docs: raise Exception('No contributions found for ALS Beamline Explorer!') data = [] columns = ['formula', 'cid'] keys = RecursiveDict([ ('composition', ['Co', 'Cu', 'Ce']), #('position', ['x', 'y']), ('XAS', ['min', 'max']), ('XMCD', ['min', 'max']) ]) columns += ['##'.join([k, sk]) for k, subkeys in keys.items() for sk in subkeys] for doc in docs: mpfile = MPFile.from_contribution(doc) identifier = mpfile.ids[0] contrib = mpfile.hdata[identifier]['data'] cid_url = self.get_cid_url(doc) row = [identifier, cid_url] row += [contrib[k][sk] for k, subkeys in keys.items() for sk in subkeys] data.append((identifier, row)) return Table.from_items(data, orient='index', columns=columns)
def get_contributions(self): data = [] columns = [ 'mp-id', 'contribution', 'formula', 'CIF', 'dISO', 'etaQ', 'QCC', 'B' ] docs = self.query_contributions(projection={ '_id': 1, 'mp_cat_id': 1, 'content': 1 }) if not docs: raise Exception('No contributions found for Dibbs Explorer!') for doc in docs: mpfile = MPFile.from_contribution(doc) mp_id = mpfile.ids[0] contrib = mpfile.hdata[mp_id] cid_url = self.get_cid_url(doc) row = [mp_id, cid_url, contrib['formula']] cif_url = '' structures = mpfile.sdata.get(mp_id) if structures: cif_url = '/'.join([ self.preamble.rsplit('/', 1)[0], 'explorer', 'materials', doc['_id'], 'cif', structures.keys()[0] ]) row.append(cif_url) row += [contrib['data'][col] for col in columns[-4:]] data.append((mp_id, row)) return Table.from_items(data, orient='index', columns=columns)
def get_contributions(self): data = [] columns = ['identifier', 'contribution', 'composition', 'CIF'] docs = self.query_contributions( criteria={'content.title': {'$ne': 'Ionic Radii'}}, projection={'_id': 1, 'mp_cat_id': 1, 'content': 1} ) if not docs: raise Exception('No contributions found for DlrVieten Explorer!') for doc in docs: mpfile = MPFile.from_contribution(doc) identifier = mpfile.ids[0] contrib = mpfile.hdata[identifier] cid_url = self.get_cid_url(doc) row = [identifier, cid_url, contrib['composition']] cif_url = '' structures = mpfile.sdata.get(identifier) if structures: cif_url = '/'.join([ self.preamble.rsplit('/', 1)[0], 'explorer', 'materials', doc['_id'], 'cif', structures.keys()[0] ]) row.append(cif_url) data.append((identifier, row)) return Table.from_items(data, orient='index', columns=columns)
def iterate(self, nested_dict=None): """http://stackoverflow.com/questions/10756427/loop-through-all-nested-dictionary-values""" from mpcontribs.io.core.components import Table from pymatgen import Structure d = self if nested_dict is None else nested_dict if nested_dict is None: self.level = 0 for key in list(d.keys()): value = d[key] if isinstance(value, _Mapping): if value.get('@class') == 'Structure': yield key, Structure.from_dict(value) continue yield (self.level, key), None if value.get('@class') == 'Table': yield key, Table.from_dict(value) continue self.level += 1 for inner_key, inner_value in self.iterate(nested_dict=value): yield inner_key, inner_value self.level -= 1 elif isinstance(value, list) and isinstance(value[0], dict): # index (from archieml parser) table = '' for row_dct in value: table = '\n'.join([table, row_dct['value']]) yield '_'.join([mp_level01_titles[1], key]), table else: yield (self.level, key), value
def get_contributions(self, limit=20): docs = self.query_contributions(projection={ '_id': 1, 'identifier': 1, 'content': 1 }, limit=limit) # use URL for all data if not docs: raise Exception( 'No contributions found for CarrierTransport Explorer!') data = [] columns = [ '##'.join(['general', sk]) for sk in ['mp-id', 'cid', 'formula'] ] keys, subkeys = ['<mₑᶜᵒⁿᵈ>', '<S>', '<σ>', '<S²σ>'], ['n', 'p'] columns += ['##'.join([k, sk]) for k in keys for sk in subkeys] for doc in docs: mpfile = MPFile.from_contribution(doc) mp_id = mpfile.ids[0] contrib = mpfile.hdata[mp_id] cid_url = self.get_cid_url(doc) row = [mp_id, cid_url, contrib['extra_data']['pretty_formula']] row += [ contrib['data'].get(k[1:-1], {}).get(sk, {}).get('<ε>', 'n.a. mₑ') for k in keys for sk in subkeys ] data.append((mp_id, row)) return Table.from_items(data, orient='index', columns=columns)
def get_table(results, letter): y = 'Δ{}'.format(letter) df = Table( RecursiveDict([('δ', results[0]), (y, results[1]), (y + 'ₑᵣᵣ', results[2])])) x0, x1 = map(float, df['δ'].iloc[[0, -1]]) pad = 0.15 * (x1 - x0) mask = (results[3] > x0 - pad) & (results[3] < x1 + pad) x, fit = results[3][mask], results[4][mask] df.set_index('δ', inplace=True) df2 = pd.DataFrame(RecursiveDict([('δ', x), (y + ' Fit', fit)])) df2.set_index('δ', inplace=True) cols = ['δ', y, y + 'ₑᵣᵣ', y + ' Fit'] return pd.concat([df, df2], sort=True).sort_index().reset_index().rename(columns={ 'index': 'δ' }).fillna('')[cols]
def get_table(results, letter): y = 'Δ{}'.format(letter) df = Table(RecursiveDict([ ('δ', results[0]), (y, results[1]), (y+'ₑᵣᵣ', results[2]) ])) x0, x1 = map(float, df['δ'].iloc[[0,-1]]) pad = 0.15 * (x1 - x0) mask = (results[3] > x0 - pad) & (results[3] < x1 + pad) x, fit = results[3][mask], results[4][mask] df.set_index('δ', inplace=True) df2 = pd.DataFrame(RecursiveDict([ ('δ', x), (y+' Fit', fit) ])) df2.set_index('δ', inplace=True) cols = ['δ', y, y+'ₑᵣᵣ', y+' Fit'] return pd.concat([df, df2], sort=True).sort_index().reset_index().rename( columns={'index': 'δ'}).fillna('')[cols]
def get_table(results, letter): y = "Δ{}".format(letter) df = Table( RecursiveDict([("δ", results[0]), (y, results[1]), (y + "ₑᵣᵣ", results[2])])) x0, x1 = map(float, df["δ"].iloc[[0, -1]]) pad = 0.15 * (x1 - x0) mask = (results[3] > x0 - pad) & (results[3] < x1 + pad) x, fit = results[3][mask], results[4][mask] df.set_index("δ", inplace=True) df2 = pd.DataFrame(RecursiveDict([("δ", x), (y + " Fit", fit)])) df2.set_index("δ", inplace=True) cols = ["δ", y, y + "ₑᵣᵣ", y + " Fit"] return (pd.concat([df, df2], sort=True).sort_index().reset_index().rename(columns={ "index": "δ" }).fillna("")[cols])
def index(request, cid=None, db_type=None, mdb=None): try: response = None if request.method == 'GET': axes, dopings = ['<S>', '<σ>', '<S²σ>'], ['n', 'p'] projection = dict( ('content.data.{}'.format(k[1:-1]), 1) for k in axes) projection.update({'identifier': 1}) docs = mdb.contrib_ad.query_contributions( {'project': 'carrier_transport'}, projection=projection) response = {'text': []} response.update(dict((k, []) for k in axes)) for doc in docs: d = doc['content']['data'] for doping in dopings: for idx, k in enumerate(axes): kk = k[1:-1] if kk in d and doping in d[kk]: value = d[kk][doping]['<ε>'] value = float(value.split()[0]) if idx == 2: value = math.log10(value) response['text'].append(doc['identifier']) response[k].append(value) elif request.method == 'POST': name = json.loads(request.body)['name'] names = name.split('##') key, subkey = names[0][1:-1], names[1][0] table_name = '{}({})'.format(key, subkey) doc = mdb.contrib_ad.query_contributions( {'_id': cid}, projection={ '_id': 0, 'content.{}'.format(table_name): 1, 'content.data.{}.{}'.format(key, subkey): 1 })[0] table = doc['content'].get(table_name) if table: table = Table.from_dict(table) x = [col.split()[0] for col in table.columns[1:]] y = list(table[table.columns[0]]) z = table[table.columns[1:]].values.tolist() if not table_name.startswith('S'): z = [[math.log10(float(c)) for c in r] for r in z] title = ' '.join([table_name, names[1].split()[-1]]) response = { 'x': x, 'y': y, 'z': z, 'type': 'heatmap', 'colorbar': { 'title': title } } except Exception as ex: raise ValueError('"REST Error: "{}"'.format(str(ex))) return {"valid_response": True, 'response': response}
def index(request, cid=None, db_type=None, mdb=None): try: response = None if request.method == 'GET': axes, dopings = ['<S>', '<σ>', '<S²σ>'], ['n', 'p'] projection = dict(('content.data.{}'.format(k[1:-1]), 1) for k in axes) projection.update({'identifier': 1}) docs = mdb.contrib_ad.query_contributions( {'project': 'carrier_transport'}, projection=projection ) response = {'text': []} response.update(dict((k, []) for k in axes)) for doc in docs: d = doc['content']['data'] for doping in dopings: for idx, k in enumerate(axes): kk = k[1:-1] if kk in d and doping in d[kk]: value = d[kk][doping]['<ε>'] value = float(value.split()[0]) if idx == 2: value = math.log10(value) response['text'].append(doc['identifier']) response[k].append(value) elif request.method == 'POST': name = json.loads(request.body)['name'] names = name.split('##') key, subkey = names[0][1:-1], names[1][0] table_name = '{}({})'.format(key, subkey) doc = mdb.contrib_ad.query_contributions( {'_id': cid}, projection={ '_id': 0, 'content.{}'.format(table_name): 1, 'content.data.{}.{}'.format(key, subkey): 1 } )[0] table = doc['content'].get(table_name) if table: table = Table.from_dict(table) x = [col.split()[0] for col in table.columns[1:]] y = list(table[table.columns[0]]) z = table[table.columns[1:]].values.tolist() if not table_name.startswith('S'): z = [[math.log10(float(c)) for c in r] for r in z] title = ' '.join([table_name, names[1].split()[-1]]) response = {'x': x, 'y': y, 'z': z, 'type': 'heatmap', 'colorbar': {'title': title}} except Exception as ex: raise ValueError('"REST Error: "{}"'.format(str(ex))) return {"valid_response": True, 'response': response}
def get_contributions(self, typ): types = ['2d', '3d'] if typ not in types: raise Exception('typ has to be 2d or 3d!') docs = self.query_contributions( criteria={'content.data.{}'.format(typ): { '$exists': 1 }}, projection={ '_id': 1, 'mp_cat_id': 1, 'content': 1 }) if not docs: raise Exception('No contributions found for JarvisDft Explorer!') data = [] columns = [ 'mp-id', 'cid', 'CIF', 'final_energy', 'optB88vDW_bandgap', 'mbj_bandgap', 'bulk_modulus', 'shear_modulus', 'jid' ] for doc in docs: mpfile = MPFile.from_contribution(doc) mp_id = mpfile.ids[0] hdata = mpfile.hdata[mp_id] contrib = hdata['data'][typ] cid_url = self.get_cid_url(doc) cif_url = '' structures = mpfile.sdata.get(mp_id) if structures: cif_url = '/'.join([ self.preamble.rsplit('/', 1)[0], 'explorer', 'materials', doc['_id'], 'cif', structures.keys()[0] ]) row = [mp_id, cid_url, cif_url ] + [contrib[k] for k in columns[3:-1]] row.append(hdata['details_url'].format(contrib['jid'])) data.append((mp_id, row)) return Table.from_items(data, orient='index', columns=columns)
def get_contributions(self, bandgap_range=None): projection = { '_id': 1, 'identifier': 1, 'content.formula': 1, 'content.ICSD': 1, 'content.data': 1 } docs = self.query_contributions(projection=projection) if not docs: raise Exception('No contributions found for DTU Explorer!') data = [] columns = ['mp-id', 'cid', 'formula', 'ICSD', 'C'] keys, subkeys = ['ΔE-KS', 'ΔE-QP'], ['indirect', 'direct'] columns += ['##'.join([k, sk]) for k in keys for sk in subkeys] for doc in docs: mpfile = MPFile.from_contribution(doc) mp_id = mpfile.ids[0] contrib = mpfile.hdata[mp_id] cid_url = self.get_cid_url(doc) row = [ mp_id, cid_url, contrib['formula'], contrib['ICSD'], contrib['data']['C'] ] row += [contrib['data'][k][sk] for k in keys for sk in subkeys] if bandgap_range: in_filter = True for k, v in bandgap_range.iteritems(): ks = k.split('_') val = contrib['data'][ks[0]][ ks[1]] if len(ks) > 1 else contrib['data'][k] dec = float(val.split()[0]) if dec < v[0] or dec > v[1]: in_filter = False break if in_filter: data.append((mp_id, row)) else: data.append((mp_id, row)) return Table.from_items(data, orient='index', columns=columns)
def get_contributions(self): docs = self.query_contributions( projection={'_id': 1, 'mp_cat_id': 1, 'content.data': 1, 'content.abbreviations': 1} ) if not docs: raise Exception('No contributions found for PerovskitesDiffusion Explorer!') data, columns = [], None for doc in docs: mpfile = MPFile.from_contribution(doc) mp_id = mpfile.ids[0] contrib = mpfile.hdata[mp_id]['data'] cid_url = self.get_cid_url(doc) if columns is None: columns = ['mp-id', 'contribution'] + contrib.keys() row = [mp_id, cid_url] + contrib.values() data.append((mp_id, row)) return Table.from_items(data, orient='index', columns=columns)
def get_all_spectra(self, typ): types = ['XAS', 'XMCD'] if typ not in types: raise Exception('{} not in {}'.format(typ, types)) projection = {'_id': 1, 'identifier': 1, 'content.Co': 1} docs = self.query_contributions(projection=projection) if not docs: raise Exception('No contributions found for ALS Beamline Explorer!') table = Table() for doc in docs: mpfile = MPFile.from_contribution(doc) identifier = mpfile.ids[0] df = mpfile.tdata[identifier]['Co'] if 'Energy' not in table.columns: table['Energy'] = df['Energy'] table[identifier] = df[typ] return Plot({'x': 'Energy', 'table': typ, 'showlegend': False}, table)
def get_contributions(self, phase=None): data = [] phase_query_key = {'$exists': 1} if phase is None else phase columns = ['mp-id', 'contribution', 'formula'] if phase is None: columns.append('phase') columns += ['dH (formation)', 'dH (hydration)', 'GS?', 'CIF'] docs = self.query_contributions( criteria={ 'content.doi': '10.1021/jacs.6b11301', 'content.data.Phase': phase_query_key }, projection={ '_id': 1, 'mp_cat_id': 1, 'content.data': 1, 'content.{}'.format(mp_level01_titles[3]): 1 } ) if not docs: raise Exception('No contributions found for MnO2 Phase Selection Explorer!') for doc in docs: mpfile = MPFile.from_contribution(doc) mp_id = mpfile.ids[0] contrib = mpfile.hdata[mp_id]['data'] cid_url = self.get_cid_url(doc) row = [mp_id, cid_url, contrib['Formula']] if phase is None: row.append(contrib['Phase']) row += [contrib['dHf'], contrib['dHh'], contrib['GS']] cif_url = '' structures = mpfile.sdata.get(mp_id) if structures: cif_url = '/'.join([ self.preamble.rsplit('/', 1)[0], 'explorer', 'materials', doc['_id'], 'cif', structures.keys()[0] ]) row.append(cif_url) data.append((mp_id, row)) return Table.from_items(data, orient='index', columns=columns)
def get_contributions(self): docs = self.query_contributions( projection={'_id': 1, 'mp_cat_id': 1, 'content.data': 1} ) if not docs: raise Exception('No contributions found for PCFC Explorer!') data = [] columns = ['mp-id', 'cid'] for doc in docs: mpfile = MPFile.from_contribution(doc) mpid = mpfile.ids[0] contrib = mpfile.hdata[mpid]['data'] cid_url = self.get_cid_url(doc) row = [mpid, cid_url] if len(columns) == 2: columns += [k for k in contrib.keys()] for col in columns[2:]: row.append(contrib.get(col, '')) data.append((mpid, row)) return Table.from_items(data, orient='index', columns=columns)
def get_contributions(self, phase=None): data = [] phase_query_key = {'$exists': 1} if phase is None else phase columns = ['mp-id', 'contribution', 'formula'] if phase is None: columns.append('phase') columns += ['ΔH', 'ΔHₕ', 'GS?', 'CIF'] docs = self.query_contributions( criteria={'content.data.Phase': phase_query_key}, projection={ '_id': 1, 'mp_cat_id': 1, 'content.data': 1, 'content.{}'.format(mp_level01_titles[3]): 1 } ) if not docs: raise Exception('No contributions found for MnO2 Phase Selection Explorer!') for doc in docs: mpfile = MPFile.from_contribution(doc) mp_id = mpfile.ids[0] contrib = mpfile.hdata[mp_id]['data'] cid_url = self.get_cid_url(doc) row = [mp_id, cid_url, contrib['Formula'].replace(' ', '')] if phase is None: row.append(contrib['Phase']) row += [contrib['ΔH'], contrib['ΔHₕ'], contrib['GS']] cif_url = '' structures = mpfile.sdata.get(mp_id) if structures: cif_url = '/'.join([ self.preamble.rsplit('/', 1)[0], 'explorer', 'materials', doc['_id'], 'cif', structures.keys()[0] ]) row.append(cif_url) data.append((mp_id, row)) return Table.from_items(data, orient='index', columns=columns)
def get_contributions(self, bandgap_range=None): projection = { '_id': 1, 'mp_cat_id': 1, 'content.formula': 1, 'content.ICSD': 1, 'content.data': 1 } docs = self.query_contributions(projection=projection) if not docs: raise Exception('No contributions found for DTU Explorer!') data = [] columns = ['mp-id', 'cid', 'formula', 'ICSD', 'C'] keys, subkeys = ['ΔE-KS', 'ΔE-QP'], ['indirect', 'direct'] columns += ['##'.join([k, sk]) for k in keys for sk in subkeys] for doc in docs: mpfile = MPFile.from_contribution(doc) mp_id = mpfile.ids[0] contrib = mpfile.hdata[mp_id] cid_url = self.get_cid_url(doc) row = [mp_id, cid_url, contrib['formula'], contrib['ICSD'], contrib['data']['C']] row += [contrib['data'][k][sk] for k in keys for sk in subkeys] if bandgap_range: in_filter = True for k, v in bandgap_range.iteritems(): ks = k.split('_') val = contrib['data'][ks[0]][ks[1]] if len(ks) > 1 else contrib['data'][k] dec = float(val.split()[0]) if dec < v[0] or dec > v[1]: in_filter = False break if in_filter: data.append((mp_id, row)) else: data.append((mp_id, row)) return Table.from_items(data, orient='index', columns=columns)
def get_contributions(self): docs = self.query_contributions(projection={ '_id': 1, 'identifier': 1, 'content.data': 1 }) if not docs: raise Exception('No contributions found for SWF Explorer!') data = [] columns = ['formula', 'contribution'] ncols = 9 for doc in docs: mpfile = MPFile.from_contribution(doc) formula = mpfile.ids[0] contrib = mpfile.hdata[formula].get('data') if contrib is None: continue cid_url = self.get_cid_url(doc) for k in contrib.keys(): if k not in columns: columns.append(k) row = [formula, cid_url] for col in columns[2:]: row.append(contrib.get(col, '')) n = len(row) if n < ncols: row += [''] * (ncols - n) data.append((formula, row)) return Table.from_items(data, orient='index', columns=columns)
def get_contributions(self): projection = {'_id': 1, 'mp_cat_id': 1, 'content': 1} docs = self.query_contributions(projection=projection) if not docs: raise Exception('No contributions found for DTU Explorer!') data = [] columns = [ 'mp-id', 'cid', 'indirect ΔE-KS', 'direct ΔE-KS', 'C', 'indirect ΔE-QP', 'direct ΔE-QP' ] for doc in docs: mpfile = MPFile.from_contribution(doc) mp_id = mpfile.ids[0] contrib = mpfile.hdata[mp_id]['data'] cid_url = self.get_cid_url(doc) row = [ mp_id, cid_url, contrib['ΔE-KS']['indirect'], contrib['ΔE-KS']['direct'], contrib['C'], contrib['ΔE-QP']['indirect'], contrib['ΔE-QP']['direct'] ] data.append((mp_id, row)) return Table.from_items(data, orient='index', columns=columns)
def get_contributions(self): docs = self.query_contributions(projection={ '_id': 1, 'identifier': 1, 'content.data': 1 }) if not docs: raise Exception('No contributions found for PCFC Explorer!') data = [] columns = ['mp-id', 'cid'] for doc in docs: mpfile = MPFile.from_contribution(doc) mpid = mpfile.ids[0] contrib = mpfile.hdata[mpid]['data'] cid_url = self.get_cid_url(doc) row = [mpid, cid_url] if len(columns) == 2: columns += [k for k in contrib.keys()] for col in columns[2:]: row.append(contrib.get(col, '')) data.append((mpid, row)) return Table.from_items(data, orient='index', columns=columns)
def run(mpfile, **kwargs): # extract data from json files input_dir = mpfile.hdata.general['input_dir'] for idx, obj in enumerate(scandir(input_dir)): mpid = obj.name.split('.', 1)[0].rsplit('_', 1)[1] print(mpid) input_file = gzip.open(obj.path, 'rb') try: data = json.loads(input_file.read()) # filter out metals if 'GGA' not in data or 'GGA' not in data['gap'] or data['gap']['GGA'] < 0.1: print('GGA gap < 0.1 -> skip') continue # add hierarchical data (nested key-values) hdata = RecursiveDict() T, lvl, S2 = '300', '1e+18', None pf_key = 'S²σ' hdata['temperature'] = T + ' K' hdata['doping_level'] = lvl + ' cm⁻³' variables = [ {'key': 'cond_eff_mass', 'name': 'mₑᶜᵒⁿᵈ', 'unit': 'mₑ'}, {'key': 'seebeck_doping', 'name': 'S', 'unit': 'μV/K'}, {'key': 'cond_doping', 'name': 'σ', 'unit': '(Ωms)⁻¹'}, ] eigs_keys = ['ε₁', 'ε₂', 'ε₃', '<ε>'] for v in variables: hdata[v['name']] = RecursiveDict() for doping_type in ['p', 'n']: if doping_type in data['GGA'][v['key']]: d = data['GGA'][v['key']][doping_type][T][lvl] eigs = map(float, d if isinstance(d, list) else d['eigs']) hdata[v['name']][doping_type] = RecursiveDict( (eigs_keys[neig], clean_value(eig, v['unit'])) for neig, eig in enumerate(eigs) ) hdata[v['name']][doping_type][eigs_keys[-1]] = clean_value(np.mean(eigs), v['unit']) if v['key'] == 'seebeck_doping': S2 = np.dot(d['tensor'], d['tensor']) elif v['key'] == 'cond_doping': pf = np.mean(np.linalg.eigh(np.dot(S2, d['tensor']))[0]) * 1e-8 if pf_key not in hdata: hdata[pf_key] = RecursiveDict() hdata[pf_key][doping_type] = {eigs_keys[-1]: clean_value(pf, 'μW/(cmK²s)')} mpfile_data = nest_dict(hdata, ['data']) # build data and max values for seebeck, conductivity and kappa # max/min values computed using numpy. It may be better to code it in pure python. keys = ['pretty_formula', 'volume'] hdata = RecursiveDict((k, data[k]) for k in keys) hdata['volume'] = clean_value(hdata['volume'], 'ų') hdata['bandgap'] = clean_value(data['gap']['GGA'], 'eV') cols = ['value', 'temperature', 'doping'] tables = RecursiveDict() props = RecursiveDict() props['seebeck_doping'] = ['S', 'μV/K'] props['cond_doping'] = ['σ', '(Ωms)⁻¹'] props['kappa_doping'] = ['κₑ', 'W/(mKs)'] for prop_name, (lbl, unit) in props.iteritems(): # TODO install Symbola font if you see squares here (https://fonts2u.com/symbola.font) # and select it as standard font in your browser (leave other fonts as is, esp. fixed width) tables[lbl] = RecursiveDict() hlbl = lbl+'₋' if len(lbl) > 1 else lbl hlbl += 'ₑₓₜᵣ' hdata[hlbl] = RecursiveDict() for doping_type in ['p', 'n']: prop = data['GGA'][prop_name][doping_type] prop_averages, dopings, columns = [], None, ['T [K]'] temps = sorted(map(int, prop.keys())) for temp in temps: row = [temp] if dopings is None: dopings = sorted(map(float, prop[str(temp)].keys())) for doping in dopings: doping_str = '%.0e' % doping if len(columns) <= len(dopings): columns.append('{} cm⁻³ [{}]'.format(doping_str, unit)) eigs = prop[str(temp)][doping_str]['eigs'] row.append(np.mean(eigs)) prop_averages.append((temp, row)) tables[lbl][doping_type] = Table.from_items( prop_averages, orient='index', columns=columns ) arr_prop_avg = np.array([item[1] for item in prop_averages])[:,1:] max_v = np.max(arr_prop_avg) if prop_name[0] == 's' and doping_type == 'n': max_v = np.min(arr_prop_avg) if prop_name[0] == 'k': max_v = np.min(arr_prop_avg) arg_max = np.argwhere(arr_prop_avg==max_v)[0] vals = [ clean_value(max_v, unit), clean_value(temps[arg_max[0]], 'K'), clean_value(dopings[arg_max[1]], 'cm⁻³') ] hdata[hlbl][doping_type] = RecursiveDict( (k, v) for k, v in zip(cols, vals) ) mpfile_data.rec_update(nest_dict(hdata, ['extra_data'])) mpfile.add_hierarchical_data(mpfile_data, identifier=data['mp_id']) for lbl, dct in tables.iteritems(): for doping_type, table in dct.iteritems(): mpfile.add_data_table( data['mp_id'], table, name='{}({})'.format(lbl, doping_type) ) finally: input_file.close()
def get_contributions(self): docs = self.query_contributions(projection={ '_id': 1, 'identifier': 1, 'content': 1 }) if not docs: raise Exception('No contributions found for JarvisDft Explorer!') data, data_jarvis = [], [] general_columns = ['mp-id', 'cid', 'formula'] keys, subkeys = ['NUS', 'JARVIS'], ['id', 'Eₓ', 'CIF'] columns = general_columns + [ '##'.join([k, sk]) for k in keys for sk in subkeys ] columns_jarvis = general_columns + [ 'id', 'E', 'ΔE|optB88vdW', 'ΔE|mbj', 'CIF' ] for doc in docs: mpfile = MPFile.from_contribution(doc) mp_id = mpfile.ids[0] contrib = mpfile.hdata[mp_id]['data'] cid_url = self.get_cid_url(doc) structures = mpfile.sdata.get(mp_id) cif_urls = {} for k in keys: cif_urls[k] = '' name = '{}_{}'.format(contrib['formula'], k) if structures.get(name) is not None: cif_urls[k] = '/'.join([ self.preamble.rsplit('/', 1)[0], 'explorer', 'materials', doc['_id'], 'cif', name ]) row = [mp_id, cid_url, contrib['formula']] for k in keys: for sk in subkeys: if sk == subkeys[-1]: row.append(cif_urls[k]) else: cell = contrib.get(k, {sk: ''})[sk] row.append(cell) data.append((mp_id, row)) row_jarvis = [mp_id, cid_url, contrib['formula']] for k in columns_jarvis[len(general_columns):]: if k == columns_jarvis[-1]: row_jarvis.append(cif_urls[keys[1]]) else: row_jarvis.append(contrib.get(keys[1], {k: ''}).get(k, '')) if row_jarvis[3]: data_jarvis.append((mp_id, row_jarvis)) return [ Table.from_items(data, orient='index', columns=columns), Table.from_items(data_jarvis, orient='index', columns=columns_jarvis) ]
def run(mpfile, **kwargs): # extract data from json files input_dir = mpfile.hdata.general['input_dir'] for idx, obj in enumerate(scandir(input_dir)): mpid = obj.name.split('.', 1)[0].rsplit('_', 1)[1] print(mpid) input_file = gzip.open(obj.path, 'rb') try: data = json.loads(input_file.read()) # filter out metals if 'GGA' not in data or 'GGA' not in data[ 'gap'] or data['gap']['GGA'] < 0.1: print('GGA gap < 0.1 -> skip') continue # add hierarchical data (nested key-values) hdata = RecursiveDict() T, lvl, S2 = '300', '1e+18', None pf_key = 'S²σ' hdata['temperature'] = T + ' K' hdata['doping_level'] = lvl + ' cm⁻³' variables = [ { 'key': 'cond_eff_mass', 'name': 'mₑᶜᵒⁿᵈ', 'unit': 'mₑ' }, { 'key': 'seebeck_doping', 'name': 'S', 'unit': 'μV/K' }, { 'key': 'cond_doping', 'name': 'σ', 'unit': '(Ωms)⁻¹' }, ] eigs_keys = ['ε₁', 'ε₂', 'ε₃', '<ε>'] for v in variables: hdata[v['name']] = RecursiveDict() for doping_type in ['p', 'n']: if doping_type in data['GGA'][v['key']]: d = data['GGA'][v['key']][doping_type][T][lvl] eigs = map(float, d if isinstance(d, list) else d['eigs']) hdata[v['name']][doping_type] = RecursiveDict( (eigs_keys[neig], clean_value(eig, v['unit'])) for neig, eig in enumerate(eigs)) hdata[v['name']][doping_type][ eigs_keys[-1]] = clean_value( np.mean(eigs), v['unit']) if v['key'] == 'seebeck_doping': S2 = np.dot(d['tensor'], d['tensor']) elif v['key'] == 'cond_doping': pf = np.mean( np.linalg.eigh(np.dot(S2, d['tensor']))[0]) * 1e-8 if pf_key not in hdata: hdata[pf_key] = RecursiveDict() hdata[pf_key][doping_type] = { eigs_keys[-1]: clean_value(pf, 'μW/(cmK²s)') } mpfile_data = nest_dict(hdata, ['data']) # build data and max values for seebeck, conductivity and kappa # max/min values computed using numpy. It may be better to code it in pure python. keys = ['pretty_formula', 'volume'] hdata = RecursiveDict((k, data[k]) for k in keys) hdata['volume'] = clean_value(hdata['volume'], 'ų') hdata['bandgap'] = clean_value(data['gap']['GGA'], 'eV') cols = ['value', 'temperature', 'doping'] tables = RecursiveDict() props = RecursiveDict() props['seebeck_doping'] = ['S', 'μV/K'] props['cond_doping'] = ['σ', '(Ωms)⁻¹'] props['kappa_doping'] = ['κₑ', 'W/(mKs)'] for prop_name, (lbl, unit) in props.iteritems(): # TODO install Symbola font if you see squares here (https://fonts2u.com/symbola.font) # and select it as standard font in your browser (leave other fonts as is, esp. fixed width) tables[lbl] = RecursiveDict() hlbl = lbl + '₋' if len(lbl) > 1 else lbl hlbl += 'ₑₓₜᵣ' hdata[hlbl] = RecursiveDict() for doping_type in ['p', 'n']: prop = data['GGA'][prop_name][doping_type] prop_averages, dopings, columns = [], None, ['T [K]'] temps = sorted(map(int, prop.keys())) for temp in temps: row = [temp] if dopings is None: dopings = sorted(map(float, prop[str(temp)].keys())) for doping in dopings: doping_str = '%.0e' % doping if len(columns) <= len(dopings): columns.append('{} cm⁻³ [{}]'.format( doping_str, unit)) eigs = prop[str(temp)][doping_str]['eigs'] row.append(np.mean(eigs)) prop_averages.append((temp, row)) tables[lbl][doping_type] = Table.from_items( prop_averages, orient='index', columns=columns) arr_prop_avg = np.array( [item[1] for item in prop_averages])[:, 1:] max_v = np.max(arr_prop_avg) if prop_name[0] == 's' and doping_type == 'n': max_v = np.min(arr_prop_avg) if prop_name[0] == 'k': max_v = np.min(arr_prop_avg) arg_max = np.argwhere(arr_prop_avg == max_v)[0] vals = [ clean_value(max_v, unit), clean_value(temps[arg_max[0]], 'K'), clean_value(dopings[arg_max[1]], 'cm⁻³') ] hdata[hlbl][doping_type] = RecursiveDict( (k, v) for k, v in zip(cols, vals)) mpfile_data.rec_update(nest_dict(hdata, ['extra_data'])) mpfile.add_hierarchical_data(mpfile_data, identifier=data['mp_id']) for lbl, dct in tables.iteritems(): for doping_type, table in dct.iteritems(): mpfile.add_data_table(data['mp_id'], table, name='{}({})'.format( lbl, doping_type)) finally: input_file.close()