def get_structure(formula, prototype=None, base_dir="./", c=None, **filters): db_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../c2db.db") # Serial version, only on rank 0 candidates = {} # if world.rank == 0: db = ase.db.connect(db_file) if prototype is None: res = db.select(formula=formula, **filters) else: res = db.select(formula=formula, prototype=prototype) for mol in res: symbol = mol.formula pos = mol.positions cell = mol.cell pbc = mol.pbc # Change distance if (c is not None) and (isinstance(c, (float, int))): cell.setflags(write=True) cell[-1][-1] = c pbc = (True, True, True) # Use full periodic name = "{}-{}".format(symbol, mol.prototype) # name = os.path.join(os.path.abspath(base_dir), # "{}-{}.traj".format(symbol, mol.prototype)) atoms = Atoms(symbol, positions=pos, cell=cell, pbc=pbc) candidates[name] = atoms if (c is not None) and (isinstance(c, (float, int))): atoms.center() # center the atoms, although not really needed return candidates
def analyze(filename, tag='results'): energies = defaultdict(list) mintimes = defaultdict(lambda: 999999) formulas = [] db = ase.db.connect(filename) for row in db.select(sort='formula'): if row.formula not in formulas: formulas.append(row.formula) energies[row.formula].append(row.get('energy', inf)) emin = {formula: min(energies[formula]) for formula in energies} data = defaultdict(list) for row in db.select(sort='formula'): if row.get('energy', inf) - emin[row.formula] < 0.01: t = row.t if row.n < 100: nsteps = row.n mintimes[row.formula] = min(mintimes[row.formula], t) else: nsteps = 9999 t = inf else: nsteps = 9999 t = inf data[row.optimizer].append((nsteps, t)) print(formulas) D = sorted(data.items(), key=lambda x: sum(y[0] for y in x[1])) with open(tag + '-iterations.csv', 'w') as f: print('optimizer,' + ','.join(formulas), file=f) for o, d in D: print('{:18},{}'.format( o, ','.join('{:3}'.format(x[0]) if x[0] < 100 else ' ' for x in d)), file=f) data = { opt: [(n, t / mintimes[f]) for (n, t), f in zip(x, formulas)] for opt, x in data.items() } D = sorted(data.items(), key=lambda x: sum(min(y[1], 999) for y in x[1])) with open(tag + '-time.csv', 'w') as f: print('optimizer,' + ','.join(formulas), file=f) for o, d in D: print('{:18},{}'.format( o, ','.join('{:8.1f}'.format(x[1]) if x[0] < 100 else ' ' for x in d)), file=f)
def main(db_filename, folder_name): db = ase.db.connect(db_filename) mkdir_p(folder_name) for row in db.select(): if not os.path.exists(os.path.join(folder_name, 'publication.txt')): with open(os.path.join(folder_name, 'publication.txt'), 'w') as outfile: data = publication_data_from_row(row) json.dump(data, outfile) atoms = row.toatoms() dft_code = row.key_value_pairs.get('dft_code', '') dft_functional = row.key_value_pairs.get('dft_functional', '') reaction = row.key_value_pairs.get('reaction', '') substrate = row.key_value_pairs.get('substrate', '') facet = row.key_value_pairs.get('facet', '').strip('()') adsorbate = row.key_value_pairs.get('adsorbate', '') out_dirname = "{folder_name}/{dft_code}/{dft_functional}/{reaction}/{substrate}/{facet}".format( **locals()) out_dirname = out_dirname.replace('/None', '') print(out_dirname) out_trajname = "{out_dirname}/{adsorbate}.traj".format(**locals()) mkdir_p(out_dirname) ase.io.write(out_trajname, atoms)
def main(): db = ase.db.connect("ce_hydrostatic.db") volumes = [] concs = [] for row in db.select(converged=1): atoms = db.get_atoms(id=row.id) volumes.append(volume(atoms)) concs.append(mg_conc(atoms)) lattice_params = [fcc_lattice_parameter_from_volume_primitive_cell(V,64) for V in volumes] fname = "almg_lattice_parameter.csv" # From J. L. Murray, The Al-Mg system, 1982 data = np.loadtxt( fname, delimiter=",") mg_conc_exp = data[:,0] lattice_param_exp = data[:,1]*10 slope, interscept, r_value, p_value, stderr = linregress( concs, lattice_params ) print (slope,interscept) x = np.linspace(0.0,0.6,10) fig = plt.figure() ax = fig.add_subplot(1,1,1) ax.plot(x,interscept+slope*x) ax.plot( concs, lattice_params, 'o', label="DFT", mfc="none" ) ax.plot( mg_conc_exp, lattice_param_exp, 'x', label="Exp") ax.legend(loc="best", labelspacing=0.05, frameon=False) ax.set_xlabel("Mg concentration") ax.set_ylabel("FCC lattice parameter") ax.spines["right"].set_visible(False) ax.spines["top"].set_visible(False) plt.show()
def choose_bulk(bulk_database, n_elems): ''' Chooses a bulks from our database at random as long as the bulk contains all the specified elements. Args: bulk_database A string pointing to the ASE *.db object that contains the bulks you want to consider. n_elems An integer indicating how many elements should be inside the bulk to be selected. Returns: atoms `ase.Atoms` of the chosen bulk structure. mpid A string indicating which MPID the bulk is ''' db = ase.db.connect(bulk_database) rows = list(db.select(n_elements=n_elems)) row_index = np.random.choice(range(len(rows))) try: atoms, mpid = rows[row_index].toatoms(), rows[row_index].mpid return atoms, mpid except IndexError: raise ValueError('Randomly chose to look for a %i-component material, ' 'but no such materials exist in %s. Please add one ' 'to the database or change the weights to exclude ' 'this number of components.' % (n_elems, n_elems, bulk_database))
def get_bulk(name, proto, id=None, method="gpaw"): # Get bulk properties if id is None: res = list(db.select(formula=name, prototype=proto)) if len(res) == 0: return None r = res[0] else: r = db.get(id) try: if method.lower() == "gpaw": L = r.bulk_L eps_para = (r.bulk_eps_x + r.bulk_eps_y) / 2 eps_perp = r.bulk_eps_z e = r.gap_hse # VASP version below: elif method.lower() == "vasp": L = r.bulk_L_vasp eps_para = (r.bulk_eps_x_vasp + r.bulk_eps_y_vasp) / 2 eps_perp = r.bulk_eps_z_vasp if r.bulk_gap < 0: e = r.gap_hse else: e = r.bulk_gap else: return None if eps_para < 0 or eps_perp < 0: return None except Exception: return None return L, eps_para, eps_perp, e
def choose_elements(bulk_database, n): ''' Chooses `n` elements at random from the set of elements inside the given database. Args: bulk_database A string pointing to the ASE *.db object that contains the bulks you want to consider. n A positive integer indicating how many elements you want to choose. Returns: elements A list of strings indicating the chosen elements ''' db = ase.db.connect(bulk_database) all_elements = { ELEMENTS[number] for row in db.select() for number in row.numbers } elements = random.sample(all_elements, n) # Make sure we choose a combination of elements that exists in our bulk # database while db.count(elements) == 0: warnings.warn( 'Sampled the elements %s, but could not find any matching ' 'bulks in the database (%s). Trying to re-sample' % (elements, bulk_database), RuntimeWarning) elements = random.sample(all_elements, n) return elements
def prep_runfolders(dbname,query): import os import shutil from ase.db import connect db = connect(dbname) prevdir = os.getcwd() for row in db.select(query): dir = str(row.id) try: os.mkdir(dir) except FileExistsError: print(f'Keeping folder {dir}') else: print(f'Creating folder {dir}') os.chdir(dir) try: os.symlink('../run.sh', 'run.sh') except: pass with open('db_id', 'w') as out: out.write(dir) os.chdir(prevdir) return print('Done')
def get_data(): candidates = db.select(selection="gap_gw>0.5") candidates = db.select(selection="gap_gw>0.05") materials = [] alpha_x = [] alpha_z = [] Eg_HSE = [] Eg_GW = [] Eg_PBE = [] thick = [] n_2D = [] polar = [] for mol in candidates: if "Cr" in mol.formula: # CrS2 stuffs are not correct? continue print("{0}-{1}".format(mol.formula, mol.prototype)) togo = True for attrib in ("gap", "gap_hse", "gap_gw", "alphax", "alphaz"): if not hasattr(mol, attrib): warnings.warn("{0} doesn't have attribute {1}!".format( mol.formula, attrib)) togo = False if togo is not True: warnings.warn("{0} not calculated!".format(mol.formula)) continue materials.append("{0}-{1}".format(mol.formula, mol.prototype)) alpha_x.append(mol.alphax) alpha_z.append(mol.alphaz) Eg_HSE.append(mol.gap_hse) Eg_GW.append(mol.gap_gw) Eg_PBE.append(mol.gap) delta, n, apol = get_thick(mol) thick.append(delta) n_2D.append(n) polar.append(apol) print(len(alpha_x)) alpha_x = numpy.array(alpha_x) alpha_z = numpy.array(alpha_z) Eg_HSE = numpy.array(Eg_HSE) Eg_GW = numpy.array(Eg_GW) Eg_PBE = numpy.array(Eg_PBE) thick = numpy.array(thick) n_2D = numpy.array(n_2D) polar = numpy.array(polar) return alpha_x, alpha_z, Eg_HSE, thick
def tofile(query, type, limit=0): fd, name = tempfile.mkstemp(suffix="." + type) con = ase.db.connect(name, use_lock_file=False) for dct in db.select(query, limit=limit): con.write(dct, data=dct.get("data", {}), **dct.get("key_value_pairs", {})) os.close(fd) data = open(name).read() os.unlink(name) return data
def main(): db = ase.db.connect("ceTest.db") # Remove all entries that does not have a gen field delID = [] for row in db.select(): if (row.get("gen") is None): delID.append(row.id) db.delete(delID)
def get_atoms(db, formula, phase): system_list = list(db.select('formula={},xc=PBE,phase={}'.format(formula, phase))) if len(system_list) > 1: # TODO - handle this better raise ValueError("found multiple matches for {}, PBE, H phase".format(formula)) atoms = system_list[0] #if atoms["hform"] > 0.0: # print("Warning: hform {} > 0".format(formula)) return atoms
def tofile(query, type, limit=0): fd, name = tempfile.mkstemp(suffix='.' + type) con = ase.db.connect(name, use_lock_file=False) for dct in db.select(query, limit=limit): con.write(dct, data=dct.get('data', {}), **dct.get('key_value_pairs', {})) os.close(fd) data = open(name).read() os.unlink(name) return data
def tofile(project, query, type, limit=0): fd, name = tempfile.mkstemp(suffix='.' + type) con = ase.db.connect(name, use_lock_file=False) db = databases[project] for row in db.select(query, limit=limit): con.write(row, data=row.get('data', {}), **row.get('key_value_pairs', {})) os.close(fd) data = open(name, 'rb').read() os.unlink(name) return data
def tofile(project, query, type, limit=0): fd, name = tempfile.mkstemp(suffix='.' + type) con = ase.db.connect(name, use_lock_file=False) db = databases[project] for row in db.select(query, limit=limit): con.write(row, data=row.get('data', {}), **row.get('key_value_pairs', {})) os.close(fd) data = open(name, 'rb').read() os.unlink(name) return data
def read_db(filename, index, **kwargs): db = ase.db.connect(filename, serial=True, **kwargs) if isinstance(index, basestring): try: index = string2index(index) except ValueError: pass if isinstance(index, int): index = slice(index, index + 1 or None) if isinstance(index, basestring): # index is a database query string: for row in db.select(index): yield row.toatoms() else: start, stop, step = index.indices(db.count()) if start == stop: return assert step == 1 for row in db.select(offset=start, limit=stop - start): yield row.toatoms()
def read_db(filename, index, **kwargs): db = ase.db.connect(filename, serial=True, **kwargs) if isinstance(index, basestring): try: index = string2index(index) except ValueError: pass if isinstance(index, int): index = slice(index, index + 1 or None) if isinstance(index, basestring): # index is a database query string: for row in db.select(index): yield row.toatoms() else: start, stop, step = index.indices(db.count()) if start == stop: return assert step == 1 for row in db.select(offset=start, limit=stop - start): yield row.toatoms()
def get_layer_system(db, formula, phase): system_list = list( db.select('formula={},xc=PBE,phase={}'.format(formula, phase))) if len(system_list) > 1: # TODO - handle this better raise ValueError("found multiple matches for {}, PBE, {} phase".format( formula, phase)) elif len(system_list) == 0: raise ValueError("found no matches for {}, PBE, {} phase".format( formula, phase)) layer_system = system_list[0].toatoms() return layer_system
def get_data(self): """Get the atoms objects.""" db = ase.db.connect('data/cubic_perovskites.db') atoms = list(db.select(combination='ABO3'))[:10] # Compile a list of atoms and target values. alist = [] for row in atoms: try: alist.append(row.toatoms()) except AttributeError: continue return alist
def analyze(): db = ase.db.connect(db_name()) kpts = [] cutoff = [] energy = [] for row in db.select(): try: new_kpt = row.n_kpt new_cut = row.cutoff new_eng = row.trial_energy kpts.append(new_kpt) cutoff.append(new_cut) energy.append(new_eng) except: pass kpt_kpt = [] eng_kpt = [] for i in range(len(energy)): if (cutoff[i] == 500): kpt_kpt.append(kpts[i]) eng_kpt.append(energy[i]) srt_indx = np.argsort(kpt_kpt) kpt_kpt = [kpt_kpt[indx] for indx in srt_indx] eng_kpt = [eng_kpt[indx] for indx in srt_indx] cut_cut = [] eng_cut = [] for i in range(len(energy)): if (kpts[i] == 1): cut_cut.append(cutoff[i]) eng_cut.append(energy[i]) srt_indx = np.argsort(cut_cut) cut_cut = [cut_cut[indx] for indx in srt_indx] eng_cut = [eng_cut[indx] for indx in srt_indx] fig1 = plt.figure() ax1 = fig1.add_subplot(1, 1, 1) ax1.plot(kpt_kpt, eng_kpt, "-o") ax1.set_xlabel("Number of k-points") ax1.set_ylabel("Energy (eV)") fig2 = plt.figure() ax2 = fig2.add_subplot(1, 1, 1) ax2.plot(cut_cut, eng_cut, "-o") ax2.set_xlabel("Plane wave cutoff (eV)") ax2.set_ylabel("Energy (eV)") plt.show()
def get_bulk_inverted_index_1(input_bulk_database, max_num_elements): ''' Converts an input ASE.db to an inverted index to efficiently sample bulks ''' assert max_num_elements > 0 db = ase.db.connect(input_bulk_database) index = {} total_entries = 0 for i in range(1, max_num_elements + 1): index[i] = [] rows = list(db.select(n_elements=i)) print(len(rows)) for r in range(len(rows)): index[i].append((rows[r].toatoms(), rows[r].mpid)) total_entries += 1 return index, total_entries
def getJobIDs(self): """ Returns all the job IDs """ if (not hasASE): raise ImportError("Could not find ASE") db = ase.db.connect(self.args["dbname"]) # Defautl condition is to run new jobs condition = "queued=False, started=False" if ("restart" in self.args.keys()): if (self.args["restart"] == "True"): # Re-start an old simulation that was not converged condition = "converged=False" ids = [row.id for row in db.select(condition)] return ids
def convert_adsorbate(input_adsorbate_database, output_pkl): ''' Converts an input ASE.db to an inverted index to efficiently sample adsorbates ''' db = ase.db.connect(input_adsorbate_database) index = {} for i, row in enumerate(db.select()): atoms = row.toatoms() data = row.data smiles = data['SMILE'] bond_indices = data['bond_idx'] index[i] = (atoms, smiles, bond_indices) with open(output_pkl, 'wb') as f: pickle.dump(index, f) # As of adsorbates.db file in master on April 28 2020 assert len(index) == 82
def convert_bulk(input_bulk_database, max_num_elements, output_pkl): ''' Converts an input ASE.db to an inverted index to efficiently sample bulks ''' assert max_num_elements > 0 db = ase.db.connect(input_bulk_database) index = {} total_entries = 0 for i in range(1, max_num_elements + 1): index[i] = [] rows = list(db.select(n_elements=i)) for r in range(len(rows)): index[i].append((rows[r].toatoms(), rows[r].mpid)) total_entries += 1 with open(output_pkl, 'wb') as f: pickle.dump(index, f) # As of bulk.db file from Kevin on 01 May 2020 assert total_entries == 11010
def choose_bulk(bulk_database, elements): ''' Chooses a bulks from our database at random as long as the bulk contains all the specified elements. Args: bulk_database A string pointing to the ASE *.db object that contains the bulks you want to consider. elements A list of strings indicating the elements you want to show up in the bulk. The strings much match one of the values in the `ELEMENTS` constant in this submodule. Returns: atoms `ase.Atoms` of the chosen bulk structure. mpid String indicating the the Materials Project ID number of the bulk that was selected. ''' db = ase.db.connect(bulk_database) bulks_subset = [(row.toatoms(), row.mpid) for row in db.select(elements)] atoms, mpid = random.choice(bulks_subset) return atoms, mpid
def train(self, label, dbfile, nepochs=10, learning_rate=0.001, shuffle=True, percenttest=0.1): """Train the potential against the data in a database. Parameters ---------- label: string, used for saving the results. db: the path to an ase database containing training examples. shuffle: boolean, if True, shuffle the data. percenttest: float, fraction of data to use only for testing """ with ase.db.connect(dbfile) as db: data = [(row.toatoms(), row.energy) for row in db.select()] if shuffle: import random random.shuffle(data) N_train = int(len(data) * (1 - percenttest)) train_data = data[0:N_train] test_data = data[N_train:] known_energies = tf.placeholder(tf.float64, None) tf_energies = tf.placeholder(tf.float64, None) #loss = tf.reduce_mean(tf.square(tf_energies - known_energies)) #opt = tf.train.AdamOptimizer(learning_rate).minimize(loss) for i in range(nepochs): for atoms, ke in train_data: atoms.set_calculator(self) te = atoms.get_calculator()._energy _loss = self.sess.run([te])
def get_bulk_inverted_index_2(input_bulk_database, max_num_elements): ''' Converts an input ASE.db to an inverted index to efficiently sample bulks ''' assert max_num_elements > 0 db = ase.db.connect(input_bulk_database) rows = list(db.select()) index = {} total_entries = 0 for r in range(len(rows)): bulk = rows[r].toatoms() mpid = rows[r].mpid formula_str = str(bulk.symbols) num_ele = sum(1 for c in formula_str if c.isupper()) if num_ele > max_num_elements: continue if num_ele not in index: index[num_ele] = [] index[num_ele].append((bulk, mpid)) total_entries += 1 return index, total_entries
emass = [] hmass = [] valence = numpy.load("../post_processing/valence.npy") pol = numpy.load("../post_processing/valence.npy") def get_thick(atom_row): pos = atom_row.positions[:, -1] diff = covalent_radii[atom_row.numbers] zmax = numpy.max(pos + diff) - numpy.min(pos - diff) vals = valence[atom_row.numbers] # valence electrons atom_pol = pol[atom_row.numbers] A = atom_row.cell_area return zmax, sum(vals) / A, sum(atom_pol) / A candidates = db.select(selection="gap_gw>0.5") for mol in candidates: if "Cr" in mol.formula: # CrS2 stuffs are not correct? continue print("{0}-{1}".format(mol.formula, mol.prototype)) togo = True for attrib in ("gap_hse", "emass1", "alphax", "alphaz", ): if not hasattr(mol, attrib): warnings.warn("{0} doesn't have attribute {1}!".format(mol.formula, attrib)) togo = False if togo is not True:
# creates: band_alignment.png from math import floor, ceil import re import numpy as np import matplotlib.pyplot as plt import ase.db # Connect to database db = ase.db.connect('c2dm.db') # Select the rows that have G0W0 results rows = db.select('xc=LDA,ind_gap_g0w0>0') data = [] for row in rows: name = row.name phase = row.phase # Use regular expressions to get the atomic species from the name m = re.search('([A-Z][a-z]?)([A-Z][a-z]?)2', name) M = m.group(1) X = m.group(2) label = '' if phase == 'H': label += '2H-' elif phase == 'T': label += '1T-' label += name.replace('2', '$_2$') # Store data as tuples - easier to sort data.append((M, X, label, row.vbm_g0w0, row.cbm_g0w0))
import sys sys.path.insert(0, '../../../') from src.discoverers.adsorption.values import calc_co2rr_activities from src.discoverers.adsorption.mms import MultiscaleDiscoverer from src.discoverers.adsorption.models import NullModel # Discoverer settings adsorbate = 'CO' initial_training_size = 1000 batch_size = 200 quantile_cutoff = 0.95 # Data loading db_dir = '../../pull_data/%s/' % adsorbate db = ase.db.connect(db_dir + '%s.db' % adsorbate) rows = list(db.select()) random.Random(42).shuffle(rows) def parse_rows(rows): features = [] labels = [] surfaces = [] for row in rows: features.append(row.id) data = row.data labels.append(data['adsorption_energy']) surface = (data['mpid'], data['miller'], data['shift'], data['top']) surfaces.append(surface)
csv_file = "../../data/gpaw_data/gpaw_vasp_aa.csv" with open(csv_file, "r", encoding="utf-8") as f: while True: line = f.readline() if len(line) == 0: break sys, gap, L, ex, ey, ez = line.strip().split(",") if any(len(s) == 0 for s in [ex, ey, ez]): # bad results, discard continue try: gap = float(gap); L = float(L) ex = float(ex); ey = float(ey); ez = float(ez) except ValueError: continue formula, proto = sys.split("-") # print(sys.encode("utf8"), gap, L, ex, ey, ez) res = list(db.select(formula=formula, prototype=proto)) if len(res) == 0: continue mol = res[0] db_id = mol.id db.update(db_id, bulk_L_vasp=L, bulk_gap_vasp=gap, bulk_eps_x_vasp=ex, bulk_eps_y_vasp=ey, bulk_eps_z_vasp=ez) print(sys, "Suscessful!")
def transfer(self, filename_sqlite, block_size=1000, start_block=0, write_ase=True, write_publication=True, write_reaction=True, write_reaction_system=True, check=False): """ Transfer data from local sqlite3 .db file to the catalysis-hub postgreSQL server Parameters: filename_sqlite: str name of .db file block_size: int (default 1000) Number of atomic structures and reactions to write together in each block. start_block: int (default 0) Block to start with write_ase: bool whether or not to write atomic structures write_publication: bool whether or not to transfer publication table write_reaction: bool whether or not to transfer reaction table write_reaction_system: bool whether or not to write reaction_system table """ self.stdout.write('Starting transfer\n') con = self.connection or self._connect() self._initialize(con) self.stdout.write('Finished initialization\n') cur = con.cursor() self.stdout.write('Got a cursor\n') self.stdout.write('Connecting to {0}\n'.format(self.server_name)) nrows = 0 if write_ase: self.stdout.write('Transfering atomic structures\n') db = ase.db.connect(filename_sqlite) n_structures = db.count() n_blocks = n_structures // block_size + 1 t_av = 0 for block_id in range(start_block, n_blocks): i = block_id - start_block t1 = time.time() b0 = block_id * block_size b1 = (block_id + 1) * block_size + 1 if block_id + 1 == n_blocks: b1 = n_structures + 1 rows = list(db.select('{}<id<{}'.format(b0, b1))) with ase.db.connect(self.server_name, type='postgresql') as db2: # write one row at the time until ase is updated # db2.write(rows) for row in rows: db2.write(row) nrows += len(rows) t2 = time.time() dt = t2 - t1 t_av = (t_av * i + dt) / (i + 1) self.stdout.write( ' Finnished Block {0} / {1} in {2} sec\n'.format( block_id + 1, n_blocks, dt)) self.stdout.write( ' Completed transfer of {0} atomic structures\n'.format( nrows)) self.stdout.write(' Estimated time left: {0} sec\n'.format( t_av * (n_blocks - block_id - 1))) db = CathubSQLite(filename_sqlite) con_lite = db._connect() cur_lite = con_lite.cursor() Npub = 0 Npubstruc = 0 if write_publication: self.stdout.write('Transfering publications\n') try: npub = db.get_last_pub_id(cur_lite) except BaseException: npub = 1 for id_lite in range(1, npub + 1): Npub += 1 row = db.read(id=id_lite, table='publication') if len(row) == 0: continue values = row[0] pid, pub_id = self.write_publication(values) # Publication structures connection cur_lite.execute("""SELECT * from publication_system;""") publication_system_values = [] rows = cur_lite.fetchall() for row in rows: Npubstruc += 1 values = list(row) value_list = get_value_list(values) publication_system_values += [tuple(value_list)] # Insert into publication_system table key_str = get_key_str(table='publication_system') insert_command = """INSERT INTO publication_system ({0}) VALUES %s ON CONFLICT DO NOTHING;"""\ .format(key_str) execute_values(cur=cur, sql=insert_command, argslist=publication_system_values, page_size=1000) # Write pub_id to systems table cur.execute("""UPDATE systems SET key_value_pairs=jsonb_set(key_value_pairs, '{{"pub_id"}}', '"{pub_id}"') WHERE unique_id IN (SELECT ase_id from publication_system WHERE pub_id='{pub_id}')"""\ .format(pub_id=pub_id)) con.commit() self.stdout.write(' Completed transfer of publications\n') Ncat = 0 Ncatstruc = 0 if write_reaction: self.stdout.write('Transfering reactions') cur.execute('SELECT max(id) from reaction;') ID = cur.fetchone()[0] or 0 n_react = db.get_last_id(cur_lite) n_blocks = int(n_react / block_size) + 1 t_av = 0 for block_id in range(start_block, n_blocks): reaction_values = [] reaction_system_values = [] Ncat0 = Ncat Ncatstruc0 = Ncatstruc i = block_id - start_block t1 = time.time() b0 = block_id * block_size + 1 b1 = (block_id + 1) * block_size + 1 if block_id + 1 == n_blocks: b1 = n_react + 1 for id_lite in range(b0, b1): row = db.read(id_lite) if len(row) == 0: continue values = row[0] # id = self.check(values[13], values[1], values[6], values[7], # values[8], strict=True) id = None update_rs = False if id is not None: id = self.update(id, values) self.stdout.write( 'Updated reaction db with row id = {}\n'.format( id)) update_rs = True else: ID += 1 Ncat += 1 value_list = get_value_list(values) value_list[0] = ID # set new ID reaction_values += [tuple(value_list)] if write_reaction_system: cur_lite.execute( "SELECT * from reaction_system where id={};". format(id_lite)) rows = cur_lite.fetchall() if update_rs: cur.execute("""Delete from reaction_system where id={0}""".format(id)) for row in rows: Ncatstruc += 1 values = list(row) if len(values) == 3: values.insert(1, None) value_list = get_value_list(values) value_list[3] = ID reaction_system_values += [tuple(value_list)] q = ', '.join('?' * 14) q = '({})'.format(q.replace('?', '%s')) key_str = get_key_str() insert_command = """INSERT INTO reaction ({0}) VALUES %s;""".format(key_str) execute_values(cur=cur, sql=insert_command, argslist=reaction_values, template=q, page_size=block_size) key_str = get_key_str('reaction_system') insert_command = """INSERT INTO reaction_system ({0}) VALUES %s ON CONFLICT DO NOTHING;""".format(key_str) execute_values(cur=cur, sql=insert_command, argslist=reaction_system_values, page_size=1000) con.commit() t2 = time.time() dt = t2 - t1 t_av = (t_av * i + dt) / (i + 1) self.stdout.write( ' Finnished Block {0} / {1} in {2} sec \n'.format( block_id + 1, n_blocks, dt)) self.stdout.write( ' Completed transfer of {0} reactions. \n'.format( Ncat - Ncat0)) self.stdout.write(' Estimated time left: {0} sec \n'.format( t_av * (n_blocks - block_id - 1))) self.stdout.write(' Completed transfer of reactions\n') for statement in tsvector_update: cur.execute(statement) if self.connection is None: con.commit() con.close() self.stdout.write('Inserted into:\n') self.stdout.write(' systems: {0}\n'.format(nrows)) self.stdout.write(' publication: {0}\n'.format(Npub)) self.stdout.write(' publication_system: {0}\n'.format(Npubstruc)) self.stdout.write(' reaction: {0}\n'.format(Ncat)) self.stdout.write(' reaction_system: {0}\n'.format(Ncatstruc))
import sys sys.path.insert(0, '../../../') from src.discoverers.adsorption.values import calc_co2rr_activities from src.discoverers.adsorption.mms import MultiscaleDiscoverer from src.discoverers.adsorption.models import PrimeModel # Discoverer settings adsorbate = 'CO' initial_training_size = 1000 batch_size = 200 quantile_cutoff = 0.9 # Data loading db_dir = '../../pull_data/%s_synthesized/' % adsorbate db = ase.db.connect(db_dir + '%s.db' % adsorbate) rows = list(tqdm(db.select(), desc='reading ASE db', total=db.count())) random.Random(42).shuffle(rows) def parse_row(row): feature = row.id data = row.data label = data['adsorption_energy'] surface = (data['mpid'], data['miller'], data['shift'], data['top']) return feature, label, surface def parse_rows(rows): with Pool(processes=32, maxtasksperchild=1000) as pool: iterator = pool.imap(parse_row, rows, chunksize=100) iterator_tracked = tqdm(iterator, desc='parsing rows', total=len(rows))
if row[4] != "": # Eps calculated name, proto = row[:2] print(name, proto) L, E, ex, ey, ez, E_direct, E_min = map(float, row[2:]) # Elements key = (name, proto) e_xy = numpy.sqrt(ex * ey) ax = (e_xy - 1) / (4 * pi) * L az = (1 - 1 / ez) * L / (4 * pi) if proto == "ABX3": # perovskite? delta = 14.24 n_2D = None mol = None else: mol = list(db.select(formula=name, prototype=proto))[0] delta, n_2D = get_thick(mol) # 3D try: L_3D, epsx, epsz, E_3D = get_bulk(name, proto) except TypeError: L_3D, epsx, epsz, E_3D = (None, None, None, None) # QC if (name, proto) in QC_res: qc_n, qc_p = QC_res[(name, proto)] else: qc_n = None qc_p = None # emass try: emass = mol.emass1