def update_sqlite(self, skip=[], goto_reaction=None, key_names='all'): for key_values in self.read(skip=skip, goto_reaction=goto_reaction): with CathubSQLite(self.cathub_db) as db: id = db.check(key_values['reaction_energy']) #print('Allready in reaction db with row id = {}'.format(id)) if id is not None: db.update(id, key_values, key_names)
def write_publication(self, pub_data): with CathubSQLite(self.cathub_db) as db: pid = db.check_publication(self.pub_id) if pid is None: pid = db.write_publication(pub_data) print('Written to publications db row id = {}'.format(pid)) else: print('Allready in reaction db with row id = {}'.format(pid)) return pid
def write(self, skip=[], goto_reaction=None): for key_values in self.read(skip=skip, goto_reaction=goto_reaction): with CathubSQLite(self.cathub_db) as db: id = db.check(key_values['chemical_composition'], key_values['reaction_energy']) if id is None: id = db.write(key_values) print('Written to reaction db row id = {}'.format(id)) elif self.update: db.update(id, key_values) print('Updated reaction db row id = {}'.format(id)) else: print( 'Allready in reaction db with row id = {}'.format(id))
def get_reactions(columns='all', n_results=20, write_db=False, **kwargs): """ Get reactions from server Give key value strings as arguments """ if write_db or columns == 'all': columns = all_columns['reactions'] queries = {} for key, value in kwargs.items(): key = map_column_names(key) if key == 'distinct': if value in [True, 'True', 'true']: queries.update({key: True}) continue if isinstance(value, int) or isinstance(value, float): queries.update({key: value}) else: queries.update({key: '{0}'.format(value)}) subtables = [] if write_db: subtables = ['reactionSystems', 'publication'] else: subtables = [] data = query(table='reactions', subtables=subtables, columns=columns, n_results=n_results, queries=queries) if not write_db: return data print('Writing result to Reactions.db') unique_ids = [] for row in data['reactions']['edges']: with CathubSQLite('Reactions.db') as db: row = row['node'] key_values = {} for key in all_columns['reactions']: v = row[key] # if isinstance(v, unicode): # v = v.encode('utf-8') try: v = json.loads(v) except BaseException: pass key_values[convert(key)] = v ase_ids = {} energy_corrections = {} for row_rs in row['reactionSystems']: if row_rs['name'] == 'N/A': continue ase_ids[row_rs['name']] = row_rs['aseId'] energy_corrections[row_rs['name']] = row_rs['energyCorrection'] if not ase_ids: ase_ids = None energy_corrections = None else: unique_ids += ase_ids.values() key_values['ase_ids'] = ase_ids key_values['energy_corrections'] = ase_ids # publications pub_key_values = {} row_p = row['publication'] for key in all_columns['publications']: pub_key_values[convert(key)] = row_p[key] db.write_publication(pub_key_values) # reactions and reaction_systems id = db.check(key_values['chemical_composition'], key_values['reaction_energy']) if id is None: id = db.write(key_values) else: db.update(id, key_values) if ase_ids is not None: # Ase structures with ase.db.connect('Reactions.db') as ase_db: con = ase_db.connection cur = con.cursor() cur.execute('SELECT unique_id from systems;') unique_ids0 = cur.fetchall() unique_ids0 = [un[0] for un in unique_ids0] unique_ids = [un for un in unique_ids if un not in unique_ids0] for unique_id in list(set(unique_ids)): # if ase_db.count('unique_id={}'.format(unique_id)) == 0: atomsrow = get_atomsrow_by_id(unique_id) ase_db.write(atomsrow) print('Writing complete!') return data
def transfer(self, filename_sqlite, start_id=1, write_ase=True, write_publication=True, write_reaction=True, write_reaction_system=True, block_size=1000, start_block=0): self.stdout.write('Starting transfer\n') con = self.connection or self._connect() self._initialize(con) self.stdout.write('Finished initialization\n') cur = con.cursor() self.stdout.write('Got a cursor\n') set_schema = 'SET search_path = {0};'.format(self.schema) cur.execute(set_schema) import os import time self.stdout.write('Imported os\n') import ase.db self.stdout.write('Imported ase.db\n') self.stdout.write('Building server_name\n') server_name = "postgres://{0}:{1}@{2}:5432/catalysishub".format(self.user, self.password, self.server) self.stdout.write('Connecting to {server_name}\n'.format(**locals())) nrows = 0 if write_ase: print('Transfering atomic structures') db = ase.db.connect(filename_sqlite) n_structures = db.count() n_blocks = int(n_structures / block_size) + 1 t_av = 0 for block_id in range(start_block, n_blocks): i = block_id - start_block t1 = time.time() b0 = block_id * block_size + 1 b1 = (block_id + 1) * block_size + 1 self.stdout.write(str(block_id) + ' ' + 'from ' + str(b0) + ' to ' + str(b1) + '\n') if block_id + 1 == n_blocks: b1 = n_structures + 1 rows = list(db.select('{}<id<{}'.format(b0 - 1, b1))) with ase.db.connect(server_name, type='postgresql') as db2: db2.write(rows) nrows += len(rows) t2 = time.time() dt = t2 - t1 t_av = (t_av * i + dt) / (i + 1) self.stdout.write(' Finnished Block {0} / {1} in {2} sec'.format(block_id, n_blocks, dt)) self.stdout.write(' Completed transfer of {0} atomic structures.'.format(nrows)) self.stdout.write(' Estimated time left: {0} sec'.format(t_av * (n_blocks - block_id))) from cathub.cathubsqlite import CathubSQLite db = CathubSQLite(filename_sqlite) con_lite = db._connect() cur_lite = con_lite.cursor() # write publication Npub = 0 Npubstruc = 0 if write_publication: try: npub = db.get_last_pub_id(cur_lite) except: npub = 1 for id_lite in range(1, npub+1): Npub += 1 row = db.read(id=id_lite, table='publication') if len(row) == 0: continue values = row[0] pid, pub_id = self.write_publication(values) # Publication structures connection cur_lite.execute("""SELECT * from publication_system;""") rows = cur_lite.fetchall() for row in rows: Npubstruc += 1 values= row[:] key_str, value_str = get_key_value_str(values, table='publication_system') set_schema = 'SET search_path = {0};'.format(self.schema) cur.execute(set_schema) print("[SET SCHEMA] {set_schema}".format(**locals())) insert_command = 'INSERT INTO publication_system ({0}) VALUES ({1}) ON CONFLICT DO NOTHING;'.format(key_str, value_str) cur.execute(insert_command) # self.write(values, table='publication_system') con.commit() Ncat = 0 Ncatstruc = 0 if write_reaction: n = db.get_last_id(cur_lite) select_ase = """SELECT * from reaction_system where id={};""" for id_lite in range(start_id, n+1): row = db.read(id_lite) if len(row) == 0: continue values = row[0] id = self.check(values[13], values[1], values[6], values[7], values[8], strict=True) update_rs = False if id is not None: id = self.update(id, values) self.stdout.write('Updated reaction db with row id = {}\n'.format(id)) update_rs = True else: Ncat += 1 id = self.write(values) self.stdout.write('Written to reaction db row id = {0}\n'.format(id)) cur_lite.execute(select_ase.format(id_lite)) rows = cur_lite.fetchall() if write_reaction_system: if update_rs: cur.execute('Delete from reaction_system where reaction_id={0}'.format(id)) for row in rows: Ncatstruc += 1 values = list(row) if len(values) == 3: values.insert(1, None) values[3] = id key_str, value_str = get_key_value_str(values, table='reaction_system') set_schema = 'SET search_path = {0};'.format(self.schema) cur.execute(set_schema) print("[SET SCHEMA] {set_schema}".format(**locals())) insert_command = 'INSERT INTO reaction_system ({0}) VALUES ({1}) ON CONFLICT DO NOTHING;'.format(key_str, value_str) print("[INSERT COMMAND] {insert_command}".format(**locals())) cur.execute(insert_command) con.commit() # Commit reaction_system for each row for statement in tsvector_update: cur.execute(statement) if self.connection is None: con.commit() con.close() self.stdout.write('Inserted into:\n') self.stdout.write(' systems: {0}\n'.format(nrows)) self.stdout.write(' publication: {0}\n'.format(Npub)) self.stdout.write(' publication_system: {0}\n'.format(Npubstruc)) self.stdout.write(' reaction: {0}\n'.format(Ncat)) self.stdout.write(' reaction_system: {0}\n'.format(Ncatstruc))