def transfer(self, filename_sqlite, start_id=1, write_ase=True, write_publication=True, write_reaction=True, write_reaction_system=True, block_size=1000, start_block=0): self.stdout.write('Starting transfer\n') con = self.connection or self._connect() self._initialize(con) self.stdout.write('Finished initialization\n') cur = con.cursor() self.stdout.write('Got a cursor\n') set_schema = 'SET search_path = {0};'.format(self.schema) cur.execute(set_schema) import os import time self.stdout.write('Imported os\n') import ase.db self.stdout.write('Imported ase.db\n') self.stdout.write('Building server_name\n') server_name = "postgres://{0}:{1}@{2}:5432/catalysishub".format( self.user, self.password, self.server) self.stdout.write('Connecting to {server_name}\n'.format(**locals())) nrows = 0 if write_ase: print('Transfering atomic structures') db = ase.db.connect(filename_sqlite) n_structures = db.count() n_blocks = int(n_structures / block_size) + 1 t_av = 0 for block_id in range(start_block, n_blocks): i = block_id - start_block t1 = time.time() b0 = block_id * block_size + 1 b1 = (block_id + 1) * block_size + 1 self.stdout.write( str(block_id) + ' ' + 'from ' + str(b0) + ' to ' + str(b1) + '\n') if block_id + 1 == n_blocks: b1 = n_structures + 1 rows = list(db.select('{}<id<{}'.format(b0 - 1, b1))) with ase.db.connect(server_name, type='postgresql') as db2: db2.write(rows) nrows += len(rows) t2 = time.time() dt = t2 - t1 t_av = (t_av * i + dt) / (i + 1) self.stdout.write( ' Finnished Block {0} / {1} in {2} sec'.format( block_id, n_blocks, dt)) self.stdout.write( ' Completed transfer of {0} atomic structures.'.format( nrows)) self.stdout.write(' Estimated time left: {0} sec'.format( t_av * (n_blocks - block_id))) from catkit.hub.cathubsqlite import CathubSQLite db = CathubSQLite(filename_sqlite) con_lite = db._connect() cur_lite = con_lite.cursor() # write publication Npub = 0 Npubstruc = 0 if write_publication: try: npub = db.get_last_pub_id(cur_lite) except BaseException: npub = 1 for id_lite in range(1, npub + 1): Npub += 1 row = db.read(id=id_lite, table='publication') if len(row) == 0: continue values = row[0] pid, pub_id = self.write_publication(values) # Publication structures connection cur_lite.execute("""SELECT * from publication_system;""") rows = cur_lite.fetchall() for row in rows: Npubstruc += 1 values = row[:] key_str, value_str = get_key_value_str( values, table='publication_system') set_schema = 'SET search_path = {0};'.format(self.schema) cur.execute(set_schema) print("[SET SCHEMA] {set_schema}".format(**locals())) insert_command = """INSERT INTO publication_system ({0}) VALUES ({1}) ON CONFLICT DO NOTHING;"""\ .format(key_str, value_str) cur.execute(insert_command) # self.write(values, table='publication_system') con.commit() Ncat = 0 Ncatstruc = 0 if write_reaction: n = db.get_last_id(cur_lite) select_ase = """SELECT * from reaction_system where id={};""" for id_lite in range(start_id, n + 1): row = db.read(id_lite) if len(row) == 0: continue values = row[0] id = self.check(values[13], values[1], values[6], values[7], values[8], strict=True) update_rs = False if id is not None: id = self.update(id, values) self.stdout.write( 'Updated reaction db with row id = {}\n'.format(id)) update_rs = True else: Ncat += 1 id = self.write(values) self.stdout.write( 'Written to reaction db row id = {0}\n'.format(id)) cur_lite.execute(select_ase.format(id_lite)) rows = cur_lite.fetchall() if write_reaction_system: if update_rs: cur.execute("""Delete from reaction_system231 where reaction_id={0}""".format(id)) for row in rows: Ncatstruc += 1 values = list(row) if len(values) == 3: values.insert(1, None) values[3] = id key_str, value_str = \ get_key_value_str(values, table='reaction_system') set_schema = 'SET search_path = {0};'.format( self.schema) cur.execute(set_schema) print("[SET SCHEMA] {set_schema}".format(**locals())) insert_command = """INSERT INTO reaction_system ({0}) VALUES ({1}) ON CONFLICT DO NOTHING;"""\ .format(key_str, value_str) print("[INSERT COMMAND] {insert_command}".format( **locals())) cur.execute(insert_command) con.commit() # Commit reaction_system for each row for statement in tsvector_update: cur.execute(statement) if self.connection is None: con.commit() con.close() self.stdout.write('Inserted into:\n') self.stdout.write(' systems: {0}\n'.format(nrows)) self.stdout.write(' publication: {0}\n'.format(Npub)) self.stdout.write(' publication_system: {0}\n'.format(Npubstruc)) self.stdout.write(' reaction: {0}\n'.format(Ncat)) self.stdout.write(' reaction_system: {0}\n'.format(Ncatstruc))
def transfer(self, filename_sqlite, block_size=1000, start_block=0, write_ase=True, write_publication=True, write_reaction=True, write_reaction_system=True, check=False): """ Transfer data from local sqlite3 .db file to the catalysis-hub postgreSQL server Parameters: filename_sqlite: str name of .db file block_size: int (default 1000) Number of atomic structures and reactions to write together in each block. start_block: int (default 0) Block to start with write_ase: bool whether or not to write atomic structures write_publication: bool whether or not to transfer publication table write_reaction: bool whether or not to transfer reaction table write_reaction_system: bool whether or not to write reaction_system table """ self.stdout.write('Starting transfer\n') con = self.connection or self._connect() self._initialize(con) self.stdout.write('Finished initialization\n') cur = con.cursor() self.stdout.write('Got a cursor\n') self.stdout.write('Connecting to {0}\n'.format(self.server_name)) nrows = 0 if write_ase: self.stdout.write('Transfering atomic structures\n') db = ase.db.connect(filename_sqlite) n_structures = db.count() n_blocks = n_structures // block_size + 1 t_av = 0 for block_id in range(start_block, n_blocks): i = block_id - start_block t1 = time.time() b0 = block_id * block_size b1 = (block_id + 1) * block_size + 1 if block_id + 1 == n_blocks: b1 = n_structures + 1 rows = list(db.select('{}<id<{}'.format(b0, b1))) with ase.db.connect(self.server_name, type='postgresql') as db2: # write one row at the time until ase is updated # db2.write(rows) for row in rows: db2.write(row) nrows += len(rows) t2 = time.time() dt = t2 - t1 t_av = (t_av * i + dt) / (i + 1) self.stdout.write( ' Finnished Block {0} / {1} in {2} sec\n'.format( block_id + 1, n_blocks, dt)) self.stdout.write( ' Completed transfer of {0} atomic structures\n'.format( nrows)) self.stdout.write(' Estimated time left: {0} sec\n'.format( t_av * (n_blocks - block_id - 1))) db = CathubSQLite(filename_sqlite) con_lite = db._connect() cur_lite = con_lite.cursor() Npub = 0 Npubstruc = 0 if write_publication: self.stdout.write('Transfering publications\n') try: npub = db.get_last_pub_id(cur_lite) except BaseException: npub = 1 for id_lite in range(1, npub + 1): Npub += 1 row = db.read(id=id_lite, table='publication') if len(row) == 0: continue values = row[0] pid, pub_id = self.write_publication(values) # Publication structures connection cur_lite.execute("""SELECT * from publication_system;""") publication_system_values = [] rows = cur_lite.fetchall() for row in rows: Npubstruc += 1 values = list(row) value_list = get_value_list(values) publication_system_values += [tuple(value_list)] # Insert into publication_system table key_str = get_key_str(table='publication_system') insert_command = """INSERT INTO publication_system ({0}) VALUES %s ON CONFLICT DO NOTHING;"""\ .format(key_str) execute_values(cur=cur, sql=insert_command, argslist=publication_system_values, page_size=1000) # Write pub_id to systems table cur.execute("""UPDATE systems SET key_value_pairs=jsonb_set(key_value_pairs, '{{"pub_id"}}', '"{pub_id}"') WHERE unique_id IN (SELECT ase_id from publication_system WHERE pub_id='{pub_id}')"""\ .format(pub_id=pub_id)) con.commit() self.stdout.write(' Completed transfer of publications\n') Ncat = 0 Ncatstruc = 0 if write_reaction: self.stdout.write('Transfering reactions') cur.execute('SELECT max(id) from reaction;') ID = cur.fetchone()[0] or 0 n_react = db.get_last_id(cur_lite) n_blocks = int(n_react / block_size) + 1 t_av = 0 for block_id in range(start_block, n_blocks): reaction_values = [] reaction_system_values = [] Ncat0 = Ncat Ncatstruc0 = Ncatstruc i = block_id - start_block t1 = time.time() b0 = block_id * block_size + 1 b1 = (block_id + 1) * block_size + 1 if block_id + 1 == n_blocks: b1 = n_react + 1 for id_lite in range(b0, b1): row = db.read(id_lite) if len(row) == 0: continue values = row[0] # id = self.check(values[13], values[1], values[6], values[7], # values[8], strict=True) id = None update_rs = False if id is not None: id = self.update(id, values) self.stdout.write( 'Updated reaction db with row id = {}\n'.format( id)) update_rs = True else: ID += 1 Ncat += 1 value_list = get_value_list(values) value_list[0] = ID # set new ID reaction_values += [tuple(value_list)] if write_reaction_system: cur_lite.execute( "SELECT * from reaction_system where id={};". format(id_lite)) rows = cur_lite.fetchall() if update_rs: cur.execute("""Delete from reaction_system where id={0}""".format(id)) for row in rows: Ncatstruc += 1 values = list(row) if len(values) == 3: values.insert(1, None) value_list = get_value_list(values) value_list[3] = ID reaction_system_values += [tuple(value_list)] q = ', '.join('?' * 14) q = '({})'.format(q.replace('?', '%s')) key_str = get_key_str() insert_command = """INSERT INTO reaction ({0}) VALUES %s;""".format(key_str) execute_values(cur=cur, sql=insert_command, argslist=reaction_values, template=q, page_size=block_size) key_str = get_key_str('reaction_system') insert_command = """INSERT INTO reaction_system ({0}) VALUES %s ON CONFLICT DO NOTHING;""".format(key_str) execute_values(cur=cur, sql=insert_command, argslist=reaction_system_values, page_size=1000) con.commit() t2 = time.time() dt = t2 - t1 t_av = (t_av * i + dt) / (i + 1) self.stdout.write( ' Finnished Block {0} / {1} in {2} sec \n'.format( block_id + 1, n_blocks, dt)) self.stdout.write( ' Completed transfer of {0} reactions. \n'.format( Ncat - Ncat0)) self.stdout.write(' Estimated time left: {0} sec \n'.format( t_av * (n_blocks - block_id - 1))) self.stdout.write(' Completed transfer of reactions\n') for statement in tsvector_update: cur.execute(statement) if self.connection is None: con.commit() con.close() self.stdout.write('Inserted into:\n') self.stdout.write(' systems: {0}\n'.format(nrows)) self.stdout.write(' publication: {0}\n'.format(Npub)) self.stdout.write(' publication_system: {0}\n'.format(Npubstruc)) self.stdout.write(' reaction: {0}\n'.format(Ncat)) self.stdout.write(' reaction_system: {0}\n'.format(Ncatstruc))
def transfer(self, filename_sqlite, start_id=1, write_ase=True, write_publication=True, write_reaction=True, write_reaction_system=True, block_size=1000, start_block=0): self.stdout.write('Starting transfer\n') con = self.connection or self._connect() self._initialize(con) self.stdout.write('Finished initialization\n') cur = con.cursor() self.stdout.write('Got a cursor\n') set_schema = 'SET search_path = {0};'.format(self.schema) cur.execute(set_schema) import os self.stdout.write('Imported os\n') import ase.db self.stdout.write('Imported ase.db\n') self.stdout.write('Building server_name\n') server_name = "postgres://{0}:{1}@{2}:5432/catalysishub".format( self.user, self.password, self.server) self.stdout.write('Connecting to {server_name}\n'.format(**locals())) nkvp = 0 nrows = 0 if write_ase: db = ase.db.connect(filename_sqlite) n_structures = db.count() n_blocks = int(n_structures / block_size) + 1 for block_id in range(start_block, n_blocks): b0 = block_id * block_size + 1 b1 = (block_id + 1) * block_size + 1 self.stdout.write( str(block_id) + ' ' + 'from ' + str(b0) + ' to ' + str(b1) + '\n') if block_id + 1 == n_blocks: b1 = n_structures + 1 #rows = [db._get_row(i) for i in range(b0, b1] #db2 = ase.db.connect(server_name, type='postgresql') #for lala in [0]: with ase.db.connect(server_name, type='postgresql') as db2: for i in range(b0, b1): self.stdout.write(' .' + str(i)) self.stdout.flush() row = db.get(i) kvp = row.get('key_value_pairs', {}) nkvp -= len(kvp) # kvp.update(add_key_value_pairs) nkvp += len(kvp) db2.write(row, data=row.get('data'), **kvp) nrows += 1 self.stdout.write('\n') self.stdout.write('Finished Block {0}\n:'.format(block_id)) self.stdout.write( ' Completed transfer of {0} atomic structures.\n'.format( nrows)) from cathub.cathubsqlite import CathubSQLite db = CathubSQLite(filename_sqlite) con_lite = db._connect() cur_lite = con_lite.cursor() # write publication Npub = 0 Npubstruc = 0 if write_publication: try: npub = db.get_last_pub_id(cur_lite) except: npub = 1 for id_lite in range(1, npub + 1): Npub += 1 row = db.read(id=id_lite, table='publication') if len(row) == 0: continue values = row[0] pid, pub_id = self.write_publication(values) # Publication structures connection cur_lite.execute("""SELECT * from publication_system;""") rows = cur_lite.fetchall() for row in rows: Npubstruc += 1 values = row[:] key_str, value_str = get_key_value_str( values, table='publication_system') set_schema = 'SET search_path = {0};'.format(self.schema) cur.execute(set_schema) print("[SET SCHEMA] {set_schema}".format(**locals())) insert_command = 'INSERT INTO publication_system ({0}) VALUES ({1}) ON CONFLICT DO NOTHING;'.format( key_str, value_str) cur.execute(insert_command) # self.write(values, table='publication_system') con.commit() Ncat = 0 Ncatstruc = 0 if write_reaction: n = db.get_last_id(cur_lite) select_ase = """SELECT * from reaction_system where id={};""" for id_lite in range(start_id, n + 1): row = db.read(id_lite) if len(row) == 0: continue values = row[0] id = self.check(values[13], values[1], values[6], values[7], values[8], strict=True) update_rs = False if id is not None: id = self.update(id, values) self.stdout.write( 'Updated reaction db with row id = {}\n'.format(id)) update_rs = True else: Ncat += 1 id = self.write(values) self.stdout.write( 'Written to reaction db row id = {0}\n'.format(id)) cur_lite.execute(select_ase.format(id_lite)) rows = cur_lite.fetchall() if write_reaction_system: if update_rs: cur.execute( 'Delete from reaction_system where reaction_id={0}' .format(id)) for row in rows: Ncatstruc += 1 values = list(row) if len(values) == 3: values.insert(1, None) values[3] = id key_str, value_str = get_key_value_str( values, table='reaction_system') set_schema = 'SET search_path = {0};'.format( self.schema) cur.execute(set_schema) print("[SET SCHEMA] {set_schema}".format(**locals())) insert_command = 'INSERT INTO reaction_system ({0}) VALUES ({1}) ON CONFLICT DO NOTHING;'.format( key_str, value_str) print("[INSERT COMMAND] {insert_command}".format( **locals())) cur.execute(insert_command) con.commit() # Commit reaction_system for each row for statement in tsvector_update: cur.execute(statement) if self.connection is None: con.commit() con.close() self.stdout.write('Inserted into:\n') self.stdout.write(' systems: {0}\n'.format(nrows)) self.stdout.write(' publication: {0}\n'.format(Npub)) self.stdout.write(' publication_system: {0}\n'.format(Npubstruc)) self.stdout.write(' reaction: {0}\n'.format(Ncat)) self.stdout.write(' reaction_system: {0}\n'.format(Ncatstruc))