def choose_elements(bulk_database, n): ''' Chooses `n` elements at random from the set of elements inside the given database. Args: bulk_database A string pointing to the ASE *.db object that contains the bulks you want to consider. n A positive integer indicating how many elements you want to choose. Returns: elements A list of strings indicating the chosen elements ''' db = ase.db.connect(bulk_database) all_elements = { ELEMENTS[number] for row in db.select() for number in row.numbers } elements = random.sample(all_elements, n) # Make sure we choose a combination of elements that exists in our bulk # database while db.count(elements) == 0: warnings.warn( 'Sampled the elements %s, but could not find any matching ' 'bulks in the database (%s). Trying to re-sample' % (elements, bulk_database), RuntimeWarning) elements = random.sample(all_elements, n) return elements
def read_db(filename, index, **kwargs): db = ase.db.connect(filename, serial=True, **kwargs) if isinstance(index, basestring): try: index = string2index(index) except ValueError: pass if isinstance(index, int): index = slice(index, index + 1 or None) if isinstance(index, basestring): # index is a database query string: for row in db.select(index): yield row.toatoms() else: start, stop, step = index.indices(db.count()) if start == stop: return assert step == 1 for row in db.select(offset=start, limit=stop - start): yield row.toatoms()
def choose_adsorbate(adsorbate_database): ''' Chooses a bulks from our database at random as long as the bulk contains all the specified elements. Args: adsorbate_database A string pointing to the ASE *.db object that contains the adsorbates you want to consider. Returns: atoms `ase.Atoms` object of the adsorbate simles SMILES-formatted representation of the adsorbate bond_indices list of integers indicating the indices of the atoms in the adsorbate that are meant to be bonded to the surface ''' db = ase.db.connect(adsorbate_database) ads_idx = random.choice(list(range(db.count()))) row = db.get(ads_idx + 1) # ase.db's don't 0-index atoms = row.toatoms() data = row.data smiles = data['SMILE'] bond_indices = data['bond_idx'] return atoms, smiles, bond_indices
# mol = list(db.select(formula=name, prototype=proto))[0] # thick.append(get_thick(mol)) alpha_x = numpy.array(alpha_x) alpha_z = numpy.array(alpha_z) eps_x_3D = numpy.array(eps_x_3D) eps_z_3D = numpy.array(eps_z_3D) Eg_HSE = numpy.array(Eg_HSE) # thick = numpy.array(thick) eps_x_gpaw = [] eps_z_gpaw = [] alpha_z_gpaw = [] Eg_gpaw = [] L_gpaw = [] for db_id in range(1, db.count() + 1): # db index starts with 1 mol = db.get(db_id) # if any(hasattr(mol, key) is False for key in ["alphax", "alphay", "alphaz", # "bulk_L", "bulk_eps_x", "bulk_eps_y", # "bulk_eps_z"]): # continue # if mol.bulk_calculated is False: # continue try: ax = (mol.alphax + mol.alphay) / 2 az = mol.alphaz L, ex, ez, e = get_bulk(None, None, db_id, method="gpaw") ex_simu = 1 + 4 * pi * ax / L ez_simu = 1 / (1 - 4 * pi * az / L) # ez_simu = 4 * pi * az / L eps_x_gpaw.append((ex, ex_simu))
def index(project): global next_con_id # Backwards compatibility: project = request.args.get('project') or project if not projects: # First time: initialize list of projects for proj, db in sorted(databases.items()): meta = ase.db.web.process_metadata(db) db.meta = meta nrows = len(db) projects.append((proj, db.meta.get('title', proj), nrows)) print('Initialized {proj}: {nrows} rows'.format(proj=proj, nrows=nrows)) if project is None and len(projects) > 1: return render_template('projects.html', projects=projects, home=home, md=None, ase_db_footer=ase_db_footer) if project is None: project = list(databases)[0] con_id = int(request.args.get('x', '0')) if con_id in connections: query, nrows, page, columns, sort, limit = connections[con_id] if con_id not in connections: # Give this connetion a new id: con_id = next_con_id next_con_id += 1 query = ['', {}, ''] nrows = None page = 0 columns = None sort = 'id' limit = 25 db = databases.get(project) if db is None: return 'No such project: ' + project meta = db.meta if columns is None: columns = meta.get('default_columns')[:] or list(all_columns) if 'sort' in request.args: column = request.args['sort'] if column == sort: sort = '-' + column elif '-' + column == sort: sort = 'id' else: sort = column page = 0 elif 'query' in request.args: dct = {} query = [request.args['query']] q = query[0] for special in meta['special_keys']: kind, key = special[:2] if kind == 'SELECT': value = request.args['select_' + key] dct[key] = convert_str_to_int_float_or_str(value) if value: q += ',{}={}'.format(key, value) elif kind == 'BOOL': value = request.args['bool_' + key] dct[key] = convert_str_to_int_float_or_str(value) if value: q += ',{}={}'.format(key, value) else: v1 = request.args['from_' + key] v2 = request.args['to_' + key] var = request.args['range_' + key] dct[key] = (v1, v2, var) if v1 or v2: var = request.args['range_' + key] if v1: q += ',{}>={}'.format(var, v1) if v2: q += ',{}<={}'.format(var, v2) q = q.lstrip(',') query += [dct, q] sort = 'id' page = 0 nrows = None elif 'limit' in request.args: limit = int(request.args['limit']) page = 0 elif 'page' in request.args: page = int(request.args['page']) if 'toggle' in request.args: column = request.args['toggle'] if column == 'reset': columns = meta.get('default_columns')[:] or list(all_columns) else: if column in columns: columns.remove(column) if column == sort.lstrip('-'): sort = 'id' page = 0 else: columns.append(column) okquery = query if nrows is None: try: nrows = db.count(query[2]) except (ValueError, KeyError) as e: flash(', '.join(['Bad query'] + list(e.args))) okquery = ('', {}, 'id=0') # this will return no rows nrows = 0 table = Table(db, meta.get('unique_key', 'id')) table.select(okquery[2], columns, sort, limit, offset=page * limit) con = Connection(query, nrows, page, columns, sort, limit) connections[con_id] = con if len(connections) > 1000: # Forget old connections: for cid in sorted(connections)[:200]: del connections[cid] table.format(SUBSCRIPT) addcolumns = [ column for column in all_columns + table.keys if column not in table.columns ] return render_template('table.html', project=project, t=table, md=meta, con=con, x=con_id, home=home, ase_db_footer=ase_db_footer, pages=pages(page, nrows, limit), nrows=nrows, addcolumns=addcolumns, row1=page * limit + 1, row2=min((page + 1) * limit, nrows), download_button=download_button)
def transfer(self, filename_sqlite, block_size=1000, start_block=0, write_ase=True, write_publication=True, write_reaction=True, write_reaction_system=True, check=False): """ Transfer data from local sqlite3 .db file to the catalysis-hub postgreSQL server Parameters: filename_sqlite: str name of .db file block_size: int (default 1000) Number of atomic structures and reactions to write together in each block. start_block: int (default 0) Block to start with write_ase: bool whether or not to write atomic structures write_publication: bool whether or not to transfer publication table write_reaction: bool whether or not to transfer reaction table write_reaction_system: bool whether or not to write reaction_system table """ self.stdout.write('Starting transfer\n') con = self.connection or self._connect() self._initialize(con) self.stdout.write('Finished initialization\n') cur = con.cursor() self.stdout.write('Got a cursor\n') self.stdout.write('Connecting to {0}\n'.format(self.server_name)) nrows = 0 if write_ase: self.stdout.write('Transfering atomic structures\n') db = ase.db.connect(filename_sqlite) n_structures = db.count() n_blocks = n_structures // block_size + 1 t_av = 0 for block_id in range(start_block, n_blocks): i = block_id - start_block t1 = time.time() b0 = block_id * block_size b1 = (block_id + 1) * block_size + 1 if block_id + 1 == n_blocks: b1 = n_structures + 1 rows = list(db.select('{}<id<{}'.format(b0, b1))) with ase.db.connect(self.server_name, type='postgresql') as db2: # write one row at the time until ase is updated # db2.write(rows) for row in rows: db2.write(row) nrows += len(rows) t2 = time.time() dt = t2 - t1 t_av = (t_av * i + dt) / (i + 1) self.stdout.write( ' Finnished Block {0} / {1} in {2} sec\n'.format( block_id + 1, n_blocks, dt)) self.stdout.write( ' Completed transfer of {0} atomic structures\n'.format( nrows)) self.stdout.write(' Estimated time left: {0} sec\n'.format( t_av * (n_blocks - block_id - 1))) db = CathubSQLite(filename_sqlite) con_lite = db._connect() cur_lite = con_lite.cursor() Npub = 0 Npubstruc = 0 if write_publication: self.stdout.write('Transfering publications\n') try: npub = db.get_last_pub_id(cur_lite) except BaseException: npub = 1 for id_lite in range(1, npub + 1): Npub += 1 row = db.read(id=id_lite, table='publication') if len(row) == 0: continue values = row[0] pid, pub_id = self.write_publication(values) # Publication structures connection cur_lite.execute("""SELECT * from publication_system;""") publication_system_values = [] rows = cur_lite.fetchall() for row in rows: Npubstruc += 1 values = list(row) value_list = get_value_list(values) publication_system_values += [tuple(value_list)] # Insert into publication_system table key_str = get_key_str(table='publication_system') insert_command = """INSERT INTO publication_system ({0}) VALUES %s ON CONFLICT DO NOTHING;"""\ .format(key_str) execute_values(cur=cur, sql=insert_command, argslist=publication_system_values, page_size=1000) # Write pub_id to systems table cur.execute("""UPDATE systems SET key_value_pairs=jsonb_set(key_value_pairs, '{{"pub_id"}}', '"{pub_id}"') WHERE unique_id IN (SELECT ase_id from publication_system WHERE pub_id='{pub_id}')"""\ .format(pub_id=pub_id)) con.commit() self.stdout.write(' Completed transfer of publications\n') Ncat = 0 Ncatstruc = 0 if write_reaction: self.stdout.write('Transfering reactions') cur.execute('SELECT max(id) from reaction;') ID = cur.fetchone()[0] or 0 n_react = db.get_last_id(cur_lite) n_blocks = int(n_react / block_size) + 1 t_av = 0 for block_id in range(start_block, n_blocks): reaction_values = [] reaction_system_values = [] Ncat0 = Ncat Ncatstruc0 = Ncatstruc i = block_id - start_block t1 = time.time() b0 = block_id * block_size + 1 b1 = (block_id + 1) * block_size + 1 if block_id + 1 == n_blocks: b1 = n_react + 1 for id_lite in range(b0, b1): row = db.read(id_lite) if len(row) == 0: continue values = row[0] # id = self.check(values[13], values[1], values[6], values[7], # values[8], strict=True) id = None update_rs = False if id is not None: id = self.update(id, values) self.stdout.write( 'Updated reaction db with row id = {}\n'.format( id)) update_rs = True else: ID += 1 Ncat += 1 value_list = get_value_list(values) value_list[0] = ID # set new ID reaction_values += [tuple(value_list)] if write_reaction_system: cur_lite.execute( "SELECT * from reaction_system where id={};". format(id_lite)) rows = cur_lite.fetchall() if update_rs: cur.execute("""Delete from reaction_system where id={0}""".format(id)) for row in rows: Ncatstruc += 1 values = list(row) if len(values) == 3: values.insert(1, None) value_list = get_value_list(values) value_list[3] = ID reaction_system_values += [tuple(value_list)] q = ', '.join('?' * 14) q = '({})'.format(q.replace('?', '%s')) key_str = get_key_str() insert_command = """INSERT INTO reaction ({0}) VALUES %s;""".format(key_str) execute_values(cur=cur, sql=insert_command, argslist=reaction_values, template=q, page_size=block_size) key_str = get_key_str('reaction_system') insert_command = """INSERT INTO reaction_system ({0}) VALUES %s ON CONFLICT DO NOTHING;""".format(key_str) execute_values(cur=cur, sql=insert_command, argslist=reaction_system_values, page_size=1000) con.commit() t2 = time.time() dt = t2 - t1 t_av = (t_av * i + dt) / (i + 1) self.stdout.write( ' Finnished Block {0} / {1} in {2} sec \n'.format( block_id + 1, n_blocks, dt)) self.stdout.write( ' Completed transfer of {0} reactions. \n'.format( Ncat - Ncat0)) self.stdout.write(' Estimated time left: {0} sec \n'.format( t_av * (n_blocks - block_id - 1))) self.stdout.write(' Completed transfer of reactions\n') for statement in tsvector_update: cur.execute(statement) if self.connection is None: con.commit() con.close() self.stdout.write('Inserted into:\n') self.stdout.write(' systems: {0}\n'.format(nrows)) self.stdout.write(' publication: {0}\n'.format(Npub)) self.stdout.write(' publication_system: {0}\n'.format(Npubstruc)) self.stdout.write(' reaction: {0}\n'.format(Ncat)) self.stdout.write(' reaction_system: {0}\n'.format(Ncatstruc))
import sys sys.path.insert(0, '../../../') from src.discoverers.adsorption.values import calc_co2rr_activities from src.discoverers.adsorption.mms import MultiscaleDiscoverer from src.discoverers.adsorption.models import PrimeModel # Discoverer settings adsorbate = 'CO' initial_training_size = 1000 batch_size = 200 quantile_cutoff = 0.9 # Data loading db_dir = '../../pull_data/%s_synthesized/' % adsorbate db = ase.db.connect(db_dir + '%s.db' % adsorbate) rows = list(tqdm(db.select(), desc='reading ASE db', total=db.count())) random.Random(42).shuffle(rows) def parse_row(row): feature = row.id data = row.data label = data['adsorption_energy'] surface = (data['mpid'], data['miller'], data['shift'], data['top']) return feature, label, surface def parse_rows(rows): with Pool(processes=32, maxtasksperchild=1000) as pool: iterator = pool.imap(parse_row, rows, chunksize=100) iterator_tracked = tqdm(iterator, desc='parsing rows', total=len(rows))
def index(): global next_con_id con_id = int(request.args.get('x', '0')) if con_id not in connections: con_id = next_con_id next_con_id += 1 query = '' columns = list(all_columns) sort = 'id' limit = 25 opened = set() nrows = None page = 0 else: query, nrows, page, columns, sort, limit, opened = connections[con_id] if 'sort' in request.args: column = request.args['sort'] if column == sort: sort = '-' + column elif '-' + column == sort: sort = 'id' else: sort = column page = 0 elif 'query' in request.args: query = request.args['query'].encode() try: limit = max(1, min(int(request.args.get('limit', limit)), 200)) except ValueError: pass sort = 'id' opened = set() page = 0 nrows = None elif 'page' in request.args: page = int(request.args['page']) if 'toggle' in request.args: tcolumns = request.args['toggle'].split(',') if tcolumns == ['reset']: columns = list(all_columns) else: for column in tcolumns: if column in columns: columns.remove(column) if column == sort.lstrip('-'): sort = 'id' page = 0 else: columns.append(column) if nrows is None: nrows = db.count(query) table = Table(db) table.select(query, columns, sort, limit, offset=page * limit) con = Connection(query, nrows, page, columns, sort, limit, opened) connections[con_id] = con table.format(SUBSCRIPT) addcolumns = [column for column in all_columns + table.keys if column not in table.columns] return render_template('table.html', t=table, con=con, cid=con_id, home=home, pages=pages(page, nrows, limit), nrows=nrows, addcolumns=addcolumns, row1=page * limit + 1, row2=min((page + 1) * limit, nrows))
def index(): global next_con_id if not projects: # First time: initialize list of projects projects[:] = [(proj, d.metadata.get('title', proj)) for proj, d in sorted(databases.items())] con_id = int(request.args.get('x', '0')) if con_id in connections: project, query, nrows, page, columns, sort, limit = connections[con_id] newproject = request.args.get('project') if newproject is not None and newproject != project: con_id = 0 if con_id not in connections: # Give this connetion a new id: con_id = next_con_id next_con_id += 1 project = request.args.get('project', projects[0][0]) query = ['', {}, ''] nrows = None page = 0 columns = None sort = 'id' limit = 25 db = databases[project] if not hasattr(db, 'meta'): meta = ase.db.web.process_metadata(db) db.meta = meta else: meta = db.meta if columns is None: columns = meta.get('default_columns')[:] or list(all_columns) if 'sort' in request.args: column = request.args['sort'] if column == sort: sort = '-' + column elif '-' + column == sort: sort = 'id' else: sort = column page = 0 elif 'query' in request.args: dct = {} query = [request.args['query']] q = query[0] for special in meta['special_keys']: kind, key = special[:2] if kind == 'SELECT': value = request.args['select_' + key] dct[key] = value if value: q += ',{}={}'.format(key, value) elif kind == 'BOOL': value = request.args['bool_' + key] dct[key] = value if value: q += ',{}={}'.format(key, value) else: v1 = request.args['from_' + key] v2 = request.args['to_' + key] var = request.args['range_' + key] dct[key] = (v1, v2, var) if v1 or v2: var = request.args['range_' + key] if v1: q += ',{}>={}'.format(var, v1) if v2: q += ',{}<={}'.format(var, v2) q = q.lstrip(',') query += [dct, q] sort = 'id' page = 0 nrows = None elif 'limit' in request.args: limit = int(request.args['limit']) page = 0 elif 'page' in request.args: page = int(request.args['page']) if 'toggle' in request.args: column = request.args['toggle'] if column == 'reset': columns = meta.get('default_columns')[:] or list(all_columns) else: if column in columns: columns.remove(column) if column == sort.lstrip('-'): sort = 'id' page = 0 else: columns.append(column) okquery = query if nrows is None: try: nrows = db.count(query[2]) except (ValueError, KeyError) as e: flash(', '.join(['Bad query'] + list(e.args))) okquery = ('', {}, 'id=0') # this will return no rows nrows = 0 table = Table(db) table.select(okquery[2], columns, sort, limit, offset=page * limit) con = Connection(project, query, nrows, page, columns, sort, limit) connections[con_id] = con if len(connections) > 1000: # Forget old connections: for cid in sorted(connections)[:200]: del connections[cid] table.format(SUBSCRIPT) addcolumns = [column for column in all_columns + table.keys if column not in table.columns] return render_template('table.html', project=project, projects=projects, t=table, md=meta, con=con, x=con_id, home=home, pages=pages(page, nrows, limit), nrows=nrows, addcolumns=addcolumns, row1=page * limit + 1, row2=min((page + 1) * limit, nrows))
def transfer(self, filename_sqlite, start_id=1, write_ase=True, write_publication=True, write_reaction=True, write_reaction_system=True, block_size=1000, start_block=0): self.stdout.write('Starting transfer\n') con = self.connection or self._connect() self._initialize(con) self.stdout.write('Finished initialization\n') cur = con.cursor() self.stdout.write('Got a cursor\n') set_schema = 'SET search_path = {0};'.format(self.schema) cur.execute(set_schema) import os import time self.stdout.write('Imported os\n') import ase.db self.stdout.write('Imported ase.db\n') self.stdout.write('Building server_name\n') server_name = "postgres://{0}:{1}@{2}:5432/catalysishub".format( self.user, self.password, self.server) self.stdout.write('Connecting to {server_name}\n'.format(**locals())) nrows = 0 if write_ase: print('Transfering atomic structures') db = ase.db.connect(filename_sqlite) n_structures = db.count() n_blocks = int(n_structures / block_size) + 1 t_av = 0 for block_id in range(start_block, n_blocks): i = block_id - start_block t1 = time.time() b0 = block_id * block_size + 1 b1 = (block_id + 1) * block_size + 1 self.stdout.write( str(block_id) + ' ' + 'from ' + str(b0) + ' to ' + str(b1) + '\n') if block_id + 1 == n_blocks: b1 = n_structures + 1 rows = list(db.select('{}<id<{}'.format(b0 - 1, b1))) with ase.db.connect(server_name, type='postgresql') as db2: db2.write(rows) nrows += len(rows) t2 = time.time() dt = t2 - t1 t_av = (t_av * i + dt) / (i + 1) self.stdout.write( ' Finnished Block {0} / {1} in {2} sec'.format( block_id, n_blocks, dt)) self.stdout.write( ' Completed transfer of {0} atomic structures.'.format( nrows)) self.stdout.write(' Estimated time left: {0} sec'.format( t_av * (n_blocks - block_id))) from catkit.hub.cathubsqlite import CathubSQLite db = CathubSQLite(filename_sqlite) con_lite = db._connect() cur_lite = con_lite.cursor() # write publication Npub = 0 Npubstruc = 0 if write_publication: try: npub = db.get_last_pub_id(cur_lite) except BaseException: npub = 1 for id_lite in range(1, npub + 1): Npub += 1 row = db.read(id=id_lite, table='publication') if len(row) == 0: continue values = row[0] pid, pub_id = self.write_publication(values) # Publication structures connection cur_lite.execute("""SELECT * from publication_system;""") rows = cur_lite.fetchall() for row in rows: Npubstruc += 1 values = row[:] key_str, value_str = get_key_value_str( values, table='publication_system') set_schema = 'SET search_path = {0};'.format(self.schema) cur.execute(set_schema) print("[SET SCHEMA] {set_schema}".format(**locals())) insert_command = """INSERT INTO publication_system ({0}) VALUES ({1}) ON CONFLICT DO NOTHING;"""\ .format(key_str, value_str) cur.execute(insert_command) # self.write(values, table='publication_system') con.commit() Ncat = 0 Ncatstruc = 0 if write_reaction: n = db.get_last_id(cur_lite) select_ase = """SELECT * from reaction_system where id={};""" for id_lite in range(start_id, n + 1): row = db.read(id_lite) if len(row) == 0: continue values = row[0] id = self.check(values[13], values[1], values[6], values[7], values[8], strict=True) update_rs = False if id is not None: id = self.update(id, values) self.stdout.write( 'Updated reaction db with row id = {}\n'.format(id)) update_rs = True else: Ncat += 1 id = self.write(values) self.stdout.write( 'Written to reaction db row id = {0}\n'.format(id)) cur_lite.execute(select_ase.format(id_lite)) rows = cur_lite.fetchall() if write_reaction_system: if update_rs: cur.execute("""Delete from reaction_system231 where reaction_id={0}""".format(id)) for row in rows: Ncatstruc += 1 values = list(row) if len(values) == 3: values.insert(1, None) values[3] = id key_str, value_str = \ get_key_value_str(values, table='reaction_system') set_schema = 'SET search_path = {0};'.format( self.schema) cur.execute(set_schema) print("[SET SCHEMA] {set_schema}".format(**locals())) insert_command = """INSERT INTO reaction_system ({0}) VALUES ({1}) ON CONFLICT DO NOTHING;"""\ .format(key_str, value_str) print("[INSERT COMMAND] {insert_command}".format( **locals())) cur.execute(insert_command) con.commit() # Commit reaction_system for each row for statement in tsvector_update: cur.execute(statement) if self.connection is None: con.commit() con.close() self.stdout.write('Inserted into:\n') self.stdout.write(' systems: {0}\n'.format(nrows)) self.stdout.write(' publication: {0}\n'.format(Npub)) self.stdout.write(' publication_system: {0}\n'.format(Npubstruc)) self.stdout.write(' reaction: {0}\n'.format(Ncat)) self.stdout.write(' reaction_system: {0}\n'.format(Ncatstruc))
def index(): global next_con_id con_id = int(request.args.get('x', '0')) if con_id not in connections: con_id = next_con_id next_con_id += 1 query = '' columns = list(all_columns) sort = 'id' limit = 25 opened = set() nrows = None page = 0 else: query, nrows, page, columns, sort, limit, opened = connections[con_id] if 'sort' in request.args: column = request.args['sort'] if column == sort: sort = '-' + column elif '-' + column == sort: sort = 'id' else: sort = column page = 0 elif 'query' in request.args: query = request.args['query'].encode() try: limit = max(1, min(int(request.args.get('limit', limit)), 200)) except ValueError: pass sort = 'id' opened = set() page = 0 nrows = None elif 'page' in request.args: page = int(request.args['page']) if 'toggle' in request.args: tcolumns = request.args['toggle'].split(',') if tcolumns == ['reset']: columns = list(all_columns) else: for column in tcolumns: if column in columns: columns.remove(column) if column == sort.lstrip('-'): sort = 'id' page = 0 else: columns.append(column) if nrows is None: nrows = db.count(query) table = Table(db) table.select(query, columns, sort, limit, offset=page * limit) con = Connection(query, nrows, page, columns, sort, limit, opened) connections[con_id] = con table.format(SUBSCRIPT) addcolumns = [ column for column in all_columns + table.keys if column not in table.columns ] return render_template('table.html', t=table, con=con, cid=con_id, home=home, pages=pages(page, nrows, limit), nrows=nrows, addcolumns=addcolumns, row1=page * limit + 1, row2=min((page + 1) * limit, nrows))
def index(): global next_con_id con_id = int(request.args.get("x", "0")) if con_id not in connections: con_id = next_con_id next_con_id += 1 query = "" columns = list(all_columns) sort = "id" limit = 25 opened = set() nrows = None page = 0 else: query, nrows, page, columns, sort, limit, opened = connections[con_id] if "sort" in request.args: column = request.args["sort"] if column == sort: sort = "-" + column elif "-" + column == sort: sort = "id" else: sort = column page = 0 elif "query" in request.args: query = request.args["query"].encode() try: limit = max(1, min(int(request.args.get("limit", limit)), 200)) except ValueError: pass sort = "id" opened = set() page = 0 nrows = None elif "page" in request.args: page = int(request.args["page"]) if "toggle" in request.args: tcolumns = request.args["toggle"].split(",") if tcolumns == ["reset"]: columns = list(all_columns) else: for column in tcolumns: if column in columns: columns.remove(column) if column == sort.lstrip("-"): sort = "id" page = 0 else: columns.append(column) if nrows is None: nrows = db.count(query) table = Table(db) table.select(query, columns, sort, limit, offset=page * limit) con = Connection(query, nrows, page, columns, sort, limit, opened) connections[con_id] = con table.format(SUBSCRIPT) addcolumns = [column for column in all_columns + table.keys if column not in table.columns] return render_template( "table.html", t=table, con=con, cid=con_id, home=home, pages=pages(page, nrows, limit), nrows=nrows, addcolumns=addcolumns, row1=page * limit + 1, row2=min((page + 1) * limit, nrows), )
def transfer(self, filename_sqlite, start_id=1, write_ase=True, write_publication=True, write_reaction=True, write_reaction_system=True, block_size=1000, start_block=0): self.stdout.write('Starting transfer\n') con = self.connection or self._connect() self._initialize(con) self.stdout.write('Finished initialization\n') cur = con.cursor() self.stdout.write('Got a cursor\n') set_schema = 'SET search_path = {0};'.format(self.schema) cur.execute(set_schema) import os self.stdout.write('Imported os\n') import ase.db self.stdout.write('Imported ase.db\n') self.stdout.write('Building server_name\n') server_name = "postgres://{0}:{1}@{2}:5432/catalysishub".format( self.user, self.password, self.server) self.stdout.write('Connecting to {server_name}\n'.format(**locals())) nkvp = 0 nrows = 0 if write_ase: db = ase.db.connect(filename_sqlite) n_structures = db.count() n_blocks = int(n_structures / block_size) + 1 for block_id in range(start_block, n_blocks): b0 = block_id * block_size + 1 b1 = (block_id + 1) * block_size + 1 self.stdout.write( str(block_id) + ' ' + 'from ' + str(b0) + ' to ' + str(b1) + '\n') if block_id + 1 == n_blocks: b1 = n_structures + 1 #rows = [db._get_row(i) for i in range(b0, b1] #db2 = ase.db.connect(server_name, type='postgresql') #for lala in [0]: with ase.db.connect(server_name, type='postgresql') as db2: for i in range(b0, b1): self.stdout.write(' .' + str(i)) self.stdout.flush() row = db.get(i) kvp = row.get('key_value_pairs', {}) nkvp -= len(kvp) # kvp.update(add_key_value_pairs) nkvp += len(kvp) db2.write(row, data=row.get('data'), **kvp) nrows += 1 self.stdout.write('\n') self.stdout.write('Finished Block {0}\n:'.format(block_id)) self.stdout.write( ' Completed transfer of {0} atomic structures.\n'.format( nrows)) from cathub.cathubsqlite import CathubSQLite db = CathubSQLite(filename_sqlite) con_lite = db._connect() cur_lite = con_lite.cursor() # write publication Npub = 0 Npubstruc = 0 if write_publication: try: npub = db.get_last_pub_id(cur_lite) except: npub = 1 for id_lite in range(1, npub + 1): Npub += 1 row = db.read(id=id_lite, table='publication') if len(row) == 0: continue values = row[0] pid, pub_id = self.write_publication(values) # Publication structures connection cur_lite.execute("""SELECT * from publication_system;""") rows = cur_lite.fetchall() for row in rows: Npubstruc += 1 values = row[:] key_str, value_str = get_key_value_str( values, table='publication_system') set_schema = 'SET search_path = {0};'.format(self.schema) cur.execute(set_schema) print("[SET SCHEMA] {set_schema}".format(**locals())) insert_command = 'INSERT INTO publication_system ({0}) VALUES ({1}) ON CONFLICT DO NOTHING;'.format( key_str, value_str) cur.execute(insert_command) # self.write(values, table='publication_system') con.commit() Ncat = 0 Ncatstruc = 0 if write_reaction: n = db.get_last_id(cur_lite) select_ase = """SELECT * from reaction_system where id={};""" for id_lite in range(start_id, n + 1): row = db.read(id_lite) if len(row) == 0: continue values = row[0] id = self.check(values[13], values[1], values[6], values[7], values[8], strict=True) update_rs = False if id is not None: id = self.update(id, values) self.stdout.write( 'Updated reaction db with row id = {}\n'.format(id)) update_rs = True else: Ncat += 1 id = self.write(values) self.stdout.write( 'Written to reaction db row id = {0}\n'.format(id)) cur_lite.execute(select_ase.format(id_lite)) rows = cur_lite.fetchall() if write_reaction_system: if update_rs: cur.execute( 'Delete from reaction_system where reaction_id={0}' .format(id)) for row in rows: Ncatstruc += 1 values = list(row) if len(values) == 3: values.insert(1, None) values[3] = id key_str, value_str = get_key_value_str( values, table='reaction_system') set_schema = 'SET search_path = {0};'.format( self.schema) cur.execute(set_schema) print("[SET SCHEMA] {set_schema}".format(**locals())) insert_command = 'INSERT INTO reaction_system ({0}) VALUES ({1}) ON CONFLICT DO NOTHING;'.format( key_str, value_str) print("[INSERT COMMAND] {insert_command}".format( **locals())) cur.execute(insert_command) con.commit() # Commit reaction_system for each row for statement in tsvector_update: cur.execute(statement) if self.connection is None: con.commit() con.close() self.stdout.write('Inserted into:\n') self.stdout.write(' systems: {0}\n'.format(nrows)) self.stdout.write(' publication: {0}\n'.format(Npub)) self.stdout.write(' publication_system: {0}\n'.format(Npubstruc)) self.stdout.write(' reaction: {0}\n'.format(Ncat)) self.stdout.write(' reaction_system: {0}\n'.format(Ncatstruc))