def _write(self, atoms, key_value_pairs, data): Database._write(self, atoms, key_value_pairs, data) bigdct = {} ids = [] nextid = 1 if (isinstance(self.filename, basestring) and os.path.isfile(self.filename)): try: bigdct, ids, nextid = self._read_json() except (SyntaxError, ValueError): pass if isinstance(atoms, AtomsRow): row = atoms unique_id = row.unique_id for id in ids: if bigdct[id]['unique_id'] == unique_id: break else: id = None mtime = now() else: row = AtomsRow(atoms) row.ctime = mtime = now() row.user = os.getenv('USER') id = None dct = {} for key in row.__dict__: if key[0] == '_' or key in row._keys or key == 'id': continue dct[key] = row[key] dct['mtime'] = mtime kvp = key_value_pairs or row.key_value_pairs if kvp: dct['key_value_pairs'] = kvp data = data or row.get('data') if data: dct['data'] = data constraints = row.get('constraints') if constraints: dct['constraints'] = constraints if id is None: id = nextid ids.append(id) nextid += 1 bigdct[id] = dct self._write_json(bigdct, ids, nextid) return id
def _write(self, atoms, key_value_pairs, data, id): Database._write(self, atoms, key_value_pairs, data) bigdct = {} ids = [] nextid = 1 if (isinstance(self.filename, str) and os.path.isfile(self.filename)): try: bigdct, ids, nextid = self._read_json() except (SyntaxError, ValueError): pass mtime = now() if isinstance(atoms, AtomsRow): row = atoms else: row = AtomsRow(atoms) row.ctime = mtime row.user = os.getenv('USER') dct = {} for key in row.__dict__: if key[0] == '_' or key in row._keys or key == 'id': continue dct[key] = row[key] dct['mtime'] = mtime if key_value_pairs: dct['key_value_pairs'] = key_value_pairs if data: dct['data'] = data constraints = row.get('constraints') if constraints: dct['constraints'] = constraints if id is None: id = nextid ids.append(id) nextid += 1 else: assert id in bigdct bigdct[id] = dct self._write_json(bigdct, ids, nextid) return id
def add_project(db: Database) -> None: """Add database to projects with name 'default'.""" all_keys: Set[str] = set() for row in db.select(columns=['key_value_pairs'], include_data=False): all_keys.update(row._keys) key_descriptions = {key: (key, '', '') for key in all_keys} meta: Dict[str, Any] = db.metadata if 'key_descriptions' in meta: key_descriptions.update(meta['key_descriptions']) default_columns = meta.get('default_columns') if default_columns is None: default_columns = all_columns[:] projects['default'] = { 'name': 'default', 'title': meta.get('title', ''), 'uid_key': 'id', 'key_descriptions': create_key_descriptions(key_descriptions), 'database': db, 'row_to_dict_function': row_to_dict, 'handle_query_function': handle_query, 'default_columns': default_columns, 'search_template': 'ase/db/templates/search.html', 'row_template': 'ase/db/templates/row.html', 'table_template': 'ase/db/templates/table.html' }
def create_table(self, db: Database, uid_key: str, keys: List[str]) -> Table: query = self.query if self.nrows is None: try: self.nrows = db.count(query) except (ValueError, KeyError) as e: error = ', '.join(['Bad query'] + list(e.args)) from flask import flash flash(error) query = 'id=0' # this will return no rows self.nrows = 0 table = Table(db, uid_key) table.select(query, self.columns, self.sort, self.limit, offset=self.page * self.limit, show_empty_columns=True) table.format() assert self.columns is not None table.addcolumns = sorted(column for column in all_columns + keys if column not in self.columns) return table
def _write(self, atoms, keywords, key_value_pairs, data): Database._write(self, atoms, keywords, key_value_pairs, data) bigdct = {} ids = [] nextid = 1 if isinstance(self.filename, str) and os.path.isfile(self.filename): try: bigdct, ids, nextid = self._read_json() except (SyntaxError, ValueError): pass if isinstance(atoms, dict): dct = dict((key, atoms[key]) for key in reserved_keys if key in atoms and key != 'id') unique_id = dct['unique_id'] for id in ids: if bigdct[id]['unique_id'] == unique_id: break else: id = None dct['mtime'] = now() else: dct = self.collect_data(atoms) id = None for key, value in [('keywords', keywords), ('key_value_pairs', key_value_pairs), ('data', data)]: if value: dct[key] = value else: dct.pop(key, None) if id is None: id = nextid ids.append(id) nextid += 1 bigdct[id] = dct self._write_json(bigdct, ids, nextid) return id
def add_project(db: Database) -> None: """Add database to projects with name 'default'.""" all_keys = set() for row in db.select(columns=['key_value_pairs'], include_data=False): all_keys.update(row._keys) kd = {key: (key, '', '') for key in all_keys} projects['default'] = { 'name': 'default', 'uid_key': 'id', 'key_descriptions': create_key_descriptions(kd), 'database': db, 'row_to_dict_function': row_to_dict, 'handle_query_function': handle_query, 'default_columns': all_columns[:], 'search_template': 'ase/db/templates/search.html', 'row_template': 'ase/db/templates/row.html'}
def create_table(self, db: Database, uid_key: str) -> Table: query = self.query if self.nrows is None: try: self.nrows = db.count(query) except (ValueError, KeyError) as e: error = ', '.join(['Bad query'] + list(e.args)) flash(error) query = 'id=0' # this will return no rows self.nrows = 0 table = Table(db, uid_key) table.select(query, self.columns, self.sort, self.limit, offset=self.page * self.limit) table.format() table.addcolumns = sorted(column for column in all_columns + table.keys if column not in table.columns) return table
def _write(self, atoms, key_value_pairs, data, id): ext_tables = key_value_pairs.pop("external_tables", {}) Database._write(self, atoms, key_value_pairs, data) mtime = now() encode = self.encode blob = self.blob if not isinstance(atoms, AtomsRow): row = AtomsRow(atoms) row.ctime = mtime row.user = os.getenv('USER') else: row = atoms # Extract the external tables from AtomsRow names = self._get_external_table_names() for name in names: new_table = row.get(name, {}) if new_table: ext_tables[name] = new_table if not id and not key_value_pairs and not ext_tables: key_value_pairs = row.key_value_pairs for k, v in ext_tables.items(): dtype = self._guess_type(v) self._create_table_if_not_exists(k, dtype) constraints = row._constraints if constraints: if isinstance(constraints, list): constraints = encode(constraints) else: constraints = None values = (row.unique_id, row.ctime, mtime, row.user, blob(row.numbers), blob(row.positions), blob(row.cell), int(np.dot(row.pbc, [1, 2, 4])), blob(row.get('initial_magmoms')), blob(row.get('initial_charges')), blob(row.get('masses')), blob(row.get('tags')), blob(row.get('momenta')), constraints) if 'calculator' in row: values += (row.calculator, encode(row.calculator_parameters)) else: values += (None, None) if not data: data = row._data with self.managed_connection() as con: if not isinstance(data, (str, bytes)): data = encode(data, binary=self.version >= 9) values += (row.get('energy'), row.get('free_energy'), blob(row.get('forces')), blob(row.get('stress')), blob(row.get('dipole')), blob(row.get('magmoms')), row.get('magmom'), blob(row.get('charges')), encode(key_value_pairs), data, len(row.numbers), float_if_not_none(row.get('fmax')), float_if_not_none(row.get('smax')), float_if_not_none(row.get('volume')), float(row.mass), float(row.charge)) cur = con.cursor() if id is None: q = self.default + ', ' + ', '.join('?' * len(values)) cur.execute('INSERT INTO systems VALUES ({})'.format(q), values) id = self.get_last_id(cur) else: self._delete(cur, [id], [ 'keys', 'text_key_values', 'number_key_values', 'species' ]) q = ', '.join(name + '=?' for name in self.columnnames[1:]) cur.execute('UPDATE systems SET {} WHERE id=?'.format(q), values + (id, )) count = row.count_atoms() if count: species = [(atomic_numbers[symbol], n, id) for symbol, n in count.items()] cur.executemany('INSERT INTO species VALUES (?, ?, ?)', species) text_key_values = [] number_key_values = [] for key, value in key_value_pairs.items(): if isinstance(value, (numbers.Real, np.bool_)): number_key_values.append([key, float(value), id]) else: assert isinstance(value, str) text_key_values.append([key, value, id]) cur.executemany('INSERT INTO text_key_values VALUES (?, ?, ?)', text_key_values) cur.executemany('INSERT INTO number_key_values VALUES (?, ?, ?)', number_key_values) cur.executemany('INSERT INTO keys VALUES (?, ?)', [(key, id) for key in key_value_pairs]) # Insert entries in the valid tables for tabname in ext_tables.keys(): entries = ext_tables[tabname] entries['id'] = id self._insert_in_external_table(cur, name=tabname, entries=ext_tables[tabname]) return id
def _write(self, atoms, key_value_pairs, data, id): Database._write(self, atoms, key_value_pairs, data) encode = self.encode con = self.connection or self._connect() self._initialize(con) cur = con.cursor() mtime = now() blob = self.blob text_key_values = [] number_key_values = [] if not isinstance(atoms, AtomsRow): row = AtomsRow(atoms) row.ctime = mtime row.user = os.getenv('USER') else: row = atoms if id: self._delete(cur, [id], ['keys', 'text_key_values', 'number_key_values', 'species']) else: if not key_value_pairs: key_value_pairs = row.key_value_pairs constraints = row._constraints if constraints: if isinstance(constraints, list): constraints = encode(constraints) else: constraints = None values = (row.unique_id, row.ctime, mtime, row.user, blob(row.numbers), blob(row.positions), blob(row.cell), int(np.dot(row.pbc, [1, 2, 4])), blob(row.get('initial_magmoms')), blob(row.get('initial_charges')), blob(row.get('masses')), blob(row.get('tags')), blob(row.get('momenta')), constraints) if 'calculator' in row: values += (row.calculator, encode(row.calculator_parameters)) else: values += (None, None) if not data: data = row._data if not isinstance(data, basestring): data = encode(data) values += (row.get('energy'), row.get('free_energy'), blob(row.get('forces')), blob(row.get('stress')), blob(row.get('dipole')), blob(row.get('magmoms')), row.get('magmom'), blob(row.get('charges')), encode(key_value_pairs), data, len(row.numbers), float_if_not_none(row.get('fmax')), float_if_not_none(row.get('smax')), float_if_not_none(row.get('volume')), float(row.mass), float(row.charge)) if id is None: q = self.default + ', ' + ', '.join('?' * len(values)) cur.execute('INSERT INTO systems VALUES ({})'.format(q), values) id = self.get_last_id(cur) else: q = ', '.join(name + '=?' for name in self.columnnames[1:]) cur.execute('UPDATE systems SET {} WHERE id=?'.format(q), values + (id,)) count = row.count_atoms() if count: species = [(atomic_numbers[symbol], n, id) for symbol, n in count.items()] cur.executemany('INSERT INTO species VALUES (?, ?, ?)', species) text_key_values = [] number_key_values = [] for key, value in key_value_pairs.items(): if isinstance(value, (numbers.Real, np.bool_)): number_key_values.append([key, float(value), id]) else: assert isinstance(value, basestring) text_key_values.append([key, value, id]) cur.executemany('INSERT INTO text_key_values VALUES (?, ?, ?)', text_key_values) cur.executemany('INSERT INTO number_key_values VALUES (?, ?, ?)', number_key_values) cur.executemany('INSERT INTO keys VALUES (?, ?)', [(key, id) for key in key_value_pairs]) if self.connection is None: con.commit() con.close() return id
def _write(self, atoms, keywords, key_value_pairs, data): Database._write(self, atoms, keywords, key_value_pairs, data) con = self._connect() self._initialize(con) cur = con.cursor() id = None if isinstance(atoms, dict): dct = atoms unique_id = dct['unique_id'] cur.execute('SELECT id FROM systems WHERE unique_id=?', (unique_id,)) rows = cur.fetchall() if rows: id = rows[0][0] self._delete(cur, [id], ['keywords', 'text_key_values', 'number_key_values']) dct['mtime'] = now() else: dct = self.collect_data(atoms) if 'constraints' in dct: constraints = encode(dct['constraints']) else: constraints = None numbers = dct.get('numbers') row = (dct['unique_id'], dct['ctime'], dct['mtime'], dct['user'], blob(numbers), blob(dct.get('positions')), blob(dct.get('cell')), int(np.dot(dct.get('pbc'), [1, 2, 4])), blob(dct.get('initial_magmoms')), blob(dct.get('initial_charges')), blob(dct.get('masses')), blob(dct.get('tags')), blob(dct.get('momenta')), constraints) if 'calculator' in dct: row += (dct['calculator'], encode(dct['calculator_parameters'])) else: row += (None, None) magmom = dct.get('magmom') if magmom is not None: # magmom can be one or three numbers (non-collinear case) magmom = np.array(magmom) row += (dct.get('energy'), dct.get('free_energy'), blob(dct.get('forces')), blob(dct.get('stress')), blob(dct.get('dipole')), blob(dct.get('magmoms')), blob(magmom), blob(dct.get('charges')), encode(keywords), encode(key_value_pairs), encode(data), len(numbers)) if id is None: q = self.default + ', ' + ', '.join('?' * len(row)) cur.execute('INSERT INTO systems VALUES ({0})'.format(q), row) else: q = ', '.join(line.split()[0].lstrip() + '=?' for line in init_statements[0].splitlines()[2:]) cur.execute('UPDATE systems SET {0} WHERE id=?'.format(q), row + (id,)) if id is None: id = self.get_last_id(cur) if len(numbers) > 0: count = np.bincount(numbers) unique_numbers = count.nonzero()[0] species = [(int(Z), int(count[Z]), id) for Z in unique_numbers] cur.executemany('INSERT INTO species VALUES (?, ?, ?)', species) text_key_values = [] number_key_values = [] for key, value in key_value_pairs.items(): if isinstance(value, (float, int)): number_key_values.append([key, float(value), id]) else: assert isinstance(value, (str, unicode)) text_key_values.append([key, value, id]) cur.executemany('INSERT INTO text_key_values VALUES (?, ?, ?)', text_key_values) cur.executemany('INSERT INTO number_key_values VALUES (?, ?, ?)', number_key_values) cur.executemany('INSERT INTO keywords VALUES (?, ?)', [(keyword, id) for keyword in keywords]) # Insert keys in keywords table also so that it is easy to query # for the existance of keys: cur.executemany('INSERT INTO keywords VALUES (?, ?)', [(key, id) for key in key_value_pairs]) con.commit() con.close() return id
def _write(self, atoms, key_value_pairs, data, id): ext_tables = key_value_pairs.pop("external_tables", {}) Database._write(self, atoms, key_value_pairs, data) encode = self.encode con = self.connection or self._connect() self._initialize(con) cur = con.cursor() mtime = now() blob = self.blob text_key_values = [] number_key_values = [] if not isinstance(atoms, AtomsRow): row = AtomsRow(atoms) row.ctime = mtime row.user = os.getenv('USER') else: row = atoms # Extract the external tables from AtomsRow names = self._get_external_table_names(db_con=con) for name in names: new_table = row.get(name, {}) if new_table: ext_tables[name] = new_table if id: self._delete( cur, [id], ['keys', 'text_key_values', 'number_key_values', 'species']) else: if not key_value_pairs: key_value_pairs = row.key_value_pairs constraints = row._constraints if constraints: if isinstance(constraints, list): constraints = encode(constraints) else: constraints = None values = (row.unique_id, row.ctime, mtime, row.user, blob(row.numbers), blob(row.positions), blob(row.cell), int(np.dot(row.pbc, [1, 2, 4])), blob(row.get('initial_magmoms')), blob(row.get('initial_charges')), blob(row.get('masses')), blob(row.get('tags')), blob(row.get('momenta')), constraints) if 'calculator' in row: values += (row.calculator, encode(row.calculator_parameters)) else: values += (None, None) if not data: data = row._data if not isinstance(data, basestring): data = encode(data) values += (row.get('energy'), row.get('free_energy'), blob(row.get('forces')), blob(row.get('stress')), blob(row.get('dipole')), blob(row.get('magmoms')), row.get('magmom'), blob(row.get('charges')), encode(key_value_pairs), data, len(row.numbers), float_if_not_none(row.get('fmax')), float_if_not_none(row.get('smax')), float_if_not_none(row.get('volume')), float(row.mass), float(row.charge)) if id is None: q = self.default + ', ' + ', '.join('?' * len(values)) cur.execute('INSERT INTO systems VALUES ({})'.format(q), values) id = self.get_last_id(cur) else: q = ', '.join(name + '=?' for name in self.columnnames[1:]) cur.execute('UPDATE systems SET {} WHERE id=?'.format(q), values + (id, )) count = row.count_atoms() if count: species = [(atomic_numbers[symbol], n, id) for symbol, n in count.items()] cur.executemany('INSERT INTO species VALUES (?, ?, ?)', species) text_key_values = [] number_key_values = [] for key, value in key_value_pairs.items(): if isinstance(value, (numbers.Real, np.bool_)): number_key_values.append([key, float(value), id]) else: assert isinstance(value, basestring) text_key_values.append([key, value, id]) cur.executemany('INSERT INTO text_key_values VALUES (?, ?, ?)', text_key_values) cur.executemany('INSERT INTO number_key_values VALUES (?, ?, ?)', number_key_values) cur.executemany('INSERT INTO keys VALUES (?, ?)', [(key, id) for key in key_value_pairs]) # Update external tables valid_entries = [] for k, v in ext_tables.items(): try: # Guess the type of the value dtype = self._guess_type(v) self._create_table_if_not_exists(k, dtype, db_con=con) v["id"] = id valid_entries.append(k) except ValueError as exc: # Close the connection without committing if self.connection is None: con.close() # Raise error again raise ValueError(exc) # Insert entries in the valid tables for tabname in valid_entries: try: self._insert_in_external_table(cur, name=tabname, entries=ext_tables[tabname]) except ValueError as exc: # Close the connection without committing if self.connection is None: con.close() # Raise the error again raise ValueError(exc) if self.connection is None: con.commit() con.close() return id
def _write(self, atoms, key_value_pairs, data): Database._write(self, atoms, key_value_pairs, data) con = self.connection or self._connect() self._initialize(con) cur = con.cursor() id = None if not isinstance(atoms, AtomsRow): row = AtomsRow(atoms) row.ctime = mtime = now() row.user = os.getenv('USER') else: row = atoms cur.execute('SELECT id FROM systems WHERE unique_id=?', (row.unique_id,)) results = cur.fetchall() if results: id = results[0][0] self._delete(cur, [id], ['keys', 'text_key_values', 'number_key_values']) mtime = now() constraints = row._constraints if constraints: if isinstance(constraints, list): constraints = encode(constraints) else: constraints = None values = (row.unique_id, row.ctime, mtime, row.user, blob(row.numbers), blob(row.positions), blob(row.cell), int(np.dot(row.pbc, [1, 2, 4])), blob(row.get('initial_magmoms')), blob(row.get('initial_charges')), blob(row.get('masses')), blob(row.get('tags')), blob(row.get('momenta')), constraints) if 'calculator' in row: if not isinstance(row.calculator_parameters, basestring): row.calculator_parameters = encode(row.calculator_parameters) values += (row.calculator, row.calculator_parameters) else: values += (None, None) if key_value_pairs is None: key_value_pairs = row.key_value_pairs if not data: data = row._data if not isinstance(data, basestring): data = encode(data) values += (row.get('energy'), row.get('free_energy'), blob(row.get('forces')), blob(row.get('stress')), blob(row.get('dipole')), blob(row.get('magmoms')), row.get('magmom'), blob(row.get('charges')), encode(key_value_pairs), data, len(row.numbers), float_if_not_none(row.get('fmax')), float_if_not_none(row.get('smax')), float_if_not_none(row.get('volume')), float(row.mass), float(row.charge)) if id is None: q = self.default + ', ' + ', '.join('?' * len(values)) cur.execute('INSERT INTO systems VALUES ({0})'.format(q), values) else: q = ', '.join(line.split()[0].lstrip() + '=?' for line in init_statements[0].splitlines()[2:]) cur.execute('UPDATE systems SET {0} WHERE id=?'.format(q), values + (id,)) if id is None: id = self.get_last_id(cur) count = row.count_atoms() if count: species = [(atomic_numbers[symbol], n, id) for symbol, n in count.items()] cur.executemany('INSERT INTO species VALUES (?, ?, ?)', species) text_key_values = [] number_key_values = [] for key, value in key_value_pairs.items(): if isinstance(value, (float, int)): number_key_values.append([key, float(value), id]) else: assert isinstance(value, basestring) text_key_values.append([key, value, id]) cur.executemany('INSERT INTO text_key_values VALUES (?, ?, ?)', text_key_values) cur.executemany('INSERT INTO number_key_values VALUES (?, ?, ?)', number_key_values) cur.executemany('INSERT INTO keys VALUES (?, ?)', [(key, id) for key in key_value_pairs]) if self.connection is None: con.commit() con.close() return id