Esempio n. 1
0
    def _select(self, keys, cmps, explain=False, verbosity=0,
                limit=None, offset=0, sort=None, include_data=True,
                columns='all'):
        if explain:
            yield {'explain': (0, 0, 0, 'scan table')}
            return

        if sort:
            if sort[0] == '-':
                reverse = True
                sort = sort[1:]
            else:
                reverse = False

            def f(row):
                return row.get(sort, missing)

            rows = []
            missing = []
            for row in self._select(keys, cmps):
                key = row.get(sort)
                if key is None:
                    missing.append((0, row))
                else:
                    rows.append((key, row))

            rows.sort(reverse=reverse, key=lambda x: x[0])
            rows += missing

            if limit:
                rows = rows[offset:offset + limit]
            for key, row in rows:
                yield row
            return

        try:
            bigdct, ids, nextid = self._read_json()
        except IOError:
            return

        if not limit:
            limit = -offset - 1

        cmps = [(key, ops[op], val) for key, op, val in cmps]
        n = 0
        for id in ids:
            if n - offset == limit:
                return
            dct = bigdct[id]
            if not include_data:
                dct.pop('data', None)
            row = AtomsRow(dct)
            row.id = id
            for key in keys:
                if key not in row:
                    break
            else:
                for key, op, val in cmps:
                    if isinstance(key, int):
                        value = np.equal(row.numbers, key).sum()
                    else:
                        value = row.get(key)
                        if key == 'pbc':
                            assert op in [ops['='], ops['!=']]
                            value = ''.join('FT'[x] for x in value)
                    if value is None or not op(value, val):
                        break
                else:
                    if n >= offset:
                        yield row
                    n += 1
Esempio n. 2
0
    def _convert_tuple_to_row(self, values):
        deblob = self.deblob
        decode = self.decode

        values = self._old2new(values)
        dct = {
            'id': values[0],
            'unique_id': values[1],
            'ctime': values[2],
            'mtime': values[3],
            'user': values[4],
            'numbers': deblob(values[5], np.int32),
            'positions': deblob(values[6], shape=(-1, 3)),
            'cell': deblob(values[7], shape=(3, 3))
        }

        if values[8] is not None:
            dct['pbc'] = (values[8] & np.array([1, 2, 4])).astype(bool)
        if values[9] is not None:
            dct['initial_magmoms'] = deblob(values[9])
        if values[10] is not None:
            dct['initial_charges'] = deblob(values[10])
        if values[11] is not None:
            dct['masses'] = deblob(values[11])
        if values[12] is not None:
            dct['tags'] = deblob(values[12], np.int32)
        if values[13] is not None:
            dct['momenta'] = deblob(values[13], shape=(-1, 3))
        if values[14] is not None:
            dct['constraints'] = values[14]
        if values[15] is not None:
            dct['calculator'] = values[15]
        if values[16] is not None:
            dct['calculator_parameters'] = decode(values[16])
        if values[17] is not None:
            dct['energy'] = values[17]
        if values[18] is not None:
            dct['free_energy'] = values[18]
        if values[19] is not None:
            dct['forces'] = deblob(values[19], shape=(-1, 3))
        if values[20] is not None:
            dct['stress'] = deblob(values[20])
        if values[21] is not None:
            dct['dipole'] = deblob(values[21])
        if values[22] is not None:
            dct['magmoms'] = deblob(values[22])
        if values[23] is not None:
            dct['magmom'] = values[23]
        if values[24] is not None:
            dct['charges'] = deblob(values[24])
        if values[25] != '{}':
            dct['key_value_pairs'] = decode(values[25])
        if len(values) >= 27 and values[26] != 'null':
            dct['data'] = decode(values[26], lazy=True)

        # Now we need to update with info from the external tables
        external_tab = self._get_external_table_names()
        tables = {}
        for tab in external_tab:
            row = self._read_external_table(tab, dct["id"])
            tables[tab] = row

        dct.update(tables)
        return AtomsRow(dct)
Esempio n. 3
0
    def _write(self, atoms, key_value_pairs, data, id):
        ext_tables = key_value_pairs.pop("external_tables", {})
        Database._write(self, atoms, key_value_pairs, data)

        mtime = now()

        encode = self.encode
        blob = self.blob

        if not isinstance(atoms, AtomsRow):
            row = AtomsRow(atoms)
            row.ctime = mtime
            row.user = os.getenv('USER')
        else:
            row = atoms
            # Extract the external tables from AtomsRow
            names = self._get_external_table_names()
            for name in names:
                new_table = row.get(name, {})
                if new_table:
                    ext_tables[name] = new_table

        if not id and not key_value_pairs and not ext_tables:
            key_value_pairs = row.key_value_pairs

        for k, v in ext_tables.items():
            dtype = self._guess_type(v)
            self._create_table_if_not_exists(k, dtype)

        constraints = row._constraints
        if constraints:
            if isinstance(constraints, list):
                constraints = encode(constraints)
        else:
            constraints = None

        values = (row.unique_id, row.ctime, mtime, row.user, blob(row.numbers),
                  blob(row.positions), blob(row.cell),
                  int(np.dot(row.pbc,
                             [1, 2, 4])), blob(row.get('initial_magmoms')),
                  blob(row.get('initial_charges')), blob(row.get('masses')),
                  blob(row.get('tags')), blob(row.get('momenta')), constraints)

        if 'calculator' in row:
            values += (row.calculator, encode(row.calculator_parameters))
        else:
            values += (None, None)

        if not data:
            data = row._data

        with self.managed_connection() as con:
            if not isinstance(data, (str, bytes)):
                data = encode(data, binary=self.version >= 9)

            values += (row.get('energy'), row.get('free_energy'),
                       blob(row.get('forces')), blob(row.get('stress')),
                       blob(row.get('dipole')), blob(row.get('magmoms')),
                       row.get('magmom'), blob(row.get('charges')),
                       encode(key_value_pairs), data, len(row.numbers),
                       float_if_not_none(row.get('fmax')),
                       float_if_not_none(row.get('smax')),
                       float_if_not_none(row.get('volume')), float(row.mass),
                       float(row.charge))

            cur = con.cursor()
            if id is None:
                q = self.default + ', ' + ', '.join('?' * len(values))
                cur.execute('INSERT INTO systems VALUES ({})'.format(q),
                            values)
                id = self.get_last_id(cur)
            else:
                self._delete(cur, [id], [
                    'keys', 'text_key_values', 'number_key_values', 'species'
                ])
                q = ', '.join(name + '=?' for name in self.columnnames[1:])
                cur.execute('UPDATE systems SET {} WHERE id=?'.format(q),
                            values + (id, ))

            count = row.count_atoms()
            if count:
                species = [(atomic_numbers[symbol], n, id)
                           for symbol, n in count.items()]
                cur.executemany('INSERT INTO species VALUES (?, ?, ?)',
                                species)

            text_key_values = []
            number_key_values = []
            for key, value in key_value_pairs.items():
                if isinstance(value, (numbers.Real, np.bool_)):
                    number_key_values.append([key, float(value), id])
                else:
                    assert isinstance(value, str)
                    text_key_values.append([key, value, id])

            cur.executemany('INSERT INTO text_key_values VALUES (?, ?, ?)',
                            text_key_values)
            cur.executemany('INSERT INTO number_key_values VALUES (?, ?, ?)',
                            number_key_values)
            cur.executemany('INSERT INTO keys VALUES (?, ?)',
                            [(key, id) for key in key_value_pairs])

            # Insert entries in the valid tables
            for tabname in ext_tables.keys():
                entries = ext_tables[tabname]
                entries['id'] = id
                self._insert_in_external_table(cur,
                                               name=tabname,
                                               entries=ext_tables[tabname])

        return id
Esempio n. 4
0
    def update(self,
               id,
               atoms=None,
               delete_keys=[],
               data=None,
               **add_key_value_pairs):
        """Update and/or delete key-value pairs of row(s).

        id: int
            ID of row to update.
        atoms: Atoms object
            Optionally update the Atoms data (positions, cell, ...).
        data: dict
            Data dict to be added to the existing data.
        delete_keys: list of str
            Keys to remove.

        Use keyword arguments to add new key-value pairs.

        Returns number of key-value pairs added and removed.
        """

        if not isinstance(id, numbers.Integral):
            if isinstance(id, list):
                err = ('First argument must be an int and not a list.\n'
                       'Do something like this instead:\n\n'
                       'with db:\n'
                       '    for id in ids:\n'
                       '        db.update(id, ...)')
                raise ValueError(err)
            raise TypeError('id must be an int')

        check(add_key_value_pairs)

        row = self._get_row(id)

        if atoms:
            oldrow = row
            row = AtomsRow(atoms)

            # Copy over data, kvp, ctime, user and id
            row._data = oldrow._data
            kvp = oldrow.key_value_pairs
            row.__dict__.update(kvp)
            row._keys = list(kvp)
            row.ctime = oldrow.ctime
            row.user = oldrow.user
            row.id = id

        kvp = row.key_value_pairs

        n = len(kvp)
        for key in delete_keys:
            kvp.pop(key, None)
        n -= len(kvp)
        m = -len(kvp)
        kvp.update(add_key_value_pairs)
        m += len(kvp)

        moredata = data
        data = row.get('data', {})
        if moredata:
            data.update(moredata)
        if not data:
            data = None

        self._write(row, kvp, data, row.id)

        return m, n
Esempio n. 5
0
    def _write(self, atoms, key_value_pairs, data, id):
        Database._write(self, atoms, key_value_pairs, data)

        con = self.connection or self._connect()
        self._initialize(con)
        cur = con.cursor()

        mtime = now()

        if not isinstance(atoms, AtomsRow):
            row = AtomsRow(atoms)
            row.ctime = mtime
            row.user = os.getenv('USER')
        else:
            row = atoms

        if id:
            self._delete(cur, [id], ['keys', 'text_key_values',
                                     'number_key_values', 'species'])

        constraints = row._constraints
        if constraints:
            if isinstance(constraints, list):
                constraints = encode(constraints)
        else:
            constraints = None

        values = (row.unique_id,
                  row.ctime,
                  mtime,
                  row.user,
                  blob(row.numbers),
                  blob(row.positions),
                  blob(row.cell),
                  int(np.dot(row.pbc, [1, 2, 4])),
                  blob(row.get('initial_magmoms')),
                  blob(row.get('initial_charges')),
                  blob(row.get('masses')),
                  blob(row.get('tags')),
                  blob(row.get('momenta')),
                  constraints)

        if 'calculator' in row:
            values += (row.calculator, encode(row.calculator_parameters))
        else:
            values += (None, None)

        if data is None:
            data = {}
        if not isinstance(data, basestring):
            data = encode(data)

        values += (row.get('energy'),
                   row.get('free_energy'),
                   blob(row.get('forces')),
                   blob(row.get('stress')),
                   blob(row.get('dipole')),
                   blob(row.get('magmoms')),
                   row.get('magmom'),
                   blob(row.get('charges')),
                   encode(key_value_pairs),
                   data,
                   len(row.numbers),
                   float_if_not_none(row.get('fmax')),
                   float_if_not_none(row.get('smax')),
                   float_if_not_none(row.get('volume')),
                   float(row.mass),
                   float(row.charge))

        if id is None:
            q = self.default + ', ' + ', '.join('?' * len(values))
            cur.execute('INSERT INTO systems VALUES ({})'.format(q),
                        values)
            id = self.get_last_id(cur)
        else:
            q = ', '.join(name + '=?' for name in self.columnnames[1:])
            cur.execute('UPDATE systems SET {} WHERE id=?'.format(q),
                        values + (id,))

        count = row.count_atoms()
        if count:
            species = [(atomic_numbers[symbol], n, id)
                       for symbol, n in count.items()]
            cur.executemany('INSERT INTO species VALUES (?, ?, ?)',
                            species)

        text_key_values = []
        number_key_values = []
        for key, value in key_value_pairs.items():
            if isinstance(value, (numbers.Real, np.bool_)):
                number_key_values.append([key, float(value), id])
            else:
                assert isinstance(value, basestring)
                text_key_values.append([key, value, id])

        cur.executemany('INSERT INTO text_key_values VALUES (?, ?, ?)',
                        text_key_values)
        cur.executemany('INSERT INTO number_key_values VALUES (?, ?, ?)',
                        number_key_values)
        cur.executemany('INSERT INTO keys VALUES (?, ?)',
                        [(key, id) for key in key_value_pairs])

        if self.connection is None:
            con.commit()
            con.close()

        return id
Esempio n. 6
0
    def _select(self,
                keys,
                cmps,
                explain=False,
                verbosity=0,
                limit=None,
                offset=0,
                sort=None):
        if explain:
            yield {'explain': (0, 0, 0, 'scan table')}
            return

        if sort:
            if sort[0] == '-':
                reverse = True
                sort = sort[1:]
            else:
                reverse = False

            def f(row):
                return row[sort]

            rows = sorted(self._select(keys + [sort], cmps),
                          key=f,
                          reverse=reverse)
            if limit:
                rows = rows[offset:offset + limit]
            for row in rows:
                yield row
            return

        try:
            bigdct, ids, nextid = self._read_json()
        except IOError:
            return

        if not limit:
            limit = -offset - 1

        cmps = [(key, ops[op], val) for key, op, val in cmps]
        n = 0
        for id in ids:
            if n - offset == limit:
                return
            row = AtomsRow(bigdct[id])
            row.id = id
            for key in keys:
                if key not in row:
                    break
            else:
                for key, op, val in cmps:
                    if isinstance(key, int):
                        value = np.equal(row.numbers, key).sum()
                    else:
                        value = row.get(key)
                        if key == 'pbc':
                            assert op in [ops['='], ops['!=']]
                            value = ''.join('FT'[x] for x in value)
                    if value is None or not op(value, val):
                        break
                else:
                    if n >= offset:
                        yield row
                    n += 1