Exemple #1
0
    def transfer(self,
                 filename_sqlite,
                 start_id=1,
                 write_ase=True,
                 write_publication=True,
                 write_reaction=True,
                 write_reaction_system=True,
                 block_size=1000,
                 start_block=0):

        self.stdout.write('Starting transfer\n')
        con = self.connection or self._connect()
        self._initialize(con)
        self.stdout.write('Finished initialization\n')
        cur = con.cursor()
        self.stdout.write('Got a cursor\n')

        set_schema = 'SET search_path = {0};'.format(self.schema)
        cur.execute(set_schema)

        import os
        import time
        self.stdout.write('Imported os\n')

        import ase.db
        self.stdout.write('Imported ase.db\n')
        self.stdout.write('Building server_name\n')
        server_name = "postgres://{0}:{1}@{2}:5432/catalysishub".format(
            self.user, self.password, self.server)

        self.stdout.write('Connecting to {server_name}\n'.format(**locals()))

        nrows = 0
        if write_ase:
            print('Transfering atomic structures')
            db = ase.db.connect(filename_sqlite)
            n_structures = db.count()
            n_blocks = int(n_structures / block_size) + 1
            t_av = 0
            for block_id in range(start_block, n_blocks):
                i = block_id - start_block
                t1 = time.time()
                b0 = block_id * block_size + 1
                b1 = (block_id + 1) * block_size + 1
                self.stdout.write(
                    str(block_id) + ' ' + 'from ' + str(b0) + ' to ' +
                    str(b1) + '\n')
                if block_id + 1 == n_blocks:
                    b1 = n_structures + 1

                rows = list(db.select('{}<id<{}'.format(b0 - 1, b1)))

                with ase.db.connect(server_name, type='postgresql') as db2:

                    db2.write(rows)

                nrows += len(rows)
                t2 = time.time()
                dt = t2 - t1
                t_av = (t_av * i + dt) / (i + 1)

                self.stdout.write(
                    '  Finnished Block {0} / {1} in {2} sec'.format(
                        block_id, n_blocks, dt))
                self.stdout.write(
                    '    Completed transfer of {0} atomic structures.'.format(
                        nrows))
                self.stdout.write('    Estimated time left: {0} sec'.format(
                    t_av * (n_blocks - block_id)))

        from catkit.hub.cathubsqlite import CathubSQLite
        db = CathubSQLite(filename_sqlite)
        con_lite = db._connect()
        cur_lite = con_lite.cursor()

        # write publication
        Npub = 0
        Npubstruc = 0
        if write_publication:
            try:
                npub = db.get_last_pub_id(cur_lite)
            except BaseException:
                npub = 1
            for id_lite in range(1, npub + 1):
                Npub += 1
                row = db.read(id=id_lite, table='publication')
                if len(row) == 0:
                    continue
                values = row[0]
                pid, pub_id = self.write_publication(values)

            # Publication structures connection
            cur_lite.execute("""SELECT * from publication_system;""")
            rows = cur_lite.fetchall()
            for row in rows:
                Npubstruc += 1
                values = row[:]
                key_str, value_str = get_key_value_str(
                    values, table='publication_system')

                set_schema = 'SET search_path = {0};'.format(self.schema)
                cur.execute(set_schema)
                print("[SET SCHEMA] {set_schema}".format(**locals()))

                insert_command = """INSERT INTO publication_system ({0})
                VALUES ({1}) ON CONFLICT DO NOTHING;"""\
                    .format(key_str, value_str)

                cur.execute(insert_command)
                # self.write(values, table='publication_system')
            con.commit()

        Ncat = 0
        Ncatstruc = 0

        if write_reaction:
            n = db.get_last_id(cur_lite)
            select_ase = """SELECT * from reaction_system where id={};"""
            for id_lite in range(start_id, n + 1):
                row = db.read(id_lite)
                if len(row) == 0:
                    continue
                values = row[0]

                id = self.check(values[13],
                                values[1],
                                values[6],
                                values[7],
                                values[8],
                                strict=True)
                update_rs = False

                if id is not None:
                    id = self.update(id, values)
                    self.stdout.write(
                        'Updated reaction db with row id = {}\n'.format(id))
                    update_rs = True
                else:
                    Ncat += 1
                    id = self.write(values)
                    self.stdout.write(
                        'Written to reaction db row id = {0}\n'.format(id))

                cur_lite.execute(select_ase.format(id_lite))
                rows = cur_lite.fetchall()
                if write_reaction_system:
                    if update_rs:
                        cur.execute("""Delete from reaction_system231
                        where reaction_id={0}""".format(id))
                    for row in rows:
                        Ncatstruc += 1
                        values = list(row)
                        if len(values) == 3:
                            values.insert(1, None)

                        values[3] = id

                        key_str, value_str = \
                            get_key_value_str(values, table='reaction_system')

                        set_schema = 'SET search_path = {0};'.format(
                            self.schema)
                        cur.execute(set_schema)
                        print("[SET SCHEMA] {set_schema}".format(**locals()))

                        insert_command = """INSERT INTO reaction_system
                        ({0}) VALUES ({1}) ON CONFLICT DO NOTHING;"""\
                            .format(key_str, value_str)

                        print("[INSERT COMMAND] {insert_command}".format(
                            **locals()))
                        cur.execute(insert_command)

                con.commit()  # Commit reaction_system for each row

        for statement in tsvector_update:
            cur.execute(statement)

        if self.connection is None:
            con.commit()
            con.close()

        self.stdout.write('Inserted into:\n')
        self.stdout.write('  systems: {0}\n'.format(nrows))
        self.stdout.write('  publication: {0}\n'.format(Npub))
        self.stdout.write('  publication_system: {0}\n'.format(Npubstruc))
        self.stdout.write('  reaction: {0}\n'.format(Ncat))
        self.stdout.write('  reaction_system: {0}\n'.format(Ncatstruc))
Exemple #2
0
    def transfer(self,
                 filename_sqlite,
                 block_size=1000,
                 start_block=0,
                 write_ase=True,
                 write_publication=True,
                 write_reaction=True,
                 write_reaction_system=True,
                 check=False):
        """ Transfer data from local sqlite3 .db file to the
        catalysis-hub postgreSQL server

        Parameters:
        filename_sqlite: str
            name of .db file
        block_size: int (default 1000)
            Number of atomic structures and reactions to write together
            in each block.
        start_block: int (default 0)
            Block to start with
        write_ase: bool
            whether or not to write atomic structures
        write_publication: bool
            whether or not to transfer publication table
        write_reaction: bool
            whether or not to transfer reaction table
        write_reaction_system: bool
            whether or not to write reaction_system table
        """

        self.stdout.write('Starting transfer\n')
        con = self.connection or self._connect()
        self._initialize(con)
        self.stdout.write('Finished initialization\n')
        cur = con.cursor()
        self.stdout.write('Got a cursor\n')
        self.stdout.write('Connecting to {0}\n'.format(self.server_name))

        nrows = 0
        if write_ase:
            self.stdout.write('Transfering atomic structures\n')
            db = ase.db.connect(filename_sqlite)
            n_structures = db.count()
            n_blocks = n_structures // block_size + 1
            t_av = 0
            for block_id in range(start_block, n_blocks):
                i = block_id - start_block
                t1 = time.time()
                b0 = block_id * block_size
                b1 = (block_id + 1) * block_size + 1

                if block_id + 1 == n_blocks:
                    b1 = n_structures + 1

                rows = list(db.select('{}<id<{}'.format(b0, b1)))

                with ase.db.connect(self.server_name,
                                    type='postgresql') as db2:
                    # write one row at the time until ase is updated
                    # db2.write(rows)
                    for row in rows:
                        db2.write(row)

                nrows += len(rows)
                t2 = time.time()
                dt = t2 - t1
                t_av = (t_av * i + dt) / (i + 1)

                self.stdout.write(
                    '  Finnished Block {0} / {1} in {2} sec\n'.format(
                        block_id + 1, n_blocks, dt))
                self.stdout.write(
                    '    Completed transfer of {0} atomic structures\n'.format(
                        nrows))
                self.stdout.write('    Estimated time left: {0} sec\n'.format(
                    t_av * (n_blocks - block_id - 1)))

        db = CathubSQLite(filename_sqlite)
        con_lite = db._connect()
        cur_lite = con_lite.cursor()

        Npub = 0
        Npubstruc = 0
        if write_publication:
            self.stdout.write('Transfering publications\n')
            try:
                npub = db.get_last_pub_id(cur_lite)
            except BaseException:
                npub = 1
            for id_lite in range(1, npub + 1):
                Npub += 1
                row = db.read(id=id_lite, table='publication')
                if len(row) == 0:
                    continue
                values = row[0]
                pid, pub_id = self.write_publication(values)

            # Publication structures connection
            cur_lite.execute("""SELECT * from publication_system;""")
            publication_system_values = []
            rows = cur_lite.fetchall()
            for row in rows:
                Npubstruc += 1
                values = list(row)
                value_list = get_value_list(values)
                publication_system_values += [tuple(value_list)]

            # Insert into publication_system table
            key_str = get_key_str(table='publication_system')
            insert_command = """INSERT INTO publication_system ({0})
            VALUES %s ON CONFLICT DO NOTHING;"""\
                .format(key_str)

            execute_values(cur=cur,
                           sql=insert_command,
                           argslist=publication_system_values,
                           page_size=1000)

            # Write pub_id to systems table
            cur.execute("""UPDATE systems SET
            key_value_pairs=jsonb_set(key_value_pairs, '{{"pub_id"}}', '"{pub_id}"')
            WHERE unique_id IN
            (SELECT ase_id from publication_system WHERE pub_id='{pub_id}')"""\
                        .format(pub_id=pub_id))

            con.commit()
            self.stdout.write('  Completed transfer of publications\n')

        Ncat = 0
        Ncatstruc = 0

        if write_reaction:
            self.stdout.write('Transfering reactions')
            cur.execute('SELECT max(id) from reaction;')
            ID = cur.fetchone()[0] or 0

            n_react = db.get_last_id(cur_lite)

            n_blocks = int(n_react / block_size) + 1
            t_av = 0
            for block_id in range(start_block, n_blocks):
                reaction_values = []
                reaction_system_values = []
                Ncat0 = Ncat
                Ncatstruc0 = Ncatstruc

                i = block_id - start_block
                t1 = time.time()
                b0 = block_id * block_size + 1
                b1 = (block_id + 1) * block_size + 1
                if block_id + 1 == n_blocks:
                    b1 = n_react + 1

                for id_lite in range(b0, b1):
                    row = db.read(id_lite)
                    if len(row) == 0:
                        continue
                    values = row[0]

                    # id = self.check(values[13], values[1], values[6], values[7],
                    #                values[8], strict=True)
                    id = None
                    update_rs = False
                    if id is not None:
                        id = self.update(id, values)
                        self.stdout.write(
                            'Updated reaction db with row id = {}\n'.format(
                                id))
                        update_rs = True
                    else:
                        ID += 1
                        Ncat += 1
                        value_list = get_value_list(values)
                        value_list[0] = ID  # set new ID
                        reaction_values += [tuple(value_list)]
                        if write_reaction_system:
                            cur_lite.execute(
                                "SELECT * from reaction_system where id={};".
                                format(id_lite))
                            rows = cur_lite.fetchall()
                            if update_rs:
                                cur.execute("""Delete from reaction_system
                                where id={0}""".format(id))
                            for row in rows:
                                Ncatstruc += 1
                                values = list(row)
                                if len(values) == 3:
                                    values.insert(1, None)
                                value_list = get_value_list(values)
                                value_list[3] = ID
                                reaction_system_values += [tuple(value_list)]

                q = ', '.join('?' * 14)
                q = '({})'.format(q.replace('?', '%s'))

                key_str = get_key_str()
                insert_command = """INSERT INTO reaction
                ({0}) VALUES %s;""".format(key_str)

                execute_values(cur=cur,
                               sql=insert_command,
                               argslist=reaction_values,
                               template=q,
                               page_size=block_size)

                key_str = get_key_str('reaction_system')
                insert_command = """INSERT INTO reaction_system
                ({0}) VALUES %s ON CONFLICT DO NOTHING;""".format(key_str)

                execute_values(cur=cur,
                               sql=insert_command,
                               argslist=reaction_system_values,
                               page_size=1000)
                con.commit()

                t2 = time.time()
                dt = t2 - t1
                t_av = (t_av * i + dt) / (i + 1)

                self.stdout.write(
                    '  Finnished Block {0} / {1} in {2} sec \n'.format(
                        block_id + 1, n_blocks, dt))
                self.stdout.write(
                    '    Completed transfer of {0} reactions. \n'.format(
                        Ncat - Ncat0))
                self.stdout.write('    Estimated time left: {0} sec \n'.format(
                    t_av * (n_blocks - block_id - 1)))

            self.stdout.write('  Completed transfer of reactions\n')

        for statement in tsvector_update:
            cur.execute(statement)

        if self.connection is None:
            con.commit()
            con.close()

        self.stdout.write('Inserted into:\n')
        self.stdout.write('  systems: {0}\n'.format(nrows))
        self.stdout.write('  publication: {0}\n'.format(Npub))
        self.stdout.write('  publication_system: {0}\n'.format(Npubstruc))
        self.stdout.write('  reaction: {0}\n'.format(Ncat))
        self.stdout.write('  reaction_system: {0}\n'.format(Ncatstruc))
    def transfer(self,
                 filename_sqlite,
                 start_id=1,
                 write_ase=True,
                 write_publication=True,
                 write_reaction=True,
                 write_reaction_system=True,
                 block_size=1000,
                 start_block=0):
        self.stdout.write('Starting transfer\n')
        con = self.connection or self._connect()
        self._initialize(con)
        self.stdout.write('Finished initialization\n')
        cur = con.cursor()
        self.stdout.write('Got a cursor\n')

        set_schema = 'SET search_path = {0};'.format(self.schema)
        cur.execute(set_schema)

        import os
        self.stdout.write('Imported os\n')
        import ase.db
        self.stdout.write('Imported ase.db\n')
        self.stdout.write('Building server_name\n')
        server_name = "postgres://{0}:{1}@{2}:5432/catalysishub".format(
            self.user, self.password, self.server)
        self.stdout.write('Connecting to {server_name}\n'.format(**locals()))

        nkvp = 0
        nrows = 0
        if write_ase:
            db = ase.db.connect(filename_sqlite)
            n_structures = db.count()
            n_blocks = int(n_structures / block_size) + 1
            for block_id in range(start_block, n_blocks):
                b0 = block_id * block_size + 1
                b1 = (block_id + 1) * block_size + 1
                self.stdout.write(
                    str(block_id) + ' ' + 'from ' + str(b0) + ' to ' +
                    str(b1) + '\n')
                if block_id + 1 == n_blocks:
                    b1 = n_structures + 1
                #rows = [db._get_row(i) for i in range(b0, b1]
                #db2 = ase.db.connect(server_name, type='postgresql')
                #for lala in [0]:
                with ase.db.connect(server_name, type='postgresql') as db2:
                    for i in range(b0, b1):
                        self.stdout.write('  .' + str(i))
                        self.stdout.flush()
                        row = db.get(i)
                        kvp = row.get('key_value_pairs', {})
                        nkvp -= len(kvp)
                        # kvp.update(add_key_value_pairs)
                        nkvp += len(kvp)
                        db2.write(row, data=row.get('data'), **kvp)
                        nrows += 1
                    self.stdout.write('\n')

                self.stdout.write('Finished Block {0}\n:'.format(block_id))
                self.stdout.write(
                    '  Completed transfer of {0} atomic structures.\n'.format(
                        nrows))

        from cathub.cathubsqlite import CathubSQLite
        db = CathubSQLite(filename_sqlite)
        con_lite = db._connect()
        cur_lite = con_lite.cursor()

        # write publication
        Npub = 0
        Npubstruc = 0
        if write_publication:
            try:
                npub = db.get_last_pub_id(cur_lite)
            except:
                npub = 1
            for id_lite in range(1, npub + 1):
                Npub += 1
                row = db.read(id=id_lite, table='publication')
                if len(row) == 0:
                    continue
                values = row[0]
                pid, pub_id = self.write_publication(values)

            # Publication structures connection
            cur_lite.execute("""SELECT * from publication_system;""")
            rows = cur_lite.fetchall()
            for row in rows:
                Npubstruc += 1
                values = row[:]
                key_str, value_str = get_key_value_str(
                    values, table='publication_system')

                set_schema = 'SET search_path = {0};'.format(self.schema)
                cur.execute(set_schema)
                print("[SET SCHEMA] {set_schema}".format(**locals()))

                insert_command = 'INSERT INTO publication_system ({0}) VALUES ({1}) ON CONFLICT DO NOTHING;'.format(
                    key_str, value_str)

                cur.execute(insert_command)
                # self.write(values, table='publication_system')
            con.commit()

        Ncat = 0
        Ncatstruc = 0

        if write_reaction:
            n = db.get_last_id(cur_lite)
            select_ase = """SELECT * from reaction_system where id={};"""
            for id_lite in range(start_id, n + 1):
                row = db.read(id_lite)
                if len(row) == 0:
                    continue
                values = row[0]

                id = self.check(values[13],
                                values[1],
                                values[6],
                                values[7],
                                values[8],
                                strict=True)
                update_rs = False

                if id is not None:
                    id = self.update(id, values)
                    self.stdout.write(
                        'Updated reaction db with row id = {}\n'.format(id))
                    update_rs = True
                else:
                    Ncat += 1
                    id = self.write(values)
                    self.stdout.write(
                        'Written to reaction db row id = {0}\n'.format(id))

                cur_lite.execute(select_ase.format(id_lite))
                rows = cur_lite.fetchall()
                if write_reaction_system:
                    if update_rs:
                        cur.execute(
                            'Delete from reaction_system where reaction_id={0}'
                            .format(id))
                    for row in rows:
                        Ncatstruc += 1
                        values = list(row)
                        if len(values) == 3:
                            values.insert(1, None)

                        values[3] = id

                        key_str, value_str = get_key_value_str(
                            values, table='reaction_system')

                        set_schema = 'SET search_path = {0};'.format(
                            self.schema)
                        cur.execute(set_schema)
                        print("[SET SCHEMA] {set_schema}".format(**locals()))

                        insert_command = 'INSERT INTO reaction_system ({0}) VALUES ({1}) ON CONFLICT DO NOTHING;'.format(
                            key_str, value_str)

                        print("[INSERT COMMAND] {insert_command}".format(
                            **locals()))
                        cur.execute(insert_command)

                con.commit()  # Commit reaction_system for each row

        for statement in tsvector_update:
            cur.execute(statement)

        if self.connection is None:
            con.commit()
            con.close()

        self.stdout.write('Inserted into:\n')
        self.stdout.write('  systems: {0}\n'.format(nrows))
        self.stdout.write('  publication: {0}\n'.format(Npub))
        self.stdout.write('  publication_system: {0}\n'.format(Npubstruc))
        self.stdout.write('  reaction: {0}\n'.format(Ncat))
        self.stdout.write('  reaction_system: {0}\n'.format(Ncatstruc))