def build_and_then_import_data(connection,
                               table,
                               primary_key,
                               columns,
                               excludes,
                               search,
                               total_count,
                               chunk_size,
                               verbose=False,
                               dry_run=False):
    """
    Select all data from a table and return it together with a list of table columns.

    :param connection: A database connection instance.
    :param str table: Name of the table to retrieve the data.
    :param str primary_key: Table primary key
    :param list columns: A list of table fields
    :param list[dict] excludes: A list of exclude definitions.
    :param str search: A SQL WHERE (search_condition) to filter and keep only the searched rows.
    :param int total_count: The amount of rows for the current table
    :param int chunk_size: Number of data rows to fetch with the cursor
    :param bool verbose: Display logging information and a progress bar.
    :param bool dry_run: Script is running in dry-run mode, no commit expected.
    """
    column_names = get_column_names(columns)
    sql_columns = SQL(', ').join([
        Identifier(column_name) for column_name in [primary_key] + column_names
    ])
    sql_select = SQL('SELECT {columns} FROM {table}').format(
        table=Identifier(table), columns=sql_columns)
    if search:
        sql_select = Composed([
            sql_select,
            SQL(" WHERE {search_condition}".format(search_condition=search))
        ])
    if dry_run:
        sql_select = Composed([sql_select, SQL(" LIMIT 100")])
    cursor = connection.cursor(cursor_factory=psycopg2.extras.DictCursor,
                               name='fetch_large_result')
    cursor.execute(sql_select.as_string(connection))
    temp_table = 'tmp_{table}'.format(table=table)
    create_temporary_table(connection, columns, table, temp_table, primary_key)
    batches = int(math.ceil((1.0 * total_count) / (1.0 * chunk_size)))
    for i in trange(batches,
                    desc="Processing {} batches for {}".format(batches, table),
                    disable=not verbose):
        records = cursor.fetchmany(size=chunk_size)
        if records:
            data = parmap.map(process_row,
                              records,
                              columns,
                              excludes,
                              pm_pbar=verbose)
            import_data(connection, temp_table, [primary_key] + column_names,
                        filter(None, data))
    apply_anonymized_data(connection, temp_table, table, primary_key, columns)

    cursor.close()
Exemple #2
0
 def load(self):
     with self.conn.cursor() as cursor:
         headers = self.headers.keys()
         stmt = SQL("COPY {} ({}) FROM STDIN WITH CSV HEADER NULL AS ''")
         columns = Composed([Identifier(c) for c in headers])
         columns = columns.join(', ')
         stmt = stmt.format(Identifier(self.table), columns)
         # print stmt.as_string(cursor)
         cursor.copy_expert(stmt, self.proxy)
     self.conn.commit()
Exemple #3
0
 def load(self, fh, delimiter):
     with self.conn.cursor() as cursor:
         headers = list(self.headers.keys())
         stmt = SQL("COPY {} ({}) FROM STDIN "
                    "WITH CSV HEADER DELIMITER AS {} NULL AS ''")
         columns = Composed([Identifier(c) for c in headers])
         columns = columns.join(', ')
         stmt = stmt.format(Identifier(self.table), columns,
                            Literal(delimiter))
         print(stmt.as_string(cursor))
         cursor.copy_expert(stmt, fh)
     self.conn.commit()
Exemple #4
0
def sql_with_cte(table: Table, cte_deps: List[Table], **kwargs) -> Composed:
    """Build Table query with CTEs. cte_deps must be topologically sorted"""
    names = {t: Identifier("_cte%d" % i) for i, t in enumerate(cte_deps)}
    return Composed([
        SQL("WITH "),
        SQL("\n   , ").join(
            Composed(
                Composed([
                    n,
                    SQL(" AS ("),
                    sql_with_named_deps(t, names, **kwargs),
                    SQL(")")
                ]) for t, n in names.items())),
        SQL("\n"),
        sql_with_named_deps(table, names, **kwargs)
    ])
Exemple #5
0
def sql_with_named_deps(table: Table, deps: Mapping[Table, Identifier],
                        **kwargs) -> Composed:
    """Build Table query relying on given Identifiers for dependencies"""
    return table.sql.format(
        **{
            s: Composed([deps[t], SQL(" AS "),
                         Identifier(s)])
            for s, t in table.source_tables.items()
        }, **kwargs)
Exemple #6
0
def sql_with_subqueries(table: Table, **kwargs) -> Composed:
    """Recursively build Table query with dependencies as sub-queries"""
    return table.sql.format(
        **{
            s: Composed([
                SQL("("),
                sql_with_subqueries(t, **kwargs),
                SQL(") AS "),
                Identifier(s)
            ])
            for s, t in table.source_tables.items()
        }, **kwargs)
Exemple #7
0
    def index_action(self, columns, action='add', where='', table=None):
        if isinstance(columns, str):
            columns = [columns]

        def stringify(column):
            if isinstance(column, str):
                return column
            else:
                return ('_'.join(map(str, (column, ) + column.params)).replace(
                    '"', '').replace('%s', '__'))

        name = [table or self.table_name]
        name.append('_'.join(map(stringify, columns)))
        if where:
            name.append('+where')
            name.append(stringify(where))
        name.append('index')
        index_name = self.convert_name('_'.join(name))

        with Transaction().connection.cursor() as cursor:
            if action == 'add':
                if index_name in self._indexes:
                    return
                params = sum(
                    (c.params for c in columns if hasattr(c, 'params')), ())
                if where:
                    params += where.params
                    where = ' WHERE %s' % where
                cursor.execute(
                    Composed([
                        SQL('CREATE INDEX {name} ON {table} ({columns})').
                        format(name=Identifier(index_name),
                               table=Identifier(self.table_name),
                               columns=SQL(',').join((Identifier(
                                   c) if isinstance(c, str) else SQL(str(c)))
                                                     for c in columns)),
                        SQL(where)
                    ]), params)
                self._update_definitions(indexes=True)
            elif action == 'remove':
                if len(columns) == 1 and isinstance(columns[0], str):
                    if self._field2module.get(
                            columns[0], self.module_name) != self.module_name:
                        return

                if index_name in self._indexes:
                    cursor.execute(
                        SQL('DROP INDEX {}').format(Identifier(index_name)))
                    self._update_definitions(indexes=True)
            else:
                raise Exception('Index action not supported!')
    def update(self, an_id: id = None, where_key: str = None, name: str = None, data=None, notes: str = None,
               modified_by: str = None, created_by: str = None, my_conn: Optional[dict] = None,
               t_log: Optional[TimeLogger] = None, verbose: bool = None):
        """Insert New Record Into grouped_physics_object"""

        if my_conn is None:
            my_conn = self.my_conn
        else:
            self.my_conn = my_conn

        if verbose is True and t_log is None:
            t_log = TimeLogger()

        my_conn = my_connect(my_conn=my_conn, t_log=t_log, verbose=verbose)
        conn = my_conn['conn']
        db_params = my_conn['db_params']

        if where_key is None:
            where_key = self.id_name()

        if an_id is None:
            warn("No Record ID Specified", NoRecordIDError)
        else:
            if data is None:
                data = {}

            data.update(add_field('name', name))
            data.update(add_field('notes', notes))
            data.update(add_field('created_by', created_by))

            # If there is no data, then skip. Of course one could still change modified by:
            if len(data) > 0 or modified_by is not None:

                # Always require a modified by and because one can change data without specifying a modifer,
                # this is necessary. We don't check it before the previous if, because we don't want to create
                # a modified_by if not data was set and no modified_by was set.
                if modified_by is None:
                    modified_by = db_params['user']

                data.update(modified_by=modified_by)

                fields = data.keys()

                sql = "UPDATE {table} SET {fields} WHERE {pkey} = {a_value}"

                if verbose:
                    print('Data:\n', data)
                    print('\nFields:\n', fields)

                query = SQL(sql).format(
                    table=Identifier(self.table_name),
                    fields=SQL(', ').join(
                        Composed([Identifier(k), SQL(' = '), Placeholder(k)]) for k in fields
                    ),
                    pkey=Identifier(where_key),
                    a_value=Placeholder('where_key')
                )

                data.update(where_key=an_id)

                cur = conn.cursor(cursor_factory=NamedTupleCursor)

                if verbose:
                    print(query.as_string(conn))
                    print(cur.mogrify(query, data))

                try:
                    cur.execute(query, data)
                except OperationalError as error:
                    print(error)

                conn.commit()

                cur.close()

                self.pull_data()
    def update(self,
               an_id: id = None,
               where_key: str = None,
               name: str = None,
               data=None,
               latex: str = None,
               notes: str = None,
               unit_id: int = None,
               image: bytes = None,
               template_id: int = None,
               dimensions: int = None,
               modified_by: str = None,
               created_by: str = None,
               verbose: bool = None):
        """Insert New Record Into math_object"""

        if where_key is None:
            where_key = self.id_name

        if an_id is None:
            warn("No Record ID Specified", NoRecordIDError)
        else:
            if data is None:
                data = {}

            db_params = config()

            data.update(self._add_field('name', name))
            data.update(self._add_field('notes', notes))
            data.update(self._process_latex(latex, image, template_id,
                                            verbose))
            data.update(self._add_field('dimensions', dimensions))
            data.update(self._add_field('unit_id', unit_id))
            data.update(self._add_field('created_by', created_by))

            # If there is no data, then skip. Of course one could still change modified by:
            if len(data) > 0 or modified_by is not None:

                # Always require a modified by and because one can change data without specifying a modifer,
                # this is necessary. We don't check it before the previous if, because we don't want to create
                # a modified_by if not data was set and no modified_by was set.
                if modified_by is None:
                    modified_by = db_params['user']

                data.update(modified_by=modified_by)

                fields = data.keys()

                sql = "UPDATE {table} SET {fields} WHERE {pkey} = {a_value} RETURNING *"

                query = SQL(sql).format(
                    table=Identifier(self.table),
                    fields=SQL(', ').join(
                        Composed([Identifier(k),
                                  SQL(' = '),
                                  Placeholder(k)]) for k in fields),
                    pkey=Identifier(where_key),
                    a_value=Placeholder('where_key'))

                data.update(where_key=an_id)

                conn = connect(**db_params)
                cur = conn.cursor(cursor_factory=NamedTupleCursor)

                if verbose:
                    print(query.as_string(conn))
                    print(cur.mogrify(query, data))

                try:
                    cur.execute(query, data)
                except OperationalError as error:
                    print(error)

                new_record = cur.fetchall()

                conn.commit()

                cur.close()
                conn.close()

                self.last_inserted = self.as_columns(new_record)

                self.records = self.all_records()