Example #1
0
    def get_table_metadata(self):
        with db.shard_connection(self.c.source_shard, read=True) as conn:
            self.table.source_start_time = conn.get_current_timestamp()
            self.table.start_time = int(time.time() * 1000)
            self.table.status = 'in_progress'
            self.table.update()

            self.get_column_metadata(conn)

            self.c.where_clauses, self.c.where_values = worker.generate_where(conn, self.c, self.c.table_config)

            if (
                self.c.migration_type == orm.MigrationType.DELTA
                and self.c.table_config.join
            ):
                # TODO: come up with a different way to do deltas for crate with a join clause. We don't need the chunking if we do it via json export
                if self.c.source_type == 'crate':
                    self.log_error(
                        'The %s table specifies a join clause but joins are not supported for crate due to lack of '
                        'aggregation support for JOIN queries. This table will not have any delta migrations '
                        'performed.',
                        self.c.table_config.table_name
                    )
                    return
                join = self.c.table_config.join % {'schema': self.c.source_schema}
            else:
                join = ''

            sql = (
                'SELECT COUNT(*), MIN(%(chunk_col)s), MAX(%(chunk_col)s) '
                'FROM %(schema)s.%(table)s %(table_alias)s %(join)s %(where)s'
            ) % {
                'chunk_col': self.c.table_config.chunk_col,
                'schema': self.c.source_schema,
                'table': self.c.table_config.table_name,
                'table_alias': self.c.table_config.table_alias,
                # We only need the join clause for delta and direct currently
                'join': join,
                'where': (' WHERE ' + (' AND '.join(self.c.where_clauses))) if self.c.where_clauses else ''
            }
            with db.cursor(conn) as cur:
                cur.execute(sql % {'?': conn.PARAMETER_PLACEHOLDER}, self.c.where_values)
                (self.num_rows, self.min_id, self.max_id) = cur.fetchone()
        self.log('num_rows=%r min_id=%r max_id=%r', self.num_rows, self.min_id, self.max_id)
Example #2
0
    def check_table(self, table_config, conn):
        table_data = orm.Table.get(
            self.redis_conn,
            migration_id=self.c.migration_id,
            partition_val=self.c.partition_val, namespace=self.c.namespace,
            source_shard=self.c.source_shard, destination_shard=self.c.destination_shard,
            table_name=table_config.table_name)
        if table_data is None:
            return None
        with db.cursor(conn) as cur:
            # update the table_config so logging is correct
            self.c.table_config = table_config
            # TODO(jpatrin): Add join support for non-crate destinations

            # TODO(jpatrin): Disabling min and max checks for now as the query is different for crate vs. mysql
            if self.c.migration_type == orm.MigrationType.DELTA:
                if table_config.join:
                    self.log_warning('Verification is unsupported for tables in delta migrations with a join clause')
                    table_data.verification_status = 'unknown'
                    table_data.update()
                    return table_data
            elif self.c.migration_type != orm.MigrationType.FULL:
                raise shinkansen.UnrecoverableError('Migration type %r unknown' % (self.c.migration_type,))
            # TODO(jpatrin): The verifier should technically take the join clause into account so it gets the same
            # result as the queuer and exporter, but crate doesn't support joins with aggregation. As long as the
            # destination only has the records we have inserted into it the join shouldn't be needed, though.

            (where_clauses, where_values) = worker.generate_where(conn, self.c, table_config)

            sql = (
                'SELECT COUNT(*) '  # , MIN(%(chunk_col)s), MAX(%(chunk_col)s) '
                'FROM %(schema)s.%(table)s %(table_alias)s %(where)s'  # %(join)s
            ) % {
                # 'chunk_col': chunk_col,
                'schema': self.c.destination_schema,
                'table': table_config.table_name,
                'table_alias': table_config.table_alias,
                # 'join': (self.c.table_config.join % {'schema': self.c.destination_schema}
                #          if self.c.migration_type == orm.MigrationType.DELTA
                #          or self.c.chunk_migration_type == orm.ChunkMigrationType.DIRECT
                #          else ''),
                'where': (' WHERE ' + (' AND '.join(where_clauses))) if where_clauses else ''
            }
            cur.execute(sql % {'?': conn.PARAMETER_PLACEHOLDER}, where_values)
            #(num_rows, min_id, max_id) = cur.fetchone()
            (num_rows,) = cur.fetchone()
        errors = []
        if table_data.num_records != num_rows:
            errors.append('The queued number of rows (%r) and the resulting number of rows (%r) do not match' % (
                table_data.num_records, num_rows))
        #if table_data.min_id != min_id:
        #    errors.append('The queued min_id (%r) and the resulting min_id (%r) do not match' % (
        #        table_data.min_id, min_id))
        #if table_data.max_id != max_id:
        #    errors.append('The queued max_id (%r) and the resulting max_id (%r) do not match' % (
        #        table_data.max_id, max_id))
        if len(errors) > 0:
            self.log_error('Verification errors: %s', ', '.join(errors))
            table_data.verification_status = 'failed'
        else:
            self.log('Verification succeeded')
            table_data.verification_status = 'verified'
        table_data.update()
        return table_data