def get_table_metadata(self): with db.shard_connection(self.c.source_shard, read=True) as conn: self.table.source_start_time = conn.get_current_timestamp() self.table.start_time = int(time.time() * 1000) self.table.status = 'in_progress' self.table.update() self.get_column_metadata(conn) self.c.where_clauses, self.c.where_values = worker.generate_where(conn, self.c, self.c.table_config) if ( self.c.migration_type == orm.MigrationType.DELTA and self.c.table_config.join ): # TODO: come up with a different way to do deltas for crate with a join clause. We don't need the chunking if we do it via json export if self.c.source_type == 'crate': self.log_error( 'The %s table specifies a join clause but joins are not supported for crate due to lack of ' 'aggregation support for JOIN queries. This table will not have any delta migrations ' 'performed.', self.c.table_config.table_name ) return join = self.c.table_config.join % {'schema': self.c.source_schema} else: join = '' sql = ( 'SELECT COUNT(*), MIN(%(chunk_col)s), MAX(%(chunk_col)s) ' 'FROM %(schema)s.%(table)s %(table_alias)s %(join)s %(where)s' ) % { 'chunk_col': self.c.table_config.chunk_col, 'schema': self.c.source_schema, 'table': self.c.table_config.table_name, 'table_alias': self.c.table_config.table_alias, # We only need the join clause for delta and direct currently 'join': join, 'where': (' WHERE ' + (' AND '.join(self.c.where_clauses))) if self.c.where_clauses else '' } with db.cursor(conn) as cur: cur.execute(sql % {'?': conn.PARAMETER_PLACEHOLDER}, self.c.where_values) (self.num_rows, self.min_id, self.max_id) = cur.fetchone() self.log('num_rows=%r min_id=%r max_id=%r', self.num_rows, self.min_id, self.max_id)
def check_table(self, table_config, conn): table_data = orm.Table.get( self.redis_conn, migration_id=self.c.migration_id, partition_val=self.c.partition_val, namespace=self.c.namespace, source_shard=self.c.source_shard, destination_shard=self.c.destination_shard, table_name=table_config.table_name) if table_data is None: return None with db.cursor(conn) as cur: # update the table_config so logging is correct self.c.table_config = table_config # TODO(jpatrin): Add join support for non-crate destinations # TODO(jpatrin): Disabling min and max checks for now as the query is different for crate vs. mysql if self.c.migration_type == orm.MigrationType.DELTA: if table_config.join: self.log_warning('Verification is unsupported for tables in delta migrations with a join clause') table_data.verification_status = 'unknown' table_data.update() return table_data elif self.c.migration_type != orm.MigrationType.FULL: raise shinkansen.UnrecoverableError('Migration type %r unknown' % (self.c.migration_type,)) # TODO(jpatrin): The verifier should technically take the join clause into account so it gets the same # result as the queuer and exporter, but crate doesn't support joins with aggregation. As long as the # destination only has the records we have inserted into it the join shouldn't be needed, though. (where_clauses, where_values) = worker.generate_where(conn, self.c, table_config) sql = ( 'SELECT COUNT(*) ' # , MIN(%(chunk_col)s), MAX(%(chunk_col)s) ' 'FROM %(schema)s.%(table)s %(table_alias)s %(where)s' # %(join)s ) % { # 'chunk_col': chunk_col, 'schema': self.c.destination_schema, 'table': table_config.table_name, 'table_alias': table_config.table_alias, # 'join': (self.c.table_config.join % {'schema': self.c.destination_schema} # if self.c.migration_type == orm.MigrationType.DELTA # or self.c.chunk_migration_type == orm.ChunkMigrationType.DIRECT # else ''), 'where': (' WHERE ' + (' AND '.join(where_clauses))) if where_clauses else '' } cur.execute(sql % {'?': conn.PARAMETER_PLACEHOLDER}, where_values) #(num_rows, min_id, max_id) = cur.fetchone() (num_rows,) = cur.fetchone() errors = [] if table_data.num_records != num_rows: errors.append('The queued number of rows (%r) and the resulting number of rows (%r) do not match' % ( table_data.num_records, num_rows)) #if table_data.min_id != min_id: # errors.append('The queued min_id (%r) and the resulting min_id (%r) do not match' % ( # table_data.min_id, min_id)) #if table_data.max_id != max_id: # errors.append('The queued max_id (%r) and the resulting max_id (%r) do not match' % ( # table_data.max_id, max_id)) if len(errors) > 0: self.log_error('Verification errors: %s', ', '.join(errors)) table_data.verification_status = 'failed' else: self.log('Verification succeeded') table_data.verification_status = 'verified' table_data.update() return table_data