def mapper_init(self): """ mrjob initialization. Should you decide to override, you should call 'super' to invoke this method. """ yaml_data = load_from_file(self.options.extractions) schema = RedShiftLogSchema(yaml.load(yaml_data)) self.schema = schema self.table_name_to_columns = dict((table_name, [ Column.create_from_table(table, column) for column in table['columns'] ]) for table_name, table in schema.tables().iteritems()) self.table_name_to_table = dict( (table_name, Table.create(table, columns=self.table_name_to_columns[table_name])) for table_name, table in schema.tables().iteritems()) self.table_name_to_output_order = dict( (table_name, [column.name for column in columns if not column.is_noop]) for table_name, columns in self.table_name_to_columns.iteritems()) self.redshift_export = RedshiftExportProtocol( delimiter=self.options.column_delimiter) error_table_name, error_table = self.schema.get_error_table() self.error_tbl_name = error_table_name self.error_tbl_output_order = [ c['log_key'] for c in error_table['columns'] ]
def mapper_init(self): """ mrjob initialization. Should you decide to override, you should call 'super' to invoke this method. """ yaml_data = load_from_file(self.options.extractions) schema = RedShiftLogSchema(yaml.load(yaml_data)) self.schema = schema self.table_name_to_columns = dict( (table_name, [Column.create_from_table(table, column) for column in table['columns']]) for table_name, table in schema.tables().iteritems() ) self.table_name_to_table = dict( (table_name, Table.create(table, columns=self.table_name_to_columns[table_name])) for table_name, table in schema.tables().iteritems() ) self.table_name_to_output_order = dict( (table_name, [column.name for column in columns if not column.is_noop]) for table_name, columns in self.table_name_to_columns.iteritems() ) self.redshift_export = RedshiftExportProtocol( delimiter=self.options.column_delimiter ) error_table_name, error_table = self.schema.get_error_table() self.error_tbl_name = error_table_name self.error_tbl_output_order = [c['log_key'] for c in error_table['columns']]
def create_schema(file_path): yaml_data = load_from_file(file_path) schema = RedShiftLogSchema(yaml.load(yaml_data)) name_to_columns = dict((name, [ Column.create_from_table(table, column) for column in table['columns'] ]) for name, table in schema.tables().iteritems()) for __, columns in name_to_columns.iteritems(): assert columns name_to_table = dict( (name, Table.create(table, columns=name_to_columns[name])) for name, table in schema.tables().iteritems()) assert name_to_table
def create_schema(file_path): yaml_data = load_from_file(file_path) schema = RedShiftLogSchema(yaml.load(yaml_data)) name_to_columns = dict((name, [Column.create_from_table(table, column) for column in table['columns']]) for name, table in schema.tables().iteritems()) for __, columns in name_to_columns.iteritems(): assert columns name_to_table = dict((name, Table.create(table, columns=name_to_columns[name])) for name, table in schema.tables().iteritems()) assert name_to_table