def _fix_primitive(record, field): """ Converts the a value in the field in the record for parquet compatibility. This is mainly to consistently repeated types. :param record: record from data from beam pipeline. :param field: (bigquery.schema.SchemaField) to convert. """ field_name = field['name'] if field['type'] in ('TIMESTAMP', 'DATETIME'): record[field_name] = int(datetime_to_epoch_timestamp( record[field_name] )) elif field['type'] == 'DATE': record[field_name] = int(date_to_epoch_date( record[field_name] )) elif field['type'] == 'TIME': try: record[field_name] = datetime.datetime.strptime( record[field_name], '%H:%M:%S' ).time() except ValueError: record[field_name] = datetime.datetime.strptime( record[field_name], '%H:%M:%S.%f' ).time() return record[field_name]
def fix_record_for_avro(record, avro_schema): for field in avro_schema.fields: field_name = field.name datatype = field.type.to_json() if isinstance(datatype, dict): # This is a record type definition so we need to recurse a level deeper. record[field_name] = fix_record_for_avro(record[field_name], avro.schema.parse(json.dumps(datatype)))[0] elif isinstance(datatype, list) and isinstance(datatype[1], dict): logical_type = datatype[1].get(u'logicalType', None) if logical_type: if logical_type.find('-') > -1: logical_prefix, precision = logical_type.split('-') else: logical_prefix = logical_type precision = None if logical_prefix == u'timestamp': is_micros = (precision == u'micros') record[field_name] = datetime_to_epoch_timestamp( record[field_name], micros=is_micros ) elif logical_type == u'date': record[field_name] = date_to_epoch_date(record[field_name]) elif logical_prefix == u'time': is_micros = (precision == u'micros') record[field_name] = time_to_epoch_time( record[field_name], micros=is_micros ) return [record]
def fix_record_for_parquet(record, schema): """ Converts TIMESTAMP, DATETIME, DATE, and TIME types to their respective types for parquet compatibility. :param record: record of data from beam pipeline :param schema: string schema dict. :return: record with converted TIMESTAMP, DATETIME, DATE, and/or TIME fields. """ for field in schema: field_name = field["name"] if field["type"] in ("TIMESTAMP", "DATETIME"): record[field_name] = int( datetime_to_epoch_timestamp(record[field_name])) elif field["type"] == "DATE": record[field_name] = int(date_to_epoch_date(record[field_name])) elif field["type"] == "TIME": try: record[field_name] = datetime.datetime.strptime( record[field_name], '%H:%M:%S').time() except ValueError: record[field_name] = datetime.datetime.strptime( record[field_name], '%H:%M:%S.%f').time() return [record]
def fix_record_for_avro(record, avro_schema): for field in avro_schema.fields: field_name = field.name datatype_union = field.type.to_json() if isinstance(datatype_union[1], dict): logical_type = datatype_union[1].get(u'logicalType', None) if logical_type: if logical_type.find('-') > -1: logical_prefix, precision = logical_type.split('-') else: logical_prefix = logical_type precision = None if logical_prefix == u'timestamp': is_micros = (precision == u'micros') record[field_name] = datetime_to_epoch_timestamp( record[field_name], micros=is_micros) elif logical_type == u'date': record[field_name] = date_to_epoch_date(record[field_name]) elif logical_prefix == u'time': is_micros = (precision == u'micros') record[field_name] = time_to_epoch_time(record[field_name], micros=is_micros) return [record]