def _fix_primitive(record, field):
     """
     Converts the a value in the field in the record for parquet 
     compatibility. This is mainly to consistently repeated types.
     :param record: record from data from beam pipeline.
     :param field: (bigquery.schema.SchemaField) to convert.
     """
     field_name = field['name']
     if field['type'] in ('TIMESTAMP', 'DATETIME'):
         record[field_name] = int(datetime_to_epoch_timestamp(
             record[field_name]
         ))
     elif field['type'] == 'DATE':
         record[field_name] = int(date_to_epoch_date(
             record[field_name]
         ))
     elif field['type'] == 'TIME':
         try:
             record[field_name] = datetime.datetime.strptime(
                 record[field_name],
                 '%H:%M:%S'
             ).time()
         except ValueError:
             record[field_name] = datetime.datetime.strptime(
                 record[field_name],
                 '%H:%M:%S.%f'
             ).time()
     return record[field_name]
Example #2
0
def fix_record_for_avro(record, avro_schema):
    for field in avro_schema.fields:
        field_name = field.name
        datatype = field.type.to_json()
        if isinstance(datatype, dict):
            # This is a record type definition so we need to recurse a level deeper.
            record[field_name] = fix_record_for_avro(record[field_name], 
                    avro.schema.parse(json.dumps(datatype)))[0]
        elif isinstance(datatype, list) and isinstance(datatype[1], dict):
            logical_type = datatype[1].get(u'logicalType', None)
            if logical_type:
                if logical_type.find('-') > -1:
                    logical_prefix, precision = logical_type.split('-')
                else:
                    logical_prefix = logical_type
                    precision = None
                if logical_prefix == u'timestamp': 
                    is_micros = (precision == u'micros') 
                    record[field_name] = datetime_to_epoch_timestamp(
                        record[field_name],
                        micros=is_micros
                    )
                elif logical_type == u'date':
                    record[field_name] = date_to_epoch_date(record[field_name])
                elif logical_prefix == u'time':
                    is_micros = (precision == u'micros') 

                    record[field_name] = time_to_epoch_time(
                        record[field_name],
                        micros=is_micros
                    )
    return [record]
Example #3
0
def fix_record_for_parquet(record, schema):
    """
    Converts TIMESTAMP, DATETIME, DATE, and TIME types to their respective
    types for parquet compatibility.
    :param record: record of data from beam pipeline
    :param schema: string schema dict.
    :return: record with converted TIMESTAMP, DATETIME, DATE, and/or TIME
    fields. 
    """
    for field in schema:
        field_name = field["name"]
        if field["type"] in ("TIMESTAMP", "DATETIME"):
            record[field_name] = int(
                datetime_to_epoch_timestamp(record[field_name]))
        elif field["type"] == "DATE":
            record[field_name] = int(date_to_epoch_date(record[field_name]))
        elif field["type"] == "TIME":
            try:
                record[field_name] = datetime.datetime.strptime(
                    record[field_name], '%H:%M:%S').time()
            except ValueError:
                record[field_name] = datetime.datetime.strptime(
                    record[field_name], '%H:%M:%S.%f').time()

    return [record]
Example #4
0
def fix_record_for_avro(record, avro_schema):
    for field in avro_schema.fields:
        field_name = field.name
        datatype_union = field.type.to_json()
        if isinstance(datatype_union[1], dict):
            logical_type = datatype_union[1].get(u'logicalType', None)
            if logical_type:
                if logical_type.find('-') > -1:
                    logical_prefix, precision = logical_type.split('-')
                else:
                    logical_prefix = logical_type
                    precision = None
                if logical_prefix == u'timestamp':
                    is_micros = (precision == u'micros')
                    record[field_name] = datetime_to_epoch_timestamp(
                        record[field_name], micros=is_micros)
                elif logical_type == u'date':
                    record[field_name] = date_to_epoch_date(record[field_name])
                elif logical_prefix == u'time':
                    is_micros = (precision == u'micros')
                    record[field_name] = time_to_epoch_time(record[field_name],
                                                            micros=is_micros)
    return [record]