Exemple #1
0
 def record_primary_key_string(self, record):
     """Generate a unique PK string in the record"""
     if len(self.stream_schema_message['key_properties']) == 0:
         return None
     flatten = flattening.flatten_record(record, self.flatten_schema, max_level=self.data_flattening_max_level)
     try:
         key_props = [str(flatten[p]) for p in self.stream_schema_message['key_properties']]
     except Exception as exc:
         self.logger.error(
             'Cannot find %s primary key(s) in record: %s', self.stream_schema_message['key_properties'],
             flatten)
         raise exc
     return ','.join(key_props)
    def record_primary_key_string(self, record):
        """Generate a unique PK string in the record"""
        if len(self.stream_schema_message['key_properties']) == 0:
            return None
        flatten = flattening.flatten_record(
            record,
            self.flatten_schema,
            max_level=self.data_flattening_max_level)

        key_props = []
        for key_prop in self.stream_schema_message['key_properties']:
            if key_prop not in flatten or flatten[key_prop] is None:
                raise PrimaryKeyNotFoundException(
                    f"Primary key '{key_prop}' does not exist in record or is null. "
                    f"Available fields: {list(flatten.keys())}")

            key_props.append(str(flatten[key_prop]))

        return ','.join(key_props)
Exemple #3
0
def records_to_dataframe(
        records: Dict,
        schema: Dict,
        data_flattening_max_level: int = 0) -> pandas.DataFrame:
    """
    Transforms a list of record messages into pandas dataframe with flattened records

    Args:
        records: List of dictionaries that represents a batch of singer record messages
        data_flattening_max_level: Max level of auto flattening if a record message has nested objects. (Default: 0)

    Returns:
        Pandas dataframe
    """
    flattened_records = []

    for record in records.values():
        flatten_record = flattening.flatten_record(
            record, schema, max_level=data_flattening_max_level)
        flattened_records.append(flatten_record)

    return pandas.DataFrame(data=flattened_records)
Exemple #4
0
def record_to_csv_line(record: dict,
                       schema: dict,
                       data_flattening_max_level: int = 0) -> str:
    """
    Transforms a record message to a CSV line

    Args:
        record: Dictionary that represents a csv line. Dict key is column name, value is the column value
        schema: JSONSchema of the record
        data_flattening_max_level: Max level of auto flattening if a record message has nested objects. (Default: 0)

    Returns:
        string of csv line
    """
    flatten_record = flattening.flatten_record(
        record, schema, max_level=data_flattening_max_level)

    return ','.join([
        json.dumps(flatten_record[column], ensure_ascii=False)
        if column in flatten_record and
        (flatten_record[column] == 0 or flatten_record[column]) else ''
        for column in schema
    ])