def record_primary_key_string(self, record): """Generate a unique PK string in the record""" if len(self.stream_schema_message['key_properties']) == 0: return None flatten = flattening.flatten_record(record, self.flatten_schema, max_level=self.data_flattening_max_level) try: key_props = [str(flatten[p]) for p in self.stream_schema_message['key_properties']] except Exception as exc: self.logger.error( 'Cannot find %s primary key(s) in record: %s', self.stream_schema_message['key_properties'], flatten) raise exc return ','.join(key_props)
def record_primary_key_string(self, record): """Generate a unique PK string in the record""" if len(self.stream_schema_message['key_properties']) == 0: return None flatten = flattening.flatten_record( record, self.flatten_schema, max_level=self.data_flattening_max_level) key_props = [] for key_prop in self.stream_schema_message['key_properties']: if key_prop not in flatten or flatten[key_prop] is None: raise PrimaryKeyNotFoundException( f"Primary key '{key_prop}' does not exist in record or is null. " f"Available fields: {list(flatten.keys())}") key_props.append(str(flatten[key_prop])) return ','.join(key_props)
def records_to_dataframe( records: Dict, schema: Dict, data_flattening_max_level: int = 0) -> pandas.DataFrame: """ Transforms a list of record messages into pandas dataframe with flattened records Args: records: List of dictionaries that represents a batch of singer record messages data_flattening_max_level: Max level of auto flattening if a record message has nested objects. (Default: 0) Returns: Pandas dataframe """ flattened_records = [] for record in records.values(): flatten_record = flattening.flatten_record( record, schema, max_level=data_flattening_max_level) flattened_records.append(flatten_record) return pandas.DataFrame(data=flattened_records)
def record_to_csv_line(record: dict, schema: dict, data_flattening_max_level: int = 0) -> str: """ Transforms a record message to a CSV line Args: record: Dictionary that represents a csv line. Dict key is column name, value is the column value schema: JSONSchema of the record data_flattening_max_level: Max level of auto flattening if a record message has nested objects. (Default: 0) Returns: string of csv line """ flatten_record = flattening.flatten_record( record, schema, max_level=data_flattening_max_level) return ','.join([ json.dumps(flatten_record[column], ensure_ascii=False) if column in flatten_record and (flatten_record[column] == 0 or flatten_record[column]) else '' for column in schema ])