def test_convert_columns_to_str(self): # Test that all columns are string mixed_raw = [{ 'col1': 1, 'col2': 2, 'col3': 3 }, { 'col1': 'one', 'col2': 2, 'col3': [3, 'three', 3.0] }, { 'col1': { 'one': 1, "two": 2.0 }, 'col2': None, "col3": 'three' }] tbl = Table(mixed_raw) tbl.convert_columns_to_str() cols = tbl.get_columns_type_stats() type_set = {i for x in cols for i in x['type']} self.assertTrue('str' in type_set and len(type_set) == 1)
def process_json(self, json_blob, obj_type, tidy=False): # Internal method for converting most types of json responses into a list of Parsons tables # Output goes here table_list = [] # Original table & columns obj_table = Table(json_blob) cols = obj_table.get_columns_type_stats() list_cols = [x['name'] for x in cols if 'list' in x['type']] dict_cols = [x['name'] for x in cols if 'dict' in x['type']] # Unpack all list columns if len(list_cols) > 0: for l in list_cols: # Check for nested data list_rows = obj_table.select_rows(lambda row: isinstance( row[l], list) and any(isinstance(x, dict) for x in row[l])) # Add separate long table for each column with nested data if list_rows.num_rows > 0: logger.debug(l, 'is a nested column') if len([x for x in cols if x['name'] == l]) == 1: table_list.append({ 'name': f'{obj_type}_{l}', 'tbl': obj_table.long_table(['id'], l) }) else: # Ignore if column doesn't exist (or has multiples) continue else: if tidy is False: logger.debug(l, 'is a normal list column') obj_table.unpack_list(l) # Unpack all dict columns if len(dict_cols) > 0 and tidy is False: for d in dict_cols: logger.debug(d, 'is a dict column') obj_table.unpack_dict(d) if tidy is not False: packed_cols = list_cols + dict_cols for p in packed_cols: if p in obj_table.columns: logger.debug(p, 'needs to be unpacked into rows') # Determine whether or not to expand based on tidy unpacked_tidy = obj_table.unpack_nested_columns_as_rows( p, expand_original=tidy) # Check if column was removed as sign it was unpacked into separate table if p not in obj_table.columns: table_list.append({ 'name': f'{obj_type}_{p}', 'tbl': unpacked_tidy }) else: obj_table = unpacked_tidy # Original table will have had all nested columns removed if len(obj_table.columns) > 1: table_list.append({'name': obj_type, 'tbl': obj_table}) return table_list