look(rf[(1, 3)]) look(rf[(7, 9)]) # transpose table1 = (('id', 'colour'), (1, 'blue'), (2, 'red'), (3, 'purple'), (5, 'yellow'), (7, 'orange')) from petl import transpose, look look(table1) table2 = transpose(table1) look(table2) # intersection table1 = (('foo', 'bar', 'baz'), ('A', 1, True), ('C', 7, False), ('B', 2, False), ('C', 9, True)) table2 = (('x', 'y', 'z'), ('B', 2, False), ('A', 9, False), ('B', 3, True), ('C', 9, True))
[3, 'gender', 'M']] table10 = etl.recast(table9, key='id') table10 # transpose() ############# import petl as etl table1 = [['id', 'colour'], [1, 'blue'], [2, 'red'], [3, 'purple'], [5, 'yellow'], [7, 'orange']] table2 = etl.transpose(table1) table2 # pivot() ######### import petl as etl table1 = [['region', 'gender', 'style', 'units'], ['east', 'boy', 'tee', 12], ['east', 'boy', 'golf', 14], ['east', 'boy', 'fancy', 7], ['east', 'girl', 'tee', 3], ['east', 'girl', 'golf', 8], ['east', 'girl', 'fancy', 18], ['west', 'boy', 'tee', 12],
table8 = etl.recast(table6, key="id", reducers={"weight": mean}) table8 # missing values are padded with whatever is provided via the # missing keyword argument (None by default) table9 = [["id", "variable", "value"], [1, "gender", "F"], [2, "age", 17], [1, "age", 12], [3, "gender", "M"]] table10 = etl.recast(table9, key="id") table10 # transpose() ############# import petl as etl table1 = [["id", "colour"], [1, "blue"], [2, "red"], [3, "purple"], [5, "yellow"], [7, "orange"]] table2 = etl.transpose(table1) table2 # pivot() ######### import petl as etl table1 = [ ["region", "gender", "style", "units"], ["east", "boy", "tee", 12], ["east", "boy", "golf", 14], ["east", "boy", "fancy", 7], ["east", "girl", "tee", 3], ["east", "girl", "golf", 8],
def transform_teragon_csv(teragon_csv, transpose=False, indexed=False): """transform Teragon's CSV response into a python dictionary, which mirrors the JSON response we want to provide to API clients Arguments: teragon_csv {reference} -- reference to a CSV table on disk or in memory transpose {boolean} -- transpose Teragon table indexed {boolean} -- return dictionary in indexed format or as records Returns: {dict} -- a dictionary representing the Terragon table, transformed for ease of use in spatial/temporal data vizualation """ petl_table = etl.fromcsv(teragon_csv) # print(petl_table) # get iterable of column pairs (minus 'Timestamp') # this is used to group the double columns representing a single # data point in Teragon's CSV h = list(etl.header(petl_table)) xy_cols = zip(*[iter(h[1:])] * 2) # make a new header row new_header = ['Timestamp'] fields_to_cut = [] for each in xy_cols: # print(each) # correct id, assembled from columns id_col, note_col = each[0], each[1] # assemble new id column, to replace of PX column (which has data) # id_col = "{0}{1}".format(px[:3], px[4:]) # assemble new notes column, to replace of PY column (which has notes) notes_col = "{0}-n".format(id_col) # add those to our new header (array) new_header.extend([id_col, notes_col]) # track fields that we might want to remove fields_to_cut.append(notes_col) # transform the table table = etl \ .setheader(petl_table, new_header) \ .cutout(*tuple(fields_to_cut)) \ .select('Timestamp', lambda v: v.upper() != 'TOTAL') \ .convert('Timestamp', lambda t: parse(t).isoformat()) \ .replaceall('N/D', None) # transpose the table, so that rows are cells/gauges and columns are times # (note that this operation can take a while) if transpose: table = etl.transpose(table) # if indexed: format data where cells/gauges or times are keys, and # rainfall amounts are values # otherwise, format as nested records (arrays of dicts) if indexed: data = SortedDict() for row in etl.dicts(table): inside = SortedDict() for d in row.items(): if d[0] != 'Timestamp': if d[1]: v = float(d[1]) else: v = d[1] inside[d[0]] = v data[row['Timestamp']] = inside return data else: rows = [] # create a nested dictionary from the table for row in etl.dicts(table): data = [] for d in row.items(): if d[0] != 'Timestamp': if d[1]: v = float(d[1]) else: v = d[1] data.append({'id': d[0], 'v': v}) rows.append({"id": row['Timestamp'], "d": data}) # print(rows) # print(json.dumps(rows, indent=2)) return rows