Esempio n. 1
0
look(rf[(1, 3)])
look(rf[(7, 9)])


# transpose

table1 = (('id', 'colour'),
          (1, 'blue'),
          (2, 'red'),
          (3, 'purple'),
          (5, 'yellow'),
          (7, 'orange'))

from petl import transpose, look    
look(table1)
table2 = transpose(table1)
look(table2)


# intersection

table1 = (('foo', 'bar', 'baz'),
          ('A', 1, True),
          ('C', 7, False),
          ('B', 2, False),
          ('C', 9, True))
table2 = (('x', 'y', 'z'),
          ('B', 2, False),
          ('A', 9, False),
          ('B', 3, True),
          ('C', 9, True))
Esempio n. 2
0
look(rf[(1, 3)])
look(rf[(7, 9)])


# transpose

table1 = (('id', 'colour'),
          (1, 'blue'),
          (2, 'red'),
          (3, 'purple'),
          (5, 'yellow'),
          (7, 'orange'))

from petl import transpose, look    
look(table1)
table2 = transpose(table1)
look(table2)


# intersection

table1 = (('foo', 'bar', 'baz'),
          ('A', 1, True),
          ('C', 7, False),
          ('B', 2, False),
          ('C', 9, True))
table2 = (('x', 'y', 'z'),
          ('B', 2, False),
          ('A', 9, False),
          ('B', 3, True),
          ('C', 9, True))
Esempio n. 3
0
          [3, 'gender', 'M']]
table10 = etl.recast(table9, key='id')
table10


# transpose()
#############

import petl as etl
table1 = [['id', 'colour'],
          [1, 'blue'],
          [2, 'red'],
          [3, 'purple'],
          [5, 'yellow'],
          [7, 'orange']]
table2 = etl.transpose(table1)
table2


# pivot()
#########

import petl as etl
table1 = [['region', 'gender', 'style', 'units'],
          ['east', 'boy', 'tee', 12],
          ['east', 'boy', 'golf', 14],
          ['east', 'boy', 'fancy', 7],
          ['east', 'girl', 'tee', 3],
          ['east', 'girl', 'golf', 8],
          ['east', 'girl', 'fancy', 18],
          ['west', 'boy', 'tee', 12],
Esempio n. 4
0
table8 = etl.recast(table6, key="id", reducers={"weight": mean})
table8
# missing values are padded with whatever is provided via the
# missing keyword argument (None by default)
table9 = [["id", "variable", "value"], [1, "gender", "F"], [2, "age", 17], [1, "age", 12], [3, "gender", "M"]]
table10 = etl.recast(table9, key="id")
table10


# transpose()
#############

import petl as etl

table1 = [["id", "colour"], [1, "blue"], [2, "red"], [3, "purple"], [5, "yellow"], [7, "orange"]]
table2 = etl.transpose(table1)
table2


# pivot()
#########

import petl as etl

table1 = [
    ["region", "gender", "style", "units"],
    ["east", "boy", "tee", 12],
    ["east", "boy", "golf", 14],
    ["east", "boy", "fancy", 7],
    ["east", "girl", "tee", 3],
    ["east", "girl", "golf", 8],
Esempio n. 5
0
def transform_teragon_csv(teragon_csv, transpose=False, indexed=False):
    """transform Teragon's CSV response into a python dictionary,
    which mirrors the JSON response we want to provide to API clients

    Arguments:
        teragon_csv {reference} -- reference to a CSV table on disk
        or in memory
        transpose {boolean} -- transpose Teragon table
        indexed {boolean} -- return dictionary in indexed format or as records

    Returns:
        {dict} -- a dictionary representing the Terragon table, transformed
        for ease of use in spatial/temporal data vizualation
    """

    petl_table = etl.fromcsv(teragon_csv)
    # print(petl_table)
    # get iterable of column pairs (minus 'Timestamp')
    # this is used to group the double columns representing a single
    # data point in Teragon's CSV

    h = list(etl.header(petl_table))
    xy_cols = zip(*[iter(h[1:])] * 2)

    # make a new header row
    new_header = ['Timestamp']
    fields_to_cut = []
    for each in xy_cols:
        # print(each)
        # correct id, assembled from columns
        id_col, note_col = each[0], each[1]
        # assemble new id column, to replace of PX column (which has data)
        # id_col = "{0}{1}".format(px[:3], px[4:])
        # assemble new notes column, to replace of PY column (which has notes)
        notes_col = "{0}-n".format(id_col)
        # add those to our new header (array)
        new_header.extend([id_col, notes_col])
        # track fields that we might want to remove
        fields_to_cut.append(notes_col)

    # transform the table
    table = etl \
        .setheader(petl_table, new_header) \
        .cutout(*tuple(fields_to_cut))  \
        .select('Timestamp', lambda v: v.upper() != 'TOTAL')  \
        .convert('Timestamp', lambda t: parse(t).isoformat())  \
        .replaceall('N/D', None)

    # transpose the table, so that rows are cells/gauges and columns are times
    # (note that this operation can take a while)
    if transpose:
        table = etl.transpose(table)

    # if indexed: format data where cells/gauges or times are keys, and
    # rainfall amounts are values
    # otherwise, format as nested records (arrays of dicts)

    if indexed:
        data = SortedDict()
        for row in etl.dicts(table):
            inside = SortedDict()
            for d in row.items():
                if d[0] != 'Timestamp':
                    if d[1]:
                        v = float(d[1])
                    else:
                        v = d[1]
                    inside[d[0]] = v
            data[row['Timestamp']] = inside
        return data

    else:
        rows = []
        # create a nested dictionary from the table
        for row in etl.dicts(table):
            data = []
            for d in row.items():
                if d[0] != 'Timestamp':
                    if d[1]:
                        v = float(d[1])
                    else:
                        v = d[1]
                    data.append({'id': d[0], 'v': v})
            rows.append({"id": row['Timestamp'], "d": data})
        # print(rows)
        # print(json.dumps(rows, indent=2))
        return rows