Exemple #1
0
def join(keys, tables):
    """Merge a list of `Table` objects using `keys` to group rows"""

    # Make new (merged) Table fields
    fields = OrderedDict()
    for table in tables:
        fields.update(table.fields)
    # TODO: may raise an error if a same field is different in some tables

    # Check if all keys are inside merged Table's fields
    fields_keys = set(fields.keys())
    for key in keys:
        if key not in fields_keys:
            raise ValueError('Invalid key: "{}"'.format(key))

    # Group rows by key, without missing ordering
    none_fields = lambda: OrderedDict({field: None for field in fields.keys()})
    data = OrderedDict()
    for table in tables:
        for row in table:
            row_key = tuple([getattr(row, key) for key in keys])
            if row_key not in data:
                data[row_key] = none_fields()
            data[row_key].update(row._asdict())

    merged = Table(fields=fields)
    merged.extend(data.values())
    return merged
Exemple #2
0
def create_table(data, meta=None, force_headers=None, fields=None,
                 skip_header=True, *args, **kwargs):
    # TODO: add auto_detect_types=True parameter
    table_rows = list(data)

    if fields is None:
        if force_headers is None:
            header = make_header(table_rows[0])
            table_rows = table_rows[1:]
        else:
            header = force_headers
        fields = detect_types(header, table_rows, *args, **kwargs)
    else:
        if skip_header:
            table_rows = table_rows[1:]
            header = make_header(fields.keys())
            assert type(fields) is collections.OrderedDict
            fields = {field_name: fields[key]
                      for field_name, key in zip(header, fields)}
        else:
            header = make_header(fields.keys())

        # TODO: may reuse max_columns from html
        max_columns = max(len(row) for row in table_rows)
        assert len(fields) == max_columns

    # TODO: put this inside Table.__init__
    table = Table(fields=fields, meta=meta)
    for row in table_rows:
        table.append({field_name: value
                      for field_name, value in zip(header, row)})

    return table
Exemple #3
0
def transform(fields, function, *tables):
    "Return a new table based on other tables and a transformation function"

    new_table = Table(fields=fields)

    for table in tables:
        for row in filter(bool, map(lambda row: function(row, table), table)):
            new_table.append(row)

    return new_table
Exemple #4
0
 def setUp(self):
     self.table = Table(fields={'name': rows.fields.UnicodeField,
                                'birthdate': rows.fields.DateField, })
     self.first_row = {'name': u'Álvaro Justen',
                       'birthdate': datetime.date(1987, 4, 29)}
     self.table.append(self.first_row)
     self.table.append({'name': u'Somebody',
                        'birthdate': datetime.date(1990, 2, 1)})
     self.table.append({'name': u'Douglas Adams',
                        'birthdate': '1952-03-11'})
Exemple #5
0
 def setUp(self):
     self.table = Table(fields={
         "name": rows.fields.TextField,
         "birthdate": rows.fields.DateField
     })
     self.first_row = {
         "name": "Álvaro Justen",
         "birthdate": datetime.date(1987, 4, 29),
     }
     self.table.append(self.first_row)
     self.table.append({
         "name": "Somebody",
         "birthdate": datetime.date(1990, 2, 1)
     })
     self.table.append({"name": "Douglas Adams", "birthdate": "1952-03-11"})
Exemple #6
0
def import_from_uwsgi_log(filename):
    fields = UWSGI_FIELDS.keys()
    table = Table(fields=UWSGI_FIELDS)
    with open(filename) as fobj:
        for line in fobj:
            result = REGEXP_UWSGI_LOG.findall(line)
            if result:
                data = list(result[0])
                # Convert datetime
                data[2] = strptime(data[2], UWSGI_DATETIME_FORMAT)
                # Convert generation time (micros -> seconds)
                data[5] = float(data[5]) / 1000000
                table.append({
                    field_name: value
                    for field_name, value in zip(fields, data)
                })
    return table
Exemple #7
0
def create_table(data,
                 meta=None,
                 fields=None,
                 skip_header=True,
                 import_fields=None,
                 samples=None,
                 force_types=None,
                 *args,
                 **kwargs):
    """Create a rows.Table object based on data rows and some configurations

    - `skip_header` is only used if `fields` is set
    - `samples` is only used if `fields` is `None`. If samples=None, all data
      is filled in memory - use with caution.
    - `force_types` is only used if `fields` is `None`
    - `import_fields` can be used either if `fields` is set or not, the
      resulting fields will seek its order
    - `fields` must always be in the same order as the data
    """

    table_rows = iter(data)
    force_types = force_types or {}
    if import_fields is not None:
        import_fields = make_header(import_fields)

    if fields is None:  # autodetect field types
        # TODO: may add `type_hints` parameter so autodetection can be easier
        #       (plugins may specify some possible field types).
        header = make_header(next(table_rows))

        if samples is not None:
            sample_rows = list(islice(table_rows, 0, samples))
            table_rows = chain(sample_rows, table_rows)
        else:
            sample_rows = table_rows = list(table_rows)

        # Detect field types using only the desired columns
        detected_fields = detect_types(
            header,
            sample_rows,
            skip_indexes=[
                index for index, field in enumerate(header)
                if field in force_types or field not in (
                    import_fields or header)
            ],
            *args,
            **kwargs)
        # Check if any field was added during detecting process
        new_fields = [
            field_name for field_name in detected_fields.keys()
            if field_name not in header
        ]
        # Finally create the `fields` with both header and new field names,
        # based on detected fields `and force_types`
        fields = OrderedDict([(field_name,
                               detected_fields.get(field_name, TextField))
                              for field_name in header + new_fields])
        fields.update(force_types)

        # Update `header` and `import_fields` based on new `fields`
        header = list(fields.keys())
        if import_fields is None:
            import_fields = header

    else:  # using provided field types
        if not isinstance(fields, OrderedDict):
            raise ValueError("`fields` must be an `OrderedDict`")

        if skip_header:
            # If we're skipping the header probably this row is not trustable
            # (can be data or garbage).
            _ = next(table_rows)

        header = make_header(list(fields.keys()))
        if import_fields is None:
            import_fields = header

        fields = OrderedDict([(field_name, fields[key])
                              for field_name, key in zip(header, fields)])

    diff = set(import_fields) - set(header)
    if diff:
        field_names = ", ".join('"{}"'.format(field) for field in diff)
        raise ValueError("Invalid field names: {}".format(field_names))
    fields = OrderedDict([(field_name, fields[field_name])
                          for field_name in import_fields])

    get_row = get_items(*map(header.index, import_fields))
    table = Table(fields=fields, meta=meta)
    table.extend(dict(zip(import_fields, get_row(row))) for row in table_rows)

    return table
Exemple #8
0
            'datetime_column': datetime.datetime(2015, 5, 6, 12, 1, 2),
            'percent_column': Decimal('0.02'),
            'unicode_column': 'test',
        },
        {
            'float_column': None,
            'decimal_column': None,
            'bool_column': None,
            'integer_column': None,
            'date_column': None,
            'datetime_column': None,
            'percent_column': None,
            'unicode_column': '',
        }
]
table = Table(fields=FIELDS)
for row in EXPECTED_ROWS:
    table.append(row)
table._meta = {'test': 123}


class RowsTestMixIn(object):

    maxDiff = None
    override_fields = None

    def setUp(self):
        self.files_to_delete = []

    def tearDown(self):
        for filename in self.files_to_delete:
Exemple #9
0
def create_table(data,
                 meta=None,
                 fields=None,
                 skip_header=True,
                 import_fields=None,
                 samples=None,
                 force_types=None,
                 *args,
                 **kwargs):
    # TODO: add auto_detect_types=True parameter
    table_rows = iter(data)
    sample_rows = []

    if fields is None:
        header = make_header(next(table_rows))

        if samples is not None:
            sample_rows = list(islice(table_rows, 0, samples))
        else:
            sample_rows = list(table_rows)

        fields = detect_types(header, sample_rows, *args, **kwargs)

        if force_types is not None:
            # TODO: optimize field detection (ignore fields on `force_types`)
            for field_name, field_type in force_types.items():
                fields[field_name] = field_type
    else:
        if not isinstance(fields, OrderedDict):
            raise ValueError('`fields` must be an `OrderedDict`')

        if skip_header:
            next(table_rows)

        header = make_header(list(fields.keys()))
        fields = OrderedDict([(field_name, fields[key])
                              for field_name, key in zip(header, fields)])

    if import_fields is not None:
        # TODO: can optimize if import_fields is not None.
        #       Example: do not detect all columns
        import_fields = make_header(import_fields)

        diff = set(import_fields) - set(header)
        if diff:
            field_names = ', '.join('"{}"'.format(field) for field in diff)
            raise ValueError("Invalid field names: {}".format(field_names))

        new_fields = OrderedDict()
        for field_name in import_fields:
            new_fields[field_name] = fields[field_name]
        fields = new_fields

    table = Table(fields=fields, meta=meta)
    # TODO: put this inside Table.__init__
    for row in chain(sample_rows, table_rows):
        table.append(
            {field_name: value
             for field_name, value in zip(header, row)})

    return table
Exemple #10
0
        'unicode_column': 'álvaro',
        'null_column': 'none'.encode('utf-8')
    },
    {
        'float_column': 1.2345,
        'decimal_column': 1.2345,
        'bool_column': False,
        'integer_column': 6,
        'date_column': datetime.date(2015, 5, 6),
        'datetime_column': datetime.datetime(2015, 5, 6, 12, 1, 2),
        'percent_column': Decimal('0.02'),
        'unicode_column': 'test',
        'null_column': ''.encode('utf-8')
    },
]
table = Table(fields=expected_fields)
for row in expected_rows:
    table.append(row)
table._meta = {'test': 123}


class RowsTestMixIn(object):

    maxDiff = None

    def setUp(self):
        self.files_to_delete = []

    def tearDown(self):
        for filename in self.files_to_delete:
            os.unlink(filename)