def join(keys, tables): """Merge a list of `Table` objects using `keys` to group rows""" # Make new (merged) Table fields fields = OrderedDict() for table in tables: fields.update(table.fields) # TODO: may raise an error if a same field is different in some tables # Check if all keys are inside merged Table's fields fields_keys = set(fields.keys()) for key in keys: if key not in fields_keys: raise ValueError('Invalid key: "{}"'.format(key)) # Group rows by key, without missing ordering none_fields = lambda: OrderedDict({field: None for field in fields.keys()}) data = OrderedDict() for table in tables: for row in table: row_key = tuple([getattr(row, key) for key in keys]) if row_key not in data: data[row_key] = none_fields() data[row_key].update(row._asdict()) merged = Table(fields=fields) merged.extend(data.values()) return merged
def create_table(data, meta=None, force_headers=None, fields=None, skip_header=True, *args, **kwargs): # TODO: add auto_detect_types=True parameter table_rows = list(data) if fields is None: if force_headers is None: header = make_header(table_rows[0]) table_rows = table_rows[1:] else: header = force_headers fields = detect_types(header, table_rows, *args, **kwargs) else: if skip_header: table_rows = table_rows[1:] header = make_header(fields.keys()) assert type(fields) is collections.OrderedDict fields = {field_name: fields[key] for field_name, key in zip(header, fields)} else: header = make_header(fields.keys()) # TODO: may reuse max_columns from html max_columns = max(len(row) for row in table_rows) assert len(fields) == max_columns # TODO: put this inside Table.__init__ table = Table(fields=fields, meta=meta) for row in table_rows: table.append({field_name: value for field_name, value in zip(header, row)}) return table
def transform(fields, function, *tables): "Return a new table based on other tables and a transformation function" new_table = Table(fields=fields) for table in tables: for row in filter(bool, map(lambda row: function(row, table), table)): new_table.append(row) return new_table
def setUp(self): self.table = Table(fields={'name': rows.fields.UnicodeField, 'birthdate': rows.fields.DateField, }) self.first_row = {'name': u'Álvaro Justen', 'birthdate': datetime.date(1987, 4, 29)} self.table.append(self.first_row) self.table.append({'name': u'Somebody', 'birthdate': datetime.date(1990, 2, 1)}) self.table.append({'name': u'Douglas Adams', 'birthdate': '1952-03-11'})
def setUp(self): self.table = Table(fields={ "name": rows.fields.TextField, "birthdate": rows.fields.DateField }) self.first_row = { "name": "Álvaro Justen", "birthdate": datetime.date(1987, 4, 29), } self.table.append(self.first_row) self.table.append({ "name": "Somebody", "birthdate": datetime.date(1990, 2, 1) }) self.table.append({"name": "Douglas Adams", "birthdate": "1952-03-11"})
def import_from_uwsgi_log(filename): fields = UWSGI_FIELDS.keys() table = Table(fields=UWSGI_FIELDS) with open(filename) as fobj: for line in fobj: result = REGEXP_UWSGI_LOG.findall(line) if result: data = list(result[0]) # Convert datetime data[2] = strptime(data[2], UWSGI_DATETIME_FORMAT) # Convert generation time (micros -> seconds) data[5] = float(data[5]) / 1000000 table.append({ field_name: value for field_name, value in zip(fields, data) }) return table
def create_table(data, meta=None, fields=None, skip_header=True, import_fields=None, samples=None, force_types=None, *args, **kwargs): """Create a rows.Table object based on data rows and some configurations - `skip_header` is only used if `fields` is set - `samples` is only used if `fields` is `None`. If samples=None, all data is filled in memory - use with caution. - `force_types` is only used if `fields` is `None` - `import_fields` can be used either if `fields` is set or not, the resulting fields will seek its order - `fields` must always be in the same order as the data """ table_rows = iter(data) force_types = force_types or {} if import_fields is not None: import_fields = make_header(import_fields) if fields is None: # autodetect field types # TODO: may add `type_hints` parameter so autodetection can be easier # (plugins may specify some possible field types). header = make_header(next(table_rows)) if samples is not None: sample_rows = list(islice(table_rows, 0, samples)) table_rows = chain(sample_rows, table_rows) else: sample_rows = table_rows = list(table_rows) # Detect field types using only the desired columns detected_fields = detect_types( header, sample_rows, skip_indexes=[ index for index, field in enumerate(header) if field in force_types or field not in ( import_fields or header) ], *args, **kwargs) # Check if any field was added during detecting process new_fields = [ field_name for field_name in detected_fields.keys() if field_name not in header ] # Finally create the `fields` with both header and new field names, # based on detected fields `and force_types` fields = OrderedDict([(field_name, detected_fields.get(field_name, TextField)) for field_name in header + new_fields]) fields.update(force_types) # Update `header` and `import_fields` based on new `fields` header = list(fields.keys()) if import_fields is None: import_fields = header else: # using provided field types if not isinstance(fields, OrderedDict): raise ValueError("`fields` must be an `OrderedDict`") if skip_header: # If we're skipping the header probably this row is not trustable # (can be data or garbage). _ = next(table_rows) header = make_header(list(fields.keys())) if import_fields is None: import_fields = header fields = OrderedDict([(field_name, fields[key]) for field_name, key in zip(header, fields)]) diff = set(import_fields) - set(header) if diff: field_names = ", ".join('"{}"'.format(field) for field in diff) raise ValueError("Invalid field names: {}".format(field_names)) fields = OrderedDict([(field_name, fields[field_name]) for field_name in import_fields]) get_row = get_items(*map(header.index, import_fields)) table = Table(fields=fields, meta=meta) table.extend(dict(zip(import_fields, get_row(row))) for row in table_rows) return table
'datetime_column': datetime.datetime(2015, 5, 6, 12, 1, 2), 'percent_column': Decimal('0.02'), 'unicode_column': 'test', }, { 'float_column': None, 'decimal_column': None, 'bool_column': None, 'integer_column': None, 'date_column': None, 'datetime_column': None, 'percent_column': None, 'unicode_column': '', } ] table = Table(fields=FIELDS) for row in EXPECTED_ROWS: table.append(row) table._meta = {'test': 123} class RowsTestMixIn(object): maxDiff = None override_fields = None def setUp(self): self.files_to_delete = [] def tearDown(self): for filename in self.files_to_delete:
def create_table(data, meta=None, fields=None, skip_header=True, import_fields=None, samples=None, force_types=None, *args, **kwargs): # TODO: add auto_detect_types=True parameter table_rows = iter(data) sample_rows = [] if fields is None: header = make_header(next(table_rows)) if samples is not None: sample_rows = list(islice(table_rows, 0, samples)) else: sample_rows = list(table_rows) fields = detect_types(header, sample_rows, *args, **kwargs) if force_types is not None: # TODO: optimize field detection (ignore fields on `force_types`) for field_name, field_type in force_types.items(): fields[field_name] = field_type else: if not isinstance(fields, OrderedDict): raise ValueError('`fields` must be an `OrderedDict`') if skip_header: next(table_rows) header = make_header(list(fields.keys())) fields = OrderedDict([(field_name, fields[key]) for field_name, key in zip(header, fields)]) if import_fields is not None: # TODO: can optimize if import_fields is not None. # Example: do not detect all columns import_fields = make_header(import_fields) diff = set(import_fields) - set(header) if diff: field_names = ', '.join('"{}"'.format(field) for field in diff) raise ValueError("Invalid field names: {}".format(field_names)) new_fields = OrderedDict() for field_name in import_fields: new_fields[field_name] = fields[field_name] fields = new_fields table = Table(fields=fields, meta=meta) # TODO: put this inside Table.__init__ for row in chain(sample_rows, table_rows): table.append( {field_name: value for field_name, value in zip(header, row)}) return table
'unicode_column': 'álvaro', 'null_column': 'none'.encode('utf-8') }, { 'float_column': 1.2345, 'decimal_column': 1.2345, 'bool_column': False, 'integer_column': 6, 'date_column': datetime.date(2015, 5, 6), 'datetime_column': datetime.datetime(2015, 5, 6, 12, 1, 2), 'percent_column': Decimal('0.02'), 'unicode_column': 'test', 'null_column': ''.encode('utf-8') }, ] table = Table(fields=expected_fields) for row in expected_rows: table.append(row) table._meta = {'test': 123} class RowsTestMixIn(object): maxDiff = None def setUp(self): self.files_to_delete = [] def tearDown(self): for filename in self.files_to_delete: os.unlink(filename)