예제 #1
0
    def test_table_add_should_not_iterate_over_rows(self):
        table1 = rows.Table(fields={
            "f1": rows.fields.IntegerField,
            "f2": rows.fields.FloatField
        })
        table2 = rows.Table(fields={
            "f1": rows.fields.IntegerField,
            "f2": rows.fields.FloatField
        })
        table1._rows = mock.Mock()
        table1._rows.__add__ = mock.Mock()
        table1._rows.__iter__ = mock.Mock()
        table2._rows = mock.Mock()
        table2._rows.__add__ = mock.Mock()
        table2._rows.__iter__ = mock.Mock()

        self.assertFalse(table1._rows.__add__.called)
        self.assertFalse(table2._rows.__add__.called)
        self.assertFalse(table1._rows.__iter__.called)
        self.assertFalse(table2._rows.__iter__.called)
        table1 + table2
        self.assertTrue(table1._rows.__add__.called)
        self.assertFalse(table2._rows.__add__.called)
        self.assertFalse(table1._rows.__iter__.called)
        self.assertFalse(table2._rows.__iter__.called)
예제 #2
0
 def test_export_to_html_unescaped_content(self):
     table = rows.Table(
         fields=OrderedDict([("unescaped_content", rows.fields.TextField)])
     )
     table.append({"unescaped_content": "<&>"})
     output = rows.export_to_html(table)
     self.assertIn(b"<td> &lt;&amp;&gt; </td>", output)
예제 #3
0
def sum_iof_into_entries(table):
    entries, iofs = [], {}
    for row in table:
        description = row.description.lower()
        if description.startswith('iof de "'):
            entry_description = description.split('"')[1].strip()
            iofs[entry_description] = row
        else:
            entries.append(row)

    table = rows.Table(fields=FIELDS)
    for entry in entries:
        description = entry.description.lower().strip()
        entry = {
            'description': entry.description.strip(),
            'value': entry.value,
            'category': entry.category,
            'date': entry.date,
        }
        if description in iofs:
            iof = iofs[description]
            entry['description'] += ' (+ IOF)'
            entry['value'] += iof.value
        table.append(entry)

    table.order_by('date')
    return table
    def test_detect_dialect_using_json(self):
        temp = tempfile.NamedTemporaryFile(delete=False)
        filename = '{}.{}'.format(temp.name, self.file_extension)
        encoding = 'utf-8'
        self.files_to_delete.append(filename)

        # Using JSON will force the sniffer to do not include ':', '}' in the
        # possible delimiters
        table = rows.Table(fields=OrderedDict([
            ('jsoncolumn1', rows.fields.JSONField),
            ('jsoncolumn2', rows.fields.JSONField),
        ]))
        table.append({
            'jsoncolumn1': '{"a": 42}',
            'jsoncolumn2': '{"b": 43}',
        })
        table.append({
            'jsoncolumn1': '{"c": 44}',
            'jsoncolumn2': '{"d": 45}',
        })
        rows.export_to_csv(table, filename, encoding=encoding)

        table = rows.import_from_csv(filename, encoding=encoding)

        self.assertEqual(table.field_names, ['jsoncolumn1', 'jsoncolumn2'])
        self.assertDictEqual(table[0].jsoncolumn1, {'a': 42})
        self.assertDictEqual(table[0].jsoncolumn2, {'b': 43})
        self.assertDictEqual(table[1].jsoncolumn1, {'c': 44})
        self.assertDictEqual(table[1].jsoncolumn2, {'d': 45})
    def test_quotes(self):
        temp = tempfile.NamedTemporaryFile(delete=False)
        filename = '{}.{}'.format(temp.name, self.file_extension)
        self.files_to_delete.append(filename)

        table = rows.Table(fields=OrderedDict([
            ('field_1', rows.fields.TextField),
            ('field_2', rows.fields.TextField),
            ('field_3', rows.fields.TextField),
            ('field_4', rows.fields.TextField),
        ]))
        table.append({
            'field_1': '"quotes"',
            'field_2': 'test "quotes"',
            'field_3': '"quotes" test',
            'field_4': 'test "quotes" test',
        })
        # we need this line row since `"quotes"` on `field_1` could be
        # `JSONField` or `TextField`
        table.append({
            'field_1': 'noquotes',
            'field_2': 'test "quotes"',
            'field_3': '"quotes" test',
            'field_4': 'test "quotes" test',
        })
        rows.export_to_csv(table, filename)

        table2 = rows.import_from_csv(filename)
        self.assert_table_equal(table, table2)
예제 #6
0
def csv2sqlite(input_filename,
               output_filename,
               samples=None,
               batch_size=10000,
               encoding='utf-8',
               callback=None,
               force_types=None,
               table_name='table1'):
    'Export a CSV file to SQLite, based on field type detection from samples'

    # Identify data types
    fobj = open_compressed(input_filename, encoding=encoding)
    data = list(islice(csv.DictReader(fobj), samples))
    fields = rows.import_from_dicts(data).fields
    if force_types is not None:
        fields.update(force_types)

    # Create lazy table object to be converted
    # TODO: this lazyness feature will be incorported into the library soon
    reader = csv.reader(open_compressed(input_filename, encoding=encoding))
    header = next(reader)  # skip header
    table = rows.Table(fields=OrderedDict([(field, fields[field])
                                           for field in header]))
    table._rows = reader

    # Export to SQLite
    return rows.export_to_sqlite(table,
                                 output_filename,
                                 table_name=table_name,
                                 batch_size=batch_size,
                                 callback=callback)
예제 #7
0
def merge_files(filenames, output):
    'Merge all game files into one CSV file, adding year and country columns'

    if not output.parent.exists():
        output.parent.mkdir()

    countries_by_code = download_countries()
    games = rows.Table(fields=FULL_FIELDS)

    for filename in filenames:
        year, country_code = _parse_filename(filename)
        country = countries_by_code[country_code]
        print('Merging year: {}, country: {}...'.format(year, country.name))
        game = rows.import_from_csv(str(filename.absolute()),
                                    fields=FIELDS,
                                    dialect=csv.excel,
                                    encoding='utf-8')
        for row in game:
            data = row._asdict()
            data['year'] = year
            data['country_code'] = country_code
            data['country_name'] = country.name
            del data['rk']
            games.append(data)
    games.order_by('-year')
    rows.utils.export_to_uri(games, str(output.absolute()))
예제 #8
0
def csv2sqlite(input_filename,
               output_filename,
               table_name,
               samples=30000,
               batch_size=10000,
               encoding='utf-8',
               callback=None,
               force_types=None):

    # Identify data types
    fobj = open_compressed(input_filename, encoding)
    reader = csv.reader(fobj)
    header = next(reader)
    data = []
    for index, row in enumerate(reader):
        row = dict(zip(header, row))
        if index == samples:
            break
        data.append(row)
    fields = rows.import_from_dicts(data, import_fields=header).fields
    if force_types is not None:
        fields.update(force_types)

    # Create lazy table object to be converted
    table = rows.Table(fields=fields)
    reader = csv.reader(open_compressed(input_filename, encoding))
    next(reader)  # skip header
    table._rows = reader

    # Export to SQLite
    rows.export_to_sqlite(table,
                          output_filename,
                          table_name=table_name,
                          callback=callback,
                          batch_size=batch_size)
 def test_export_to_html_unescaped_content(self):
     table = rows.Table(fields=OrderedDict([
         ('unescaped_content', rows.fields.TextField)
     ]))
     table.append({'unescaped_content': '<&>'})
     output = rows.export_to_html(table)
     self.assertIn(b'<td> &lt;&amp;&gt; </td>', output)
예제 #10
0
    def test_transpose_feature(self):
        new_fields = OrderedDict([
            ("key", rows.fields.TextField),
            ("value_1", rows.fields.TextField),
            ("value_2", rows.fields.TextField),
        ])
        table = rows.Table(fields=new_fields)
        table.append({
            "key": "first_key",
            "value_1": "first_value_1",
            "value_2": "first_value_2"
        })
        table.append({"key": "second_key", "value_1": 1, "value_2": 2})
        table.append({"key": "third_key", "value_1": 3.14, "value_2": 2.71})
        table.append({
            "key": "fourth_key",
            "value_1": "2015-09-04",
            "value_2": "2015-08-29"
        })

        new_table = rows.transpose(table, fields_column="key")

        self.assertEqual(len(new_table), 2)
        self.assertEqual(len(new_table.fields), len(table))
        self.assertEqual(new_table.field_names, [row.key for row in table])
        self.assertEqual(new_table[0].first_key, "first_value_1")
        self.assertEqual(new_table[0].second_key, 1)
        self.assertEqual(new_table[0].third_key, 3.14)
        self.assertEqual(new_table[0].fourth_key, datetime.date(2015, 9, 4))
        self.assertEqual(new_table[1].first_key, "first_value_2")
        self.assertEqual(new_table[1].second_key, 2)
        self.assertEqual(new_table[1].third_key, 2.71)
        self.assertEqual(new_table[1].fourth_key, datetime.date(2015, 8, 29))
def import_from_dicts(counter):
    keys = ('label', 'count')
    t = rows.Table(fields=OrderedDict([(
        keys[0], rows.fields.TextField), (keys[1], rows.fields.IntegerField)]))
    for key, value in counter.items():
        t.append({keys[0]: key, keys[1]: value})
    return t
예제 #12
0
def create_final_headers(header_type, order_columns, final_filename):
    final_headers = {}
    filenames = sorted(
        [
            (REGEXP_HEADER_YEAR.findall(filename)[0], filename)
            for filename in glob(str(settings.HEADERS_PATH / f"{header_type}-*.csv"))
            if REGEXP_HEADER_YEAR.findall(filename)
        ]
    )
    # TODO: check if schema is according to final header. if there are diffs,
    # warn user.
    for index, (header_year, filename) in enumerate(filenames):
        header = read_header(filename)
        for row in header:
            if not row.nome_final:
                continue
            if row.nome_final not in final_headers:
                row_data = row._asdict()
                if index > 0:
                    row_data["introduced_on"] = header_year
                row_data["original_names"] = [(header_year, row_data.pop("nome_tse"))]
                final_headers[row.nome_final] = row_data
            else:
                original_name = (header_year, row.nome_tse)
                original_names = final_headers[row.nome_final]["original_names"]
                should_add = True
                for original in original_names:
                    if original[1] == original_name[1]:
                        should_add = False
                        break
                if should_add:
                    original_names.append(original_name)

    table = rows.Table(
        fields=OrderedDict(
            [
                ("nome_final", rows.fields.TextField),
                ("descricao", rows.fields.TextField),
            ]
        )
    )

    header_list = sorted(
        final_headers.values(), key=lambda row: order_columns(row["nome_final"])
    )
    for row in header_list:
        row_data = {"descricao": row["descricao"] or "", "nome_final": row["nome_final"]}
        introduced_on = row.get("introduced_on", None)
        original_names = ", ".join(
            f"{item[1]} ({item[0]})" for item in row.get("original_names")
        )
        row_data["descricao"] += f". Aparece no TSE como: {original_names}"
        if introduced_on:
            row_data["descricao"] += f". Coluna adicionada em {introduced_on}"
        if row_data["descricao"][-1] != ".":
            row_data["descricao"] += "."
        table.append(row_data)
    rows.export_to_csv(table, final_filename)
예제 #13
0
    def test_table_name(self):
        table = rows.Table(
            fields=collections.OrderedDict([("a", fields.TextField)]))

        self.assertTrue("filename" not in table.meta)
        self.assertEqual(table.name, "table1")

        table.meta["filename"] = "This is THE name.csv"
        self.assertTrue("filename" in table.meta)
        self.assertEqual(table.name, "this_is_the_name")
예제 #14
0
    def test_table_name(self):
        table = rows.Table(fields=collections.OrderedDict([
            ('a', fields.TextField),
        ]))

        self.assertTrue('filename' not in table.meta)
        self.assertEqual(table.name, 'table1')

        table.meta['filename'] = 'This is THE name.csv'
        self.assertTrue('filename' in table.meta)
        self.assertEqual(table.name, 'this_is_the_name')
예제 #15
0
    def test_issue_168(self):
        temp = tempfile.NamedTemporaryFile(delete=False)
        filename = "{}.{}".format(temp.name, self.file_extension)
        self.files_to_delete.append(filename)

        table = rows.Table(fields=OrderedDict([("jsoncolumn", rows.fields.JSONField)]))
        table.append({"jsoncolumn": '{"python": 42}'})
        rows.export_to_html(table, filename)

        table2 = rows.import_from_html(filename)
        self.assert_table_equal(table, table2)
예제 #16
0
    def export_prob_predictions_to_csv(self, filename, ids, predictions):
        new_rows = []
        
        # classifier = {
        #     'Return_to_owner': 0,
        #     'Euthanasia': 1,
        #     'Adoption': 2,
        #     'Transfer': 3,
        #     'Died': 4
        # }
        #print(predictions)
        #count = [0, 0, 0, 0]
        #m = []
        for i, prediction in enumerate(predictions):
            #ID	Adoption	Died	Euthanasia	Return_to_owner	Transfer
            # print(type(prediction))
            # print(prediction == '3')
            # print(int(prediction == '3'))
            
            new_row = OrderedDict({})
            # print(prediction[0])
            # print(type(prediction[0]))

            # print numpy.argmax(prediction)
            # m.append(numpy.argmax(prediction))
            # if numpy.argmax(prediction) == 0:
            #     count[0]+=1
            # if numpy.argmax(prediction) == 1:
            #     count[1]+=1
            # if numpy.argmax(prediction) == 2:
            #     count[2]+=1
            # if numpy.argmax(prediction) == 3:
            #     count[3]+=1
            
            new_row['ID'] = ids[i]
            new_row['Adoption'] = prediction[2]
            new_row['Died'] = prediction[4]
            new_row['Euthanasia'] = prediction[1]
            new_row['Return_to_owner'] = prediction[0]
            new_row['Transfer'] = prediction[3]
            
            new_rows.append(new_row)
        #print(count)
        #print(set(m))
        new_rows.sort(key=lambda e: e['ID'])
        #print(new_rows)
        
        new_fields = [(key, rows.fields.UnicodeField) for key in new_rows[0].keys()]
        table_to = rows.Table(fields=OrderedDict(new_fields))
        for row in new_rows:
            table_to.append(row)
            
        rows.export_to_csv(table_to, filename)
    def test_issue_170(self):
        temp = tempfile.NamedTemporaryFile(delete=False)
        self.files_to_delete.append(temp.name)

        table = rows.Table(fields=
                OrderedDict([('intvalue', rows.fields.IntegerField),
                             ('floatvalue', rows.fields.FloatField)]))
        table.append({'intvalue': 42, 'floatvalue': 3.14})
        table.append({'intvalue': None, 'floatvalue': None})

        # should not raise an exception
        rows.export_to_sqlite(table, temp.name)
    def test_issue_168(self):
        temp = tempfile.NamedTemporaryFile(delete=False)
        filename = '{}.{}'.format(temp.name, self.file_extension)
        self.files_to_delete.append(filename)

        table = rows.Table(fields=OrderedDict([('jsoncolumn',
                                                rows.fields.JSONField)]))
        table.append({'jsoncolumn': '{"python": 42}'})
        rows.export_to_txt(table, filename, encoding='utf-8')

        table2 = rows.import_from_txt(filename, encoding='utf-8')
        self.assert_table_equal(table, table2)
예제 #19
0
    def test_table_iadd(self):
        table = rows.Table(fields={
            "f1": rows.fields.IntegerField,
            "f2": rows.fields.FloatField
        })
        table.append({"f1": 1, "f2": 2})
        table.append({"f1": 3, "f2": 4})

        self.assertEqual(len(table), 2)
        table += table
        self.assertEqual(len(table), 4)
        data_rows = list(table)
        self.assertEqual(data_rows[0], data_rows[2])
        self.assertEqual(data_rows[1], data_rows[3])
예제 #20
0
def csv_to_sqlite(
    input_filename,
    output_filename,
    samples=None,
    dialect=None,
    batch_size=10000,
    encoding="utf-8",
    callback=None,
    force_types=None,
    chunk_size=8388608,
    table_name="table1",
    schema=None,
):
    "Export a CSV file to SQLite, based on field type detection from samples"

    # TODO: automatically detect encoding if encoding == `None`
    # TODO: should be able to specify fields

    if dialect is None:  # Get a sample to detect dialect
        fobj = open_compressed(input_filename, mode="rb")
        sample = fobj.read(chunk_size)
        dialect = rows.plugins.csv.discover_dialect(sample, encoding=encoding)
    elif isinstance(dialect, six.text_type):
        dialect = csv.get_dialect(dialect)

    if schema is None:  # Identify data types
        fobj = open_compressed(input_filename, encoding=encoding)
        data = list(islice(csv.DictReader(fobj, dialect=dialect), samples))
        schema = rows.import_from_dicts(data).fields
        if force_types is not None:
            schema.update(force_types)

    # Create lazy table object to be converted
    # TODO: this lazyness feature will be incorported into the library soon so
    #       we can call here `rows.import_from_csv` instead of `csv.reader`.
    reader = csv.reader(
        open_compressed(input_filename, encoding=encoding), dialect=dialect
    )
    header = make_header(next(reader))  # skip header
    table = rows.Table(fields=OrderedDict([(field, schema[field]) for field in header]))
    table._rows = reader

    # Export to SQLite
    return rows.export_to_sqlite(
        table,
        output_filename,
        table_name=table_name,
        batch_size=batch_size,
        callback=callback,
    )
예제 #21
0
def html_to_table(input_filename, encoding='utf-8'):
    with open(input_filename) as fobj:
        html = fobj.read().decode(encoding).replace('\xa0', ' ')
    tree = HTML(html)

    data = tree.xpath('//body/b')
    for index, element in enumerate(data):
        text = element.text
        if text.startswith('Valores') and text.endswith('R$'):
            break
    new = []
    for element in data[index + 1:]:
        text = element.text
        if text.startswith('FATURA DE '):
            continue
        elif REGEXP_PAGE.findall(text):
            continue
        else:
            new.append(element.text)
    data = new

    chunks = [[value.strip() for value in row] for row in partition(data, 4)
              if len(row) == 4]
    table = rows.Table(fields=FIELDS)
    current_year = datetime.datetime.now().year
    months = set(extract_month(row) for row in chunks)
    subtract_year = 'DEZ' in months and 'JAN' in months
    for row in chunks:
        try:
            category = convert_text(row[0])
            description = convert_text(row[1])
            value = convert_value(row[2])
        except:
            print('WARNING: Ignoring row: {}'.format(row))
            continue
        year = current_year
        month = extract_month(row)
        if subtract_year and month in ('NOV', 'DEZ'):
            year = current_year - 1
        date = convert_date(row[3], year)
        table.append({
            'category': category,
            'description': description,
            'value': value,
            'date': date,
        })

    return table
    def test_export_to_json_indent(self):
        temp = tempfile.NamedTemporaryFile(delete=False, mode='rb+')
        self.files_to_delete.append(temp.name)

        table = rows.Table(fields=utils.table.fields)
        table.append(utils.table[0]._asdict())
        rows.export_to_json(table, temp.name, indent=2)

        temp.file.seek(0)
        result = temp.file.read().strip().replace(b'\r\n', b'\n').splitlines()
        self.assertEqual(result[0], b'[')
        self.assertEqual(result[1], b'  {')
        for line in result[2:-2]:
            self.assertTrue(line.startswith(b'    '))
        self.assertEqual(result[-2], b'  }')
        self.assertEqual(result[-1], b']')
예제 #23
0
    def assert_generate_schema(self, fmt, expected, export_fields=None):
        # prepare a consistent table so we can test all formats using it
        table_fields = utils.table.fields.copy()
        table_fields['json_column'] = fields.JSONField
        table_fields['decimal_column'] = fields.DecimalField
        table_fields['percent_column'] = fields.DecimalField
        if export_fields is None:
            export_fields = list(table_fields.keys())
        table = rows.Table(fields=table_fields)

        for row in utils.table:
            data = row._asdict()
            data['json_column'] = {}
            table.append(data)
        table.meta['filename'] = 'this is my table.csv'

        obj = io.StringIO()
        fields.generate_schema(table, export_fields, fmt, obj)
        obj.seek(0)
        result = obj.read()

        self.assertEqual(expected.strip(), result.strip())
예제 #24
0
    def test_transpose_feature(self):
        new_fields = OrderedDict([('key', rows.fields.TextField),
                                  ('value_1', rows.fields.TextField),
                                  ('value_2', rows.fields.TextField)])
        table = rows.Table(fields=new_fields)
        table.append({
            'key': 'first_key',
            'value_1': 'first_value_1',
            'value_2': 'first_value_2',
        })
        table.append({
            'key': 'second_key',
            'value_1': 1,
            'value_2': 2,
        })
        table.append({
            'key': 'third_key',
            'value_1': 3.14,
            'value_2': 2.71,
        })
        table.append({
            'key': 'fourth_key',
            'value_1': '2015-09-04',
            'value_2': '2015-08-29',
        })

        new_table = rows.transpose(table, fields_column='key')

        self.assertEqual(len(new_table), 2)
        self.assertEqual(len(new_table.fields), len(table))
        self.assertEqual(new_table.field_names, [row.key for row in table])
        self.assertEqual(new_table[0].first_key, 'first_value_1')
        self.assertEqual(new_table[0].second_key, 1)
        self.assertEqual(new_table[0].third_key, 3.14)
        self.assertEqual(new_table[0].fourth_key, datetime.date(2015, 9, 4))
        self.assertEqual(new_table[1].first_key, 'first_value_2')
        self.assertEqual(new_table[1].second_key, 2)
        self.assertEqual(new_table[1].third_key, 2.71)
        self.assertEqual(new_table[1].fourth_key, datetime.date(2015, 8, 29))
예제 #25
0
    def export_exact_predictions_to_csv(self, filename, ids, predictions):
        new_rows = []
        
        # classifier = {
        #     'Return_to_owner': 0,
        #     'Euthanasia': 1,
        #     'Adoption': 2,
        #     'Transfer': 3,
        #     'Died': 4
        # }

        for i, prediction in enumerate(predictions):
            #ID	Adoption	Died	Euthanasia	Return_to_owner	Transfer
            # print(type(prediction))
            # print(prediction == '3')
            # print(int(prediction == '3'))
            
            new_row = OrderedDict({})
            new_row['ID'] = ids[i]
            new_row['Adoption'] = int(prediction == '2')
            new_row['Died'] = int(prediction == '4')
            new_row['Euthanasia'] = int(prediction == '1')
            new_row['Return_to_owner'] = int(prediction == '0')
            new_row['Transfer'] = int(prediction == '3')
            
            new_rows.append(new_row)
            
        new_rows.sort(key=lambda e: e['ID'])
        #print(new_rows)
        
        new_fields = [(key, rows.fields.UnicodeField) for key in new_rows[0].keys()]
        table_to = rows.Table(fields=OrderedDict(new_fields))
        for row in new_rows:
            table_to.append(row)
            
        rows.export_to_csv(table_to, filename)
예제 #26
0
    def test_table_init_slug_creation_on_fields(self):
        table = rows.Table(fields=collections.OrderedDict([(
            'Query Occurrence"( % ),"First Seen', rows.fields.FloatField)]))

        self.assertIn("query_occurrence_first_seen", table.fields)
예제 #27
0
def create_table(year, download_path):

    # TODO: may read the schema from a file
    schema = OrderedDict([
        ('codLegislatura', rows.fields.IntegerField),
        ('datEmissao', rows.fields.DatetimeField),
        ('ideDocumento', rows.fields.IntegerField),
        ('idecadastro', rows.fields.IntegerField),
        ('indTipoDocumento', rows.fields.IntegerField),
        ('nuCarteiraParlamentar', rows.fields.IntegerField),
        ('nuDeputadoId', rows.fields.IntegerField),
        ('nuLegislatura', rows.fields.IntegerField),
        ('numAno', rows.fields.IntegerField),
        ('numEspecificacaoSubCota', rows.fields.IntegerField),
        ('numLote', rows.fields.IntegerField),
        ('numMes', rows.fields.IntegerField),
        ('numParcela', rows.fields.IntegerField),
        ('numRessarcimento', rows.fields.IntegerField),
        ('numSubCota', rows.fields.IntegerField),
        ('sgPartido', rows.fields.TextField),
        ('sgUF', rows.fields.TextField),
        ('txNomeParlamentar', rows.fields.TextField),
        ('txtCNPJCPF', DocumentField),
        ('txtDescricao', rows.fields.TextField),
        ('txtDescricaoEspecificacao', rows.fields.TextField),
        ('txtFornecedor', rows.fields.TextField),
        ('txtNumero', rows.fields.TextField),
        ('txtPassageiro', rows.fields.TextField),
        ('txtTrecho', rows.fields.TextField),
        ('vlrDocumento', MoneyField),
        ('vlrGlosa', MoneyField),
        ('vlrLiquido', MoneyField),
        ('vlrRestituicao', MoneyField),
    ])

    def convert_field(FieldClass):
        if FieldClass is MoneyField:
            return rows.fields.DecimalField
        elif FieldClass is DocumentField:
            return rows.fields.TextField
        else:
            return FieldClass

    # The current rows implementation does not know how to export `MoneyField`
    # and `DocumentField` to SQLite (only knows rows.fields.*Field classes), so
    # we need to have a specific schema for the `Table` object. In the future,
    # the library should detect from the values produced by the class or by
    # inspecting it.
    schema_rows = OrderedDict([(field_name, convert_field(Field))
                               for field_name, Field in schema.items()])

    def read_file(fobj):
        year = fobj.name.split('.csv')[0].split('-')[1]
        reader = csv.DictReader(fobj, delimiter=';')
        for row in reader:
            assert row['numAno'] == year
            if not row['sgPartido']:
                row['sgPartido'] = \
                    row['txNomeParlamentar'].replace('LIDERANÇA DO ', '')
            yield [
                Field.deserialize(row[field_name])
                for field_name, Field in schema.items()
            ]

    table = rows.Table(fields=schema_rows)
    table._rows = read_file(load_file(year, download_path))
    header = table.field_names
    return table
예제 #28
0
    for key, value in drow.items():
        if key not in statistics:
            statistics[key] = {}

        if value not in statistics[key]:
            statistics[key][value] = 0
        statistics[key][value] += 1

string = rows.fields.UnicodeField

columns = {}
columns['value'] = string
for key in statistics.keys():
    for value in statistics[key].keys():
        columns[key + '_' + value] = string

table_2d_analize = rows.Table(fields=columns)
drows = map(lambda r: r.__dict__, table)
for key in statistics.keys():
    for value in statistics[key].keys():
        data = {}
        data['value'] = key + '_' + value
        for key2 in statistics.keys():
            for value2 in statistics[key2].keys():
                data[key2 + '_' + value2] = len(
                    filter(lambda d: d[key] == value and d[key2] == value2,
                           drows))
        table_2d_analize.append(data)

rows.export_to_csv(table_2d_analize, '2d_column_analize.csv')
예제 #29
0
    #('name', rows.fields.UnicodeField),
    ('has_name', rows.fields.UnicodeField),
    #('datetime', rows.fields.UnicodeField),
    #('holiday', rows.fields.UnicodeField),
    ('free_day', rows.fields.UnicodeField),
    ('outcometype', rows.fields.UnicodeField),
    #('outcomesubtype', rows.fields.UnicodeField),
    ('animaltype', rows.fields.UnicodeField),
    ('sex', rows.fields.UnicodeField),
    ('castration', rows.fields.UnicodeField),
    #('ageuponoutcome', rows.fields.UnicodeField),
    #('breed', rows.fields.UnicodeField),
    #('color', rows.fields.UnicodeField)
])

table_3 = rows.Table(fields=table3_fields)
for row in table_2:
    if len(row.name):
        has_name = 'Yes'
    else:
        has_name = 'No'

    if (row.datetime == "Sunday" or row.datetime == "Saturday"
            or row.holiday == 'True'):
        free_day = True
    else:
        free_day = False

    table_3.append({
        'has_name': has_name,
        'free_day': free_day,
예제 #30
0
        new_cat_rows.append(new_row)

    if row.animaltype == 'Dog':
        new_row.update(get_dog_age_columns(row))
        # Demora muito
        #new_row.update(get_dog_breed_columns(row))
        new_row.update(get_dog_color_columns(row))

        new_row['outcome'] = get_animal_outcome(row)

        new_dog_rows.append(new_row)

new_fields = [(key, rows.fields.UnicodeField)
              for key in new_cat_rows[0].keys()]
table_to = rows.Table(fields=OrderedDict(new_fields))
for row in new_cat_rows:
    table_to.append(row)

rows.export_to_csv(table_to, "clean_data3_no_breed_cat.csv")

new_fields = [(key, rows.fields.UnicodeField)
              for key in new_dog_rows[0].keys()]
table_to = rows.Table(fields=OrderedDict(new_fields))
for row in new_dog_rows:
    table_to.append(row)

rows.export_to_csv(table_to, "clean_data3_no_breed_dog.csv")

##################
# Limpando dados de teste