Beispiel #1
0
    def test_import_from_xlsx_retrieve_desired_data(self, mocked_create_table):
        mocked_create_table.return_value = 42

        # import using filename
        rows.import_from_xlsx(self.filename)
        call_args = mocked_create_table.call_args_list[0]
        self.assert_create_table_data(
            call_args,
            expected_meta={
                "imported_from": "xlsx",
                "filename": self.filename,
                "sheet_name": "Sheet1",
            },
        )

        # import using fobj
        with open(self.filename, "rb") as fobj:
            rows.import_from_xlsx(fobj)
        call_args = mocked_create_table.call_args_list[1]
        self.assert_create_table_data(
            call_args,
            expected_meta={
                "imported_from": "xlsx",
                "filename": self.filename,
                "sheet_name": "Sheet1",
            },
        )
Beispiel #2
0
    def test_import_from_xlsx_retrieve_desired_data(self, mocked_create_table):
        mocked_create_table.return_value = 42

        # import using filename
        rows.import_from_xlsx(self.filename)
        call_args = mocked_create_table.call_args_list[0]
        self.assert_create_table_data(
            call_args,
            expected_meta={
                "imported_from": "xlsx",
                "filename": self.filename,
                "sheet_name": "Sheet1",
            },
        )

        # import using fobj
        with open(self.filename, "rb") as fobj:
            rows.import_from_xlsx(fobj)
        call_args = mocked_create_table.call_args_list[1]
        self.assert_create_table_data(
            call_args,
            expected_meta={
                "imported_from": "xlsx",
                "filename": self.filename,
                "sheet_name": "Sheet1",
            },
        )
Beispiel #3
0
 def test_start_and_end_row(self, mocked_create_table):
     rows.import_from_xlsx(
         self.filename, start_row=6, end_row=8, start_column=4, end_column=7
     )
     self.assertTrue(mocked_create_table.called)
     self.assertEqual(mocked_create_table.call_count, 1)
     call_args = mocked_create_table.call_args_list[0]
     expected_data = [
         [4.56, 4.56, "12%", datetime.datetime(2050, 1, 2, 0, 0)],
         [7.89, 7.89, "13.64%", datetime.datetime(2015, 8, 18, 0, 0)],
         [9.87, 9.87, "13.14%", datetime.datetime(2015, 3, 4, 0, 0)],
     ]
     self.assertEqual(expected_data, call_args[0][0])
Beispiel #4
0
    def test_import_from_xlsx_retrieve_desired_data(self, mocked_create_table):
        mocked_create_table.return_value = 42

        # import using filename
        table_1 = rows.import_from_xlsx(self.filename)
        call_args = mocked_create_table.call_args_list[0]
        self.assert_create_table_data(call_args)

        # import using fobj
        with open(self.filename, 'rb') as fobj:
            table_2 = rows.import_from_xlsx(fobj)
        call_args = mocked_create_table.call_args_list[1]
        self.assert_create_table_data(call_args)
Beispiel #5
0
    def test_import_from_xlsx_retrieve_desired_data(self, mocked_create_table):
        mocked_create_table.return_value = 42

        # import using filename
        table_1 = rows.import_from_xlsx(self.filename)
        call_args = mocked_create_table.call_args_list[0]
        self.assert_create_table_data(call_args)

        # import using fobj
        with open(self.filename, 'rb') as fobj:
            table_2 = rows.import_from_xlsx(fobj)
        call_args = mocked_create_table.call_args_list[1]
        self.assert_create_table_data(call_args)
Beispiel #6
0
def get_state_data_from_google_spreadsheets(state, timeout=5):
    state_spreadsheet_url = import_info_by_state(state).planilha_brasilio
    state_spreadsheet_download_url = spreadsheet_download_url(
        state_spreadsheet_url, "xlsx")
    data = http_get(state_spreadsheet_download_url, timeout)
    reports = rows.import_from_xlsx(io.BytesIO(data),
                                    sheet_name=BOLETIM_SPREADSHEET,
                                    force_types=FIELDS_BOLETIM)
    cases = rows.import_from_xlsx(io.BytesIO(data),
                                  sheet_name=CASOS_SPREADSHEET)
    return {
        "reports": [dict(row._asdict()) for row in reports if row.date],
        "cases": [dict(row._asdict()) for row in cases if row.municipio],
    }
Beispiel #7
0
 def parse_boletim(self, state, data):
     self.logger.info(f"Parsing {state} boletim")
     try:
         boletins = rows.import_from_xlsx(
             io.BytesIO(data),
             sheet_name="Boletins (FINAL)",
             force_types={
                 "date": rows.fields.DateField,
                 "url": rows.fields.TextField,
                 "notes": rows.fields.TextField,
             },
         )
     except Exception as exp:
         self.errors[state].append(
             ("boletim", state, f"{exp.__class__.__name__}: {exp}"))
         return
     for boletim in boletins:
         boletim = boletim._asdict()
         boletim_data = [item for item in boletim.values() if item]
         if not boletim_data:
             continue
         date = boletim["date"]
         url = (boletim["url"] or "").strip()
         if not url:
             message = f"Boletim URL not found for {state} on {date}"
             self.errors[state].append(("boletim", state, message))
         else:
             boletim = {
                 "date": date,
                 "state": state,
                 "url": url,
                 "notes": boletim["notes"],
             }
             self.logger.debug(boletim)
             self.boletim_writer.writerow(boletim)
    def handle(self, *args, **kwargs):
        truncate = kwargs.get("truncate", False)
        update_functions = [
            (Dataset, dataset_update_data),
            (Link, link_update_data),
            (Version, version_update_data),
            (Table, table_update_data),
            (Field, field_update_data),
        ]

        if truncate:
            print("Deleting metadata to create new objects...")
            for Model, _ in update_functions:
                Model.objects.all().delete()
        else:
            print("WARNING: updating data only. If some field was removed "
                  "this change will not be reflected on your database. "
                  "Consider using --truncate")

        self.datasets, self.tables, self.versions = {}, {}, {}
        response = urlopen(settings.DATA_URL)
        data = response.read()
        for Model, update_data_function in update_functions:
            table = rows.import_from_xlsx(
                io.BytesIO(data),
                sheet_name=Model.__name__,
                workbook_kwargs={"read_only": False},
            )
            self._update_data(Model, table, update_data_function)
Beispiel #9
0
    def test_export_to_xlsx_filename(self):
        temp = tempfile.NamedTemporaryFile()
        filename = temp.name + '.xlsx'
        temp.close()
        self.files_to_delete.append(filename)
        rows.export_to_xlsx(utils.table, filename)

        table = rows.import_from_xlsx(filename)
        self.assert_table_equal(table, utils.table)

        export_in_memory = rows.export_to_xlsx(utils.table, None)
        result_fobj = BytesIO()
        result_fobj.write(export_in_memory)
        result_fobj.seek(0)
        result_table = rows.import_from_xlsx(result_fobj)
        self.assert_table_equal(result_table, utils.table)
Beispiel #10
0
    def handle(self, *args, **kwargs):
        truncate = kwargs.get("truncate", False)
        update_functions = [
            (Dataset, dataset_update_data),
            (Link, link_update_data),
            (Version, version_update_data),
            (Table, table_update_data),
            (Field, field_update_data),
        ]

        data_tables_map = {}
        for table in Table.with_hidden.all():
            key = (table.dataset.slug, table.name)
            data_tables_map[key] = table.data_table

        if truncate:
            print("Deleting metadata to create new objects...")
            for Model, _ in update_functions:
                Model.objects.all().delete()
        else:
            print("WARNING: updating data only. If some field was removed "
                  "this change will not be reflected on your database. "
                  "Consider using --truncate")

        self.datasets, self.tables, self.versions = {}, {}, {}
        response_data = http_get(settings.DATA_URL, 5)
        if response_data is None:
            raise RuntimeError(f"Cannot download {settings.DATA_URL}")
        for Model, update_data_function in update_functions:
            table = rows.import_from_xlsx(io.BytesIO(response_data),
                                          sheet_name=Model.__name__,
                                          workbook_kwargs={"read_only": False})
            self._update_data(Model, table, update_data_function)

        print("Updating DataTable...", end="", flush=True)
        total_created, total_updated, total_skipped = 0, 0, 0
        for table in Table.with_hidden.select_related("dataset"):
            key = (table.dataset.slug, table.name)
            data_table = data_tables_map.get(key, None)
            if data_table is None:  # create DataTable if new Table or if previous was None
                data_table = DataTable.new_data_table(table, suffix_size=0)
                data_table.activate()
                total_created += 1
            elif data_table.table != table:  # Tables were truncated so previous DataTables get updated
                total_updated += 1
                data_table.table = table
                data_table.save()
            else:  # Same table as before, so no need to update
                total_skipped += 1

            if table.filtering_fields:  # avoid None
                table.fields.filter(name__in=table.filtering_fields).update(
                    frontend_filter=True)
            if table.search_fields:
                table.fields.filter(name__in=table.search_fields).update(
                    searchable=True)

        print(" created: {}, updated: {}, skipped: {}.".format(
            total_created, total_updated, total_skipped))
Beispiel #11
0
 def test_start_and_end_row(self, mocked_create_table):
     rows.import_from_xlsx(self.filename,
                           start_row=6,
                           end_row=8,
                           start_column=4,
                           end_column=7)
     self.assertTrue(mocked_create_table.called)
     self.assertEqual(mocked_create_table.call_count, 1)
     call_args = mocked_create_table.call_args_list[0]
     expected_data = [
         [4.56, 4.56, "12%",
          datetime.datetime(2050, 1, 2, 0, 0)],
         [7.89, 7.89, "13.64%",
          datetime.datetime(2015, 8, 18, 0, 0)],
         [9.87, 9.87, "13.14%",
          datetime.datetime(2015, 3, 4, 0, 0)],
     ]
     self.assertEqual(expected_data, call_args[0][0])
Beispiel #12
0
 def abre_doc(self):
     if self.doc_tipo == 'xls':
         self.doc = rows.import_from_xls(self.doc)
     elif self.doc_tipo == 'xlsx':
         self.doc = rows.import_from_xlsx(self.doc)
     elif self.doc_tipo == 'ods':
         self.doc = rows.import_from_ods(self.doc)
     else:
         raise Exception('Erro ao abrir o arquivo.')
Beispiel #13
0
 def test_issue_290_one_hundred_read_as_1(self):
     result = rows.import_from_xlsx("tests/data/text_in_percent_cell.xlsx")
     # As this test is written, file numeric file contents on first column are
     # 100%, 23.20%, 1.00%, 10.00%, 100.00%
     assert result[0][0] == Decimal("1")
     assert result[1][0] == Decimal("0.2320")
     assert result[2][0] == Decimal("0.01")
     assert result[3][0] == Decimal("0.1")
     assert result[4][0] == Decimal("1")
Beispiel #14
0
 def test_issue_290_one_hundred_read_as_1(self):
     result = rows.import_from_xlsx("tests/data/text_in_percent_cell.xlsx")
     # As this test is written, file numeric file contents on first column are
     # 100%, 23.20%, 1.00%, 10.00%, 100.00%
     assert result[0][0] == Decimal("1")
     assert result[1][0] == Decimal("0.2320")
     assert result[2][0] == Decimal("0.01")
     assert result[3][0] == Decimal("0.1")
     assert result[4][0] == Decimal("1")
Beispiel #15
0
    def test_import_from_xlsx_retrieve_desired_data(self, mocked_create_table):
        mocked_create_table.return_value = 42

        # import using filename
        table_1 = rows.import_from_xlsx(self.filename)
        call_args = mocked_create_table.call_args_list[0]
        self.assert_create_table_data(call_args,
                expected_meta={'imported_from': 'xlsx',
                               'filename': self.filename,
                               'sheet_name': 'Sheet1',})

        # import using fobj
        with open(self.filename, 'rb') as fobj:
            table_2 = rows.import_from_xlsx(fobj)
        call_args = mocked_create_table.call_args_list[1]
        self.assert_create_table_data(call_args,
                expected_meta={'imported_from': 'xlsx',
                               'filename': self.filename,
                               'sheet_name': 'Sheet1',})
Beispiel #16
0
    def test_import_from_xlsx_uses_create_table(self, mocked_create_table):
        mocked_create_table.return_value = 42
        kwargs = {'encoding': 'iso-8859-15', 'some_key': 123, 'other': 456, }
        result = rows.import_from_xlsx(self.filename, **kwargs)
        self.assertTrue(mocked_create_table.called)
        self.assertEqual(mocked_create_table.call_count, 1)
        self.assertEqual(result, 42)

        call = mocked_create_table.call_args
        kwargs['meta'] = {'imported_from': 'xlsx', 'filename': self.filename, }
        self.assertEqual(call[1], kwargs)
Beispiel #17
0
    def test_issue_168(self):
        temp = tempfile.NamedTemporaryFile(delete=False)
        filename = "{}.{}".format(temp.name, self.file_extension)
        self.files_to_delete.append(filename)

        table = rows.Table(fields=OrderedDict([("jsoncolumn", rows.fields.JSONField)]))
        table.append({"jsoncolumn": '{"python": 42}'})
        rows.export_to_xlsx(table, filename)

        table2 = rows.import_from_xlsx(filename)
        self.assert_table_equal(table, table2)
Beispiel #18
0
    def test_issue_168(self):
        temp = tempfile.NamedTemporaryFile(delete=False)
        filename = '{}.{}'.format(temp.name, self.file_extension)
        self.files_to_delete.append(filename)

        table = rows.Table(fields=
                OrderedDict([('jsoncolumn', rows.fields.JSONField)]))
        table.append({'jsoncolumn': '{"python": 42}'})
        rows.export_to_xlsx(table, filename)

        table2 = rows.import_from_xlsx(filename)
        self.assert_table_equal(table, table2)
Beispiel #19
0
    def test_export_to_xlsx_fobj(self):
        temp = tempfile.NamedTemporaryFile()
        filename = temp.name + '.xlsx'
        temp.close()
        fobj = open(filename, 'wb')
        self.files_to_delete.append(filename)

        rows.export_to_xlsx(utils.table, fobj)
        fobj.close()

        table = rows.import_from_xlsx(filename)
        self.assert_table_equal(table, utils.table)
Beispiel #20
0
    def test_import_from_xlsx_uses_create_table(self, mocked_create_table):
        mocked_create_table.return_value = 42
        kwargs = {'encoding': 'iso-8859-15', 'some_key': 123, 'other': 456, }
        result = rows.import_from_xlsx(self.filename, **kwargs)
        self.assertTrue(mocked_create_table.called)
        self.assertEqual(mocked_create_table.call_count, 1)
        self.assertEqual(result, 42)

        call = mocked_create_table.call_args
        kwargs['meta'] = {'imported_from': 'xlsx',
                          'filename': self.filename,
                          'sheet_name': 'Sheet1',}
        self.assertEqual(call[1], kwargs)
    def read_data(self, *args, **kwargs):
        if kwargs["sheet_name"] is None:
            return []
        else:
            kwargs["skip_header"] = False

            if "end_column" not in kwargs:
                # Avoid reading all columns (even if blank)
                kwargs["end_column"] = 50

        return rows.import_from_xlsx(self.filename,
                                     workbook_kwargs={"data_only": True},
                                     *args,
                                     **kwargs)
Beispiel #22
0
    def test_import_from_xlsx_uses_create_table(self, mocked_create_table):
        mocked_create_table.return_value = 42
        kwargs = {"encoding": "iso-8859-15", "some_key": 123, "other": 456}
        result = rows.import_from_xlsx(self.filename, **kwargs)
        self.assertTrue(mocked_create_table.called)
        self.assertEqual(mocked_create_table.call_count, 1)
        self.assertEqual(result, 42)

        call = mocked_create_table.call_args
        kwargs["meta"] = {
            "imported_from": "xlsx",
            "filename": self.filename,
            "sheet_name": "Sheet1",
        }
        self.assertEqual(call[1], kwargs)
Beispiel #23
0
    def test_import_from_xlsx_uses_create_table(self, mocked_create_table):
        mocked_create_table.return_value = 42
        kwargs = {"encoding": "iso-8859-15", "some_key": 123, "other": 456}
        result = rows.import_from_xlsx(self.filename, **kwargs)
        self.assertTrue(mocked_create_table.called)
        self.assertEqual(mocked_create_table.call_count, 1)
        self.assertEqual(result, 42)

        call = mocked_create_table.call_args
        kwargs["meta"] = {
            "imported_from": "xlsx",
            "filename": self.filename,
            "sheet_name": "Sheet1",
        }
        self.assertEqual(call[1], kwargs)
Beispiel #24
0
    def handle(self, *args, **kwargs):
        self.datasets, self.tables, self.versions = {}, {}, {}
        response = urlopen(settings.DATA_URL)
        data = response.read()

        update_functions = [
            (Dataset, dataset_update_data),
            (Link, link_update_data),
            (Version, version_update_data),
            (Table, table_update_data),
            (Field, field_update_data),
        ]
        for Model, update_data_function in update_functions:
            table = rows.import_from_xlsx(
                io.BytesIO(data),
                sheet_name=Model.__name__,
            )
            self._update_data(Model, table, update_data_function)
Beispiel #25
0
 def test_issue_290_can_read_sheet(self):
     rows.import_from_xlsx("tests/data/text_in_percent_cell.xlsx")
     # Before fixing the first part of #290, this would simply crash
     assert True
Beispiel #26
0
    def parse_caso(self, state, data):
        self.logger.info(f"Parsing {state} caso")
        casos = rows.import_from_xlsx(io.BytesIO(data),
                                      sheet_name="Casos (FINAL)")
        cities = defaultdict(dict)
        for caso in casos:
            caso = caso._asdict()
            caso_data = [
                value for key, value in caso.items()
                if key != "municipio" and value is not None
            ]
            if not caso_data:
                continue
            for key, value in caso.items():
                if key == "municipio":
                    continue
                elif key.startswith("confirmados_") or key.startswith(
                        "mortes_"):
                    try:
                        _, day, month = key.split("_")
                    except ValueError:
                        message = f"ERROR PARSING {repr(key)} - {repr(value)} - {caso}"
                        self.errors[state].append(("caso", state, message))
                        self.logger.error(message)
                        continue
                    date = f"2020-{int(month):02d}-{int(day):02d}"
                    if key.startswith("confirmados_"):
                        number_type = "confirmed"
                    elif key.startswith("mortes_"):
                        number_type = "deaths"
                else:
                    continue
                if date not in cities[caso["municipio"]]:
                    cities[caso["municipio"]][date] = {}
                if value in (None, ""):
                    value = None
                else:
                    value = str(value)
                    if value.endswith(".0"):
                        value = value[:-2]
                    if value.startswith("=") and value[1:].isdigit():
                        value = value[1:]
                    try:
                        value = int(value)
                    except ValueError:
                        message = f"ERROR converting to int: {date} {number_type} {value} {caso}"
                        self.errors[state].append(("caso", state, message))
                        self.logger.error(message)
                        continue
                cities[caso["municipio"]][date][number_type] = value
        result = []
        for city, city_data in cities.items():
            for date, date_data in city_data.items():
                confirmed = date_data["confirmed"]
                deaths = date_data["deaths"]
                if confirmed is None and deaths is None:
                    continue
                confirmed = int(confirmed) if confirmed is not None else None
                deaths = int(deaths) if deaths is not None else None
                row = {
                    "date": date,
                    "state": state,
                    "city": city if city != "TOTAL NO ESTADO" else "",
                    "place_type":
                    "city" if city != "TOTAL NO ESTADO" else "state",
                    "confirmed": confirmed,
                    "deaths": deaths,
                }
                confirmed = row["confirmed"]
                deaths = row["deaths"]
                NULL = (None, "")
                if (confirmed in NULL
                        and deaths not in NULL) or (deaths in NULL
                                                    and confirmed not in NULL):
                    message = f"ERROR: only one field is filled for {date}, {state}, {city}"
                    self.errors[state].append(("caso", state, message))
                result.append(row)

        row_key = lambda row: (row["state"], row["city"], row["place_type"])
        result.sort(key=row_key)
        groups = groupby(result, key=row_key)
        for key, row_list_it in groups:
            row_list = list(row_list_it)
            row_list.sort(key=lambda row: row["date"])
            for order_for_place, row in enumerate(row_list, start=1):
                row["order_for_place"] = order_for_place
                row["is_last"] = False
            if row_list:
                row_list[-1]["is_last"] = True

        for row in result:
            if row["place_type"] == "city":
                if row["city"] == "Importados/Indefinidos":
                    row_population = None
                    row_city_code = None
                else:
                    row_city_code = get_city_code(row["state"], row["city"])
                    row_population = get_city_population(
                        row["state"], row["city"])
            elif row["place_type"] == "state":
                row_city_code = get_state_code(row["state"])
                row_population = get_state_population(row["state"])
            else:
                message = f"Invalid row: {row}"
                self.errors[state].append(("caso", state, message))
                self.logger.error(message)
                continue
            row_deaths = row["deaths"]
            row_confirmed = row["confirmed"]
            confirmed_per_100k = (100_000 * (row_confirmed / row_population) if
                                  row_confirmed and row_population else None)
            death_rate = (row_deaths / row_confirmed if row_deaths is not None
                          and row_confirmed not in (None, 0) else 0)
            row["estimated_population_2019"] = row_population
            row["city_ibge_code"] = row_city_code
            row["confirmed_per_100k_inhabitants"] = (
                f"{confirmed_per_100k:.5f}" if confirmed_per_100k else None)
            row["death_rate"] = f"{death_rate:.4f}"
            self.logger.debug(row)
            self.caso_writer.writerow(row)
Beispiel #27
0
 def test_issue_290_textual_value_in_percent_col_is_preserved(self):
     result = rows.import_from_xlsx("tests/data/text_in_percent_cell.xlsx")
     # As this test is written, file contents on first column are
     # 100%, 23.20%, 1.00%, 10.00%, 100.00%
     assert result[5][1] == "text"
Beispiel #28
0
def teladduser(file, time_sleep):
    """
    Log in on a Telegram account and add a users in a Supergroup from a SpreadSheet
    which the account logged in is admin.
    """

    # Verify if the Excel SpreadSheet was give!
    if not file:
        print('Need to pass the Excel SpreadSheet Filename!\n')
        click.Context(teladduser).exit(code=1)

    # Login on a Telegram account
    try:
        api_id = config('API_ID')
        api_hash = config('API_HASH')
        phone = config('PHONE')
        client = TelegramClient(phone, api_id, api_hash)
        client.connect()
        if not client.is_user_authorized():
            client.send_code_request(phone)
            login_code = click.prompt(
                'Enter the Login Code that was send to yor Telegram app',
                type=int)
            client.sign_in(phone, login_code)
    except UndefinedValueError:
        print(
            'The environment variables API_ID, API_HASH or PHONE were not defined. '
            'Please create a .env file with they!\n')
        click.Context(teladduser).exit(code=1)

    # Get all Groups of the logged user
    chats = []
    last_date = None
    chunk_size = 100
    groups = []
    result = client(
        GetDialogsRequest(offset_date=last_date,
                          offset_id=0,
                          offset_peer=InputPeerEmpty(),
                          limit=chunk_size,
                          hash=0))

    # Get only the super group of the logged user
    chats.extend(result.chats)
    for chat in chats:
        try:
            if chat.megagroup:
                groups.append(chat)
        except:
            continue

    # Select a group to add users
    for i, g in enumerate(groups):
        print(f"{i + 1} - {g.title}")
    g_index = click.prompt("\nEnter Number of Group you want add users",
                           type=int)
    try:
        target_group = groups[int(g_index) - 1]
    except IndexError:
        print(
            '\nThe number selected was not of a valid Group number! Please try again!\n'
        )
        click.Context(teladduser).exit(code=1)

    target_group_entity = InputPeerChannel(target_group.id,
                                           target_group.access_hash)

    print(f'\nReading the file {file}, this will take a while ...\n')
    users_to_add = rows.import_from_xlsx(file)

    # Create a new Rows Table to save processed data
    fields = OrderedDict([('username_normal', rows.fields.TextField),
                          ('nome', rows.fields.TextField),
                          ('grupocanal', rows.fields.TextField),
                          ('conta_de_envio', rows.fields.IntegerField),
                          ('log', rows.fields.TextField)])
    users_added = rows.Table(fields=fields)

    n = 0
    for i, user in enumerate(users_to_add):
        if user.log:
            users_added.append({
                'username_normal': user.username_normal,
                'nome': user.nome,
                'grupocanal': user.grupocanal,
                'cont_a_de_envio': user.conta_de_envio,
                'log': user.log,
            })
        elif i >= 45:
            try:
                print(f'Adicionando usuário: {i} - {user.nome}')
                user_to_add = client.get_input_entity(user.username_normal)
                client(
                    InviteToChannelRequest(target_group_entity, [user_to_add]))
                log = f"Usuário inserido em: {datetime.strftime(datetime.today(), '%Y-%m-%d às %H:%M:%S')}"
                users_added.append({
                    'username_normal': user.username_normal,
                    'nome': user.nome,
                    'grupocanal': target_group.title,
                    'cont_a_de_envio': user.conta_de_envio,
                    'log': log,
                })
                n += 1
                if n % 20 == 0:
                    print(
                        f'\nWaiting {time_sleep / 60} minutes to avoid Flood Error.\n'
                    )
                    time.sleep(time_sleep)
                else:
                    time.sleep(time_sleep / 15)
            except PeerFloodError:
                print(
                    "\nGetting Flood Error from telegram. Script is stopping now. Please try again after some time.\n"
                )
                try:
                    rows.export_to_xlsx(users_added,
                                        "usersAddedBeforeFloodError.xlsx")
                except:
                    print('\nCould not write to the file provided!\n')
                click.Context(teladduser).exit(code=1)
            except UserPrivacyRestrictedError:
                print(
                    "\nThe user's privacy settings do not allow you to do this. Skipping.\n"
                )
            except ValueError as err:
                print(f'\n{err} - Skipping.\n')
            except UserChannelsTooMuchError:
                print(
                    f'\nThe user {user.username_normal} you tried to add is already in too many channels/supergroups\n'
                )
            except FloodWaitError as err:
                print('\nHave to sleep', err.seconds, 'seconds\n')
                time.sleep(err.seconds)
            except KeyboardInterrupt:
                print('\nExecution was interrupted by user.\n')
                click.Context(teladduser).exit(code=1)
            except:
                traceback.print_exc()
                print("\nUnexpected Error\n")
                continue
        else:
            users_added.append({
                'username_normal': user.username_normal,
                'nome': user.nome,
                'grupocanal': user.grupocanal,
                'cont_a_de_envio': user.conta_de_envio,
                'log': user.log,
            })
    try:
        rows.export_to_xlsx(users_added, file)
    except:
        traceback.print_exc()
        print('\nCould not write to the file provided!\n')
Beispiel #29
0
import rows
data = rows.import_from_xlsx("Phishlabs_Malicious_URLs.xlsx")
rows.export_to_csv(data, open("Phishlabs_Malicious_URLs.csv", "wb"))
Beispiel #30
0
 def test_issue_290_can_read_sheet(self):
     rows.import_from_xlsx("tests/data/text_in_percent_cell.xlsx")
     # Before fixing the first part of #290, this would simply crash
     assert True
Beispiel #31
0
    def parse_state_file(self, response):
        state = response.meta["state"]

        self.logger.info(f"Parsing {state} boletim")
        boletins = rows.import_from_xlsx(
            io.BytesIO(response.body),
            sheet_name="Boletins (FINAL)",
            force_types={
                "date": rows.fields.DateField,
                "url": rows.fields.TextField,
                "notes": rows.fields.TextField,
            },
        )
        for boletim in boletins:
            boletim = boletim._asdict()
            boletim_data = [item for item in boletim.values() if item]
            if not boletim_data:
                continue
            boletim = {
                "date": boletim["date"],
                "state": state,
                "url": boletim["url"],
                "notes": boletim["notes"],
            }
            self.logger.debug(boletim)
            self.boletim_writer.writerow(boletim)

        self.logger.info(f"Parsing {state} caso")
        casos = rows.import_from_xlsx(
            io.BytesIO(response.body), sheet_name="Casos (FINAL)"
        )
        cities = defaultdict(dict)
        for caso in casos:
            caso = caso._asdict()
            caso_data = [
                value
                for key, value in caso.items()
                if key != "municipio" and value is not None
            ]
            if not caso_data:
                continue
            for key, value in caso.items():
                if key == "municipio":
                    continue
                elif key.startswith("confirmados_") or key.startswith("mortes_"):
                    try:
                        _, day, month = key.split("_")
                    except ValueError:
                        self.logger.error(
                            f"ERROR PARSING {repr(key)} - {repr(value)} - {caso}"
                        )
                        continue
                    date = f"2020-{int(month):02d}-{int(day):02d}"
                    if key.startswith("confirmados_"):
                        number_type = "confirmed"
                    elif key.startswith("mortes_"):
                        number_type = "deaths"
                else:
                    continue
                if date not in cities[caso["municipio"]]:
                    cities[caso["municipio"]][date] = {}
                try:
                    value = int(value) if value not in (None, "") else None
                except ValueError:
                    self.logger.error(
                        f"ERROR converting to int: {date} {number_type} {value} {caso}"
                    )
                    continue
                cities[caso["municipio"]][date][number_type] = value
        result = []
        for city, city_data in cities.items():
            for date, date_data in city_data.items():
                confirmed = date_data["confirmed"]
                deaths = date_data["deaths"]
                if confirmed is None and deaths is None:
                    continue
                confirmed = int(confirmed) if confirmed is not None else None
                deaths = int(deaths) if deaths is not None else None
                row = {
                    "date": date,
                    "state": state,
                    "city": city if city != "TOTAL NO ESTADO" else "",
                    "place_type": "city" if city != "TOTAL NO ESTADO" else "state",
                    "confirmed": confirmed,
                    "deaths": deaths,
                }
                result.append(row)
        row_key = lambda row: (row["state"], row["city"], row["place_type"])
        groups = groupby(result, key=row_key)
        is_last = {}
        for key, row_list in groups:
            is_last[key] = max(row["date"] for row in row_list)
        for row in result:
            row["is_last"] = row["date"] == is_last[row_key(row)]
            if row["place_type"] == "city":
                if row["city"] == "Importados/Indefinidos":
                    row_population = None
                    row_city_code = None
                else:
                    state_code = int(self.state_code[row["state"]])
                    city_code = int(self.city_code[(row["state"], row["city"])])
                    row_population = self.population_per_city[
                        (row["state"], row["city"])
                    ]
                    row_city_code = f"{state_code:02d}{city_code:05d}"
            elif row["place_type"] == "state":
                state_code = int(self.state_code[row["state"]])
                row_population = self.population_per_state[row["state"]]
                row_city_code = f"{state_code:02d}"
            else:
                self.logger.error(f"Invalid row: {row}")
                continue
            row_deaths = row["deaths"]
            row_confirmed = row["confirmed"]
            confirmed_per_100k = (
                100_000 * (row_confirmed / row_population)
                if row_confirmed and row_population
                else None
            )
            death_rate = (
                row_deaths / row_confirmed if row_deaths and row_confirmed else None
            )
            row["estimated_population_2019"] = row_population
            row["city_ibge_code"] = row_city_code
            row["confirmed_per_100k_inhabitants"] = (
                f"{confirmed_per_100k:.5f}" if confirmed_per_100k else None
            )
            row["death_rate"] = f"{death_rate:.4f}" if death_rate else None
            self.logger.debug(row)
            self.caso_writer.writerow(row)
Beispiel #32
0
 def test_issue_290_textual_value_in_percent_col_is_preserved(self):
     result = rows.import_from_xlsx("tests/data/text_in_percent_cell.xlsx")
     # As this test is written, file contents on first column are
     # 100%, 23.20%, 1.00%, 10.00%, 100.00%
     assert result[5][1] == "text"
                'name': u'pertence ao tema',
                'type_id': TYPE_IDS[u'tema-norma'],
                'id': str(uuid.uuid4()),
                'weight': 1,
                'directed': 1,
                'properties': {
                },
            }
            graph['edges'].append(edge)


    return { 'graph': graph }


if __name__ == '__main__':
    rows = rows.import_from_xlsx("dados.xlsx")
    filtrado = rows
    dados = estruturar_dados(filtrado)

    arq = open('build/grafo-gc.json', 'w')
    json.dump(generate_graph(dados), arq)
    arq.close()

    for uf in UFS:
        filtrado2 = [r for r in filtrado if r.uf == uf]
        filename = 'build/grafo-gc-' + uf + '.json'
        print(filename)
        arq = open(filename, 'w')
        dados = estruturar_dados(filtrado2)
        json.dump(generate_graph(dados), arq)
        arq.close()