Example #1
0
 async def ingest_file_helper(self):
     connection_object = DBconnection()
     self.db_connection = connection_object.get_connection()
     self.cursor = self.db_connection.cursor()
     try:
         check_table_exits = "SHOW TABLES LIKE '%s' " % ('%products%')
         self.cursor.execute(check_table_exits)
         result = cursor.fetchone()
         print(result)
         if (result == "products"):
             try:
                 async with aiofiles.open(self.csv_filepath,
                                          mode="r",
                                          encoding="utf-8",
                                          newline="") as afp:
                     async for row in AsyncReader(afp):
                         if (row[0] != "name" and row[1] != "sku"
                                 and row[2] != "description"):
                             try:
                                 query = "insert into products(name, sku, description) values(%s,%s,%s) on duplicate key update name=values(name), description=values(description)"
                                 val = (row[0], row[1], row[2])
                                 self.cursor.execute(query, val)
                             except Exception as e:
                                 raise e
                     self.db_connection.commit()
             except Exception as e:
                 raise e
         else:
             raise Exception(
                 "table `products` does not exit!! stopping the process, create table products and start the process again"
             )
     except Exception as e:
         raise e
     connection_object.close_connection(self.db_connection)
Example #2
0
 async def ingest_file_helper(self):
     print("Starting Ingestion")
     try:
         async with aiofiles.open(self.csv_filepath,
                                  mode="r",
                                  encoding="utf-8",
                                  newline="") as afp:
             async for row in AsyncReader(afp):
                 if (row[0] != "name" and row[1] != "sku"
                         and row[2] != "description"):
                     try:
                         query = "insert into products(name, sku, description) values(%s,%s,%s) on duplicate key update name=values(name), description=values(description)"
                         val = (row[0], row[1], row[2])
                         self.cursor.execute(query, val)
                     except Exception as e:
                         raise e
             self.db_connection.commit()
             print("Ingestion Completed")
     except Exception as e:
         raise e
Example #3
0
    async def read_from_file(self, file_name):
        """
        Send request to URL and get page load time

        Parameters
        --------------

            file_name : str,
                Path to crawl results

        Returns
        ------------
            list[List[str]]

        """

        async with aiofiles.open(file_name,
                                 mode="r",
                                 encoding="UTF-8",
                                 newline="") as afp:
            read_rows = [i async for i in AsyncReader(afp, dialect='unix')]
            return read_rows
Example #4
0
    async def upload_data(self, csv_file_path):
        html = await self.get_html()
        try:
            async with aiofiles.open(csv_file_path, mode="r",
                                     encoding="utf-8") as afp:
                async for row in AsyncReader(afp):
                    await asyncio.sleep(1)
                    while True:
                        await asyncio.sleep(1)
                        token = await self.get_token(html)
                        link = await self.extract_captcha_link(html)
                        captcha_text = await self.get_captcha_text(link)
                        if len(captcha_text) == 5:
                            captcha = captcha_text[:3]
                            person_data = await self.get_person_tables(
                                row[0], captcha, token)

                            tables_data = await self.extract_data_from_tables(
                                person_data)

                            if not tables_data:
                                token = await self.get_token(html)
                                link = await self.extract_captcha_link(html)
                                captcha_text = await self.get_captcha_text(link
                                                                           )
                                if len(captcha_text) == 5:
                                    captcha = captcha_text[:3]
                                    person_data = await self.get_person_tables(
                                        row[0], captcha, token)

                                    tables_data = await self.extract_data_from_tables(
                                        person_data)
                            break

        except FileNotFoundError:
            print('File not found! Try again...')
Example #5
0
async def test_simple_read():
    async with aiofiles.open(FILENAME, mode="r", encoding="ascii",
                             newline="") as af:
        read_rows = [i async for i in AsyncReader(af)]
        assert read_rows == VALUES
Example #6
0
async def test_newline_read():
    async with aiofiles.open(FILENAME, mode="r", encoding="ascii", newline="") as af:
        read_rows = [i async for i in AsyncReader(af, **DIALECT_PARAMS)]
        assert read_rows == [HEADER] + READ_VALUES
Example #7
0
async def test_dialect_read():
    async with aiofiles.open(FILENAME, mode="r", encoding="ascii", newline="") as afp:
        read_rows = [i async for i in AsyncReader(afp, **PARAMS)]
        assert read_rows == VALUES