async def ingest_file_helper(self): connection_object = DBconnection() self.db_connection = connection_object.get_connection() self.cursor = self.db_connection.cursor() try: check_table_exits = "SHOW TABLES LIKE '%s' " % ('%products%') self.cursor.execute(check_table_exits) result = cursor.fetchone() print(result) if (result == "products"): try: async with aiofiles.open(self.csv_filepath, mode="r", encoding="utf-8", newline="") as afp: async for row in AsyncReader(afp): if (row[0] != "name" and row[1] != "sku" and row[2] != "description"): try: query = "insert into products(name, sku, description) values(%s,%s,%s) on duplicate key update name=values(name), description=values(description)" val = (row[0], row[1], row[2]) self.cursor.execute(query, val) except Exception as e: raise e self.db_connection.commit() except Exception as e: raise e else: raise Exception( "table `products` does not exit!! stopping the process, create table products and start the process again" ) except Exception as e: raise e connection_object.close_connection(self.db_connection)
async def ingest_file_helper(self): print("Starting Ingestion") try: async with aiofiles.open(self.csv_filepath, mode="r", encoding="utf-8", newline="") as afp: async for row in AsyncReader(afp): if (row[0] != "name" and row[1] != "sku" and row[2] != "description"): try: query = "insert into products(name, sku, description) values(%s,%s,%s) on duplicate key update name=values(name), description=values(description)" val = (row[0], row[1], row[2]) self.cursor.execute(query, val) except Exception as e: raise e self.db_connection.commit() print("Ingestion Completed") except Exception as e: raise e
async def read_from_file(self, file_name): """ Send request to URL and get page load time Parameters -------------- file_name : str, Path to crawl results Returns ------------ list[List[str]] """ async with aiofiles.open(file_name, mode="r", encoding="UTF-8", newline="") as afp: read_rows = [i async for i in AsyncReader(afp, dialect='unix')] return read_rows
async def upload_data(self, csv_file_path): html = await self.get_html() try: async with aiofiles.open(csv_file_path, mode="r", encoding="utf-8") as afp: async for row in AsyncReader(afp): await asyncio.sleep(1) while True: await asyncio.sleep(1) token = await self.get_token(html) link = await self.extract_captcha_link(html) captcha_text = await self.get_captcha_text(link) if len(captcha_text) == 5: captcha = captcha_text[:3] person_data = await self.get_person_tables( row[0], captcha, token) tables_data = await self.extract_data_from_tables( person_data) if not tables_data: token = await self.get_token(html) link = await self.extract_captcha_link(html) captcha_text = await self.get_captcha_text(link ) if len(captcha_text) == 5: captcha = captcha_text[:3] person_data = await self.get_person_tables( row[0], captcha, token) tables_data = await self.extract_data_from_tables( person_data) break except FileNotFoundError: print('File not found! Try again...')
async def test_simple_read(): async with aiofiles.open(FILENAME, mode="r", encoding="ascii", newline="") as af: read_rows = [i async for i in AsyncReader(af)] assert read_rows == VALUES
async def test_newline_read(): async with aiofiles.open(FILENAME, mode="r", encoding="ascii", newline="") as af: read_rows = [i async for i in AsyncReader(af, **DIALECT_PARAMS)] assert read_rows == [HEADER] + READ_VALUES
async def test_dialect_read(): async with aiofiles.open(FILENAME, mode="r", encoding="ascii", newline="") as afp: read_rows = [i async for i in AsyncReader(afp, **PARAMS)] assert read_rows == VALUES