def test_insert_many(connection_string_database): time1 = datetime.datetime.now() time2 = datetime.datetime.now() - datetime.timedelta(1) time3 = datetime.datetime.now() - datetime.timedelta(2) records_to_insert = [("Göteborg", time1, "boll", "annons/goteborg/aktiv_panther_lang__84/93336788", "kolla in värsta dealen!", 93336788, 123, "123kr"), ("Malmö", time2, "racket", "annons/malmo/aktiv_panther_lang__84/93336789", "kom och köp!", 93336789, 456, "456kr"), ("Stockholm", time3, "nät", "annons/stockholm/aktiv_panther_lang__84/93336787", "billigt, billigt billigt!", 93336787, 789, "789kr")] list_of_columns = [ "location", "time", "top_info", "href", "subject_text", "item_id", "price", "price_text" ] connection = mysql_scripts.create_connection(connection_string_database, "local_database") cursor = mysql_scripts.create_cursor(connection) mysql_scripts.insert_many_data(cursor, connection, list_of_columns, records_to_insert, table_name) # mysql_scripts.drop_table(cursor, table_name) # mysql_scripts.create_table(cursor, table_name, columns) mysql_scripts.close_cursor(cursor) mysql_scripts.close_connection(connection)
def test_delete_data_from_table(connection_string_database): table_name = "scrape_log" connection = mysql_scripts.create_connection(connection_string_database, "local_database") cursor = mysql_scripts.create_cursor(connection) mysql_scripts.delete_data(cursor, connection, table_name) mysql_scripts.close_cursor(cursor) mysql_scripts.close_connection(connection)
def alter_tables_add_id(connection): cursor = mysql_scripts.create_cursor(connection) cursor.execute( "ALTER TABLE scrape_log ADD id INT UNSIGNED NOT NULL AUTO_INCREMENT, ADD INDEX (id)" ) cursor.execute( "ALTER TABLE articles ADD id INT UNSIGNED NOT NULL AUTO_INCREMENT, ADD INDEX (id)" ) connection.commit() mysql_scripts.close_cursor(cursor)
def scrape_history_start_up_date(connection): # Desc: adding last scraped date as three days ago to avoid heavy load on inital scrape with new database # @Parms no parms # @output no output # Insert Scrape details to scrape log time_of_first_article = datetime.datetime.now() - datetime.timedelta(3) time_of_scrape = datetime.datetime.now() no_of_articles = 0 cursor = mysql_scripts.create_cursor(connection) temporary_scrape_history_insert(connection, cursor, time_of_scrape, time_of_first_article, no_of_articles) mysql_scripts.close_cursor(cursor)
def test_select_data(connection_string_database): connection = mysql_scripts.create_connection(connection_string_database, "local_database") cursor = mysql_scripts.create_cursor(connection) table_name = "scrape_log" condition_dict = None selection_columns_list = [ 'time_of_first_article', 'time_of_scrape', 'no_of_articles' ] data = mysql_scripts.select_data(cursor, connection, selection_columns_list, table_name, condition_dict) print(data[-1][0]) mysql_scripts.close_cursor(cursor) mysql_scripts.close_connection(connection)
def test_insert_single(connection_string_database): connection = mysql_scripts.create_connection(connection_string_database, "local_database") cursor = mysql_scripts.create_cursor(connection) table_name = "scrape_log" list_of_columns = ["time_of_scrape", "time_of_scrape", "no_of_articles"] time_of_first_article = datetime.datetime(2020, 12, 9, 9, 15, 00, 00) time_of_scrape = time_of_first_article = datetime.datetime( 2020, 12, 9, 9, 15, 00, 00) no_of_articles = 98276 list_of_values = [time_of_first_article, time_of_scrape, no_of_articles] cursor.execute( "INSERT INTO scrape_log (time_of_first_article, time_of_scrape, no_of_articles) VALUES (%s, %s, %s)", (time_of_first_article, time_of_scrape, '90000')) connection.commit() # mysql_scripts.insert_data(connection, cursor, table_name, list_of_columns, list_of_values) mysql_scripts.close_cursor(cursor) mysql_scripts.close_connection(connection)
def check_duplicates(connection_string_database): connection = mysql_scripts.create_connection(connection_string_database, "local_database") cursor = mysql_scripts.create_cursor(connection) table_name = "articles" condition_dict = None selection_columns_list = ['item_id', 'subject_text'] data = mysql_scripts.select_data(cursor, connection, selection_columns_list, table_name, condition_dict) mysql_scripts.close_cursor(cursor) mysql_scripts.close_connection(connection) duplicate_list = [] for item in data: counter = 0 for item_check in data: if item[0] == item_check[0]: counter += 1 if counter > 1: duplicate_list.append(item) print(duplicate_list)
def create_tables(connection): tables_and_columns = [ { 'articles': [{ "column_name": "location", "data_type": "VARCHAR", "column_lenght": 255, "primary_key": False, "auto_increment": False, "not_null": False, "unique": False }, { "column_name": "time", "data_type": "DATETIME", "primary_key": False, "auto_increment": False, "not_null": False, "unique": False }, { "column_name": "top_info", "data_type": "VARCHAR", "column_lenght": 255, "primary_key": False, "auto_increment": False, "not_null": False, "unique": False }, { "column_name": "href", "data_type": "VARCHAR", "column_lenght": 255, "primary_key": False, "auto_increment": False, "not_null": False, "unique": False }, { "column_name": "subject_text", "data_type": "VARCHAR", "column_lenght": 255, "primary_key": False, "auto_increment": False, "not_null": False, "unique": False }, { "column_name": "item_id", "data_type": "INT", "primary_key": False, "auto_increment": False, "not_null": False, "unique": False }, { "column_name": "price", "data_type": "INT", "primary_key": False, "auto_increment": False, "not_null": False, "unique": False }, { "column_name": "price_text", "data_type": "VARCHAR", "column_lenght": 255, "primary_key": False, "auto_increment": False, "not_null": False, "unique": False }] }, { 'scrape_log': [{ "column_name": "time_of_scrape", "data_type": "DATETIME", "primary_key": False, "auto_increment": False, "not_null": False, "unique": False }, { "column_name": "time_of_first_article", "data_type": "DATETIME", "primary_key": False, "auto_increment": False, "not_null": False, "unique": False }, { "column_name": "no_of_articles", "data_type": "INT", "primary_key": False, "auto_increment": False, "not_null": False, "unique": False }] }, ] cursor = mysql_scripts.create_cursor(connection) for table in tables_and_columns: for key, value in table.items(): mysql_scripts.create_table(cursor, key, value) mysql_scripts.close_cursor(cursor)