def extract_listed_url_hashes_from_db() -> list: """Iterate over all rows in listed_ads table and extract each url hash column value and return as list of hashes""" conn = None listed_db_hashes = [] try: params = config() conn = psycopg2.connect(**params) cur = conn.cursor() cur.execute("SELECT url_hash FROM listed_ads ORDER BY url_hash") row = cur.fetchone() while row is not None: listed_db_hashes.append(row) row = cur.fetchone() cur.close() except (Exception, psycopg2.DatabaseError) as error: logger.error(f'{error}') finally: if conn is not None: conn.close() clean_hashes = [] for element in listed_db_hashes: str_element = ''.join(element) clean_element = str_element.replace("'", "").replace(")", "") clean_hash = clean_element.replace("(", "").replace(",", "") clean_hashes.append(clean_hash) logger.info(f'Extracted {len(clean_hashes)} hashes from database listed_ads table') logger.info(f'Extracted clean hash count: {len(clean_hashes)}') logger.info(f'Extracted clean hash list: {clean_hashes}') return clean_hashes
def insert_data_to_removed_table(data: dict) -> None: """function takes as input to_remove_msg_data dict and inserts to database removed_ads table """ conn = None try: logger.info(f'Inserting {len(data)} messages to removed_ads table') params = config() conn = psycopg2.connect(**params) cur = conn.cursor() for key, value in data.items(): url_hash = key room_count = value[0] house_floors = value[1] apt_floor = value[2] price = value[3] sqm = value[4] sqm_price = value[5] apt_address = value[6] listed_date = value[7] removed_date = value[8] days_listed = value[9] cur.execute(""" INSERT INTO removed_ads (url_hash, room_count, house_floors, apt_floor, price, sqm, sqm_price, apt_address, listed_date, removed_date, days_listed) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) """, (url_hash, room_count, house_floors, apt_floor, price, sqm, sqm_price, apt_address, listed_date, removed_date, days_listed)) conn.commit() cur.close() for k, v in data.items(): logger.info(f'{k} {v}') except (Exception, psycopg2.DatabaseError) as error: logger.error(error) print(error) finally: if conn is not None: conn.close()
def insert_data_to_listed_table(data: dict) -> None: """ insert data to database table """ conn = None try: logger.info(f'Inserting {len(data)} messages to listed_ads table') params = config() conn = psycopg2.connect(**params) cur = conn.cursor() for k, v in data.items(): url_hash = k room_count = v[0] house_floors = v[1] apt_floor = v[2] price = v[3] sqm = v[4] sqm_price = v[5] apt_address = v[6] list_date = v[7] days_listed = v[8] cur.execute(""" INSERT INTO listed_ads (url_hash, room_count, house_floors, apt_floor, price, sqm, sqm_price, apt_address, list_date, days_listed) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s) """, (url_hash, room_count, house_floors, apt_floor, price, sqm, sqm_price, apt_address, list_date, days_listed)) conn.commit() cur.close() for k, v in data.items(): logger.info(f'{k} {v}') except (Exception, psycopg2.DatabaseError) as error: print(error) finally: if conn is not None: conn.close()
def extract_to_remove_msg_data(delisted_hashes: list) -> dict: """Filters data base table by delisted hashes column and returns dict hash:[delisted message elements] for using in to insert to removed_ads table""" delisted_mesages = {} conn = None try: params = config() conn = psycopg2.connect(**params) cur = conn.cursor() cur.execute("SELECT * FROM listed_ads") table_row_count = cur.rowcount table_rows = cur.fetchall() for delisted_hash in delisted_hashes: for i in range(table_row_count): curr_row_hash = table_rows[i][0] if delisted_hash == curr_row_hash: room_count = table_rows[i][1] house_floor_count = table_rows[i][2] apt_floor = table_rows[i][3] price = table_rows[i][4] sqm = table_rows[i][5] sqm_price = table_rows[i][6] apt_address = table_rows[i][7] list_date = table_rows[i][8] removed_date = gen_removed_date() days_listed = table_rows[i][9] data_values = [] data_values.append(room_count) data_values.append(house_floor_count) data_values.append(apt_floor) data_values.append(price) data_values.append(sqm) data_values.append(sqm_price) data_values.append(apt_address) data_values.append(list_date) data_values.append(removed_date) data_values.append(days_listed) delisted_mesages[curr_row_hash] = data_values cur.close() except (Exception, psycopg2.DatabaseError) as error: print(error) finally: if conn is not None: conn.close() return delisted_mesages
def extract_to_increment_msg_data(listed_url_hashes:list) -> list: """Connects to db listed_ads table and iterates over table based on hashe list (listed_url_hashes) and extracts data in list of dicts format. Args: listed_url_hashes: string list of hashes Returns: list: example data returned [{'gjhdx': ['2021.04.20', 108], 'cecek': ['2021.04.17', 101]}] """ conn = None to_increment_msg_data = {} try: logger.info(f'Connecting to DB to fetch data from listed_ads table') params = config() conn = psycopg2.connect(**params) cur = conn.cursor() cur.execute("SELECT * FROM listed_ads") table_row_count = cur.rowcount # row count from table listed_ads table_rows = cur.fetchall() # list of rows as tuple Datastructure # need to handle case when table is empty - row count = 0 aka first run if table_row_count < 1: return None if len(listed_url_hashes) < 1: return None for luh in listed_url_hashes: for i in range(table_row_count): # iterate listed url shace count over all table rows curr_row_hash = table_rows[i][0] if luh == curr_row_hash: pub_date = table_rows[i][8] dlv = table_rows[i][9] data_values = [] data_values.append(pub_date) data_values.append(dlv) to_increment_msg_data[curr_row_hash] = data_values cur.close() logger.info(f'Extracted data from listed_ads table for {len(to_increment_msg_data)} messages') for k, v in to_increment_msg_data.items(): logger.info(f'{k} {v}') except (Exception, psycopg2.DatabaseError) as error: print(error) finally: if conn is not None: conn.close() return to_increment_msg_data
def list_rows_in_removed_table() -> None: """Iterates over all records in delisted_ads table and print them""" conn = None try: params = config() conn = psycopg2.connect(**params) cur = conn.cursor() cur.execute("SELECT * FROM removed_ads WHERE price < 150000 ORDER BY price") print("The number of ads in delisted_ads table: ", cur.rowcount) row = cur.fetchone() while row is not None: print(row) row = cur.fetchone() cur.close() except (Exception, psycopg2.DatabaseError) as error: print(error) finally: if conn is not None: conn.close()
def update_single_column_value(table_name: str, dlv: int, url_hash: str) -> None: """connect to db and update value for listed_ads column only for row that matched url_hash""" conn = None try: params = config() conn = psycopg2.connect(**params) cur = conn.cursor() sql = (f"UPDATE {table_name} " f"SET days_listed = {dlv} " f"WHERE url_hash = '{url_hash}' ;") cur.execute(sql) conn.commit() cur.close() except (Exception, psycopg2.DatabaseError) as error: print(error) finally: if conn is not None: conn.close()
def delete_db_listed_table_rows(delisted_hashes: list) -> None: """Deletes rows from listed_ads table based on removed ads hashes""" conn = None try: logger.info(f'Deleting {len(delisted_hashes)} removed messages from listed_ads table') params = config() conn = psycopg2.connect(**params) cur = conn.cursor() cur.execute("SELECT * FROM listed_ads") for delisted_hash in delisted_hashes: del_row = "DELETE FROM listed_ads WHERE url_hash = " full_cmd = del_row + "'" + delisted_hash + "'" cur.execute(full_cmd) conn.commit() cur.close() logger.info(f'Deleted ads with hashes: {delisted_hashes} from listed_ads table') except (Exception, psycopg2.DatabaseError) as error: print(error) finally: if conn is not None: conn.close()