コード例 #1
0
def extract_listed_url_hashes_from_db() -> list:
    """Iterate over all rows in  listed_ads table and
    extract each url hash column value and return as list of hashes"""
    conn = None
    listed_db_hashes = []
    try:
        params = config()
        conn = psycopg2.connect(**params)
        cur = conn.cursor()
        cur.execute("SELECT url_hash FROM listed_ads ORDER BY url_hash")
        row = cur.fetchone()
        while row is not None:
            listed_db_hashes.append(row)
            row = cur.fetchone()
        cur.close()
    except (Exception, psycopg2.DatabaseError) as error:
        logger.error(f'{error}')
    finally:
        if conn is not None:
            conn.close()
    clean_hashes = []
    for element in listed_db_hashes:
        str_element =  ''.join(element)
        clean_element = str_element.replace("'", "").replace(")", "")
        clean_hash = clean_element.replace("(", "").replace(",", "")
        clean_hashes.append(clean_hash)
    logger.info(f'Extracted {len(clean_hashes)} hashes from database listed_ads table')
    logger.info(f'Extracted clean hash count: {len(clean_hashes)}')
    logger.info(f'Extracted clean hash list: {clean_hashes}')
    return clean_hashes
コード例 #2
0
def insert_data_to_removed_table(data: dict) -> None:
    """function takes as input to_remove_msg_data dict and inserts
    to database removed_ads table """
    conn = None
    try:
        logger.info(f'Inserting {len(data)} messages to removed_ads table')
        params = config()
        conn = psycopg2.connect(**params)
        cur = conn.cursor()
        for key, value in data.items():
            url_hash = key
            room_count = value[0]
            house_floors = value[1]
            apt_floor = value[2]
            price = value[3]
            sqm = value[4]
            sqm_price = value[5]
            apt_address = value[6]
            listed_date = value[7]
            removed_date = value[8]
            days_listed = value[9]
            cur.execute(""" INSERT INTO removed_ads
                  (url_hash,
                  room_count,
                  house_floors,
                  apt_floor,
                  price,
                  sqm,
                  sqm_price,
                  apt_address,
                  listed_date,
                  removed_date,
                  days_listed)
                  VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) """,
                 (url_hash,
                  room_count,
                  house_floors,
                  apt_floor,
                  price,
                  sqm,
                  sqm_price,
                  apt_address,
                  listed_date,
                  removed_date,
                  days_listed))
        conn.commit()
        cur.close()
        for k, v in data.items():
            logger.info(f'{k} {v}')
    except (Exception, psycopg2.DatabaseError) as error:
        logger.error(error)
        print(error)
    finally:
        if conn is not None:
            conn.close()
コード例 #3
0
def insert_data_to_listed_table(data: dict) -> None:
    """ insert data to database table """
    conn = None
    try:
        logger.info(f'Inserting {len(data)} messages to listed_ads table')
        params = config()
        conn = psycopg2.connect(**params)
        cur = conn.cursor()
        for k, v in data.items():
            url_hash = k
            room_count = v[0]
            house_floors = v[1]
            apt_floor = v[2]
            price = v[3]
            sqm = v[4]
            sqm_price = v[5]
            apt_address = v[6]
            list_date = v[7]
            days_listed = v[8]
            cur.execute(""" INSERT INTO listed_ads
                  (url_hash,
                  room_count,
                  house_floors,
                  apt_floor,
                  price,
                  sqm,
                  sqm_price,
                  apt_address,
                  list_date,
                  days_listed)
                  VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s) """,
                  (url_hash,
                  room_count,
                  house_floors,
                  apt_floor,
                  price,
                  sqm,
                  sqm_price,
                  apt_address,
                  list_date,
                  days_listed))
        conn.commit()
        cur.close()
        for k, v in data.items():
            logger.info(f'{k} {v}')
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
    finally:
        if conn is not None:
            conn.close()
コード例 #4
0
def extract_to_remove_msg_data(delisted_hashes: list) -> dict:
    """Filters data base table by delisted hashes column and
        returns dict hash:[delisted message elements] for using
        in to insert to removed_ads table"""
    delisted_mesages = {}
    conn = None
    try:
        params = config()
        conn = psycopg2.connect(**params)
        cur = conn.cursor()
        cur.execute("SELECT * FROM listed_ads")
        table_row_count = cur.rowcount
        table_rows = cur.fetchall()
        for delisted_hash in delisted_hashes:
            for i in range(table_row_count):
                curr_row_hash = table_rows[i][0]
                if delisted_hash == curr_row_hash:
                    room_count = table_rows[i][1]
                    house_floor_count = table_rows[i][2]
                    apt_floor = table_rows[i][3]
                    price = table_rows[i][4]
                    sqm = table_rows[i][5]
                    sqm_price = table_rows[i][6]
                    apt_address = table_rows[i][7]
                    list_date = table_rows[i][8]
                    removed_date = gen_removed_date()
                    days_listed = table_rows[i][9]
                    data_values = []
                    data_values.append(room_count)
                    data_values.append(house_floor_count)
                    data_values.append(apt_floor)
                    data_values.append(price)
                    data_values.append(sqm)
                    data_values.append(sqm_price)
                    data_values.append(apt_address)
                    data_values.append(list_date)
                    data_values.append(removed_date)
                    data_values.append(days_listed)
                    delisted_mesages[curr_row_hash] = data_values
        cur.close()
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
    finally:
        if conn is not None:
            conn.close()
    return delisted_mesages
コード例 #5
0
def extract_to_increment_msg_data(listed_url_hashes:list) -> list:
    """Connects to db listed_ads table and iterates over table based on hashe
    list (listed_url_hashes) and extracts data in list of dicts format.

    Args:
        listed_url_hashes: string list of hashes

    Returns:
        list: example data returned [{'gjhdx': ['2021.04.20', 108], 'cecek': ['2021.04.17', 101]}]
    """
    conn = None
    to_increment_msg_data = {}
    try:
        logger.info(f'Connecting to DB to fetch data from listed_ads table')
        params = config()
        conn = psycopg2.connect(**params)
        cur = conn.cursor()
        cur.execute("SELECT * FROM listed_ads")
        table_row_count = cur.rowcount # row count from table listed_ads
        table_rows = cur.fetchall()  # list of rows as tuple Datastructure
        # need to handle case when table is empty - row count = 0 aka first run 
        if table_row_count < 1:
            return None
        if len(listed_url_hashes) < 1:
            return None
        for luh in listed_url_hashes:
            for i in range(table_row_count): # iterate listed url shace count over all table rows
                curr_row_hash = table_rows[i][0]
                if luh == curr_row_hash:
                    pub_date = table_rows[i][8]
                    dlv = table_rows[i][9]
                    data_values = []
                    data_values.append(pub_date)
                    data_values.append(dlv)
                    to_increment_msg_data[curr_row_hash] = data_values
        cur.close()
        logger.info(f'Extracted data from listed_ads table for {len(to_increment_msg_data)} messages')
        for k, v in to_increment_msg_data.items():
            logger.info(f'{k} {v}')
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
    finally:
        if conn is not None:
            conn.close()
    return to_increment_msg_data
コード例 #6
0
def list_rows_in_removed_table() -> None:
    """Iterates over all records in delisted_ads table and print them"""
    conn = None
    try:
        params = config()
        conn = psycopg2.connect(**params)
        cur = conn.cursor()
        cur.execute("SELECT * FROM removed_ads WHERE price < 150000 ORDER BY price")
        print("The number of ads in delisted_ads table: ", cur.rowcount)
        row = cur.fetchone()
        while row is not None:
            print(row)
            row = cur.fetchone()
        cur.close()
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
    finally:
        if conn is not None:
            conn.close()
コード例 #7
0
def update_single_column_value(table_name: str, dlv: int, url_hash: str) -> None:
    """connect to db and update value for listed_ads column only for
    row that matched url_hash"""
    conn = None
    try:
        params = config()
        conn = psycopg2.connect(**params)
        cur = conn.cursor()
        sql = (f"UPDATE {table_name} "
               f"SET days_listed = {dlv} "
               f"WHERE url_hash = '{url_hash}' ;")
        cur.execute(sql)
        conn.commit()
        cur.close()
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
    finally:
        if conn is not None:
            conn.close()
コード例 #8
0
def delete_db_listed_table_rows(delisted_hashes: list) -> None:
    """Deletes rows from listed_ads table based on removed ads hashes"""
    conn = None
    try:
        logger.info(f'Deleting {len(delisted_hashes)} removed messages from listed_ads table')
        params = config()
        conn = psycopg2.connect(**params)
        cur = conn.cursor()
        cur.execute("SELECT * FROM listed_ads")
        for delisted_hash in delisted_hashes:
            del_row = "DELETE FROM listed_ads WHERE url_hash = "
            full_cmd = del_row + "'" + delisted_hash + "'"
            cur.execute(full_cmd)
        conn.commit()
        cur.close()
        logger.info(f'Deleted ads with hashes: {delisted_hashes} from listed_ads table')
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
    finally:
        if conn is not None:
            conn.close()