예제 #1
0
async def import_customer(directory_name, customer_file):
    """ import customer file into database """
    success_cnt, error_cnt, prev_cnt, curr_cnt = 0, 0, 0, 0
    start_cust = datetime.datetime.now()
    prev_cnt = DB.CUST.count_documents({})

    try:
        data = open(Path(directory_name, customer_file),
                    "r",
                    encoding="utf-8-sig")
        header = next(csv.reader(data))
        for row in csv.reader(data):

            DB.CUST.insert_one({
                header[0]: row[0],
                header[1]: row[1],
                header[2]: row[2],
                header[3]: row[3],
                header[4]: row[4],
                header[5]: row[5],
                header[6]: row[6],
                header[7]: row[7]
            })

            if DB.CUST.acknowledged:
                success_cnt += 1
                LOGGER.debug("Item added to customer collection: %s", row[0])
            else:
                error_cnt += 1
                LOGGER.error("Customer collection insert error: %s", row[0])

        await asyncio.sleep(0)

    except FileNotFoundError:
        LOGGER.error("File does not exist: %s",
                     Path(directory_name, customer_file))
        sys.exit(1)

    curr_cnt = DB.CUST.count_documents({})
    end_cust = datetime.datetime.now()

    result = (success_cnt, prev_cnt, curr_cnt, end_cust - start_cust)
    LOGGER.info("Customer result: %s", result)
    LOGGER.info("Customer load: start time %s, end time %s", start_cust,
                end_cust)

    return result
예제 #2
0
def import_rental(directory_name, rental_file):
    """ import rental file into database """
    success_cnt, error_cnt, prev_cnt, curr_cnt = 0, 0, 0, 0
    start_rntl = datetime.datetime.now()
    prev_cnt = DB.RNTL.count_documents({})

    try:
        data = open(Path(directory_name, rental_file),
                    "r",
                    encoding="utf-8-sig")
        header = next(csv.reader(data))
        for row in csv.reader(data):

            DB.RNTL.insert_one({
                header[0]: row[0],
                header[1]: row[1],
                header[2]: row[2],
                header[3]: row[3],
                header[4]: row[4],
                header[5]: row[5]
            })

            if DB.RNTL.acknowledged:
                success_cnt += 1
                LOGGER.debug("Item added to rental collection: %s", row[0])
            else:
                error_cnt += 1
                LOGGER.error("Rental collection insert error: %s", row[0])

    except FileNotFoundError:
        LOGGER.error("File does not exist: %s",
                     Path(directory_name, rental_file))
        sys.exit(1)

    curr_cnt = DB.RNTL.count_documents({})
    end_rntl = datetime.datetime.now()

    result = (success_cnt, prev_cnt, curr_cnt, end_rntl - start_rntl)
    LOGGER.info("Rental result: %s", result)
    LOGGER.info("Rental load: start time %s, end time %s", start_rntl,
                end_rntl)

    return result
예제 #3
0
def import_product(directory_name, product_file):
    """ import product file into database """
    success_cnt, error_cnt, prev_cnt, curr_cnt = 0, 0, 0, 0
    start_prdct = datetime.datetime.now()
    prev_cnt = DB.PRDCT.count_documents({})

    try:
        data = open(Path(directory_name, product_file),
                    "r",
                    encoding="utf-8-sig")
        header = next(csv.reader(data))
        for row in csv.reader(data):

            DB.PRDCT.insert_one({
                header[0]: row[0],
                header[1]: row[1],
                header[2]: row[2],
                header[3]: row[3]
            })

            if DB.PRDCT.acknowledged:
                success_cnt += 1
                LOGGER.debug("Item added to product collection: %s", row[0])
            else:
                error_cnt += 1
                LOGGER.error("Prodcut collection insert error: %s", row[0])

    except FileNotFoundError:
        LOGGER.error("File does not exist: %s",
                     Path(directory_name, product_file))
        sys.exit(1)

    curr_cnt = DB.PRDCT.count_documents({})
    end_prdct = datetime.datetime.now()

    result = (success_cnt, prev_cnt, curr_cnt, end_prdct - start_prdct)
    LOGGER.info("Product result: %s", result)
    LOGGER.info("Product load: start time %s, end time %s", start_prdct,
                end_prdct)

    return result
예제 #4
0
def delete_all():
    """ delete all data from collections """
    prdct_result = DB.PRDCT.delete_many({})
    cust_result = DB.CUST.delete_many({})
    rntl_result = DB.RNTL.delete_many({})

    LOGGER.info("Items deleted from product collection: %s",
                prdct_result.deleted_count)
    LOGGER.info("Items deleted from customer collection: %s",
                cust_result.deleted_count)
    LOGGER.info("Items deleted from rental collection: %s",
                rntl_result.deleted_count)
    return prdct_result.deleted_count, cust_result.deleted_count, rntl_result.deleted_count
예제 #5
0

def drop_all():
    """ delete all data from collections """
    DB.PRDCT.drop()
    DB.CUST.drop()
    DB.RNTL.drop()


if __name__ == "__main__":
    # Get current path
    DATA_DIR = os.path.dirname(os.path.abspath(__file__))

    # Configure and enable internal profiling
    PR = cProfile.Profile()
    PR.enable()

    # Start linear operations and measure total module run time
    START_TIME = datetime.datetime.now()
    import_data(DATA_DIR, 'products.csv', 'customers.csv', 'rentals.csv')
    END_TIME = datetime.datetime.now()

    # Log results of module run time
    LOGGER.info("Total module run time: %s", END_TIME - START_TIME)
    LOGGER.info("Module: start time %s, end time %s", START_TIME, END_TIME)

    # Disable internal profiling and return results
    PR.disable()
    PR.print_stats()
    drop_all()