Exemple #1
0
def scrap_and_upload(vehicle_category):
    """
    """
    if vehicle_category is None:
        sys.exit("vehicle category cannot be null")
    vehicles = load_scrapping_links(vehicle_category)

    start_time = datetime.utcnow().strftime("%Y-%m-%d")
    create_directory(f"tmp")
    create_directory(f"tmp/{vehicle_category}")
    file_path = f"{DIR_NAME}/tmp/{vehicle_category}/{start_time}.csv"

    if os.path.exists(file_path):
        header = None
    else:
        header = ["Make", "Model", "Trim", "Year", "Mileage", "Price"]

    for make, model, urls in vehicles:
        for website_name, link in urls.items():
            if website_name == 'cg':
                urlsuffix = "#resultsPage="
            elif website_name == 'ed':
                urlsuffix = "?pagenumber="
            site_scrapper = Scrapper(website_name, link, urlsuffix, make,
                                     model, vehicle_category)
            site_scrapper.fetch_batch(NUM_OF_PAGES)
            if site_scrapper.listings:
                with open(file_path, "a") as csvfile:
                    write(csvfile, site_scrapper.listings, header)
                    header = None

    if os.path.exists(file_path):
        s3_client = boto3.client('s3')
        s3_client.upload_file(file_path, DESTINATION_BUCKET,
                              f"{vehicle_category}/{start_time}.csv")