Example #1
0
def main():
    item_filters = {
        "Condition": "1000",  # NEW
        "ListingType": [
            "Classified",
            "FixedPrice",
            "StoreInventory",
        ],
        "HideDuplicateItems": "true",
        "TopRatedSellerOnly": "true"
    }
    pages = 10
    brands = ["Samsung", "Xiaomi", "Motorola", "Apple", "Huawei", "Nokia", "LG", "Sony", "Honor", "Google"]
    results = []
    for brand in brands:
        brand_results = advanced_search(SMARTPHONE_CATEGORY, pages, item_filters, brand)
        results.extend(brand_results)

    for item in results:
        current_price = item['sellingStatus'][0]["currentPrice"][0]
        item['extracted_current_price'] = current_price["__value__"]
        item['extracted_current_currency'] = current_price["@currencyId"]
        date_now = datetime.datetime.utcnow()
        item['api_fetch_time'] = date_now
        item['api_fetch_date'] = datetime.datetime(date_now.year, date_now.month, date_now.day)

    database.upsert_many(database.ebay_raw_data, results, ['itemId', 'extracted_current_price', 'api_fetch_date'])
Example #2
0
def main():
    category = 'MLA1055'
    query = None
    only_new = True
    find_most_sold = True
    products = ml_api.find_products_by_category(category, query, only_new,
                                                find_most_sold)

    for item in products:
        date_now = datetime.datetime.utcnow()
        item['api_fetch_time'] = date_now
        item['api_fetch_date'] = datetime.datetime(date_now.year,
                                                   date_now.month,
                                                   date_now.day)

    database.upsert_many(database.mercadolibre_raw_data, products,
                         ['id', 'price', 'api_fetch_date'])
def main():
    # Get phone prices from offer_history
    offer_history = list(database.offer_history.find())

    # Group prices by phone, week and currency
    grouped_offers = {}
    today = datetime.utcnow()
    for offer in offer_history:
        if not offer['visible_classification']:
            continue  # Ignore offers that have low classification score to avoid noise in the data
        offer_mobile_phone_id = offer["classified_mobile_phone_id"]
        week = datetime.strptime(offer["date"], "%d-%m-%Y").isocalendar()[1]
        if (today.isocalendar()[1]) == week:
            continue  # Skip current week until it is over
        week -= 1  # Current week starts in 1 in isocalendar
        currency = offer["currency"]
        idx = (offer_mobile_phone_id, week, currency)
        offers_by_link = grouped_offers.get(idx, {})
        offer_link = offer['link']
        if offer_link in offers_by_link:
            existing_offer = offers_by_link[offer_link]
            if existing_offer['date'] < offer['date']:
                offers_by_link[offer_link] = offer
        else:
            offers_by_link[offer_link] = offer
        grouped_offers[idx] = offers_by_link

    grouped_prices = {}
    for idx, offers_by_link in grouped_offers.items():
        prices = [offer['amount'] for offer in offers_by_link.values()]
        grouped_prices[idx] = prices

    weekly_price_summary = []
    for (phone_id, week, currency), prices in grouped_prices.items():
        price_summary = {
            "phone_id":
            phone_id,
            "week_of_year":
            week,
            "currency":
            currency,
            # TODO: Mover year to column in data
            "end_of_week":
            datetime.strptime(f'2020-{week}-0',
                              "%Y-%W-%w").strftime("%d-%m-%Y"),
        }
        price_statistics = get_statistics(prices)
        price_summary.update(price_statistics)
        weekly_price_summary.append(price_summary)

    # Upsert prices in weekly phone prices
    if weekly_price_summary:
        database.upsert_many(database.weekly_phone_price, weekly_price_summary,
                             ['phone_id', 'week_of_year', 'currency'])
        logger.info("Weekly prices updated")
    else:
        logger.warn(
            "Nothing to summarize in weekly prices, are there no offers matching phones with good scores?"
        )

    logger.info("Collecting prices per phone for the last 7 days")

    generate_last_7_days_prices(offer_history)
def main():
    # Get list of titles from offers
    offers = list(database.offer_history.find())
    for offer in offers:
        title = offer['title']
        if type(title) != str:
            print(offer)
    offer_titles = {offer['title'] for offer in offers}

    # Check which titles are not classfied yet
    classifications = list(database.phone_classifications.find())
    classified_titles = {
        classification['offer_title']
        for classification in classifications
    }
    unclassified_titles = offer_titles.difference(classified_titles)

    if unclassified_titles:
        logger.info("{len(unclassified_titles) new titles to classify}")

        # Load phone data for classification
        phones = list(database.mobile_phone.find())

        # Classify titles
        new_phone_classifications = get_classified_titles_by_phone(
            unclassified_titles, phones)

        # Save classified titles
        database.insert_many_ignore_duplicates(database.phone_classifications,
                                               new_phone_classifications)

        # Update list of classifications in memory
        classifications.extend(new_phone_classifications)
    else:
        logger.info("There are no new titles to classify")

    # Update all offers with the classification
    classification_by_title = {
        classification["offer_title"]: classification
        for classification in classifications
    }
    reclassification_count = 0
    updated_offers = []
    for offer in offers:
        if offer['title'] in classification_by_title:
            classification = classification_by_title[offer['title']]
            if ("classified_mobile_phone" not in offer
                    or offer["classified_mobile_phone"] !=
                    classification['classified_mobile_phone']
                    or "classified_mobile_phone_id" not in offer
                    or offer["classified_mobile_phone_id"] !=
                    classification['classified_mobile_phone_id']):
                reclassification_count += 1
                offer["classified_mobile_phone"] = classification[
                    'classified_mobile_phone']
                offer["classified_mobile_phone_id"] = classification[
                    'classified_mobile_phone_id']
                offer["classification_score"] = classification[
                    'classification_score']
                offer["visible_classification"] = classification[
                    'classification_score'] > 0.5
                updated_offers.append(offer)

    if updated_offers:
        logger.info(
            f"Updating offer history with {reclassification_count} new reclassification"
        )
        database.upsert_many(database.offer_history, updated_offers)
        logger.info("Classification completed")
    else:
        logger.info("There are no offers to classify")
Example #5
0
        else:
            seen[obj_id] = obj
    return list(seen.values())


with open(in_filename, 'r') as csvfile:
    reader = csv.reader(csvfile)
    headers = next(reader)
    field_mapping = get_columns_from_fields(headers)

    logger.debug(field_mapping)
    object_dataset = [
        get_obj_from_row_and_mapping(row, field_mapping) for row in reader
    ]
    logger.debug("Before cleanup: %d", len(object_dataset))
    object_dataset = dataset_cleanup(object_dataset)
    logger.debug("After cleanup: %d", len(object_dataset))
    object_dataset = remove_duplicates(object_dataset, "dataset_unique_name")

    logger.debug("After removing duplicates: %d", len(object_dataset))

    # logger.debug(object_dataset[:20])

    with open(out_filename, 'w') as jsonfile:
        json.dump(object_dataset, jsonfile, indent=4)

    # database.mobile_phone.drop()
    # database.mobile_phone.insert_many(object_dataset)
    database.upsert_many(database.mobile_phone, object_dataset,
                         ["dataset_unique_name"])