def main(): item_filters = { "Condition": "1000", # NEW "ListingType": [ "Classified", "FixedPrice", "StoreInventory", ], "HideDuplicateItems": "true", "TopRatedSellerOnly": "true" } pages = 10 brands = ["Samsung", "Xiaomi", "Motorola", "Apple", "Huawei", "Nokia", "LG", "Sony", "Honor", "Google"] results = [] for brand in brands: brand_results = advanced_search(SMARTPHONE_CATEGORY, pages, item_filters, brand) results.extend(brand_results) for item in results: current_price = item['sellingStatus'][0]["currentPrice"][0] item['extracted_current_price'] = current_price["__value__"] item['extracted_current_currency'] = current_price["@currencyId"] date_now = datetime.datetime.utcnow() item['api_fetch_time'] = date_now item['api_fetch_date'] = datetime.datetime(date_now.year, date_now.month, date_now.day) database.upsert_many(database.ebay_raw_data, results, ['itemId', 'extracted_current_price', 'api_fetch_date'])
def main(): category = 'MLA1055' query = None only_new = True find_most_sold = True products = ml_api.find_products_by_category(category, query, only_new, find_most_sold) for item in products: date_now = datetime.datetime.utcnow() item['api_fetch_time'] = date_now item['api_fetch_date'] = datetime.datetime(date_now.year, date_now.month, date_now.day) database.upsert_many(database.mercadolibre_raw_data, products, ['id', 'price', 'api_fetch_date'])
def main(): # Get phone prices from offer_history offer_history = list(database.offer_history.find()) # Group prices by phone, week and currency grouped_offers = {} today = datetime.utcnow() for offer in offer_history: if not offer['visible_classification']: continue # Ignore offers that have low classification score to avoid noise in the data offer_mobile_phone_id = offer["classified_mobile_phone_id"] week = datetime.strptime(offer["date"], "%d-%m-%Y").isocalendar()[1] if (today.isocalendar()[1]) == week: continue # Skip current week until it is over week -= 1 # Current week starts in 1 in isocalendar currency = offer["currency"] idx = (offer_mobile_phone_id, week, currency) offers_by_link = grouped_offers.get(idx, {}) offer_link = offer['link'] if offer_link in offers_by_link: existing_offer = offers_by_link[offer_link] if existing_offer['date'] < offer['date']: offers_by_link[offer_link] = offer else: offers_by_link[offer_link] = offer grouped_offers[idx] = offers_by_link grouped_prices = {} for idx, offers_by_link in grouped_offers.items(): prices = [offer['amount'] for offer in offers_by_link.values()] grouped_prices[idx] = prices weekly_price_summary = [] for (phone_id, week, currency), prices in grouped_prices.items(): price_summary = { "phone_id": phone_id, "week_of_year": week, "currency": currency, # TODO: Mover year to column in data "end_of_week": datetime.strptime(f'2020-{week}-0', "%Y-%W-%w").strftime("%d-%m-%Y"), } price_statistics = get_statistics(prices) price_summary.update(price_statistics) weekly_price_summary.append(price_summary) # Upsert prices in weekly phone prices if weekly_price_summary: database.upsert_many(database.weekly_phone_price, weekly_price_summary, ['phone_id', 'week_of_year', 'currency']) logger.info("Weekly prices updated") else: logger.warn( "Nothing to summarize in weekly prices, are there no offers matching phones with good scores?" ) logger.info("Collecting prices per phone for the last 7 days") generate_last_7_days_prices(offer_history)
def main(): # Get list of titles from offers offers = list(database.offer_history.find()) for offer in offers: title = offer['title'] if type(title) != str: print(offer) offer_titles = {offer['title'] for offer in offers} # Check which titles are not classfied yet classifications = list(database.phone_classifications.find()) classified_titles = { classification['offer_title'] for classification in classifications } unclassified_titles = offer_titles.difference(classified_titles) if unclassified_titles: logger.info("{len(unclassified_titles) new titles to classify}") # Load phone data for classification phones = list(database.mobile_phone.find()) # Classify titles new_phone_classifications = get_classified_titles_by_phone( unclassified_titles, phones) # Save classified titles database.insert_many_ignore_duplicates(database.phone_classifications, new_phone_classifications) # Update list of classifications in memory classifications.extend(new_phone_classifications) else: logger.info("There are no new titles to classify") # Update all offers with the classification classification_by_title = { classification["offer_title"]: classification for classification in classifications } reclassification_count = 0 updated_offers = [] for offer in offers: if offer['title'] in classification_by_title: classification = classification_by_title[offer['title']] if ("classified_mobile_phone" not in offer or offer["classified_mobile_phone"] != classification['classified_mobile_phone'] or "classified_mobile_phone_id" not in offer or offer["classified_mobile_phone_id"] != classification['classified_mobile_phone_id']): reclassification_count += 1 offer["classified_mobile_phone"] = classification[ 'classified_mobile_phone'] offer["classified_mobile_phone_id"] = classification[ 'classified_mobile_phone_id'] offer["classification_score"] = classification[ 'classification_score'] offer["visible_classification"] = classification[ 'classification_score'] > 0.5 updated_offers.append(offer) if updated_offers: logger.info( f"Updating offer history with {reclassification_count} new reclassification" ) database.upsert_many(database.offer_history, updated_offers) logger.info("Classification completed") else: logger.info("There are no offers to classify")
else: seen[obj_id] = obj return list(seen.values()) with open(in_filename, 'r') as csvfile: reader = csv.reader(csvfile) headers = next(reader) field_mapping = get_columns_from_fields(headers) logger.debug(field_mapping) object_dataset = [ get_obj_from_row_and_mapping(row, field_mapping) for row in reader ] logger.debug("Before cleanup: %d", len(object_dataset)) object_dataset = dataset_cleanup(object_dataset) logger.debug("After cleanup: %d", len(object_dataset)) object_dataset = remove_duplicates(object_dataset, "dataset_unique_name") logger.debug("After removing duplicates: %d", len(object_dataset)) # logger.debug(object_dataset[:20]) with open(out_filename, 'w') as jsonfile: json.dump(object_dataset, jsonfile, indent=4) # database.mobile_phone.drop() # database.mobile_phone.insert_many(object_dataset) database.upsert_many(database.mobile_phone, object_dataset, ["dataset_unique_name"])