def updated_product_add_category_insight(barcode: str, product: JSONType) -> bool: if product.get('categories_tags', []): return False insight = predict_category_from_product_es(product) if insight is None: insights = predict_category_from_product_ml(product, filter_blacklisted=True) if not insights: return False else: predicted = [ "{} ({})".format(insight["category"], insight["confidence"]) for insight in insights ] logger.info("Predicted categories for product {}: {}" "".format(barcode, predicted)) else: insights = [insight] product_store = CACHED_PRODUCT_STORE.get() importer = InsightImporterFactory.create(InsightType.category.name, product_store) imported = importer.import_insights(insights, automatic=False) if imported: logger.info("Category insight imported for product {}".format(barcode)) return bool(imported)
def refresh_insights(with_deletion: bool = False): deleted = 0 updated = 0 product_store = CACHED_PRODUCT_STORE.get() datetime_threshold = datetime.datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0) dataset_datetime = datetime.datetime.fromtimestamp( os.path.getmtime(settings.JSONL_MIN_DATASET_PATH)) if dataset_datetime.date() != datetime_threshold.date(): logger.warn( "Dataset version is not up to date, aborting insight removal job") return validators: Dict[str, InsightValidator] = {} with db: with db.atomic(): for insight in (ProductInsight.select().where( ProductInsight.annotation.is_null(), ProductInsight.timestamp <= datetime_threshold, ProductInsight.server_domain == settings.OFF_SERVER_DOMAIN, ).iterator()): product: Product = product_store[insight.barcode] if product is None: if with_deletion: # Product has been deleted from OFF logger.info("Product with barcode {} deleted" "".format(insight.barcode)) deleted += 1 insight.delete_instance() else: if insight.type not in validators: validators[ insight.type] = InsightValidatorFactory.create( insight.type, product_store) validator = validators[insight.type] insight_deleted = delete_invalid_insight( insight, validator) if insight_deleted: deleted += 1 logger.info( "invalid insight {} (type: {}), deleting..." "".format(insight.id, insight.type)) continue insight_updated = update_insight_attributes( product, insight) if insight_updated: updated += 1 logger.info("{} insights deleted".format(deleted)) logger.info("{} insights updated".format(updated))
def import_insights(insight_type: str, items: List[str]): product_store = CACHED_PRODUCT_STORE.get() importer: InsightImporter = InsightImporterFactory.create( insight_type, product_store) with db.atomic(): imported = importer.import_insights((json.loads(l) for l in items), automatic=False) logger.info("Import finished, {} insights imported".format(imported))
def generate_insights(): """Generate and import category insights from the latest dataset dump, for products added at day-1.""" logger.info("Generating new category insights") product_store: ProductStore = CACHED_PRODUCT_STORE.get() importer = CategoryImporter(product_store) datetime_threshold = datetime.datetime.utcnow().replace( hour=0, minute=0, second=0, microsecond=0) - datetime.timedelta(days=1) dataset = ProductDataset(settings.JSONL_DATASET_PATH) category_insights_iter = predict_from_dataset(dataset, datetime_threshold) imported = importer.import_insights(category_insights_iter) logger.info("{} category insights imported".format(imported))
def updated_product_add_category_insight(barcode: str, product: JSONType) -> bool: if product.get('categories_tags', []): return False insight = predict_from_product(product) if insight is None: return False product_store = CACHED_PRODUCT_STORE.get() importer = InsightImporterFactory.create(InsightType.category.name, product_store) imported = importer.import_insights([insight], automatic=False) if imported: logger.info("Category insight imported for product {}".format(barcode)) return bool(imported)
def import_image(barcode: str, image_url: str, ocr_url: str): logger.info("Detect insights for product {}, " "image {}".format(barcode, image_url)) product_store = CACHED_PRODUCT_STORE.get() insights_all = get_insights_from_image(barcode, image_url, ocr_url) if insights_all is None: return for insight_type, insights in insights_all.items(): if insight_type == InsightType.image_flag.name: notify_image_flag(insights['insights'], insights['source'], insights['barcode']) continue logger.info("Extracting {}".format(insight_type)) importer: InsightImporter = InsightImporterFactory.create(insight_type, product_store) with db.atomic(): imported = importer.import_insights([insights], automatic=True) logger.info("Import finished, {} insights imported".format(imported))