Beispiel #1
0
def updated_product_add_category_insight(barcode: str,
                                         product: JSONType) -> bool:
    if product.get('categories_tags', []):
        return False

    insight = predict_category_from_product_es(product)

    if insight is None:
        insights = predict_category_from_product_ml(product,
                                                    filter_blacklisted=True)

        if not insights:
            return False
        else:
            predicted = [
                "{} ({})".format(insight["category"], insight["confidence"])
                for insight in insights
            ]
            logger.info("Predicted categories for product {}: {}"
                        "".format(barcode, predicted))
    else:
        insights = [insight]

    product_store = CACHED_PRODUCT_STORE.get()
    importer = InsightImporterFactory.create(InsightType.category.name,
                                             product_store)

    imported = importer.import_insights(insights, automatic=False)

    if imported:
        logger.info("Category insight imported for product {}".format(barcode))

    return bool(imported)
Beispiel #2
0
def refresh_insights(with_deletion: bool = False):
    deleted = 0
    updated = 0
    product_store = CACHED_PRODUCT_STORE.get()

    datetime_threshold = datetime.datetime.utcnow().replace(hour=0,
                                                            minute=0,
                                                            second=0,
                                                            microsecond=0)
    dataset_datetime = datetime.datetime.fromtimestamp(
        os.path.getmtime(settings.JSONL_MIN_DATASET_PATH))

    if dataset_datetime.date() != datetime_threshold.date():
        logger.warn(
            "Dataset version is not up to date, aborting insight removal job")
        return

    validators: Dict[str, InsightValidator] = {}

    with db:
        with db.atomic():
            for insight in (ProductInsight.select().where(
                    ProductInsight.annotation.is_null(),
                    ProductInsight.timestamp <= datetime_threshold,
                    ProductInsight.server_domain == settings.OFF_SERVER_DOMAIN,
            ).iterator()):
                product: Product = product_store[insight.barcode]

                if product is None:
                    if with_deletion:
                        # Product has been deleted from OFF
                        logger.info("Product with barcode {} deleted"
                                    "".format(insight.barcode))
                        deleted += 1
                        insight.delete_instance()
                else:
                    if insight.type not in validators:
                        validators[
                            insight.type] = InsightValidatorFactory.create(
                                insight.type, product_store)

                    validator = validators[insight.type]
                    insight_deleted = delete_invalid_insight(
                        insight, validator)

                    if insight_deleted:
                        deleted += 1
                        logger.info(
                            "invalid insight {} (type: {}), deleting..."
                            "".format(insight.id, insight.type))
                        continue

                    insight_updated = update_insight_attributes(
                        product, insight)

                    if insight_updated:
                        updated += 1

    logger.info("{} insights deleted".format(deleted))
    logger.info("{} insights updated".format(updated))
Beispiel #3
0
def import_insights(insight_type: str, items: List[str]):
    product_store = CACHED_PRODUCT_STORE.get()
    importer: InsightImporter = InsightImporterFactory.create(
        insight_type, product_store)

    with db.atomic():
        imported = importer.import_insights((json.loads(l) for l in items),
                                            automatic=False)
        logger.info("Import finished, {} insights imported".format(imported))
Beispiel #4
0
def generate_insights():
    """Generate and import category insights from the latest dataset dump, for
    products added at day-1."""
    logger.info("Generating new category insights")
    product_store: ProductStore = CACHED_PRODUCT_STORE.get()
    importer = CategoryImporter(product_store)

    datetime_threshold = datetime.datetime.utcnow().replace(
        hour=0, minute=0, second=0, microsecond=0) - datetime.timedelta(days=1)
    dataset = ProductDataset(settings.JSONL_DATASET_PATH)
    category_insights_iter = predict_from_dataset(dataset, datetime_threshold)

    imported = importer.import_insights(category_insights_iter)
    logger.info("{} category insights imported".format(imported))
Beispiel #5
0
def updated_product_add_category_insight(barcode: str,
                                         product: JSONType) -> bool:
    if product.get('categories_tags', []):
        return False

    insight = predict_from_product(product)

    if insight is None:
        return False

    product_store = CACHED_PRODUCT_STORE.get()
    importer = InsightImporterFactory.create(InsightType.category.name,
                                             product_store)

    imported = importer.import_insights([insight], automatic=False)

    if imported:
        logger.info("Category insight imported for product {}".format(barcode))

    return bool(imported)
Beispiel #6
0
def import_image(barcode: str, image_url: str, ocr_url: str):
    logger.info("Detect insights for product {}, "
                "image {}".format(barcode, image_url))
    product_store = CACHED_PRODUCT_STORE.get()
    insights_all = get_insights_from_image(barcode, image_url, ocr_url)

    if insights_all is None:
        return

    for insight_type, insights in insights_all.items():
        if insight_type == InsightType.image_flag.name:
            notify_image_flag(insights['insights'],
                              insights['source'],
                              insights['barcode'])
            continue

        logger.info("Extracting {}".format(insight_type))
        importer: InsightImporter = InsightImporterFactory.create(insight_type,
                                                                  product_store)

        with db.atomic():
            imported = importer.import_insights([insights], automatic=True)
            logger.info("Import finished, {} insights imported".format(imported))