コード例 #1
0
    def import_insights(
        self,
        data: Iterable[ProductInsights],
        server_domain: str,
        automatic: bool,
    ) -> int:
        timestamp = datetime.datetime.utcnow()
        processed_insights: Iterator[Insight] = self.process_insights(
            data, server_domain, automatic)
        full_insights = self.add_fields(processed_insights, timestamp,
                                        server_domain)
        inserted = 0

        for raw_insight_batch in chunked(full_insights, 50):
            insight_batch: List[JSONType] = []
            insight: Insight

            for insight in raw_insight_batch:
                insight_dict = insight.to_dict()

                if not insight.latent or not exist_latent(insight_dict):
                    insight_batch.append(insight_dict)

            inserted += batch_insert(ProductInsight, insight_batch, 50)

        return inserted
コード例 #2
0
ファイル: importer.py プロジェクト: openfoodfacts/robotoff
    def import_insights(
        cls,
        predictions: List[Prediction],
        server_domain: str,
        automatic: bool,
        product_store: DBProductStore,
    ) -> int:
        """Import insights, this is the main method.

        :return: the number of insights that were imported.
        """
        required_prediction_types = cls.get_required_prediction_types()
        for prediction in predictions:
            if prediction.type not in required_prediction_types:
                raise ValueError(
                    f"unexpected prediction type: '{prediction.type}'")

        inserts = 0
        for to_create, to_delete in cls.generate_insights(
                predictions, server_domain, automatic, product_store):
            if to_delete:
                to_delete_ids = [insight.id for insight in to_delete]
                logger.info(
                    f"Deleting insight IDs: {[str(x) for x in to_delete_ids]}")
                ProductInsight.delete().where(
                    ProductInsight.id.in_(to_delete_ids)).execute()
            if to_create:
                inserts += batch_insert(
                    ProductInsight,
                    (model_to_dict(insight) for insight in to_create),
                    50,
                )

        return inserts
コード例 #3
0
    def import_insights(self,
                        data: Iterable[Dict],
                        automatic: bool = False) -> int:
        timestamp = datetime.datetime.utcnow()
        barcode_seen: Set[str] = set()
        insight_seen: Set = set()
        insights = []
        product_ingredients = []
        inserted = 0

        for item in data:
            barcode = item['barcode']
            corrections = item['corrections']
            text = item['text']

            if barcode not in barcode_seen:
                product_ingredients.append({
                    'barcode': barcode,
                    'ingredients': item['text'],
                })
                barcode_seen.add(barcode)

            for correction in corrections:
                start_offset = correction['start_offset']
                end_offset = correction['end_offset']
                key = (barcode, start_offset, end_offset)

                if key not in insight_seen:
                    original_snippet = self.generate_snippet(
                        text, start_offset, end_offset, correction['original'])
                    corrected_snippet = self.generate_snippet(
                        text, start_offset, end_offset,
                        correction['correction'])
                    insights.append({
                        'id': str(uuid.uuid4()),
                        'type': InsightType.ingredient_spellcheck.name,
                        'barcode': barcode,
                        'timestamp': timestamp,
                        'automatic_processing': False,
                        'data': {
                            **correction,
                            'original_snippet': original_snippet,
                            'corrected_snippet': corrected_snippet,
                        },
                    })
                    insight_seen.add(key)

            if len(product_ingredients) >= 50:
                batch_insert(ProductIngredient, product_ingredients, 50)
                product_ingredients = []

            if len(insights) >= 50:
                inserted += batch_insert(ProductInsight, insights, 50)
                insights = []

        batch_insert(ProductIngredient, product_ingredients, 50)
        inserted += batch_insert(ProductInsight, insights, 50)
        return inserted
コード例 #4
0
    def import_insights(self,
                        data: Iterable[Dict],
                        automatic: bool = False) -> int:
        grouped_by: GroupedByOCRInsights = self.group_by_barcode(data)
        inserts: List[Dict] = []
        timestamp = datetime.datetime.utcnow()

        for barcode, insights in grouped_by.items():
            insights = list(self.deduplicate_insights(insights))
            insights = self.sort_by_priority(insights)
            inserts += list(
                self._process_product_insights(barcode, insights, timestamp,
                                               automatic))

        return batch_insert(ProductInsight, inserts, 50)
コード例 #5
0
    def on_post(self, req: falcon.Request, resp: falcon.Response):
        timestamp = datetime.datetime.utcnow()
        inserts = []

        for prediction in req.media["predictions"]:
            server_domain: str = prediction.get("server_domain",
                                                settings.OFF_SERVER_DOMAIN)
            server_type: str = get_server_type(server_domain).name
            source_image = generate_image_path(prediction["barcode"],
                                               prediction.pop("image_id"))
            inserts.append({
                "timestamp": timestamp,
                "server_domain": server_domain,
                "server_type": server_type,
                "source_image": source_image,
                **prediction,
            })

        inserted = batch_insert(ImagePrediction, inserts)
        logger.info("{} image predictions inserted".format(inserted))
コード例 #6
0
ファイル: importer.py プロジェクト: openfoodfacts/robotoff
def import_product_predictions(
    barcode: str,
    product_predictions_iter: Iterable[Prediction],
    server_domain: str,
):
    """Import predictions for a specific product.

    If a prediction already exists in DB (same (barcode, type, server_domain,
    source_image, value, value_tag)), it won't be imported.

    :param barcode: Barcode of the product. All `product_predictions` must
    have the same barcode.
    :param product_predictions_iter: Iterable of Predictions.
    :param server_domain: The server domain associated with the predictions.
    :return: The number of items imported in DB.
    """
    timestamp = datetime.datetime.utcnow()
    existing_predictions = set(
        PredictionModel.select(
            PredictionModel.type,
            PredictionModel.server_domain,
            PredictionModel.source_image,
            PredictionModel.value_tag,
            PredictionModel.value,
        ).where(PredictionModel.barcode == barcode).tuples())

    # note: there are some cases
    # when we could decide to replace old predictions of the same key.
    # It's not yet implemented.
    to_import = (create_prediction_model(prediction, server_domain, timestamp)
                 for prediction in product_predictions_iter if (
                     prediction.type,
                     server_domain,
                     prediction.source_image,
                     prediction.value_tag,
                     prediction.value,
                 ) not in existing_predictions)
    return batch_insert(PredictionModel, to_import, 50)
コード例 #7
0
 def import_insights(self,
                     data: Iterable[Dict],
                     automatic: bool = False) -> int:
     inserts = self.process_product_insights(data, automatic)
     return batch_insert(ProductInsight, inserts, 50)
コード例 #8
0
ファイル: importer.py プロジェクト: Wauplin/robotoff
 def import_insights(self, data: Iterable[JSONType], server_domain: str,
                     automatic: bool) -> int:
     timestamp = datetime.datetime.utcnow()
     insights = self.process_insights(data, server_domain, automatic)
     insights = self.add_fields(insights, timestamp, server_domain)
     return batch_insert(ProductInsight, insights, 50)