def run_import_image_job( barcode: str, image_url: str, ocr_url: str, server_domain: str ): logger.info( f"Running `import_image` for product {barcode} ({server_domain}), image {image_url}" ) image = get_image_from_url(image_url, error_raise=False, session=http_session) if image is None: return source_image = get_source_from_url(image_url) product = get_product_store()[barcode] if product is None: logger.warning( "Product %s does not exist during image import (%s)", barcode, source_image ) return with db: with db.atomic(): save_image(barcode, source_image, product, server_domain) import_insights_from_image( barcode, image, source_image, ocr_url, server_domain ) with db.atomic(): # Launch object detection in a new SQL transaction run_object_detection(barcode, image, source_image, server_domain)
def import_image(barcode: str, image_url: str, ocr_url: str, server_domain: str): logger.info("Detect insights for product {}, " "image {}".format(barcode, image_url)) product_store = get_product_store() insights_all = get_insights_from_image(barcode, image_url, ocr_url) if insights_all is None: return for insight_type, insights in insights_all.items(): if insight_type == InsightType.image_flag.name: handle_image_flag_insights(insights) continue logger.info("Extracting {}".format(insight_type)) importer: InsightImporter = InsightImporterFactory.create( insight_type, product_store) with db.atomic(): imported = importer.import_insights([insights], server_domain=server_domain, automatic=True) logger.info( "Import finished, {} insights imported".format(imported))
def mark_insights(): marked = 0 with db: with db.atomic(): for insight in ( ProductInsight.select() .where( ProductInsight.automatic_processing == True, # noqa: E712 ProductInsight.latent == False, # noqa: E712 ProductInsight.process_after.is_null(), ProductInsight.annotation.is_null(), ) .iterator() ): logger.info( "Marking insight {} as processable automatically " "(product: {})".format(insight.id, insight.barcode) ) insight.process_after = datetime.datetime.utcnow() + datetime.timedelta( minutes=10 ) insight.save() marked += 1 logger.info("{} insights marked".format(marked))
def refresh_insights(with_deletion: bool = False): deleted = 0 updated = 0 product_store = CACHED_PRODUCT_STORE.get() datetime_threshold = datetime.datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0) dataset_datetime = datetime.datetime.fromtimestamp( os.path.getmtime(settings.JSONL_MIN_DATASET_PATH)) if dataset_datetime.date() != datetime_threshold.date(): logger.warn( "Dataset version is not up to date, aborting insight removal job") return validators: Dict[str, InsightValidator] = {} with db: with db.atomic(): for insight in (ProductInsight.select().where( ProductInsight.annotation.is_null(), ProductInsight.timestamp <= datetime_threshold, ProductInsight.server_domain == settings.OFF_SERVER_DOMAIN, ).iterator()): product: Product = product_store[insight.barcode] if product is None: if with_deletion: # Product has been deleted from OFF logger.info("Product with barcode {} deleted" "".format(insight.barcode)) deleted += 1 insight.delete_instance() else: if insight.type not in validators: validators[ insight.type] = InsightValidatorFactory.create( insight.type, product_store) validator = validators[insight.type] insight_deleted = delete_invalid_insight( insight, validator) if insight_deleted: deleted += 1 logger.info( "invalid insight {} (type: {}), deleting..." "".format(insight.id, insight.type)) continue insight_updated = update_insight_attributes( product, insight) if insight_updated: updated += 1 logger.info("{} insights deleted".format(deleted)) logger.info("{} insights updated".format(updated))
def annotate( self, insight: ProductInsight, annotation: int, update=True, auth: Optional[OFFAuthentication] = None, ) -> AnnotationResult: username: Optional[str] = None if auth is not None: username = auth.username if auth.session_cookie: username = extract_username(auth.session_cookie) with db.atomic(): insight.annotation = annotation insight.completed_at = datetime.datetime.utcnow() insight.save() if username: UserAnnotation.create(insight=insight, username=username) if annotation == 1 and update: return self.update_product(insight, auth=auth) return SAVED_ANNOTATION_RESULT
def import_image(barcode: str, image_url: str, ocr_url: str, server_domain: str): logger.info("Detect insights for product {}, " "image {}".format(barcode, image_url)) product_store = get_product_store() product = product_store[barcode] save_image(barcode, image_url, product, server_domain) launch_object_detection_job(barcode, image_url, server_domain) insights_all = get_insights_from_image(barcode, image_url, ocr_url) for insight_type, insights in insights_all.items(): if insight_type == InsightType.image_flag: notify_image_flag( insights.insights, insights.source_image, # type: ignore insights.barcode, ) continue logger.info("Extracting {}".format(insight_type.name)) importer: BaseInsightImporter = InsightImporterFactory.create( insight_type, product_store) with db.atomic(): imported = importer.import_insights([insights], server_domain=server_domain, automatic=True) logger.info( "Import finished, {} insights imported".format(imported))
def delete_product_insights(barcode: str): logger.info("Product {} deleted, deleting associated " "insights...".format(barcode)) with db.atomic(): deleted = (ProductInsight.delete() .where(ProductInsight.barcode == barcode).execute()) logger.info("{} insights deleted".format(deleted))
def import_insights(insight_type: str, items: List[str]): product_store = CACHED_PRODUCT_STORE.get() importer: InsightImporter = InsightImporterFactory.create( insight_type, product_store) with db.atomic(): imported = importer.import_insights((json.loads(l) for l in items), automatic=False) logger.info("Import finished, {} insights imported".format(imported))
def import_insights(insight_type: str, items: List[str], server_domain: str): product_store = get_product_store() importer: InsightImporter = InsightImporterFactory.create( insight_type, product_store) with db.atomic(): imported = importer.import_insights((json.loads(l) for l in items), server_domain=server_domain, automatic=False) logger.info("Import finished, {} insights imported".format(imported))
def delete_product_insights(barcode: str, server_domain: str): logger.info("Product {} deleted, deleting associated " "insights...".format(barcode)) with db.atomic(): deleted = (ProductInsight.delete().where( ProductInsight.barcode == barcode, ProductInsight.annotation.is_null(), ProductInsight.server_domain == server_domain, ).execute()) logger.info("{} insights deleted".format(deleted))
def annotate( self, insight: ProductInsight, annotation: int, update: bool = True, data: Optional[Dict] = None, auth: Optional[OFFAuthentication] = None, automatic: bool = False, ) -> AnnotationResult: with db.atomic(): return self._annotate(insight, annotation, update, data, auth, automatic)
def run_task(event_type: str, event_kwargs: Dict) -> None: if event_type not in EVENT_MAPPING: raise ValueError(f"unknown event type: '{event_type}") func = EVENT_MAPPING[event_type] try: # we run task inside transaction to avoid side effects with db: with db.atomic(): func(**event_kwargs) except Exception as e: logger.error(e, exc_info=1)
def update_related_insights(insight: ProductInsight): diff_len = (len(insight.data['correction']) - len(insight.data['original'])) if diff_len == 0: return with db.atomic(): for other in (ProductInsight.select().where( ProductInsight.barcode == insight.barcode, ProductInsight.id != insight.id, ProductInsight.type == InsightType.ingredient_spellcheck.name)): if insight.data['start_offset'] <= other.data['start_offset']: other.data['start_offset'] += diff_len other.data['end_offset'] += diff_len other.save()
def import_insights( predictions: Iterable[Prediction], server_domain: str, batch_size: int = 1024, ) -> int: product_store = get_product_store() imported: int = 0 prediction_batch: List[Prediction] for prediction_batch in chunked(predictions, batch_size): with db.atomic(): imported += import_insights_( prediction_batch, server_domain, automatic=False, product_store=product_store, ) return imported
def annotate( self, insight: ProductInsight, annotation: int, update: bool = True, data: Optional[Dict] = None, auth: Optional[OFFAuthentication] = None, automatic: bool = False, ) -> AnnotationResult: if insight.latent: return LATENT_INSIGHT_RESULT with db.atomic() as transaction: try: return self._annotate(insight, annotation, update, data, auth, automatic) except Exception as e: transaction.rollback() raise e
def import_image(barcode: str, image_url: str, ocr_url: str): logger.info("Detect insights for product {}, " "image {}".format(barcode, image_url)) product_store = CACHED_PRODUCT_STORE.get() insights_all = get_insights_from_image(barcode, image_url, ocr_url) if insights_all is None: return for insight_type, insights in insights_all.items(): if insight_type == InsightType.image_flag.name: notify_image_flag(insights['insights'], insights['source'], insights['barcode']) continue logger.info("Extracting {}".format(insight_type)) importer: InsightImporter = InsightImporterFactory.create(insight_type, product_store) with db.atomic(): imported = importer.import_insights([insights], automatic=True) logger.info("Import finished, {} insights imported".format(imported))
def save_annotation( insight_id: str, annotation: int, device_id: str, update: bool = True, data: Optional[Dict] = None, auth: Optional[OFFAuthentication] = None, trusted_annotator: bool = False, ) -> AnnotationResult: """Saves annotation either by using a single response as ground truth or by using several responses. trusted_annotator: defines whether the given annotation comes from an authoritative source (e.g. a trusted user), ot whether the annotation should be subject to the voting system. """ try: insight: Union[ProductInsight, None] = ProductInsight.get_by_id(insight_id) except ProductInsight.DoesNotExist: insight = None if not insight: return UNKNOWN_INSIGHT_RESULT if insight.annotation is not None: return ALREADY_ANNOTATED_RESULT if not trusted_annotator: verified: bool = False AnnotationVote.create( insight_id=insight_id, username=auth.get_username() if auth else None, value=annotation, device_id=device_id, ) with db.atomic() as tx: try: existing_votes = list( AnnotationVote.select( AnnotationVote.value, peewee.fn.COUNT( AnnotationVote.value).alias("num_votes"), ).where(AnnotationVote.insight_id == insight_id).group_by( AnnotationVote.value).order_by( peewee.SQL("num_votes").desc())) insight.n_votes = functools.reduce( lambda sum, row: sum + row.num_votes, existing_votes, 0) insight.save() except Exception as e: tx.rollback() raise e # If the top annotation has more than 2 votes, consider applying it to the insight. if existing_votes[0].num_votes > 2: annotation = existing_votes[0].value verified = True # But first check for the following cases: # 1) The 1st place annotation has >2 votes, and the 2nd place annotation has >= 2 votes. # 2) 1st place and 2nd place have 2 votes each. # # In both cases, we consider this an ambiguous result and mark it with 'I don't know'. if (existing_votes[0].num_votes >= 2 and len(existing_votes) > 1 and existing_votes[1].num_votes >= 2): # This code credits the last person to contribute a vote with a potentially not their annotation. annotation = 0 verified = True if not verified: return SAVED_ANNOTATION_VOTE_RESULT annotator = InsightAnnotatorFactory.get(insight.type) result = annotator.annotate(insight, annotation, update, data=data, auth=auth) username = auth.get_username() if auth else "unknown annotator" events.event_processor.send_async("question_answered", username, device_id, insight.barcode) return result