def start_task_and_return(self, data): if data.get('skip_confirmation'): self.start_task(data) return self.json_response(self.task_started_message) sm = ChunkSimilarity() estimation = sm.estimate_time(**data) # if estimated task duration is less than 1 hour: if estimation < SimilarityLimits.WARN_CHUNK_SIMILARITY_SECONDS: self.start_task(data) return self.json_response(self.task_started_message) _min, sec = divmod(estimation, 60) hour, _min = divmod(_min, 60) estim_str = "%d:%02d:%02d" % (hour, _min, sec) return self.json_response({ 'message': f'Processing may take about {estim_str}.', 'confirm': True })
continue doc_ids_to_values[doc_a_pk].add(doc_b_pk) doc_ids_to_values[doc_b_pk].add(doc_a_pk) if x % 100 == 0: self.log_info('{field}: Checked for similarity {x} documents of {n}' .format(field=dst_field.code, x=x + 1, n=total_docs)) self.push() self.push() self.log_info('{field}: Found {n} similar documents. Storing links into the document fields.' .format(field=dst_field.code, n=len(doc_ids_to_values))) doc_ids_to_values = {doc_id: list(v) if v else None for doc_id, v in doc_ids_to_values} field_repo.store_values_one_field_many_docs_no_ants(field=dst_field, doc_ids_to_values=doc_ids_to_values) log = CeleryTaskLogger(self) for doc_id in doc_ids_to_values.keys(): try: doc = Document.objects.get(pk=doc_id) signals.fire_document_changed(log=log, document=doc, changed_by_user=None, system_fields_changed=False, generic_fields_changed=False, user_fields_changed=[dst_field.code]) except Exception as ex: self.log_error(f'Unable to fire doc id change event for doc #{doc_id}', exc_info=ex) app.register_task(PreconfiguredDocumentSimilaritySearch()) app.register_task(Similarity()) app.register_task(SimilarityByFeatures()) app.register_task(ChunkSimilarity()) app.register_task(PartySimilarity())