def index_dirty_continue(timestamp): """Continue indexing dirty documents if any remaining""" # check how many documents we have left to re-index documents_left = (Document.objects.exclude(status=Status.deleted).filter( solr_dirty=True).count()) if documents_left > 0: # if there are any documents left continue re-indexing logger.info("[SOLR INDEX] continuing with %d dirty documents left", documents_left) celery_app.send_task("documentcloud.documents.tasks.solr_index_dirty", args=[timestamp]) else: logger.info("[SOLR INDEX] done with dirty indexing") cache.delete("solr_index_dirty_status")
def quickbooks_query(query_type, query_params): """ args are query type string and any query_params which should be a dict query types include purchase_order, item, check """ try: qb = QuickBooks(**QB_LOOKUP) qb.begin_session() results = qb.quickbooks_query(query_type, query_params) celery_app.send_task( 'quickbooks.tasks.process_quickbooks_entities', queue='quickbooks', args=[query_type, list(results)], expires=1800 ) finally: del(qb)
def index_dirty(timestamp=None): """Index dirty documents""" if timestamp is None: # starting a new dirty index process timestamp = timezone.now().isoformat() status = cache.get_or_set("solr_index_dirty_status", timestamp, timeout=7200) if status != timestamp: logger.info( "[SOLR INDEX] dirty run not starting, already in progress: %s", status) # a previous run is still running, do not run two in parallel return # on initial invocation, kick off dirty deletion deleted = Document.objects.filter(status=Status.deleted).values_list( "pk", flat=True) deleted_count = deleted.count() logger.info("[SOLR DELETE] %d documents to delete", deleted_count) for document_pk in deleted: celery_app.send_task("documentcloud.documents.tasks.solr_delete", args=[document_pk]) else: # continuing a dirty indexing process, keep the cache alive cache.touch("solr_index_dirty_status", timeout=7200) status = cache.get("solr_index_dirty_status") if status == "cancel": # allow for an explicit cancellation of the process logger.info("[SOLR INDEX] dirty run cancelled") return documents = _dirty_prepare_documents() tasks = _batch_by_size(None, documents) chord( tasks, signature( "documentcloud.documents.tasks.solr_index_dirty_continue", args=[timestamp], immutable=True, ), ).delay()
def qb_requests(request_list=None, app='quickbooks'): """ Always send a list of requests so we aren't opening and closing file more than necessary ex: qb_requests.delay([ (item_key, model_name, ('ItemReceiptAddRq', receipt_instance.quickbooks_request_tuple)), (item_key, model_name, ('ItemReceiptAddRq', receipt_instance.quickbooks_request_tuple)) ]) """ try: qb = QuickBooks(**QB_LOOKUP) qb.begin_session() # process request list if it exists or just get open purchase orders if request_list: for entry in request_list: try: surrogate_key, model_name, request_body = entry request_type, request_dict = request_body response = qb.call(request_type, request_dictionary=request_dict) if surrogate_key and request_dict: celery_app.send_task( 'quickbooks.tasks.process_response', queue='quickbooks', args=[surrogate_key, model_name, response, app], expires=1800 ) except Exception as e: logger.error(e) celery_app.send_task( 'quickbooks.tasks.process_preferences', queue='quickbooks', args=[qb.get_preferences()], expires=1800 ) finally: # making sure to end session and close file del(qb)
def reindex_continue(collection_name, after_timestamp, delete_timestamp): """Check how many documents are remaining and decide it we should continue reindexing """ # check how many documents we have left to re-index documents_left = (Document.objects.exclude(status=Status.deleted).filter( updated_at__gt=after_timestamp).count()) if (documents_left > settings.SOLR_INDEX_LIMIT and (timezone.now() - parse(after_timestamp)).total_seconds() > settings.SOLR_INDEX_CATCHUP_SECONDS): # if there are many documents left and we are not too close to the current time # continue re-indexing logger.info("[SOLR REINDEX] continuing with %d documents left", documents_left) celery_app.send_task( "documentcloud.documents.tasks.solr_reindex_all", args=[collection_name, after_timestamp, delete_timestamp], ) else: logger.info( "[SOLR REINDEX] done, re-index all documents updated after %s, " "delete all documents after %s", after_timestamp, delete_timestamp, ) # reset the commit values for normal usage before updating the alias reset_commit(collection_name) update_alias(collection_name) # mark all remaining documents as dirty and begin re-indexing them Document.objects.exclude(status=Status.deleted).filter( updated_at__gt=after_timestamp).update(solr_dirty=True) celery_app.send_task("documentcloud.documents.tasks.solr_index_dirty") # delete all documents from the new solr collection that were deleted since # we started re-indexing for document in DeletedDocument.objects.filter( created_at__gte=delete_timestamp): celery_app.send_task("documentcloud.documents.tasks.solr_delete", args=[document.pk])
# Get queue switch f = open("q.txt", "r") q = int(f.read()) f.close() if len(sys.argv) > 0 and sys.argv[1] == 'd': t = {'msg': 'd_' + t(), 'priority': 1} elif len(sys.argv) > 0 and sys.argv[1] == 'i': t = {'msg': 'i_' + t(), 'priority': 5} else: raise Exception('Non arguments') if q == 0: # Priority celery_app.send_task('priority', queue='priority', kwargs=dict(msg=t.get('msg')), priority=t.get('priority')) elif q == 1: # FIFO celery_app.send_task('fifo', queue='fifo', kwargs=dict(msg=t.get('msg'))) else: raise Exception('Non queue selected') # Set queue switch with open("q.txt", "w") as f: q = (q + 1) % 2 f.write(str(q)) f.close()
import sys from time import time from config import celery_app t = lambda: str(round(time() * 1000)) if len(sys.argv) > 0 and sys.argv[1] == 'd': t = {'msg': t(), 'queue': 'default', 'task_name': 'default', 'priority': 1} elif len(sys.argv) > 0 and sys.argv[1] == 'i': t = { 'msg': t(), 'queue': 'important', 'task_name': 'important', 'priority': 5 } else: raise Exception('Non arguments') celery_app.send_task(t.get('task_name'), queue=t.get('queue'), kwargs=dict(msg=t.get('msg')), priority=t.get('priority'))
def task_now(self) -> str: return celery_app.send_task(self.task.task, kwargs=json.loads(self.task.kwargs))