Esempio n. 1
0
def index_dirty_continue(timestamp):
    """Continue indexing dirty documents if any remaining"""
    # check how many documents we have left to re-index
    documents_left = (Document.objects.exclude(status=Status.deleted).filter(
        solr_dirty=True).count())
    if documents_left > 0:
        # if there are any documents left continue re-indexing
        logger.info("[SOLR INDEX] continuing with %d dirty documents left",
                    documents_left)
        celery_app.send_task("documentcloud.documents.tasks.solr_index_dirty",
                             args=[timestamp])
    else:
        logger.info("[SOLR INDEX] done with dirty indexing")
        cache.delete("solr_index_dirty_status")
Esempio n. 2
0
def quickbooks_query(query_type, query_params):
    """
    args are query type string and any query_params which should be a dict
    query types include 
    purchase_order, item, check
    """
    try:
        qb = QuickBooks(**QB_LOOKUP)
        qb.begin_session()
        results = qb.quickbooks_query(query_type, query_params)
        celery_app.send_task(
            'quickbooks.tasks.process_quickbooks_entities',
            queue='quickbooks', args=[query_type, list(results)], expires=1800
        )
    finally:
        del(qb)
Esempio n. 3
0
def index_dirty(timestamp=None):
    """Index dirty documents"""

    if timestamp is None:
        # starting a new dirty index process
        timestamp = timezone.now().isoformat()
        status = cache.get_or_set("solr_index_dirty_status",
                                  timestamp,
                                  timeout=7200)
        if status != timestamp:
            logger.info(
                "[SOLR INDEX] dirty run not starting, already in progress: %s",
                status)
            # a previous run is still running, do not run two in parallel
            return

        # on initial invocation, kick off dirty deletion
        deleted = Document.objects.filter(status=Status.deleted).values_list(
            "pk", flat=True)
        deleted_count = deleted.count()
        logger.info("[SOLR DELETE] %d documents to delete", deleted_count)
        for document_pk in deleted:
            celery_app.send_task("documentcloud.documents.tasks.solr_delete",
                                 args=[document_pk])

    else:
        # continuing a dirty indexing process, keep the cache alive
        cache.touch("solr_index_dirty_status", timeout=7200)

    status = cache.get("solr_index_dirty_status")
    if status == "cancel":
        # allow for an explicit cancellation of the process
        logger.info("[SOLR INDEX] dirty run cancelled")
        return

    documents = _dirty_prepare_documents()
    tasks = _batch_by_size(None, documents)
    chord(
        tasks,
        signature(
            "documentcloud.documents.tasks.solr_index_dirty_continue",
            args=[timestamp],
            immutable=True,
        ),
    ).delay()
Esempio n. 4
0
def qb_requests(request_list=None, app='quickbooks'):
    """
    Always send a list of requests so we aren't opening and closing file more than necessary
    ex: 
    qb_requests.delay([
        (item_key, model_name, ('ItemReceiptAddRq', receipt_instance.quickbooks_request_tuple)),
        (item_key, model_name, ('ItemReceiptAddRq', receipt_instance.quickbooks_request_tuple))
    ])
    """
    try:
        qb = QuickBooks(**QB_LOOKUP)
        qb.begin_session()

        # process request list if it exists or just get open purchase orders
        if request_list:
            for entry in request_list:
                try:
                    surrogate_key, model_name, request_body = entry
                    request_type, request_dict = request_body
                    response = qb.call(request_type, request_dictionary=request_dict)
                    if surrogate_key and request_dict:
                        celery_app.send_task(
                            'quickbooks.tasks.process_response',
                            queue='quickbooks',
                            args=[surrogate_key, model_name, response, app], expires=1800
                        )
                except Exception as e:
                    logger.error(e)

        celery_app.send_task(
            'quickbooks.tasks.process_preferences',
            queue='quickbooks',
            args=[qb.get_preferences()], expires=1800
        )
    finally:
        # making sure to end session and close file
        del(qb)
Esempio n. 5
0
def reindex_continue(collection_name, after_timestamp, delete_timestamp):
    """Check how many documents are remaining and decide it we should continue
    reindexing
    """
    # check how many documents we have left to re-index
    documents_left = (Document.objects.exclude(status=Status.deleted).filter(
        updated_at__gt=after_timestamp).count())
    if (documents_left > settings.SOLR_INDEX_LIMIT
            and (timezone.now() - parse(after_timestamp)).total_seconds() >
            settings.SOLR_INDEX_CATCHUP_SECONDS):
        # if there are many documents left and we are not too close to the current time
        # continue re-indexing
        logger.info("[SOLR REINDEX] continuing with %d documents left",
                    documents_left)
        celery_app.send_task(
            "documentcloud.documents.tasks.solr_reindex_all",
            args=[collection_name, after_timestamp, delete_timestamp],
        )
    else:
        logger.info(
            "[SOLR REINDEX] done, re-index all documents updated after %s, "
            "delete all documents after %s",
            after_timestamp,
            delete_timestamp,
        )
        # reset the commit values for normal usage before updating the alias
        reset_commit(collection_name)
        update_alias(collection_name)

        # mark all remaining documents as dirty and begin re-indexing them
        Document.objects.exclude(status=Status.deleted).filter(
            updated_at__gt=after_timestamp).update(solr_dirty=True)
        celery_app.send_task("documentcloud.documents.tasks.solr_index_dirty")

        # delete all documents from the new solr collection that were deleted since
        # we started re-indexing
        for document in DeletedDocument.objects.filter(
                created_at__gte=delete_timestamp):
            celery_app.send_task("documentcloud.documents.tasks.solr_delete",
                                 args=[document.pk])
Esempio n. 6
0
# Get queue switch
f = open("q.txt", "r")
q = int(f.read())
f.close()

if len(sys.argv) > 0 and sys.argv[1] == 'd':
    t = {'msg': 'd_' + t(), 'priority': 1}
elif len(sys.argv) > 0 and sys.argv[1] == 'i':
    t = {'msg': 'i_' + t(), 'priority': 5}
else:
    raise Exception('Non arguments')

if q == 0:
    # Priority
    celery_app.send_task('priority',
                         queue='priority',
                         kwargs=dict(msg=t.get('msg')),
                         priority=t.get('priority'))
elif q == 1:
    # FIFO
    celery_app.send_task('fifo', queue='fifo', kwargs=dict(msg=t.get('msg')))
else:
    raise Exception('Non queue selected')

# Set queue switch
with open("q.txt", "w") as f:
    q = (q + 1) % 2
    f.write(str(q))
    f.close()
import sys
from time import time

from config import celery_app

t = lambda: str(round(time() * 1000))

if len(sys.argv) > 0 and sys.argv[1] == 'd':
    t = {'msg': t(), 'queue': 'default', 'task_name': 'default', 'priority': 1}
elif len(sys.argv) > 0 and sys.argv[1] == 'i':
    t = {
        'msg': t(),
        'queue': 'important',
        'task_name': 'important',
        'priority': 5
    }
else:
    raise Exception('Non arguments')

celery_app.send_task(t.get('task_name'),
                     queue=t.get('queue'),
                     kwargs=dict(msg=t.get('msg')),
                     priority=t.get('priority'))
Esempio n. 8
0
 def task_now(self) -> str:
     return celery_app.send_task(self.task.task,
                                 kwargs=json.loads(self.task.kwargs))