Python TaskSet Exemples, celery.task.sets.TaskSet Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : mod07_hdf2geotiff.py Projet : Dontsov/AltGeoView

def hdf2(hdf1):

    #taskset = TaskSet(tasks.hdf2geotiff(hdf1))
    taskset = TaskSet(tasks.hdf2geotiff.subtask((x, )) for x in hdf1)
    taskset_result = taskset.apply_async()

    results = taskset_result.join_native()

Exemple #2

0

Afficher le fichier

Fichier : views.py Projet : haugvald/baruwa

def bulk_process(request):
    "Process a bulk form"
    if request.method == 'POST':
        form = BulkQuarantineProcessForm(request.POST)
        choices = request.session['quarantine_choices']
        form.fields['message_id']._choices = choices
        if form.is_valid():
            messages = Message.objects.values(
                'id', 'from_address', 'date', 'hostname',
                'to_address').filter(id__in=form.cleaned_data['message_id'])
            del form.cleaned_data['message_id']
            formvals = []
            for message in messages:
                message.update(form.cleaned_data)
                message['date'] = str(message['date'])
                message['message_id'] = message['id']
                del message['id']
                formvals.append(message)
            taskset = TaskSet(tasks=[
                ProcessQuarantinedMsg.subtask(
                    args=[formval], options=dict(queue=formval['hostname']))
                for formval in formvals
            ])
            task = taskset.apply_async()
            task.save()
            return HttpResponseRedirect(
                reverse('task-status', args=[task.taskset_id]))

    msg = _('System was unable to process your request')
    djmessages.info(request, msg)
    return HttpResponseRedirect(
        reverse('all-messages-index', args=['quarantine']))

Exemple #3

0

Afficher le fichier

    def run(self, flavor, repo_path, data):
        backend = load_backend(flavor, repo_path, cache=False)
        out = StringIO()
        proto = ReceivableProtocol(StringIO(data).read, out.write)
        handler = _ReceivePackHandler(WebBackend(), [backend],
                                      proto,
                                      stateless_rpc=True)
        handler.handle()

        sync_tasks = []
        for oldrev, newrev, name in handler._good_refs:
            if name.startswith('refs/heads/'):
                branch = name[11:]
                sync_tasks.append(
                    subtask(SyncTask,
                            args=[
                                backend.flavor, backend.path, oldrev, newrev,
                                branch
                            ]))

        if sync_tasks:
            taskset = TaskSet(tasks=sync_tasks)
            taskset.apply_async().join()

        return out.getvalue(), handler._good_refs

Exemple #4

0

Afficher le fichier

Fichier : tasks.py Projet : mbstacy/oulibq

def bags_migrate_s3(mongo_host='oulib_mongo'):
    #catalog
    db=MongoClient(mongo_host)
    #Celery Worker storage connections
    celery_worker_hostname = os.getenv('celery_worker_hostname', "dev-mstacy")
    celery_config=db.catalog.celery_worker_config.find_one({"celery_worker":celery_worker_hostname})
    #get variable by celory worker
    norfile_bagit=celery_config['norfile']['bagit']
    s3_bucket=celery_config['s3']['bucket']
    subtasks=[]
    check_catalog=[]
    s3 = boto3.client('s3')
    for itm in db.catalog.bagit_inventory.find({"s3.exists":False}):
        #double check to make sure not already in s3
        s3_key = s3.list_objects(Bucket=s3_bucket, Prefix=itm['bag'] ,MaxKeys=1)
        if not 'Contents' in s3_key:
            subtasks.append(upload_bag_s3.subtask(args=(itm['bag'],norfile_bagit)))
        else:
            check_catalog.append(itm['bag'])
    if subtasks:
        job = TaskSet(tasks=subtasks)
        result_set = job.apply_async()

    check=",".join(check_catalog)
    return "{0} subtasks('upload_bag_s3') submitted. Check Catalog: {1}".format(len(subtasks),check)

Exemple #5

0

Afficher le fichier

Fichier : tasks.py Projet : sjcorbett/twothreefall.co.uk

def update_user(user, requester):
    """ Fetch new weeks, or possibly those that failed before."""
    # TODO: fail here if couldn't contact last.fm
    # Have to fetch the chart list from last.fm because their timestamps are awkward, especially
    # those on the first few charts released.
    chart_list = fetch_chart_list(user.username, requester)
    successful_requests = Update.objects.weeks_fetched(user)

    # create taskset and run it.
    update_tasks = []
    updates = []
    with transaction.commit_on_success():
        for start, end in chart_list:
            idx = ldates.index_of_timestamp(end)
            # Skip if this week is before the user signed up
            if not idx < user.first_sunday_with_data:
                # skip if data has already been successfully fetched
                if (idx, Update.ARTIST) not in successful_requests:
                    update = Update(user=user, week_idx=idx, type=Update.ARTIST)
                    updates.append(update)
                    update_tasks.append(fetch_week_data.subtask((user, requester, start, end, Update.ARTIST)))
#                if (idx, Update.TRACK) not in successful_requests:
#                    Update.objects.create(user=user, week_idx=idx, type=Update.TRACK)
#                    update_tasks.append(fetch_week_data.subtask((user, requester, start, end, Update.TRACK)))

    Update.objects.bulk_create(updates)
    ts = TaskSet(update_tasks)
    ts.apply_async()

    user.last_updated = date.today()
    user.save()

    return len(update_tasks) > 0

Exemple #6

0

Afficher le fichier

    def test_interface__compat(self):
        warnings.resetwarnings()

        with catch_warnings(record=True) as log:
            ts = TaskSet(MockTask, [[(2, 2)], [(4, 4)], [(8, 8)]])
            self.assertTrue(log)
            self.assertIn("Using this invocation of TaskSet is deprecated",
                          log[0].message.args[0])
            self.assertListEqual(ts.tasks,
                                 [MockTask.subtask((i, i)) for i in (2, 4, 8)])
            return ts

        # TaskSet.task (deprecated)
        with catch_warnings(record=True) as log:
            ts = TaskSet(MockTask, [[(2, 2)], [(4, 4)], [(8, 8)]])
            self.assertEqual(ts.task.name, MockTask.name)
            self.assertTrue(log)
            self.assertIn("TaskSet.task is deprecated", log[0].message.args[0])

        # TaskSet.task_name (deprecated)
        with catch_warnings(record=True) as log:
            ts = TaskSet(MockTask, [[(2, 2)], [(4, 4)], [(8, 8)]])
            self.assertEqual(ts.task_name, MockTask.name)
            self.assertTrue(log)
            self.assertIn("TaskSet.task_name is deprecated",
                          log[0].message.args[0])

Exemple #7

0

Afficher le fichier

Fichier : views.py Projet : heartshare/baruwa

def bulk_process(request):
    "Process a bulk form"
    if request.method == 'POST':
        form = BulkQuarantineProcessForm(request.POST)
        choices = request.session['quarantine_choices']
        form.fields['message_id']._choices = choices
        if form.is_valid():
            messages = Message.objects.values('id', 'from_address', 'date',
            'hostname', 'to_address').filter(id__in=form.cleaned_data['message_id'])
            del form.cleaned_data['message_id']
            formvals = []
            for message in messages:
                message.update(form.cleaned_data)
                message['date'] = str(message['date'])
                message['message_id'] = message['id']
                del message['id']
                formvals.append(message)
            taskset = TaskSet(tasks=[ProcessQuarantinedMsg.subtask(
            args=[formval], options=dict(queue=formval['hostname']))
            for formval in formvals])
            task = taskset.apply_async()
            task.save()
            return HttpResponseRedirect(reverse('task-status',
            args=[task.taskset_id]))

    msg = _('System was unable to process your request')
    djmessages.info(request, msg)
    return HttpResponseRedirect(reverse('all-messages-index',
    args=['quarantine']))

Exemple #8

0

Afficher le fichier

Fichier : cl_find_citations.py Projet : voutilad/courtlistener

    def update_documents(self, documents, count):
        sys.stdout.write('Graph size is {0:d} nodes.\n'.format(count))
        sys.stdout.flush()
        processed_count = 0
        subtasks = []
        timings = []
        average_per_s = 0
        if self.index == 'concurrently':
            index_during_subtask = True
        else:
            index_during_subtask = False
        for doc in documents:
            processed_count += 1
            subtasks.append(
                update_document.subtask((doc, index_during_subtask)))
            if processed_count % 1000 == 1:
                t1 = time.time()
            if processed_count % 1000 == 0:
                t2 = time.time()
                timings.append(t2 - t1)
                average_per_s = 1000 / (sum(timings) / float(len(timings)))
            sys.stdout.write(
                "\rProcessing items in Celery queue: {:.0%} ({}/{}, {:.1f}/s, Last id: {})"
                .format(
                    processed_count * 1.0 / count,
                    processed_count,
                    count,
                    average_per_s,
                    doc.pk,
                ))
            sys.stdout.flush()
            last_document = (count == processed_count)
            if (processed_count % 50 == 0) or last_document:
                # Every 50 documents, we send the subtasks off for processing
                # Poll to see when they're done.
                job = TaskSet(tasks=subtasks)
                result = job.apply_async()
                while not result.ready():
                    time.sleep(1)

                # The jobs finished - clean things up for the next round
                subtasks = []

        if self.index == 'all_at_end':
            call_command(
                'cl_update_index',
                '--type',
                'opinions',
                '--solr-url',
                settings.SOLR_OPINION_URL,
                '--noinput',
                '--update',
                '--everything',
                '--do-commit',
            )
        elif self.index == 'False':
            sys.stdout.write("Solr index not updated after running citation "
                             "finder. You may want to do so manually.")

Exemple #9

0

Afficher le fichier

Fichier : tasks.py Projet : vup1120/oq-engine

def map_reduce(task, task_args, agg, acc):
    """
    Given a task and an iterable of positional arguments, apply the
    task function to the arguments in parallel and return an aggregate
    result depending on the initial value of the accumulator
    and on the aggregation function. To save memory, the order is
    not preserved and there is no list with the intermediated results:
    the accumulator is incremented as soon as a task result comes.

    NB: if the environment variable OQ_NO_DISTRIBUTE is set the
    tasks are run sequentially in the current process and then
    map_reduce(task, task_args, agg, acc) is the same as
    reduce(agg, itertools.starmap(task, task_args), acc).
    Users of map_reduce should be aware of the fact that when
    thousands of tasks are spawned and large arguments are passed
    or large results are returned they may incur in memory issue:
    this is way the calculators limit the queue with the
    `concurrent_task` concept.

    :param task: a `celery` task callable.
    :param task_args: an iterable over positional arguments
    :param agg: the aggregation function, (acc, val) -> new acc
    :param acc: the initial value of the accumulator
    :returns: the final value of the accumulator
    """
    if no_distribute():
        for the_args in task_args:
            result, exctype = safely_call(task.task_func, the_args)
            if exctype:
                raise RuntimeError(result)
            acc = agg(acc, result)
    else:
        backend = current_app().backend
        unpik = 0
        job_id = task_args[0][0]
        taskname = task.__name__
        mon = LightMonitor("unpickling %s" % taskname, job_id, task)
        to_send = 0
        pickled_args = []
        for args in task_args:
            piks = pickle_sequence(args)
            pickled_args.append(piks)
            to_send += sum(len(p) for p in piks)
        logs.LOG.info("Sending %dM", to_send / ONE_MB)
        taskset = TaskSet(tasks=map(task.subtask, pickled_args))
        for task_id, result_dict in taskset.apply_async().iter_native():
            check_mem_usage()  # log a warning if too much memory is used
            result_pik = result_dict["result"]
            with mon:
                result, exctype = result_pik.unpickle()
            if exctype:
                raise RuntimeError(result)
            unpik += len(result_pik)
            acc = agg(acc, result)
            del backend._cache[task_id]  # work around a celery bug
        logs.LOG.info("Unpickled %dM of received data in %s seconds", unpik / ONE_MB, mon.duration)
    return acc

Exemple #10

0

Afficher le fichier

Fichier : cl_find_citations.py Projet : Andr3iC/courtlistener

    def update_documents(self, documents, count):
        sys.stdout.write('Graph size is {0:d} nodes.\n'.format(count))
        sys.stdout.flush()
        processed_count = 0
        subtasks = []
        timings = []
        average_per_s = 0
        if self.index == 'concurrently':
            index_during_subtask = True
        else:
            index_during_subtask = False
        for doc in documents:
            processed_count += 1
            if processed_count % 10000 == 0:
                # Send the commit every 10000 times.
                self.si.commit()
            subtasks.append(update_document.subtask((doc, index_during_subtask)))
            if processed_count % 1000 == 1:
                t1 = time.time()
            if processed_count % 1000 == 0:
                t2 = time.time()
                timings.append(t2 - t1)
                average_per_s = 1000 / (sum(timings) / float(len(timings)))
            sys.stdout.write("\rProcessing items in Celery queue: {:.0%} ({}/{}, {:.1f}/s, Last id: {})".format(
                processed_count * 1.0 / count,
                processed_count,
                count,
                average_per_s,
                doc.pk,
            ))
            sys.stdout.flush()
            last_document = (count == processed_count)
            if (processed_count % 50 == 0) or last_document:
                # Every 5000 documents, we send the subtasks off for processing
                # Poll to see when they're done.
                job = TaskSet(tasks=subtasks)
                result = job.apply_async()
                while not result.ready():
                    time.sleep(1)

                # The jobs finished - clean things up for the next round
                subtasks = []

        if self.index == 'all_at_end':
            call_command(
                'cl_update_index',
                '--type', 'opinions',
                '--solr-url', settings.SOLR_OPINION_URL,
                '--noinput',
                '--update',
                '--everything',
                '--do-commit',
            )
        elif self.index == 'false':
            sys.stdout.write("Solr index not updated after running citation "
                             "finder. You may want to do so manually.")

Exemple #11

0

Afficher le fichier

Fichier : utils.py Projet : mbrown1413/anaglyph

def dispatch_image(**params):
    the_subtasks = []
    the_subtasks.append(McAllisterAnaglyphTask.subtask(**params))
    the_subtasks.append(McAllisterAnaglyphTask.subtask(**params))
    the_subtasks.append(McAllisterAnaglyphTask.subtask(**params))
    the_subtasks.append(McAllisterAnaglyphTask.subtask(**params))
    the_subtasks.append(McAllisterAnaglyphTask.subtask(**params))
    job = TaskSet(tasks=the_subtasks)
    result = job.apply_async()
    return result

Exemple #12

0

Afficher le fichier

Fichier : sitemaintenance.py Projet : moatra/fffn1

def SiteMaintenance():
    session = DBSession()
    categories = session.query(Category)

    subtasks = [UpdateFromFeed.subtask((cat.feedurl, cat.idcategory)) for cat in categories]
    subtasks.append(UpdateDlpStats.subtask())

    maintenanceJobs = TaskSet(tasks=subtasks)
    maintenanceJobs.apply_async().join()
    UpdateIndex.delay()

Exemple #13

0

Afficher le fichier

Fichier : ClientsWithPrepaidAccounts.py Projet : tuian/mq

def get_multiple_clients_daily_rate(client_ids):
    tasks = []
    for client_id in client_ids:
        y = get_clients_daily_rate.subtask((client_id, ))
        tasks.append(y)

    job = TaskSet(tasks=tasks)
    result = job.apply_async()
    result = result.join()
    return result

Exemple #14

0

Afficher le fichier

def fetch_feeds(callback=None):
    logging.warn("updating all feeds")
    feeds = Feed.objects.all()
    task_list = []
    for feed in feeds:
        s = fetch_feed.subtask([feed.id])
        task_list.append(s)
    fetch_all_tasks = TaskSet(tasks=task_list)
    fetch_all_tasks.apply_async()
    return None

Exemple #15

0

Afficher le fichier

Fichier : tasks.py Projet : ejesse/django-reader

def fetch_feeds(callback=None):
    logging.warn("updating all feeds")
    feeds = Feed.objects.all()
    task_list = []
    for feed in feeds:
        s = fetch_feed.subtask([feed.id])
        task_list.append(s)
    fetch_all_tasks = TaskSet(tasks=task_list)
    fetch_all_tasks.apply_async()
    return None

Exemple #16

0

Afficher le fichier

Fichier : celery_tasktree.py Projet : ankurchopra87/celery-tasktree

def _exec_callbacks(callback):
    """ Exec the callback or list of callbacks. Return asyncronous results as
    the TaskSetResult object.
    """
    async_result = None
    if callback:
        if not isinstance(callback, (list, tuple)): # not iterable
            callback = [callback,]
        taskset = TaskSet(tasks=callback)
        async_result = taskset.apply_async()
    return async_result

Exemple #17

0

Afficher le fichier

Fichier : test_task_sets.py Projet : GVRGowtham/mozillians

    def test_apply(self):

        applied = [0]

        class mocksubtask(subtask):
            def apply(self, *args, **kwargs):
                applied[0] += 1

        ts = TaskSet([mocksubtask(MockTask, (i, i)) for i in (2, 4, 8)])
        ts.apply()
        self.assertEqual(applied[0], 3)

Exemple #18

0

Afficher le fichier

def make_pi_tasks():

    taskset = TaskSet(tasks.make_pi.subtask((x, )) for x in NUM_CALCS)
    print "Dispatching tasks"
    taskset_result = taskset.apply_async()

    print "Waiting for results"
    results = taskset_result.join_native()
    print "Results:"
    for i in results:
        print i

Exemple #19

0

Afficher le fichier

Fichier : demo.py Projet : Saravanan-Kalirajan/rentalcar

def make_pi_tasks():

    taskset = TaskSet(tasks.make_pi.subtask((x, )) for x in NUM_CALCS)
    print "Dispatching tasks"
    taskset_result = taskset.apply_async()

    print "Waiting for results"
    results = taskset_result.join_native()
    print "Results:"
    for i in results:
        print i

Exemple #20

0

Afficher le fichier

Fichier : messaging.py Projet : edajeda/bcbb

 def _run(fn_name, xs):
     fn = getattr(tasks, fn_name)
     job = TaskSet(tasks=[apply(fn.subtask, (x,)) for x in xs])
     result = job.apply_async()
     while not result.ready():
         time.sleep(5)
     out = []
     for x in result.join():
         if x:
             out.extend(x)
     return out

Exemple #21

0

Afficher le fichier

    def test_apply(self):

        applied = [0]

        class mocksubtask(Signature):
            def apply(self, *args, **kwargs):
                applied[0] += 1

        ts = TaskSet([mocksubtask(MockTask, (i, i)) for i in (2, 4, 8)])
        ts.apply()
        self.assertEqual(applied[0], 3)

Exemple #22

0

Afficher le fichier

Fichier : chord.py Projet : eldondev/celery

 def run(self, set, body, interval=1, max_retries=None, **kwargs):
     if not isinstance(set, TaskSet):
         set = TaskSet(set)
     r = []
     setid = gen_unique_id()
     for task in set.tasks:
         uuid = gen_unique_id()
         task.options.update(task_id=uuid, chord=body)
         r.append(current_app.AsyncResult(uuid))
     current_app.TaskSetResult(setid, r).save()
     self.backend.on_chord_apply(setid, body, interval, max_retries)
     return set.apply_async(taskset_id=setid)

Exemple #23

0

Afficher le fichier

 def run(self, set, body, interval=1, max_retries=None, **kwargs):
     if not isinstance(set, TaskSet):
         set = TaskSet(set)
     r = []
     setid = gen_unique_id()
     for task in set.tasks:
         uuid = gen_unique_id()
         task.options.update(task_id=uuid, chord=body)
         r.append(current_app.AsyncResult(uuid))
     current_app.TaskSetResult(setid, r).save()
     self.backend.on_chord_apply(setid, body, interval, max_retries)
     return set.apply_async(taskset_id=setid)

Exemple #24

0

Afficher le fichier

    def update_documents(self, documents, count):
        sys.stdout.write('Graph size is {0:d} nodes.\n'.format(count))
        sys.stdout.flush()
        processed_count = 0
        subtasks = []
        timings = []
        average_per_s = 0
        if self.index == 'concurrently':
            index_during_subtask = True
        else:
            index_during_subtask = False
        for doc in documents:
            processed_count += 1
            if processed_count % 10000 == 0:
                # Send the commit every 10000 times.
                self.si.commit()
            subtasks.append(update_document.subtask((doc, index_during_subtask)))
            if processed_count % 1000 == 1:
                t1 = time.time()
            if processed_count % 1000 == 0:
                t2 = time.time()
                timings.append(t2 - t1)
                average_per_s = 1000 / (sum(timings) / float(len(timings)))
            sys.stdout.write("\rProcessing items in Celery queue: {:.0%} ({}/{}, {:.1f}/s)".format(
                processed_count * 1.0 / count,
                processed_count,
                count,
                average_per_s
            ))
            sys.stdout.flush()
            last_document = (count == processed_count)
            if (processed_count % 500 == 0) or last_document:
                # Every 500 documents, we send the subtasks off for processing
                # Poll to see when they're done.
                job = TaskSet(tasks=subtasks)
                result = job.apply_async()
                while not result.ready():
                    time.sleep(0.5)

                # The jobs finished - clean things up for the next round
                subtasks = []

        if self.index == 'all_at_end':
            call_command(
                'cl_update_index',
                update_mode=True,
                everything=True,
                solr_url='http://127.0.0.1:8983/solr/collection1'
            )
        elif self.index == 'false':
            sys.stdout.write("Solr index not updated after running citation "
                             "finder. You may want to do so manually.")

Exemple #25

0

Afficher le fichier

def _exec_callbacks(callback):
    """ Exec the callback or list of callbacks. Return asyncronous results as
    the TaskSetResult object.
    """
    async_result = None
    if callback:
        if not isinstance(callback, (list, tuple)):  # not iterable
            callback = [
                callback,
            ]
        taskset = TaskSet(tasks=callback)
        async_result = taskset.apply_async()
    return async_result

Exemple #26

0

Afficher le fichier

Fichier : task_image.py Projet : testTemtProj/OLD_PROJECT

def proccesing_pictrures(shop_id, **kwargs):
    """
    Задача обработки картинок изображения
    """
    picture_urls = {}
    tasks_image = []
    for pict_url in db.Offers.find(spec={'shopId':shop_id}, fields=['id', 'picture'], slave_ok=True):
        picture_urls[pict_url['id']] = pict_url['picture']
        tasks_image = (download_image.subtask(url=pict_url['picture'], id=pict_url['id']))
    job = TaskSet(tasks=tasks_image)
    result = job.apply_async()#connection, connect_timeout, publisher, taskset_id)
    result.wait()
    result.join()

Exemple #27

0

Afficher le fichier

    def _chunk_queryset_into_tasks(self,
                                   items,
                                   count,
                                   chunksize=5000,
                                   bundle_size=250):
        """Chunks the queryset passed in, and dispatches it to Celery for
        adding to the index.

        Potential performance improvements:
         - Postgres is quiescent when Solr is popping tasks from Celery,
           instead, it should be fetching the next 1,000
         - The wait loop (while not result.ready()) polls for the results, at
           a 1s interval. Could this be reduced or somehow eliminated while
           keeping Celery's tasks list from running away?
        """
        processed_count = 0
        subtasks = []
        item_bundle = []
        for item in items:
            last_item = (count == processed_count + 1)
            if self.verbosity >= 2:
                self.stdout.write('Indexing item %s' % item.pk)

            item_bundle.append(item)
            if (processed_count % bundle_size == 0) or last_item:
                # Every bundle_size documents we create a subtask
                subtasks.append(
                    add_or_update_items.subtask((item_bundle, self.solr_url)))
                item_bundle = []
            processed_count += 1

            if (processed_count % chunksize == 0) or last_item:
                # Every chunksize items, we send the subtasks for processing
                job = TaskSet(tasks=subtasks)
                result = job.apply_async()
                while not result.ready():
                    time.sleep(1)
                subtasks = []

            if (processed_count % 50000 == 0) or last_item:
                # Do a commit every 50000 items, for good measure.
                self.stdout.write("...running commit command...")
                self.si.commit()

            sys.stdout.write("\rProcessed {}/{} ({:.0%})".format(
                processed_count,
                count,
                processed_count * 1.0 / count,
            ))
            self.stdout.flush()
        self.stdout.write('\n')

Exemple #28

0

Afficher le fichier

Fichier : tasks.py Projet : mdominguez-paradigma/django-celery-test

    def run(self, content_producer_pk):
        content = ContentProducer.objects.get(pk=content_producer_pk)
        self.text = content.text_body
        conditions = content.conditions.all()
        end_execution = datetime.now() + timedelta(seconds=15)

        users = SimpleUser.objects.all()
        # Build the query with the given conditions
        for condition in conditions:
            filters = Q(**{condition.field: condition.value})
            users = users.filter(filters)

        jobs = TaskSet(tasks=[send_sms.subtask((content.text_body, user,), expires=end_execution) for user in users])
        jobs.apply_async()

Exemple #29

0

Afficher le fichier

def _refinery_file_import(analysis_uuid):
    """
    Check on the status of the files being imported into Refinery.
    Fail the task appropriately if we cannot retrieve the status.
    """
    analysis = _get_analysis(analysis_uuid)
    analysis_status = _get_analysis_status(analysis_uuid)

    if not analysis_status.refinery_import_task_group_id:
        logger.info("Starting analysis '%s'", analysis)
        analysis.set_status(Analysis.RUNNING_STATUS)
        logger.info("Starting input file import tasks for analysis '%s'",
                    analysis)
        refinery_import_taskset = TaskSet(
            tasks=analysis.get_refinery_import_task_signatures()).apply_async(
            )
        refinery_import_taskset.save()
        analysis_status.refinery_import_task_group_id = \
            refinery_import_taskset.taskset_id
        analysis_status.save()
        run_analysis.retry(countdown=RETRY_INTERVAL)

    # check if all files were successfully imported into Refinery
    refinery_import_taskset = get_taskset_result(
        analysis_status.refinery_import_task_group_id)
    if not refinery_import_taskset.ready():
        logger.debug("Input file import pending for analysis '%s'", analysis)
        run_analysis.retry(countdown=RETRY_INTERVAL)

    elif not refinery_import_taskset.successful():
        error_msg = "Analysis '{}' failed during file import".format(analysis)
        logger.error(error_msg)
        analysis.set_status(Analysis.FAILURE_STATUS, error_msg)
        analysis.send_email()
        refinery_import_taskset.delete()

Exemple #30

0

Afficher le fichier

    def test_apply_async(self):

        applied = [0]

        class mocksubtask(Signature):
            def apply_async(self, *args, **kwargs):
                applied[0] += 1

        ts = TaskSet([mocksubtask(MockTask, (i, i)) for i in (2, 4, 8)])
        ts.apply_async()
        self.assertEqual(applied[0], 3)

        class Publisher(object):
            def send(self, *args, **kwargs):
                pass

        ts.apply_async(publisher=Publisher())

        # setting current_task

        @current_app.task
        def xyz():
            pass

        from celery._state import _task_stack
        xyz.push_request()
        _task_stack.push(xyz)
        try:
            ts.apply_async(publisher=Publisher())
        finally:
            _task_stack.pop()
            xyz.pop_request()

Exemple #31

0

Afficher le fichier

Fichier : views.py Projet : friendofrobots/air-toolkit

def startDownload(request):
    """
    Ajax call to start the download
    """
    if request.user.is_authenticated():
        if request.method == 'POST':
            profile = request.user.profile
            if profile.stage > 0:
                if profile.stage < 3:
                    result = TaskSetResult.restore(profile.task_id)
                    response_data = {
                        "error": "download already started",
                        "stage" : profile.stage,
                        "completed" : result.completed_count(),
                        "total" : result.total,
                        }
                else:
                    reponse_data = {
                        "error": "download already finished",
                        "stage" : profile.stage,
                        "state" : "completed",
                        }
            else:
                graphapi = facebook.GraphAPI(profile.fblogin.access_token)
                me = graphapi.get_object('me')
                friends = [(f['id'],f['name']) for f in graphapi.get_connections('me','friends')['data']]
                friends.append((me['id'],me['name']))

                subtasks = [tasks.dlUser.subtask((profile.id,graphapi,fbid,name)) for (fbid,name) in friends]
                result = TaskSet(tasks=subtasks).apply_async()
                result.save()
                profile.stage = 1
                profile.task_id = result.taskset_id
                profile.save()
                r = tasks.checkTaskSet.delay(result,profile.id)
                response_data = {
                    "stage":1,
                    "completed": result.completed_count(),
                    "total": result.total,
                    }
        else:
            response_data = {
                "error": "must be a post request"
                }
    else:
        response_data = {
            "error": "user must be logged in"
            }
    return HttpResponse(json.dumps(response_data), mimetype="application/json")

Exemple #32

0

Afficher le fichier

Fichier : messaging.py Projet : brentp/bcbio-nextgen

 def _run(fn_name, xs):
     fn = getattr(tasks, fn_name)
     job = TaskSet(tasks=[apply(fn.subtask, (x,)) for x in xs])
     result = job.apply_async()
     out = []
     if wait:
         with _close_taskset(result):
             while not result.ready():
                 time.sleep(5)
                 if result.failed():
                     raise ValueError("Failed distributed task; cleaning up")
             for x in result.join():
                 if x:
                     out.extend(x)
     return out

Exemple #33

0

Afficher le fichier

Fichier : tasks.py Projet : friendofrobots/air-toolkit

def checkTaskSet(result,profile_id):
    if result.ready():
        profile = Profile.objects.get(id=profile_id)
        pages = profile.getActivePages()
        numpeople = profile.getActivePeople().count()

        subtasks = [calcPMIs.subtask((profile_id,page.id,numpeople)) for page in pages]
        r = TaskSet(tasks=subtasks).apply_async()
        r.save()
        r2 = checkPMISet.delay(r,profile.id)
        profile.stage = 2
        profile.task_id = r.taskset_id
        profile.save()
    else:
        checkTaskSet.retry(countdown=15, max_retries=100)

Exemple #34

0

Afficher le fichier

Fichier : tasks.py Projet : friendofrobots/air-toolkit

def testDl(profile_id):
    profile = Profile.objects.get(id=profile_id)
    graphapi = facebook.GraphAPI(profile.fblogin.access_token)
    me = graphapi.get_object('me')
    friends = [(f['id'],f['name']) for f in graphapi.get_connections('me','friends')['data']]
    friends.append((me['id'],me['name']))

    subtasks = [dlUser.subtask((profile.id,graphapi,fbid,name)) for (fbid,name) in friends]
    result = TaskSet(tasks=subtasks).apply_async()
    result.save()
    profile.stage = 1
    profile.task_id = result.taskset_id
    profile.save()
    r = checkTaskSet.delay(result,profile.id)
    return result

Exemple #35

0

Afficher le fichier

Fichier : messaging.py Projet : kenongit/bcbio-nextgen

 def _run(fn_name, xs):
     fn = getattr(tasks, fn_name)
     job = TaskSet(tasks=[apply(fn.subtask, (x,)) for x in xs])
     result = job.apply_async()
     out = []
     if wait:
         with _close_taskset(result):
             while not result.ready():
                 time.sleep(5)
                 if result.failed():
                     raise ValueError("Failed distributed task; cleaning up")
             for x in result.join():
                 if x:
                     out.extend(x)
     return out

Exemple #36

0

Afficher le fichier

Fichier : cl_update_index.py Projet : Andr3iC/courtlistener

    def _chunk_queryset_into_tasks(self, items, count, chunksize=5000,
                                   bundle_size=250):
        """Chunks the queryset passed in, and dispatches it to Celery for
        adding to the index.

        Potential performance improvements:
         - Postgres is quiescent when Solr is popping tasks from Celery,
           instead, it should be fetching the next 1,000
         - The wait loop (while not result.ready()) polls for the results, at
           a 1s interval. Could this be reduced or somehow eliminated while
           keeping Celery's tasks list from running away?
        """
        processed_count = 0
        subtasks = []
        item_bundle = []
        for item in items:
            last_item = (count == processed_count + 1)
            if self.verbosity >= 2:
                self.stdout.write('Indexing item %s' % item.pk)

            item_bundle.append(item)
            if (processed_count % bundle_size == 0) or last_item:
                # Every bundle_size documents we create a subtask
                subtasks.append(add_or_update_items.subtask(
                    (item_bundle, self.solr_url)))
                item_bundle = []
            processed_count += 1

            if (processed_count % chunksize == 0) or last_item:
                # Every chunksize items, we send the subtasks for processing
                job = TaskSet(tasks=subtasks)
                result = job.apply_async()
                while not result.ready():
                    time.sleep(1)
                subtasks = []

            if (processed_count % 50000 == 0) or last_item:
                # Do a commit every 50000 items, for good measure.
                self.stdout.write("...running commit command...")
                self.si.commit()

            sys.stdout.write("\rProcessed {}/{} ({:.0%})".format(
                processed_count,
                count,
                processed_count * 1.0 / count,
            ))
            self.stdout.flush()
        self.stdout.write('\n')

Exemple #37

0

Afficher le fichier

def index_all_profiles():
    # Get an es object, delete index and re-create it
    es = get_es(timeout=settings.ES_INDEXING_TIMEOUT)
    mappings = {
        'mappings': {
            UserProfileMappingType.get_mapping_type_name():
            UserProfileMappingType.get_mapping()
        }
    }

    def _recreate_index(index):
        es.indices.delete(index=index, ignore=[400, 404])
        es.indices.create(index, body=mappings)

    _recreate_index(settings.ES_INDEXES['default'])
    _recreate_index(settings.ES_INDEXES['public'])

    # mozillians index
    ids = UserProfile.objects.complete().values_list('id', flat=True)
    ts = [
        index_objects.subtask(args=[UserProfileMappingType, chunk, 150, False])
        for chunk in chunked(sorted(list(ids)), 150)
    ]

    # public index
    ts += [
        index_objects.subtask(args=[UserProfileMappingType, chunk, 150, True])
        for chunk in chunked(sorted(list(ids)), 150)
    ]

    TaskSet(ts).apply_async()

Exemple #38

0

Afficher le fichier

def index_all_profiles():
    # Get an es object, delete index and re-create it

    index = settings.ES_INDEXES['default']
    es = get_es(timeout=settings.ES_INDEXING_TIMEOUT)
    try:
        es.delete_index_if_exists(index)
    except pyes.exceptions.IndexMissingException:
        pass

    mappings = {
        'mappings': {
            UserProfile._meta.db_table: UserProfile.get_mapping()
        }
    }

    es.create_index(index, settings=mappings)

    ids = (UserProfile.objects.exclude(full_name='').values_list('id',
                                                                 flat=True))
    ts = [
        tasks.index_objects.subtask(args=[UserProfile, chunk])
        for chunk in chunked(sorted(list(ids)), 150)
    ]
    TaskSet(ts).apply_async()

Exemple #39

0

Afficher le fichier

def addon_total_contributions():
    addons = Addon.objects.values_list('id', flat=True)
    ts = [
        tasks.addon_total_contributions.subtask(args=chunk)
        for chunk in chunked(addons, 100)
    ]
    TaskSet(ts).apply_async()

Exemple #40

0

Afficher le fichier

 def unsubscribe_from_basket(modeladmin, request, queryset):
     """Unsubscribe from Basket."""
     ts = [(unsubscribe_from_basket_task.subtask(args=[userprofile.user.email,
                                                       [newsletter]]))
           for userprofile in queryset]
     TaskSet(ts).apply_async()
     messages.success(request, 'Basket update started.')

Exemple #41

0

Afficher le fichier

Fichier : cron.py Projet : sumlaj/mozillians

def index_all_profiles():
    ids = (UserProfile.objects.values_list('id', flat=True))
    ts = [
        tasks.index_objects.subtask(args=[UserProfile, chunk])
        for chunk in chunked(sorted(list(ids)), 150)
    ]
    TaskSet(ts).apply_async()

Exemple #42

0

Afficher le fichier

 def unsubscribe_from_basket(modeladmin, request, queryset):
     """Unsubscribe from Basket."""
     ts = [(mozillians.users.tasks.remove_from_basket_task
            .subtask(args=[userprofile.user.email, userprofile.basket_token]))
           for userprofile in queryset]
     TaskSet(ts).apply_async()
     messages.success(request, 'Basket update started.')

Exemple #43

0

Afficher le fichier

Fichier : cron.py Projet : pitsyncranjith/addons-server

def reindex_users(index=None):
    from . import tasks
    ids = UserProfile.objects.values_list('id', flat=True)
    taskset = [tasks.index_users.subtask(args=[chunk],
                                         kwargs=dict(index=index))
               for chunk in chunked(sorted(list(ids)), 150)]
    TaskSet(taskset).apply_async()

Exemple #44

0

Afficher le fichier

 def subscribe_to_basket(modeladmin, request, queryset):
     """Subscribe to Basket or update details of already subscribed."""
     ts = [(mozillians.users.tasks.update_basket_task
            .subtask(args=[userprofile.id]))
           for userprofile in queryset]
     TaskSet(ts).apply_async()
     messages.success(request, 'Basket update started.')

Exemple #45

0

Afficher le fichier

def index_all_profiles():
    # Get an es object, delete index and re-create it
    es = get_es(timeout=settings.ES_INDEXING_TIMEOUT)
    mappings = {'mappings':
                {UserProfile._meta.db_table: UserProfile.get_mapping()}}

    def _recreate_index(index):
        try:
            es.delete_index_if_exists(index)
        except pyes.exceptions.IndexMissingException:
            pass
        es.create_index(index, settings=mappings)
    _recreate_index(settings.ES_INDEXES['default'])
    _recreate_index(settings.ES_INDEXES['public'])

    # mozillians index
    ids = UserProfile.objects.complete().values_list('id', flat=True)
    ts = [index_objects.subtask(args=[UserProfile, chunk, False])
          for chunk in chunked(sorted(list(ids)), 150)]

    # public index
    ids = (UserProfile.objects.complete().public_indexable()
           .privacy_level(PUBLIC).values_list('id', flat=True))
    ts += [index_objects.subtask(args=[UserProfile, chunk, True])
           for chunk in chunked(sorted(list(ids)), 150)]

    TaskSet(ts).apply_async()

Exemple #46

0

Afficher le fichier

Fichier : cron.py Projet : pitsyncranjith/addons-server

def update_user_ratings():
    """Update add-on author's ratings."""

    cursor = connections[multidb.get_slave()].cursor()
    # We build this query ahead of time because the cursor complains about data
    # truncation if it does the parameters.  Also, this query is surprisingly
    # quick, <1sec for 6100 rows returned
    q = """   SELECT
                addons_users.user_id as user_id,
                AVG(rating) as avg_rating
              FROM reviews
                INNER JOIN versions
                INNER JOIN addons_users
                INNER JOIN addons
              ON reviews.version_id = versions.id
                AND addons.id = versions.addon_id
                AND addons_users.addon_id = addons.id
              WHERE reviews.reply_to IS NULL
                AND reviews.rating > 0
                AND addons.status IN (%s)
              GROUP BY addons_users.user_id
              """ % (",".join(map(str, VALID_STATUSES)))

    cursor.execute(q)
    d = cursor.fetchall()
    cursor.close()

    ts = [update_user_ratings_task.subtask(args=[chunk])
          for chunk in chunked(d, 1000)]
    TaskSet(ts).apply_async()

Exemple #47

0

Afficher le fichier

Fichier : ClientsWithPrepaidAccounts.py Projet : tuian/mq

def get_clients_with_prepaid_accounts():
    sql = text("""
        SELECT DISTINCT(leads_id) FROM subcontractors
        WHERE prepaid = 'yes'
        """)
    conn = engine.connect()
    r = conn.execute(sql).fetchall()

    client_ids = []
    for x in r:
        client_ids.append(x[0])

    #retrieve leads_id of timesheet records for the past 90 days
    #this is to prevent subcontractors record recently terminated and does not show up on adjustment
    now = get_ph_time()
    prev_date = now - timedelta(days=90)

    now_string = now.strftime('%F %T')
    prev_date_string = prev_date.strftime('%F %T')

    sql = text("""
        SELECT DISTINCT(t.leads_id) 
        FROM timesheet AS t
        JOIN subcontractors AS s
        ON t.leads_id = s.leads_id
        WHERE s.prepaid = 'yes'
        AND month_year BETWEEN :prev_date_string AND :now_string
        """)
    r = conn.execute(sql,
                     prev_date_string=prev_date_string,
                     now_string=now_string)

    for x in r:
        client_id = x[0]
        if client_id not in client_ids:
            client_ids.append(client_id)

    tasks = []
    for client_id in client_ids:
        y = get_client_details.subtask((client_id, ))
        tasks.append(y)

    job = TaskSet(tasks=tasks)
    result = job.apply_async()
    data = result.join()
    conn.close()
    return data

Exemple #48

0

Afficher le fichier

Fichier : builtins.py Projet : axiak/celery

    def chord(set, body, interval=1, max_retries=None,
            propagate=False, **kwargs):

        if not isinstance(set, TaskSet):
            set = TaskSet(set)
        r = []
        setid = uuid()
        for task in set.tasks:
            tid = uuid()
            task.options.update(task_id=tid, chord=body)
            r.append(app.AsyncResult(tid))
        app.backend.on_chord_apply(setid, body,
                                   interval=interval,
                                   max_retries=max_retries,
                                   propagate=propagate,
                                   result=r)
        set.apply_async(taskset_id=setid)

Exemple #49

0

Afficher le fichier

Fichier : celery_tasktree.py Projet : ankurchopra87/celery-tasktree

 def apply_async(self):
     tasks = []
     for node in self.children:
         func = node.func
         args = node.args
         kwargs = node.kwargs
         callback = kwargs.pop('callback', [])
         if not isinstance(callback, (list, tuple)):
             callback = [callback]
         subtasks = node._get_child_tasks()
         callback += subtasks
         kwargs = dict(callback=callback, **kwargs)
         task = func.subtask(args=args, kwargs=kwargs)
         tasks.append(task)
     taskset = TaskSet(tasks)
     result = taskset.apply_async()
     return result

Exemple #50

0

Afficher le fichier

def invalidate_users(**kw):
    """Invalidate all users to reflect latest Marketplace access whitelist."""
    from amo.utils import chunked
    from users.models import UserProfile
    log.info('Invalidating users for access whitelist.')
    d = UserProfile.objects.values_list('id', flat=True)
    ts = [_invalidate_users.subtask(args=[chunk]) for chunk in chunked(d, 100)]
    TaskSet(ts).apply_async()

Exemple #51

0

Afficher le fichier

def reindex_collections(index=None):
    from . import tasks
    ids = (Collection.objects.exclude(type=amo.COLLECTION_SYNCHRONIZED)
           .values_list('id', flat=True))
    taskset = [tasks.index_collections.subtask(args=[chunk],
                                               kwargs=dict(index=index))
               for chunk in chunked(sorted(list(ids)), 150)]
    TaskSet(taskset).apply_async()

Exemple #52

0

Afficher le fichier

 def apply_async(self):
     tasks = []
     for node in self.children:
         func = node.func
         args = node.args
         kwargs = node.kwargs
         callback = kwargs.pop('callback', [])
         if not isinstance(callback, (list, tuple)):
             callback = [callback]
         subtasks = node._get_child_tasks()
         callback += subtasks
         kwargs = dict(callback=callback, **kwargs)
         _task = func.subtask(args=args, kwargs=kwargs)
         tasks.append(_task)
     taskset = TaskSet(tasks)
     result = taskset.apply_async()
     return result

Exemple #53

0

Afficher le fichier

Fichier : cron.py Projet : mak77/addons-server

def reindex_addons(index=None, addon_type=None):
    from . import tasks
    ids = Addon.unfiltered.values_list('id', flat=True)
    if addon_type:
        ids = ids.filter(type=addon_type)
    ts = [tasks.index_addons.subtask(args=[chunk], kwargs=dict(index=index))
          for chunk in chunked(sorted(list(ids)), 150)]
    TaskSet(ts).apply_async()

Exemple #54

0

Afficher le fichier

Fichier : test_task_sets.py Projet : GVRGowtham/mozillians

    def test_apply_async(self):

        applied = [0]

        class mocksubtask(subtask):
            def apply_async(self, *args, **kwargs):
                applied[0] += 1

        ts = TaskSet([mocksubtask(MockTask, (i, i)) for i in (2, 4, 8)])
        ts.apply_async()
        self.assertEqual(applied[0], 3)

        class Publisher(object):
            def send(self, *args, **kwargs):
                pass

        ts.apply_async(publisher=Publisher())

Exemple #55

0

Afficher le fichier

Fichier : test_sets.py Projet : clvrobj/celery

    def test_apply_async(self):

        applied = [0]

        class mocksubtask(Signature):

            def apply_async(self, *args, **kwargs):
                applied[0] += 1

        ts = TaskSet([mocksubtask(MockTask, (i, i))
                        for i in (2, 4, 8)])
        ts.apply_async()
        self.assertEqual(applied[0], 3)

        class Publisher(object):

            def send(self, *args, **kwargs):
                pass

        ts.apply_async(publisher=Publisher())

        # setting current_task

        @current_app.task
        def xyz():
            pass
        from celery.app.state import _tls
        _tls.current_task = xyz
        try:
            ts.apply_async(publisher=Publisher())
        finally:
            _tls.current_task = None
            xyz.request.clear()

Exemple #56

0

Afficher le fichier

Fichier : test_sets.py Projet : SYNchroACK/crits_dependencies

    def test_apply_async(self):

        applied = [0]

        class mocksubtask(Signature):

            def apply_async(self, *args, **kwargs):
                applied[0] += 1

        ts = TaskSet([mocksubtask(MockTask, (i, i))
                        for i in (2, 4, 8)])
        ts.apply_async()
        self.assertEqual(applied[0], 3)

        class Publisher(object):

            def send(self, *args, **kwargs):
                pass

        ts.apply_async(publisher=Publisher())

        # setting current_task

        @current_app.task
        def xyz():
            pass
        from celery._state import _task_stack
        xyz.push_request()
        _task_stack.push(xyz)
        try:
            ts.apply_async(publisher=Publisher())
        finally:
            _task_stack.pop()
            xyz.pop_request()

Exemple #57

0

Afficher le fichier

Fichier : tasks.py Projet : emulbreh/vacuous

 def run(self, flavor, repo_path, data):
     backend = load_backend(flavor, repo_path, cache=False)
     out = StringIO()
     proto = ReceivableProtocol(StringIO(data).read, out.write)
     handler = _ReceivePackHandler(WebBackend(), [backend], proto, stateless_rpc=True)
     handler.handle()
     
     sync_tasks = []
     for oldrev, newrev, name in handler._good_refs:
         if name.startswith('refs/heads/'):
             branch = name[11:]
             sync_tasks.append(subtask(SyncTask, args=[backend.flavor, backend.path, oldrev, newrev, branch]))
             
     if sync_tasks:
         taskset = TaskSet(tasks=sync_tasks)
         taskset.apply_async().join()
     
     return out.getvalue(), handler._good_refs

Exemple #58

0

Afficher le fichier

Fichier : tasks.py Projet : pombredanne/ummeli

def run_jobs_update(category_parser = CategoryParser,  jobs_parser = JobsParser):    # allow mocking of parsers
    Province(search_id = 1,  name = 'All').save()
    Province(search_id = 2,  name = 'Gauteng').save()
    Province(search_id = 5,  name = 'Western Cape').save()
    Province(search_id = 6,  name = 'KZN').save()
    Province(search_id = -1,  name = 'Limpopo').save()
    Province(search_id = -2,  name = 'Mpumalanga').save()
    Province(search_id = -3,  name = 'Free State').save()
    Province(search_id = -4,  name = 'Northern Cape').save()
    Province(search_id = -5,  name = 'Eastern Cape').save()
    Province(search_id = -6,  name = 'North West').save()
    
    now = datetime.now()
    taskset = TaskSet(queue_categories.subtask((province.search_id, category_parser,  jobs_parser, ), 
                                options = {'eta':now + timedelta(seconds=10 * i)}) 
                                for i,  province in enumerate(Province.objects.all()) if province.search_id > 0)
    
    return taskset.apply_async()

Exemple #59

0

Afficher le fichier

Fichier : models.py Projet : it-innovation/MServe-PrestoPRIME

def post_save_handler( sender, instance=False, **kwargs):
    try:
        logging.debug("Prestoprime POST save handler %s %s" % (instance, instance.file))

        # Record Additional Usage
        if instance.name is not None and not instance.initial_usage_recorded and instance.name.endswith(".mxf"):
            logging.debug("Prestoprime POST save handler %s id:'%s' " % (instance,instance.id))

            mxfframecounttask = mxfframecount.subtask([[instance.id],[]])

            #count = result["frames"]
            #logging.info("Recording ingest usage %s " % (count))
            #usage_store.record(instance.id,pp_mxfframe_ingested_metric,count)

            job = Job(name="Prestoprime Tasks",mfile=instance)
            job.save()

            output = JobOutput(name="Job Output",job=job,mimetype="text/plain")
            output.save()

            ## Do Post Processing
            mfiled10check = MFileD10Check(mfile=instance)
            
            temp_handle = StringIO()
            temp_handle.seek(0)

            suf = SimpleUploadedFile("checkfile", temp_handle.read())
            mfiled10check.checkfile.save(suf.name+'.txt', suf, save=False)
            mfiled10check.save()

            d10mxfchecksumtask = d10mxfchecksum.subtask([[instance.id],[output]])

            ts = TaskSet(tasks=[d10mxfchecksumtask,mxfframecounttask])
            tsr = ts.apply_async()
            tsr.save()

            
            job.taskset_id=tsr.taskset_id
            job.save()
            
            logging.debug("Prestoprime task %s  " % task  )

    except Exception as e:
        logging.error("Prestoprime POST save handler failed %s " % e)

Exemple #60

0

Afficher le fichier

Fichier : cl_update_index.py Projet : djeraseit/courtlistener

    def _chunk_queryset_into_tasks(self, items, count, chunksize=5000,
                                   bundle_size=250):
        """Chunks the queryset passed in, and dispatches it to Celery for
        adding to the index.

        Potential performance improvements:
         - Postgres is quiescent when Solr is popping tasks from Celery,
           instead, it should be fetching the next 1,000
        """
        processed_count = 0
        subtasks = []
        item_bundle = []
        for item in items:
            last_item = (count == processed_count + 1)
            if self.verbosity >= 2:
                self.stdout.write('Indexing item %s' % item.pk)

            item_bundle.append(item)
            if (len(item_bundle) >= bundle_size) or last_item:
                # Every bundle_size documents we create a subtask
                subtasks.append(
                    add_or_update_items.subtask((item_bundle, self.solr_url))
                )
                item_bundle = []
            processed_count += 1

            if (len(subtasks) >= chunksize) or last_item:
                # Every chunksize items, we send the subtasks for processing
                job = TaskSet(tasks=subtasks)
                job.apply_async().join()
                subtasks = []

            if (processed_count % 50000 == 0) or last_item:
                # Do a commit every 50000 items, for good measure.
                self.stdout.write("...running commit command...")
                self.si.commit()

            sys.stdout.write("\rProcessed {}/{} ({:.0%})".format(
                processed_count,
                count,
                processed_count * 1.0 / count,
            ))
            self.stdout.flush()
        self.stdout.write('\n')