예제 #1
0
def hdf2(hdf1):

    #taskset = TaskSet(tasks.hdf2geotiff(hdf1))
    taskset = TaskSet(tasks.hdf2geotiff.subtask((x, )) for x in hdf1)
    taskset_result = taskset.apply_async()

    results = taskset_result.join_native()
예제 #2
0
파일: views.py 프로젝트: haugvald/baruwa
def bulk_process(request):
    "Process a bulk form"
    if request.method == 'POST':
        form = BulkQuarantineProcessForm(request.POST)
        choices = request.session['quarantine_choices']
        form.fields['message_id']._choices = choices
        if form.is_valid():
            messages = Message.objects.values(
                'id', 'from_address', 'date', 'hostname',
                'to_address').filter(id__in=form.cleaned_data['message_id'])
            del form.cleaned_data['message_id']
            formvals = []
            for message in messages:
                message.update(form.cleaned_data)
                message['date'] = str(message['date'])
                message['message_id'] = message['id']
                del message['id']
                formvals.append(message)
            taskset = TaskSet(tasks=[
                ProcessQuarantinedMsg.subtask(
                    args=[formval], options=dict(queue=formval['hostname']))
                for formval in formvals
            ])
            task = taskset.apply_async()
            task.save()
            return HttpResponseRedirect(
                reverse('task-status', args=[task.taskset_id]))

    msg = _('System was unable to process your request')
    djmessages.info(request, msg)
    return HttpResponseRedirect(
        reverse('all-messages-index', args=['quarantine']))
예제 #3
0
    def run(self, flavor, repo_path, data):
        backend = load_backend(flavor, repo_path, cache=False)
        out = StringIO()
        proto = ReceivableProtocol(StringIO(data).read, out.write)
        handler = _ReceivePackHandler(WebBackend(), [backend],
                                      proto,
                                      stateless_rpc=True)
        handler.handle()

        sync_tasks = []
        for oldrev, newrev, name in handler._good_refs:
            if name.startswith('refs/heads/'):
                branch = name[11:]
                sync_tasks.append(
                    subtask(SyncTask,
                            args=[
                                backend.flavor, backend.path, oldrev, newrev,
                                branch
                            ]))

        if sync_tasks:
            taskset = TaskSet(tasks=sync_tasks)
            taskset.apply_async().join()

        return out.getvalue(), handler._good_refs
예제 #4
0
파일: tasks.py 프로젝트: mbstacy/oulibq
def bags_migrate_s3(mongo_host='oulib_mongo'):
    #catalog
    db=MongoClient(mongo_host)
    #Celery Worker storage connections
    celery_worker_hostname = os.getenv('celery_worker_hostname', "dev-mstacy")
    celery_config=db.catalog.celery_worker_config.find_one({"celery_worker":celery_worker_hostname})
    #get variable by celory worker
    norfile_bagit=celery_config['norfile']['bagit']
    s3_bucket=celery_config['s3']['bucket']
    subtasks=[]
    check_catalog=[]
    s3 = boto3.client('s3')
    for itm in db.catalog.bagit_inventory.find({"s3.exists":False}):
        #double check to make sure not already in s3
        s3_key = s3.list_objects(Bucket=s3_bucket, Prefix=itm['bag'] ,MaxKeys=1)
        if not 'Contents' in s3_key:
            subtasks.append(upload_bag_s3.subtask(args=(itm['bag'],norfile_bagit)))
        else:
            check_catalog.append(itm['bag'])
    if subtasks:
        job = TaskSet(tasks=subtasks)
        result_set = job.apply_async()

    check=",".join(check_catalog)
    return "{0} subtasks('upload_bag_s3') submitted. Check Catalog: {1}".format(len(subtasks),check)
예제 #5
0
def update_user(user, requester):
    """ Fetch new weeks, or possibly those that failed before."""
    # TODO: fail here if couldn't contact last.fm
    # Have to fetch the chart list from last.fm because their timestamps are awkward, especially
    # those on the first few charts released.
    chart_list = fetch_chart_list(user.username, requester)
    successful_requests = Update.objects.weeks_fetched(user)

    # create taskset and run it.
    update_tasks = []
    updates = []
    with transaction.commit_on_success():
        for start, end in chart_list:
            idx = ldates.index_of_timestamp(end)
            # Skip if this week is before the user signed up
            if not idx < user.first_sunday_with_data:
                # skip if data has already been successfully fetched
                if (idx, Update.ARTIST) not in successful_requests:
                    update = Update(user=user, week_idx=idx, type=Update.ARTIST)
                    updates.append(update)
                    update_tasks.append(fetch_week_data.subtask((user, requester, start, end, Update.ARTIST)))
#                if (idx, Update.TRACK) not in successful_requests:
#                    Update.objects.create(user=user, week_idx=idx, type=Update.TRACK)
#                    update_tasks.append(fetch_week_data.subtask((user, requester, start, end, Update.TRACK)))

    Update.objects.bulk_create(updates)
    ts = TaskSet(update_tasks)
    ts.apply_async()

    user.last_updated = date.today()
    user.save()

    return len(update_tasks) > 0
예제 #6
0
    def test_interface__compat(self):
        warnings.resetwarnings()

        with catch_warnings(record=True) as log:
            ts = TaskSet(MockTask, [[(2, 2)], [(4, 4)], [(8, 8)]])
            self.assertTrue(log)
            self.assertIn("Using this invocation of TaskSet is deprecated",
                          log[0].message.args[0])
            self.assertListEqual(ts.tasks,
                                 [MockTask.subtask((i, i)) for i in (2, 4, 8)])
            return ts

        # TaskSet.task (deprecated)
        with catch_warnings(record=True) as log:
            ts = TaskSet(MockTask, [[(2, 2)], [(4, 4)], [(8, 8)]])
            self.assertEqual(ts.task.name, MockTask.name)
            self.assertTrue(log)
            self.assertIn("TaskSet.task is deprecated", log[0].message.args[0])

        # TaskSet.task_name (deprecated)
        with catch_warnings(record=True) as log:
            ts = TaskSet(MockTask, [[(2, 2)], [(4, 4)], [(8, 8)]])
            self.assertEqual(ts.task_name, MockTask.name)
            self.assertTrue(log)
            self.assertIn("TaskSet.task_name is deprecated",
                          log[0].message.args[0])
예제 #7
0
파일: views.py 프로젝트: heartshare/baruwa
def bulk_process(request):
    "Process a bulk form"
    if request.method == 'POST':
        form = BulkQuarantineProcessForm(request.POST)
        choices = request.session['quarantine_choices']
        form.fields['message_id']._choices = choices
        if form.is_valid():
            messages = Message.objects.values('id', 'from_address', 'date',
            'hostname', 'to_address').filter(id__in=form.cleaned_data['message_id'])
            del form.cleaned_data['message_id']
            formvals = []
            for message in messages:
                message.update(form.cleaned_data)
                message['date'] = str(message['date'])
                message['message_id'] = message['id']
                del message['id']
                formvals.append(message)
            taskset = TaskSet(tasks=[ProcessQuarantinedMsg.subtask(
            args=[formval], options=dict(queue=formval['hostname']))
            for formval in formvals])
            task = taskset.apply_async()
            task.save()
            return HttpResponseRedirect(reverse('task-status',
            args=[task.taskset_id]))

    msg = _('System was unable to process your request')
    djmessages.info(request, msg)
    return HttpResponseRedirect(reverse('all-messages-index',
    args=['quarantine']))
예제 #8
0
    def update_documents(self, documents, count):
        sys.stdout.write('Graph size is {0:d} nodes.\n'.format(count))
        sys.stdout.flush()
        processed_count = 0
        subtasks = []
        timings = []
        average_per_s = 0
        if self.index == 'concurrently':
            index_during_subtask = True
        else:
            index_during_subtask = False
        for doc in documents:
            processed_count += 1
            subtasks.append(
                update_document.subtask((doc, index_during_subtask)))
            if processed_count % 1000 == 1:
                t1 = time.time()
            if processed_count % 1000 == 0:
                t2 = time.time()
                timings.append(t2 - t1)
                average_per_s = 1000 / (sum(timings) / float(len(timings)))
            sys.stdout.write(
                "\rProcessing items in Celery queue: {:.0%} ({}/{}, {:.1f}/s, Last id: {})"
                .format(
                    processed_count * 1.0 / count,
                    processed_count,
                    count,
                    average_per_s,
                    doc.pk,
                ))
            sys.stdout.flush()
            last_document = (count == processed_count)
            if (processed_count % 50 == 0) or last_document:
                # Every 50 documents, we send the subtasks off for processing
                # Poll to see when they're done.
                job = TaskSet(tasks=subtasks)
                result = job.apply_async()
                while not result.ready():
                    time.sleep(1)

                # The jobs finished - clean things up for the next round
                subtasks = []

        if self.index == 'all_at_end':
            call_command(
                'cl_update_index',
                '--type',
                'opinions',
                '--solr-url',
                settings.SOLR_OPINION_URL,
                '--noinput',
                '--update',
                '--everything',
                '--do-commit',
            )
        elif self.index == 'False':
            sys.stdout.write("Solr index not updated after running citation "
                             "finder. You may want to do so manually.")
예제 #9
0
파일: tasks.py 프로젝트: vup1120/oq-engine
def map_reduce(task, task_args, agg, acc):
    """
    Given a task and an iterable of positional arguments, apply the
    task function to the arguments in parallel and return an aggregate
    result depending on the initial value of the accumulator
    and on the aggregation function. To save memory, the order is
    not preserved and there is no list with the intermediated results:
    the accumulator is incremented as soon as a task result comes.

    NB: if the environment variable OQ_NO_DISTRIBUTE is set the
    tasks are run sequentially in the current process and then
    map_reduce(task, task_args, agg, acc) is the same as
    reduce(agg, itertools.starmap(task, task_args), acc).
    Users of map_reduce should be aware of the fact that when
    thousands of tasks are spawned and large arguments are passed
    or large results are returned they may incur in memory issue:
    this is way the calculators limit the queue with the
    `concurrent_task` concept.

    :param task: a `celery` task callable.
    :param task_args: an iterable over positional arguments
    :param agg: the aggregation function, (acc, val) -> new acc
    :param acc: the initial value of the accumulator
    :returns: the final value of the accumulator
    """
    if no_distribute():
        for the_args in task_args:
            result, exctype = safely_call(task.task_func, the_args)
            if exctype:
                raise RuntimeError(result)
            acc = agg(acc, result)
    else:
        backend = current_app().backend
        unpik = 0
        job_id = task_args[0][0]
        taskname = task.__name__
        mon = LightMonitor("unpickling %s" % taskname, job_id, task)
        to_send = 0
        pickled_args = []
        for args in task_args:
            piks = pickle_sequence(args)
            pickled_args.append(piks)
            to_send += sum(len(p) for p in piks)
        logs.LOG.info("Sending %dM", to_send / ONE_MB)
        taskset = TaskSet(tasks=map(task.subtask, pickled_args))
        for task_id, result_dict in taskset.apply_async().iter_native():
            check_mem_usage()  # log a warning if too much memory is used
            result_pik = result_dict["result"]
            with mon:
                result, exctype = result_pik.unpickle()
            if exctype:
                raise RuntimeError(result)
            unpik += len(result_pik)
            acc = agg(acc, result)
            del backend._cache[task_id]  # work around a celery bug
        logs.LOG.info("Unpickled %dM of received data in %s seconds", unpik / ONE_MB, mon.duration)
    return acc
예제 #10
0
    def update_documents(self, documents, count):
        sys.stdout.write('Graph size is {0:d} nodes.\n'.format(count))
        sys.stdout.flush()
        processed_count = 0
        subtasks = []
        timings = []
        average_per_s = 0
        if self.index == 'concurrently':
            index_during_subtask = True
        else:
            index_during_subtask = False
        for doc in documents:
            processed_count += 1
            if processed_count % 10000 == 0:
                # Send the commit every 10000 times.
                self.si.commit()
            subtasks.append(update_document.subtask((doc, index_during_subtask)))
            if processed_count % 1000 == 1:
                t1 = time.time()
            if processed_count % 1000 == 0:
                t2 = time.time()
                timings.append(t2 - t1)
                average_per_s = 1000 / (sum(timings) / float(len(timings)))
            sys.stdout.write("\rProcessing items in Celery queue: {:.0%} ({}/{}, {:.1f}/s, Last id: {})".format(
                processed_count * 1.0 / count,
                processed_count,
                count,
                average_per_s,
                doc.pk,
            ))
            sys.stdout.flush()
            last_document = (count == processed_count)
            if (processed_count % 50 == 0) or last_document:
                # Every 5000 documents, we send the subtasks off for processing
                # Poll to see when they're done.
                job = TaskSet(tasks=subtasks)
                result = job.apply_async()
                while not result.ready():
                    time.sleep(1)

                # The jobs finished - clean things up for the next round
                subtasks = []

        if self.index == 'all_at_end':
            call_command(
                'cl_update_index',
                '--type', 'opinions',
                '--solr-url', settings.SOLR_OPINION_URL,
                '--noinput',
                '--update',
                '--everything',
                '--do-commit',
            )
        elif self.index == 'false':
            sys.stdout.write("Solr index not updated after running citation "
                             "finder. You may want to do so manually.")
예제 #11
0
def dispatch_image(**params):
    the_subtasks = []
    the_subtasks.append(McAllisterAnaglyphTask.subtask(**params))
    the_subtasks.append(McAllisterAnaglyphTask.subtask(**params))
    the_subtasks.append(McAllisterAnaglyphTask.subtask(**params))
    the_subtasks.append(McAllisterAnaglyphTask.subtask(**params))
    the_subtasks.append(McAllisterAnaglyphTask.subtask(**params))
    job = TaskSet(tasks=the_subtasks)
    result = job.apply_async()
    return result
예제 #12
0
def SiteMaintenance():
    session = DBSession()
    categories = session.query(Category)

    subtasks = [UpdateFromFeed.subtask((cat.feedurl, cat.idcategory)) for cat in categories]
    subtasks.append(UpdateDlpStats.subtask())

    maintenanceJobs = TaskSet(tasks=subtasks)
    maintenanceJobs.apply_async().join()
    UpdateIndex.delay()
예제 #13
0
def get_multiple_clients_daily_rate(client_ids):
    tasks = []
    for client_id in client_ids:
        y = get_clients_daily_rate.subtask((client_id, ))
        tasks.append(y)

    job = TaskSet(tasks=tasks)
    result = job.apply_async()
    result = result.join()
    return result
예제 #14
0
def fetch_feeds(callback=None):
    logging.warn("updating all feeds")
    feeds = Feed.objects.all()
    task_list = []
    for feed in feeds:
        s = fetch_feed.subtask([feed.id])
        task_list.append(s)
    fetch_all_tasks = TaskSet(tasks=task_list)
    fetch_all_tasks.apply_async()
    return None
예제 #15
0
def fetch_feeds(callback=None):
    logging.warn("updating all feeds")
    feeds = Feed.objects.all()
    task_list = []
    for feed in feeds:
        s = fetch_feed.subtask([feed.id])
        task_list.append(s)
    fetch_all_tasks = TaskSet(tasks=task_list)
    fetch_all_tasks.apply_async()
    return None
def _exec_callbacks(callback):
    """ Exec the callback or list of callbacks. Return asyncronous results as
    the TaskSetResult object.
    """
    async_result = None
    if callback:
        if not isinstance(callback, (list, tuple)): # not iterable
            callback = [callback,]
        taskset = TaskSet(tasks=callback)
        async_result = taskset.apply_async()
    return async_result
예제 #17
0
    def test_apply(self):

        applied = [0]

        class mocksubtask(subtask):
            def apply(self, *args, **kwargs):
                applied[0] += 1

        ts = TaskSet([mocksubtask(MockTask, (i, i)) for i in (2, 4, 8)])
        ts.apply()
        self.assertEqual(applied[0], 3)
예제 #18
0
def make_pi_tasks():

    taskset = TaskSet(tasks.make_pi.subtask((x, )) for x in NUM_CALCS)
    print "Dispatching tasks"
    taskset_result = taskset.apply_async()

    print "Waiting for results"
    results = taskset_result.join_native()
    print "Results:"
    for i in results:
        print i
예제 #19
0
def make_pi_tasks():

    taskset = TaskSet(tasks.make_pi.subtask((x, )) for x in NUM_CALCS)
    print "Dispatching tasks"
    taskset_result = taskset.apply_async()

    print "Waiting for results"
    results = taskset_result.join_native()
    print "Results:"
    for i in results:
        print i
예제 #20
0
파일: messaging.py 프로젝트: edajeda/bcbb
 def _run(fn_name, xs):
     fn = getattr(tasks, fn_name)
     job = TaskSet(tasks=[apply(fn.subtask, (x,)) for x in xs])
     result = job.apply_async()
     while not result.ready():
         time.sleep(5)
     out = []
     for x in result.join():
         if x:
             out.extend(x)
     return out
예제 #21
0
    def test_apply(self):

        applied = [0]

        class mocksubtask(Signature):
            def apply(self, *args, **kwargs):
                applied[0] += 1

        ts = TaskSet([mocksubtask(MockTask, (i, i)) for i in (2, 4, 8)])
        ts.apply()
        self.assertEqual(applied[0], 3)
예제 #22
0
파일: chord.py 프로젝트: eldondev/celery
 def run(self, set, body, interval=1, max_retries=None, **kwargs):
     if not isinstance(set, TaskSet):
         set = TaskSet(set)
     r = []
     setid = gen_unique_id()
     for task in set.tasks:
         uuid = gen_unique_id()
         task.options.update(task_id=uuid, chord=body)
         r.append(current_app.AsyncResult(uuid))
     current_app.TaskSetResult(setid, r).save()
     self.backend.on_chord_apply(setid, body, interval, max_retries)
     return set.apply_async(taskset_id=setid)
예제 #23
0
 def run(self, set, body, interval=1, max_retries=None, **kwargs):
     if not isinstance(set, TaskSet):
         set = TaskSet(set)
     r = []
     setid = gen_unique_id()
     for task in set.tasks:
         uuid = gen_unique_id()
         task.options.update(task_id=uuid, chord=body)
         r.append(current_app.AsyncResult(uuid))
     current_app.TaskSetResult(setid, r).save()
     self.backend.on_chord_apply(setid, body, interval, max_retries)
     return set.apply_async(taskset_id=setid)
예제 #24
0
    def update_documents(self, documents, count):
        sys.stdout.write('Graph size is {0:d} nodes.\n'.format(count))
        sys.stdout.flush()
        processed_count = 0
        subtasks = []
        timings = []
        average_per_s = 0
        if self.index == 'concurrently':
            index_during_subtask = True
        else:
            index_during_subtask = False
        for doc in documents:
            processed_count += 1
            if processed_count % 10000 == 0:
                # Send the commit every 10000 times.
                self.si.commit()
            subtasks.append(update_document.subtask((doc, index_during_subtask)))
            if processed_count % 1000 == 1:
                t1 = time.time()
            if processed_count % 1000 == 0:
                t2 = time.time()
                timings.append(t2 - t1)
                average_per_s = 1000 / (sum(timings) / float(len(timings)))
            sys.stdout.write("\rProcessing items in Celery queue: {:.0%} ({}/{}, {:.1f}/s)".format(
                processed_count * 1.0 / count,
                processed_count,
                count,
                average_per_s
            ))
            sys.stdout.flush()
            last_document = (count == processed_count)
            if (processed_count % 500 == 0) or last_document:
                # Every 500 documents, we send the subtasks off for processing
                # Poll to see when they're done.
                job = TaskSet(tasks=subtasks)
                result = job.apply_async()
                while not result.ready():
                    time.sleep(0.5)

                # The jobs finished - clean things up for the next round
                subtasks = []

        if self.index == 'all_at_end':
            call_command(
                'cl_update_index',
                update_mode=True,
                everything=True,
                solr_url='http://127.0.0.1:8983/solr/collection1'
            )
        elif self.index == 'false':
            sys.stdout.write("Solr index not updated after running citation "
                             "finder. You may want to do so manually.")
예제 #25
0
def _exec_callbacks(callback):
    """ Exec the callback or list of callbacks. Return asyncronous results as
    the TaskSetResult object.
    """
    async_result = None
    if callback:
        if not isinstance(callback, (list, tuple)):  # not iterable
            callback = [
                callback,
            ]
        taskset = TaskSet(tasks=callback)
        async_result = taskset.apply_async()
    return async_result
예제 #26
0
def proccesing_pictrures(shop_id, **kwargs):
    """
    Задача обработки картинок изображения
    """
    picture_urls = {}
    tasks_image = []
    for pict_url in db.Offers.find(spec={'shopId':shop_id}, fields=['id', 'picture'], slave_ok=True):
        picture_urls[pict_url['id']] = pict_url['picture']
        tasks_image = (download_image.subtask(url=pict_url['picture'], id=pict_url['id']))
    job = TaskSet(tasks=tasks_image)
    result = job.apply_async()#connection, connect_timeout, publisher, taskset_id)
    result.wait()
    result.join()
예제 #27
0
    def _chunk_queryset_into_tasks(self,
                                   items,
                                   count,
                                   chunksize=5000,
                                   bundle_size=250):
        """Chunks the queryset passed in, and dispatches it to Celery for
        adding to the index.

        Potential performance improvements:
         - Postgres is quiescent when Solr is popping tasks from Celery,
           instead, it should be fetching the next 1,000
         - The wait loop (while not result.ready()) polls for the results, at
           a 1s interval. Could this be reduced or somehow eliminated while
           keeping Celery's tasks list from running away?
        """
        processed_count = 0
        subtasks = []
        item_bundle = []
        for item in items:
            last_item = (count == processed_count + 1)
            if self.verbosity >= 2:
                self.stdout.write('Indexing item %s' % item.pk)

            item_bundle.append(item)
            if (processed_count % bundle_size == 0) or last_item:
                # Every bundle_size documents we create a subtask
                subtasks.append(
                    add_or_update_items.subtask((item_bundle, self.solr_url)))
                item_bundle = []
            processed_count += 1

            if (processed_count % chunksize == 0) or last_item:
                # Every chunksize items, we send the subtasks for processing
                job = TaskSet(tasks=subtasks)
                result = job.apply_async()
                while not result.ready():
                    time.sleep(1)
                subtasks = []

            if (processed_count % 50000 == 0) or last_item:
                # Do a commit every 50000 items, for good measure.
                self.stdout.write("...running commit command...")
                self.si.commit()

            sys.stdout.write("\rProcessed {}/{} ({:.0%})".format(
                processed_count,
                count,
                processed_count * 1.0 / count,
            ))
            self.stdout.flush()
        self.stdout.write('\n')
    def run(self, content_producer_pk):
        content = ContentProducer.objects.get(pk=content_producer_pk)
        self.text = content.text_body
        conditions = content.conditions.all()
        end_execution = datetime.now() + timedelta(seconds=15)

        users = SimpleUser.objects.all()
        # Build the query with the given conditions
        for condition in conditions:
            filters = Q(**{condition.field: condition.value})
            users = users.filter(filters)

        jobs = TaskSet(tasks=[send_sms.subtask((content.text_body, user,), expires=end_execution) for user in users])
        jobs.apply_async()
예제 #29
0
def _refinery_file_import(analysis_uuid):
    """
    Check on the status of the files being imported into Refinery.
    Fail the task appropriately if we cannot retrieve the status.
    """
    analysis = _get_analysis(analysis_uuid)
    analysis_status = _get_analysis_status(analysis_uuid)

    if not analysis_status.refinery_import_task_group_id:
        logger.info("Starting analysis '%s'", analysis)
        analysis.set_status(Analysis.RUNNING_STATUS)
        logger.info("Starting input file import tasks for analysis '%s'",
                    analysis)
        refinery_import_taskset = TaskSet(
            tasks=analysis.get_refinery_import_task_signatures()).apply_async(
            )
        refinery_import_taskset.save()
        analysis_status.refinery_import_task_group_id = \
            refinery_import_taskset.taskset_id
        analysis_status.save()
        run_analysis.retry(countdown=RETRY_INTERVAL)

    # check if all files were successfully imported into Refinery
    refinery_import_taskset = get_taskset_result(
        analysis_status.refinery_import_task_group_id)
    if not refinery_import_taskset.ready():
        logger.debug("Input file import pending for analysis '%s'", analysis)
        run_analysis.retry(countdown=RETRY_INTERVAL)

    elif not refinery_import_taskset.successful():
        error_msg = "Analysis '{}' failed during file import".format(analysis)
        logger.error(error_msg)
        analysis.set_status(Analysis.FAILURE_STATUS, error_msg)
        analysis.send_email()
        refinery_import_taskset.delete()
예제 #30
0
    def test_apply_async(self):

        applied = [0]

        class mocksubtask(Signature):
            def apply_async(self, *args, **kwargs):
                applied[0] += 1

        ts = TaskSet([mocksubtask(MockTask, (i, i)) for i in (2, 4, 8)])
        ts.apply_async()
        self.assertEqual(applied[0], 3)

        class Publisher(object):
            def send(self, *args, **kwargs):
                pass

        ts.apply_async(publisher=Publisher())

        # setting current_task

        @current_app.task
        def xyz():
            pass

        from celery._state import _task_stack
        xyz.push_request()
        _task_stack.push(xyz)
        try:
            ts.apply_async(publisher=Publisher())
        finally:
            _task_stack.pop()
            xyz.pop_request()
예제 #31
0
def startDownload(request):
    """
    Ajax call to start the download
    """
    if request.user.is_authenticated():
        if request.method == 'POST':
            profile = request.user.profile
            if profile.stage > 0:
                if profile.stage < 3:
                    result = TaskSetResult.restore(profile.task_id)
                    response_data = {
                        "error": "download already started",
                        "stage" : profile.stage,
                        "completed" : result.completed_count(),
                        "total" : result.total,
                        }
                else:
                    reponse_data = {
                        "error": "download already finished",
                        "stage" : profile.stage,
                        "state" : "completed",
                        }
            else:
                graphapi = facebook.GraphAPI(profile.fblogin.access_token)
                me = graphapi.get_object('me')
                friends = [(f['id'],f['name']) for f in graphapi.get_connections('me','friends')['data']]
                friends.append((me['id'],me['name']))

                subtasks = [tasks.dlUser.subtask((profile.id,graphapi,fbid,name)) for (fbid,name) in friends]
                result = TaskSet(tasks=subtasks).apply_async()
                result.save()
                profile.stage = 1
                profile.task_id = result.taskset_id
                profile.save()
                r = tasks.checkTaskSet.delay(result,profile.id)
                response_data = {
                    "stage":1,
                    "completed": result.completed_count(),
                    "total": result.total,
                    }
        else:
            response_data = {
                "error": "must be a post request"
                }
    else:
        response_data = {
            "error": "user must be logged in"
            }
    return HttpResponse(json.dumps(response_data), mimetype="application/json")
예제 #32
0
 def _run(fn_name, xs):
     fn = getattr(tasks, fn_name)
     job = TaskSet(tasks=[apply(fn.subtask, (x,)) for x in xs])
     result = job.apply_async()
     out = []
     if wait:
         with _close_taskset(result):
             while not result.ready():
                 time.sleep(5)
                 if result.failed():
                     raise ValueError("Failed distributed task; cleaning up")
             for x in result.join():
                 if x:
                     out.extend(x)
     return out
예제 #33
0
def checkTaskSet(result,profile_id):
    if result.ready():
        profile = Profile.objects.get(id=profile_id)
        pages = profile.getActivePages()
        numpeople = profile.getActivePeople().count()

        subtasks = [calcPMIs.subtask((profile_id,page.id,numpeople)) for page in pages]
        r = TaskSet(tasks=subtasks).apply_async()
        r.save()
        r2 = checkPMISet.delay(r,profile.id)
        profile.stage = 2
        profile.task_id = r.taskset_id
        profile.save()
    else:
        checkTaskSet.retry(countdown=15, max_retries=100)
예제 #34
0
def testDl(profile_id):
    profile = Profile.objects.get(id=profile_id)
    graphapi = facebook.GraphAPI(profile.fblogin.access_token)
    me = graphapi.get_object('me')
    friends = [(f['id'],f['name']) for f in graphapi.get_connections('me','friends')['data']]
    friends.append((me['id'],me['name']))

    subtasks = [dlUser.subtask((profile.id,graphapi,fbid,name)) for (fbid,name) in friends]
    result = TaskSet(tasks=subtasks).apply_async()
    result.save()
    profile.stage = 1
    profile.task_id = result.taskset_id
    profile.save()
    r = checkTaskSet.delay(result,profile.id)
    return result
예제 #35
0
 def _run(fn_name, xs):
     fn = getattr(tasks, fn_name)
     job = TaskSet(tasks=[apply(fn.subtask, (x,)) for x in xs])
     result = job.apply_async()
     out = []
     if wait:
         with _close_taskset(result):
             while not result.ready():
                 time.sleep(5)
                 if result.failed():
                     raise ValueError("Failed distributed task; cleaning up")
             for x in result.join():
                 if x:
                     out.extend(x)
     return out
예제 #36
0
    def _chunk_queryset_into_tasks(self, items, count, chunksize=5000,
                                   bundle_size=250):
        """Chunks the queryset passed in, and dispatches it to Celery for
        adding to the index.

        Potential performance improvements:
         - Postgres is quiescent when Solr is popping tasks from Celery,
           instead, it should be fetching the next 1,000
         - The wait loop (while not result.ready()) polls for the results, at
           a 1s interval. Could this be reduced or somehow eliminated while
           keeping Celery's tasks list from running away?
        """
        processed_count = 0
        subtasks = []
        item_bundle = []
        for item in items:
            last_item = (count == processed_count + 1)
            if self.verbosity >= 2:
                self.stdout.write('Indexing item %s' % item.pk)

            item_bundle.append(item)
            if (processed_count % bundle_size == 0) or last_item:
                # Every bundle_size documents we create a subtask
                subtasks.append(add_or_update_items.subtask(
                    (item_bundle, self.solr_url)))
                item_bundle = []
            processed_count += 1

            if (processed_count % chunksize == 0) or last_item:
                # Every chunksize items, we send the subtasks for processing
                job = TaskSet(tasks=subtasks)
                result = job.apply_async()
                while not result.ready():
                    time.sleep(1)
                subtasks = []

            if (processed_count % 50000 == 0) or last_item:
                # Do a commit every 50000 items, for good measure.
                self.stdout.write("...running commit command...")
                self.si.commit()

            sys.stdout.write("\rProcessed {}/{} ({:.0%})".format(
                processed_count,
                count,
                processed_count * 1.0 / count,
            ))
            self.stdout.flush()
        self.stdout.write('\n')
예제 #37
0
def index_all_profiles():
    # Get an es object, delete index and re-create it
    es = get_es(timeout=settings.ES_INDEXING_TIMEOUT)
    mappings = {
        'mappings': {
            UserProfileMappingType.get_mapping_type_name():
            UserProfileMappingType.get_mapping()
        }
    }

    def _recreate_index(index):
        es.indices.delete(index=index, ignore=[400, 404])
        es.indices.create(index, body=mappings)

    _recreate_index(settings.ES_INDEXES['default'])
    _recreate_index(settings.ES_INDEXES['public'])

    # mozillians index
    ids = UserProfile.objects.complete().values_list('id', flat=True)
    ts = [
        index_objects.subtask(args=[UserProfileMappingType, chunk, 150, False])
        for chunk in chunked(sorted(list(ids)), 150)
    ]

    # public index
    ts += [
        index_objects.subtask(args=[UserProfileMappingType, chunk, 150, True])
        for chunk in chunked(sorted(list(ids)), 150)
    ]

    TaskSet(ts).apply_async()
예제 #38
0
def index_all_profiles():
    # Get an es object, delete index and re-create it

    index = settings.ES_INDEXES['default']
    es = get_es(timeout=settings.ES_INDEXING_TIMEOUT)
    try:
        es.delete_index_if_exists(index)
    except pyes.exceptions.IndexMissingException:
        pass

    mappings = {
        'mappings': {
            UserProfile._meta.db_table: UserProfile.get_mapping()
        }
    }

    es.create_index(index, settings=mappings)

    ids = (UserProfile.objects.exclude(full_name='').values_list('id',
                                                                 flat=True))
    ts = [
        tasks.index_objects.subtask(args=[UserProfile, chunk])
        for chunk in chunked(sorted(list(ids)), 150)
    ]
    TaskSet(ts).apply_async()
예제 #39
0
def addon_total_contributions():
    addons = Addon.objects.values_list('id', flat=True)
    ts = [
        tasks.addon_total_contributions.subtask(args=chunk)
        for chunk in chunked(addons, 100)
    ]
    TaskSet(ts).apply_async()
예제 #40
0
 def unsubscribe_from_basket(modeladmin, request, queryset):
     """Unsubscribe from Basket."""
     ts = [(unsubscribe_from_basket_task.subtask(args=[userprofile.user.email,
                                                       [newsletter]]))
           for userprofile in queryset]
     TaskSet(ts).apply_async()
     messages.success(request, 'Basket update started.')
예제 #41
0
파일: cron.py 프로젝트: sumlaj/mozillians
def index_all_profiles():
    ids = (UserProfile.objects.values_list('id', flat=True))
    ts = [
        tasks.index_objects.subtask(args=[UserProfile, chunk])
        for chunk in chunked(sorted(list(ids)), 150)
    ]
    TaskSet(ts).apply_async()
예제 #42
0
 def unsubscribe_from_basket(modeladmin, request, queryset):
     """Unsubscribe from Basket."""
     ts = [(mozillians.users.tasks.remove_from_basket_task
            .subtask(args=[userprofile.user.email, userprofile.basket_token]))
           for userprofile in queryset]
     TaskSet(ts).apply_async()
     messages.success(request, 'Basket update started.')
예제 #43
0
def reindex_users(index=None):
    from . import tasks
    ids = UserProfile.objects.values_list('id', flat=True)
    taskset = [tasks.index_users.subtask(args=[chunk],
                                         kwargs=dict(index=index))
               for chunk in chunked(sorted(list(ids)), 150)]
    TaskSet(taskset).apply_async()
예제 #44
0
 def subscribe_to_basket(modeladmin, request, queryset):
     """Subscribe to Basket or update details of already subscribed."""
     ts = [(mozillians.users.tasks.update_basket_task
            .subtask(args=[userprofile.id]))
           for userprofile in queryset]
     TaskSet(ts).apply_async()
     messages.success(request, 'Basket update started.')
예제 #45
0
def index_all_profiles():
    # Get an es object, delete index and re-create it
    es = get_es(timeout=settings.ES_INDEXING_TIMEOUT)
    mappings = {'mappings':
                {UserProfile._meta.db_table: UserProfile.get_mapping()}}

    def _recreate_index(index):
        try:
            es.delete_index_if_exists(index)
        except pyes.exceptions.IndexMissingException:
            pass
        es.create_index(index, settings=mappings)
    _recreate_index(settings.ES_INDEXES['default'])
    _recreate_index(settings.ES_INDEXES['public'])

    # mozillians index
    ids = UserProfile.objects.complete().values_list('id', flat=True)
    ts = [index_objects.subtask(args=[UserProfile, chunk, False])
          for chunk in chunked(sorted(list(ids)), 150)]

    # public index
    ids = (UserProfile.objects.complete().public_indexable()
           .privacy_level(PUBLIC).values_list('id', flat=True))
    ts += [index_objects.subtask(args=[UserProfile, chunk, True])
           for chunk in chunked(sorted(list(ids)), 150)]

    TaskSet(ts).apply_async()
예제 #46
0
def update_user_ratings():
    """Update add-on author's ratings."""

    cursor = connections[multidb.get_slave()].cursor()
    # We build this query ahead of time because the cursor complains about data
    # truncation if it does the parameters.  Also, this query is surprisingly
    # quick, <1sec for 6100 rows returned
    q = """   SELECT
                addons_users.user_id as user_id,
                AVG(rating) as avg_rating
              FROM reviews
                INNER JOIN versions
                INNER JOIN addons_users
                INNER JOIN addons
              ON reviews.version_id = versions.id
                AND addons.id = versions.addon_id
                AND addons_users.addon_id = addons.id
              WHERE reviews.reply_to IS NULL
                AND reviews.rating > 0
                AND addons.status IN (%s)
              GROUP BY addons_users.user_id
              """ % (",".join(map(str, VALID_STATUSES)))

    cursor.execute(q)
    d = cursor.fetchall()
    cursor.close()

    ts = [update_user_ratings_task.subtask(args=[chunk])
          for chunk in chunked(d, 1000)]
    TaskSet(ts).apply_async()
예제 #47
0
def get_clients_with_prepaid_accounts():
    sql = text("""
        SELECT DISTINCT(leads_id) FROM subcontractors
        WHERE prepaid = 'yes'
        """)
    conn = engine.connect()
    r = conn.execute(sql).fetchall()

    client_ids = []
    for x in r:
        client_ids.append(x[0])

    #retrieve leads_id of timesheet records for the past 90 days
    #this is to prevent subcontractors record recently terminated and does not show up on adjustment
    now = get_ph_time()
    prev_date = now - timedelta(days=90)

    now_string = now.strftime('%F %T')
    prev_date_string = prev_date.strftime('%F %T')

    sql = text("""
        SELECT DISTINCT(t.leads_id) 
        FROM timesheet AS t
        JOIN subcontractors AS s
        ON t.leads_id = s.leads_id
        WHERE s.prepaid = 'yes'
        AND month_year BETWEEN :prev_date_string AND :now_string
        """)
    r = conn.execute(sql,
                     prev_date_string=prev_date_string,
                     now_string=now_string)

    for x in r:
        client_id = x[0]
        if client_id not in client_ids:
            client_ids.append(client_id)

    tasks = []
    for client_id in client_ids:
        y = get_client_details.subtask((client_id, ))
        tasks.append(y)

    job = TaskSet(tasks=tasks)
    result = job.apply_async()
    data = result.join()
    conn.close()
    return data
예제 #48
0
파일: builtins.py 프로젝트: axiak/celery
    def chord(set, body, interval=1, max_retries=None,
            propagate=False, **kwargs):

        if not isinstance(set, TaskSet):
            set = TaskSet(set)
        r = []
        setid = uuid()
        for task in set.tasks:
            tid = uuid()
            task.options.update(task_id=tid, chord=body)
            r.append(app.AsyncResult(tid))
        app.backend.on_chord_apply(setid, body,
                                   interval=interval,
                                   max_retries=max_retries,
                                   propagate=propagate,
                                   result=r)
        set.apply_async(taskset_id=setid)
 def apply_async(self):
     tasks = []
     for node in self.children:
         func = node.func
         args = node.args
         kwargs = node.kwargs
         callback = kwargs.pop('callback', [])
         if not isinstance(callback, (list, tuple)):
             callback = [callback]
         subtasks = node._get_child_tasks()
         callback += subtasks
         kwargs = dict(callback=callback, **kwargs)
         task = func.subtask(args=args, kwargs=kwargs)
         tasks.append(task)
     taskset = TaskSet(tasks)
     result = taskset.apply_async()
     return result
예제 #50
0
def invalidate_users(**kw):
    """Invalidate all users to reflect latest Marketplace access whitelist."""
    from amo.utils import chunked
    from users.models import UserProfile
    log.info('Invalidating users for access whitelist.')
    d = UserProfile.objects.values_list('id', flat=True)
    ts = [_invalidate_users.subtask(args=[chunk]) for chunk in chunked(d, 100)]
    TaskSet(ts).apply_async()
예제 #51
0
def reindex_collections(index=None):
    from . import tasks
    ids = (Collection.objects.exclude(type=amo.COLLECTION_SYNCHRONIZED)
           .values_list('id', flat=True))
    taskset = [tasks.index_collections.subtask(args=[chunk],
                                               kwargs=dict(index=index))
               for chunk in chunked(sorted(list(ids)), 150)]
    TaskSet(taskset).apply_async()
예제 #52
0
 def apply_async(self):
     tasks = []
     for node in self.children:
         func = node.func
         args = node.args
         kwargs = node.kwargs
         callback = kwargs.pop('callback', [])
         if not isinstance(callback, (list, tuple)):
             callback = [callback]
         subtasks = node._get_child_tasks()
         callback += subtasks
         kwargs = dict(callback=callback, **kwargs)
         _task = func.subtask(args=args, kwargs=kwargs)
         tasks.append(_task)
     taskset = TaskSet(tasks)
     result = taskset.apply_async()
     return result
예제 #53
0
파일: cron.py 프로젝트: mak77/addons-server
def reindex_addons(index=None, addon_type=None):
    from . import tasks
    ids = Addon.unfiltered.values_list('id', flat=True)
    if addon_type:
        ids = ids.filter(type=addon_type)
    ts = [tasks.index_addons.subtask(args=[chunk], kwargs=dict(index=index))
          for chunk in chunked(sorted(list(ids)), 150)]
    TaskSet(ts).apply_async()
예제 #54
0
    def test_apply_async(self):

        applied = [0]

        class mocksubtask(subtask):
            def apply_async(self, *args, **kwargs):
                applied[0] += 1

        ts = TaskSet([mocksubtask(MockTask, (i, i)) for i in (2, 4, 8)])
        ts.apply_async()
        self.assertEqual(applied[0], 3)

        class Publisher(object):
            def send(self, *args, **kwargs):
                pass

        ts.apply_async(publisher=Publisher())
예제 #55
0
파일: test_sets.py 프로젝트: clvrobj/celery
    def test_apply_async(self):

        applied = [0]

        class mocksubtask(Signature):

            def apply_async(self, *args, **kwargs):
                applied[0] += 1

        ts = TaskSet([mocksubtask(MockTask, (i, i))
                        for i in (2, 4, 8)])
        ts.apply_async()
        self.assertEqual(applied[0], 3)

        class Publisher(object):

            def send(self, *args, **kwargs):
                pass

        ts.apply_async(publisher=Publisher())

        # setting current_task

        @current_app.task
        def xyz():
            pass
        from celery.app.state import _tls
        _tls.current_task = xyz
        try:
            ts.apply_async(publisher=Publisher())
        finally:
            _tls.current_task = None
            xyz.request.clear()
예제 #56
0
    def test_apply_async(self):

        applied = [0]

        class mocksubtask(Signature):

            def apply_async(self, *args, **kwargs):
                applied[0] += 1

        ts = TaskSet([mocksubtask(MockTask, (i, i))
                        for i in (2, 4, 8)])
        ts.apply_async()
        self.assertEqual(applied[0], 3)

        class Publisher(object):

            def send(self, *args, **kwargs):
                pass

        ts.apply_async(publisher=Publisher())

        # setting current_task

        @current_app.task
        def xyz():
            pass
        from celery._state import _task_stack
        xyz.push_request()
        _task_stack.push(xyz)
        try:
            ts.apply_async(publisher=Publisher())
        finally:
            _task_stack.pop()
            xyz.pop_request()
예제 #57
0
파일: tasks.py 프로젝트: emulbreh/vacuous
 def run(self, flavor, repo_path, data):
     backend = load_backend(flavor, repo_path, cache=False)
     out = StringIO()
     proto = ReceivableProtocol(StringIO(data).read, out.write)
     handler = _ReceivePackHandler(WebBackend(), [backend], proto, stateless_rpc=True)
     handler.handle()
     
     sync_tasks = []
     for oldrev, newrev, name in handler._good_refs:
         if name.startswith('refs/heads/'):
             branch = name[11:]
             sync_tasks.append(subtask(SyncTask, args=[backend.flavor, backend.path, oldrev, newrev, branch]))
             
     if sync_tasks:
         taskset = TaskSet(tasks=sync_tasks)
         taskset.apply_async().join()
     
     return out.getvalue(), handler._good_refs
예제 #58
0
파일: tasks.py 프로젝트: pombredanne/ummeli
def run_jobs_update(category_parser = CategoryParser,  jobs_parser = JobsParser):    # allow mocking of parsers
    Province(search_id = 1,  name = 'All').save()
    Province(search_id = 2,  name = 'Gauteng').save()
    Province(search_id = 5,  name = 'Western Cape').save()
    Province(search_id = 6,  name = 'KZN').save()
    Province(search_id = -1,  name = 'Limpopo').save()
    Province(search_id = -2,  name = 'Mpumalanga').save()
    Province(search_id = -3,  name = 'Free State').save()
    Province(search_id = -4,  name = 'Northern Cape').save()
    Province(search_id = -5,  name = 'Eastern Cape').save()
    Province(search_id = -6,  name = 'North West').save()
    
    now = datetime.now()
    taskset = TaskSet(queue_categories.subtask((province.search_id, category_parser,  jobs_parser, ), 
                                options = {'eta':now + timedelta(seconds=10 * i)}) 
                                for i,  province in enumerate(Province.objects.all()) if province.search_id > 0)
    
    return taskset.apply_async()
예제 #59
0
def post_save_handler( sender, instance=False, **kwargs):
    try:
        logging.debug("Prestoprime POST save handler %s %s" % (instance, instance.file))

        # Record Additional Usage
        if instance.name is not None and not instance.initial_usage_recorded and instance.name.endswith(".mxf"):
            logging.debug("Prestoprime POST save handler %s id:'%s' " % (instance,instance.id))

            mxfframecounttask = mxfframecount.subtask([[instance.id],[]])

            #count = result["frames"]
            #logging.info("Recording ingest usage %s " % (count))
            #usage_store.record(instance.id,pp_mxfframe_ingested_metric,count)

            job = Job(name="Prestoprime Tasks",mfile=instance)
            job.save()

            output = JobOutput(name="Job Output",job=job,mimetype="text/plain")
            output.save()

            ## Do Post Processing
            mfiled10check = MFileD10Check(mfile=instance)
            
            temp_handle = StringIO()
            temp_handle.seek(0)

            suf = SimpleUploadedFile("checkfile", temp_handle.read())
            mfiled10check.checkfile.save(suf.name+'.txt', suf, save=False)
            mfiled10check.save()

            d10mxfchecksumtask = d10mxfchecksum.subtask([[instance.id],[output]])

            ts = TaskSet(tasks=[d10mxfchecksumtask,mxfframecounttask])
            tsr = ts.apply_async()
            tsr.save()

            
            job.taskset_id=tsr.taskset_id
            job.save()
            
            logging.debug("Prestoprime task %s  " % task  )

    except Exception as e:
        logging.error("Prestoprime POST save handler failed %s " % e)
예제 #60
0
    def _chunk_queryset_into_tasks(self, items, count, chunksize=5000,
                                   bundle_size=250):
        """Chunks the queryset passed in, and dispatches it to Celery for
        adding to the index.

        Potential performance improvements:
         - Postgres is quiescent when Solr is popping tasks from Celery,
           instead, it should be fetching the next 1,000
        """
        processed_count = 0
        subtasks = []
        item_bundle = []
        for item in items:
            last_item = (count == processed_count + 1)
            if self.verbosity >= 2:
                self.stdout.write('Indexing item %s' % item.pk)

            item_bundle.append(item)
            if (len(item_bundle) >= bundle_size) or last_item:
                # Every bundle_size documents we create a subtask
                subtasks.append(
                    add_or_update_items.subtask((item_bundle, self.solr_url))
                )
                item_bundle = []
            processed_count += 1

            if (len(subtasks) >= chunksize) or last_item:
                # Every chunksize items, we send the subtasks for processing
                job = TaskSet(tasks=subtasks)
                job.apply_async().join()
                subtasks = []

            if (processed_count % 50000 == 0) or last_item:
                # Do a commit every 50000 items, for good measure.
                self.stdout.write("...running commit command...")
                self.si.commit()

            sys.stdout.write("\rProcessed {}/{} ({:.0%})".format(
                processed_count,
                count,
                processed_count * 1.0 / count,
            ))
            self.stdout.flush()
        self.stdout.write('\n')