예제 #1
0
    def test_apply(self):

        applied = [0]

        class mocksubtask(subtask):
            def apply(self, *args, **kwargs):
                applied[0] += 1

        ts = TaskSet([mocksubtask(MockTask, (i, i)) for i in (2, 4, 8)])
        ts.apply()
        self.assertEqual(applied[0], 3)
예제 #2
0
def update_collections_total():
    """Update collections downloads totals."""

    d = (CollectionCount.objects.values('collection_id').annotate(
        sum=Sum('count')))

    ts = [
        tasks.update_collections_total.subtask(args=[chunk])
        for chunk in chunked(d, 50)
    ]
    TaskSet(ts).apply_async()
예제 #3
0
파일: cron.py 프로젝트: Dreadchild/zamboni
def update_monolith_stats(date=None):
    """Update monolith statistics."""
    if date:
        date = datetime.datetime.strptime(date, '%Y-%m-%d').date()
    today = date or datetime.date.today()
    jobs = [{
        'metric': metric,
        'date': today
    } for metric in tasks._get_monolith_jobs(date)]

    ts = [tasks.update_monolith_stats.subtask(kwargs=kw) for kw in jobs]
    TaskSet(ts).apply_async()
 def handle(self, *args, **options):
     if len(args) != 1:
         regions = ', '.join(dict(REGIONS_CHOICES_SLUG[1:]).keys())
         raise CommandError(('You must enter a single region slug. '
                             'Available choices: %s' % regions))
     region_slug = args[0]
     ids = (Webapp.objects.filter(premium_type__in=amo.ADDON_HAS_PAYMENTS)
                          .exclude(status__in=amo.WEBAPPS_EXCLUDED_STATUSES)
                          .values_list('id', flat=True))
     ts = [new_payments_region_email.subtask(args=[chunk, region_slug])
           for chunk in chunked(ids, 100)]
     TaskSet(ts).apply_async()
예제 #5
0
def update_collections_subscribers():
    """Update collections subscribers totals."""

    d = (CollectionWatcher.objects.values('collection_id').annotate(
        count=Count('collection')).extra(where=['DATE(created)=%s'],
                                         params=[date.today()]))

    ts = [
        _update_collections_subscribers.subtask(args=[chunk])
        for chunk in chunked(d, 1000)
    ]
    TaskSet(ts).apply_async()
예제 #6
0
def update_daily_theme_user_counts():
    """Store the day's theme popularity counts into ThemeUserCount."""
    raise_if_reindex_in_progress('amo')
    d = Persona.objects.values_list('addon', 'popularity').order_by('id')

    date = datetime.now().strftime('%M-%d-%y')
    ts = [
        _update_daily_theme_user_counts.subtask(args=[chunk],
                                                kwargs={'date': date})
        for chunk in chunked(d, 250)
    ]
    TaskSet(ts).apply_async()
예제 #7
0
 def run(self, set, body, interval=1, max_retries=None, **kwargs):
     if not isinstance(set, TaskSet):
         set = TaskSet(set)
     r = []
     setid = gen_unique_id()
     for task in set.tasks:
         uuid = gen_unique_id()
         task.options.update(task_id=uuid, chord=body)
         r.append(current_app.AsyncResult(uuid))
     current_app.TaskSetResult(setid, r).save()
     self.backend.on_chord_apply(setid, body, interval, max_retries)
     return set.apply_async(taskset_id=setid)
예제 #8
0
def update_weekly_downloads():
    """Update the weekly "downloads" from the users_install table."""
    interval = datetime.today() - timedelta(days=7)
    counts = (Installed.objects.values('addon').filter(
        created__gte=interval,
        addon__type=amo.ADDON_WEBAPP).annotate(count=Count('addon')))

    ts = [
        webapp_update_weekly_downloads.subtask(args=[chunk])
        for chunk in chunked(counts, 1000)
    ]
    TaskSet(ts).apply_async()
예제 #9
0
def update_addons_collections_downloads():
    """Update addons+collections download totals."""
    raise_if_reindex_in_progress('amo')

    d = (AddonCollectionCount.objects.values(
        'addon', 'collection').annotate(sum=Sum('count')))

    ts = [
        tasks.update_addons_collections_downloads.subtask(args=[chunk])
        for chunk in chunked(d, 100)
    ]
    TaskSet(ts).apply_async()
예제 #10
0
    def update_documents(self, documents, count):
        sys.stdout.write('Graph size is {0:d} nodes.\n'.format(count))
        sys.stdout.flush()
        processed_count = 0
        subtasks = []
        timings = []
        average_per_s = 0
        if self.index == 'concurrently':
            index_during_subtask = True
        else:
            index_during_subtask = False
        for doc in documents:
            processed_count += 1
            if processed_count % 10000 == 0:
                # Send the commit every 10000 times.
                self.si.commit()
            subtasks.append(update_document.subtask((doc, index_during_subtask)))
            if processed_count % 1000 == 1:
                t1 = time.time()
            if processed_count % 1000 == 0:
                t2 = time.time()
                timings.append(t2 - t1)
                average_per_s = 1000 / (sum(timings) / float(len(timings)))
            sys.stdout.write("\rProcessing items in Celery queue: {:.0%} ({}/{}, {:.1f}/s)".format(
                processed_count * 1.0 / count,
                processed_count,
                count,
                average_per_s
            ))
            sys.stdout.flush()
            last_document = (count == processed_count)
            if (processed_count % 500 == 0) or last_document:
                # Every 500 documents, we send the subtasks off for processing
                # Poll to see when they're done.
                job = TaskSet(tasks=subtasks)
                result = job.apply_async()
                while not result.ready():
                    time.sleep(0.5)

                # The jobs finished - clean things up for the next round
                subtasks = []

        if self.index == 'all_at_end':
            call_command(
                'cl_update_index',
                update_mode=True,
                everything=True,
                solr_url='http://127.0.0.1:8983/solr/collection1'
            )
        elif self.index == 'false':
            sys.stdout.write("Solr index not updated after running citation "
                             "finder. You may want to do so manually.")
예제 #11
0
파일: cron.py 프로젝트: writefaruq/zamboni
def update_addons_collections_downloads():
    """Update addons+collections download totals."""
    if settings.IGNORE_NON_CRITICAL_CRONS:
        return

    d = (AddonCollectionCount.objects.values(
        'addon', 'collection').annotate(sum=Sum('count')))

    ts = [
        tasks.update_addons_collections_downloads.subtask(args=[chunk])
        for chunk in chunked(d, 600)
    ]
    TaskSet(ts).apply_async()
예제 #12
0
파일: cron.py 프로젝트: vdt/zamboni
def reindex_addons():
    from . import tasks
    # Make sure our mapping is up to date.
    search.setup_mapping()
    ids = (Addon.objects.values_list('id', flat=True).filter(
        _current_version__isnull=False,
        status__in=amo.VALID_STATUSES,
        disabled_by_user=False))
    ts = [
        tasks.index_addons.subtask(args=[chunk])
        for chunk in chunked(sorted(list(ids)), 150)
    ]
    TaskSet(ts).apply_async()
예제 #13
0
def update_collections_votes():
    """Update collection's votes."""

    up = (CollectionVote.objects.values('collection_id').annotate(
        count=Count('collection')).filter(vote=1).extra(
            where=['DATE(created)=%s'], params=[date.today()]))

    down = (CollectionVote.objects.values('collection_id').annotate(
        count=Count('collection')).filter(vote=-1).extra(
            where=['DATE(created)=%s'], params=[date.today()]))

    ts = [
        _update_collections_votes.subtask(args=[chunk, 'new_votes_up'])
        for chunk in chunked(up, 1000)
    ]
    TaskSet(ts).apply_async()

    ts = [
        _update_collections_votes.subtask(args=[chunk, 'new_votes_down'])
        for chunk in chunked(down, 1000)
    ]
    TaskSet(ts).apply_async()
예제 #14
0
파일: cron.py 프로젝트: vdt/zamboni
def reindex_apps():
    """Apps do get indexed by `reindex_addons`, but run this for apps only."""
    from . import tasks
    search.setup_mapping()
    ids = (Addon.objects.values_list('id', flat=True).filter(
        type=amo.ADDON_WEBAPP,
        status__in=amo.VALID_STATUSES,
        disabled_by_user=False))
    ts = [
        tasks.index_addons.subtask(args=[chunk])
        for chunk in chunked(sorted(list(ids)), 150)
    ]
    TaskSet(ts).apply_async()
예제 #15
0
def _exec_callbacks(callback):
    """ Exec the callback or list of callbacks. Return asyncronous results as
    the TaskSetResult object.
    """
    async_result = None
    if callback:
        if not isinstance(callback, (list, tuple)):  # not iterable
            callback = [
                callback,
            ]
        taskset = TaskSet(tasks=callback)
        async_result = taskset.apply_async()
    return async_result
예제 #16
0
    def _chunk_queryset_into_tasks(self,
                                   items,
                                   count,
                                   chunksize=5000,
                                   bundle_size=250):
        """Chunks the queryset passed in, and dispatches it to Celery for
        adding to the index.

        Potential performance improvements:
         - Postgres is quiescent when Solr is popping tasks from Celery,
           instead, it should be fetching the next 1,000
         - The wait loop (while not result.ready()) polls for the results, at
           a 1s interval. Could this be reduced or somehow eliminated while
           keeping Celery's tasks list from running away?
        """
        processed_count = 0
        subtasks = []
        item_bundle = []
        for item in items:
            last_item = (count == processed_count + 1)
            if self.verbosity >= 2:
                self.stdout.write('Indexing item %s' % item.pk)

            item_bundle.append(item)
            if (processed_count % bundle_size == 0) or last_item:
                # Every bundle_size documents we create a subtask
                subtasks.append(
                    add_or_update_items.subtask((item_bundle, self.solr_url)))
                item_bundle = []
            processed_count += 1

            if (processed_count % chunksize == 0) or last_item:
                # Every chunksize items, we send the subtasks for processing
                job = TaskSet(tasks=subtasks)
                result = job.apply_async()
                while not result.ready():
                    time.sleep(1)
                subtasks = []

            if (processed_count % 50000 == 0) or last_item:
                # Do a commit every 50000 items, for good measure.
                self.stdout.write("...running commit command...")
                self.si.commit()

            sys.stdout.write("\rProcessed {}/{} ({:.0%})".format(
                processed_count,
                count,
                processed_count * 1.0 / count,
            ))
            self.stdout.flush()
        self.stdout.write('\n')
예제 #17
0
파일: cron.py 프로젝트: tmp0230/zamboni
def update_global_totals(date=None):
    """Update global statistics totals."""

    today = date or datetime.date.today()
    today_jobs = [dict(job=job, date=today) for job in
                  tasks._get_daily_jobs(date)]

    max_update = date or UpdateCount.objects.aggregate(max=Max('date'))['max']
    metrics_jobs = [dict(job=job, date=max_update) for job in
                    tasks._get_metrics_jobs(date)]

    ts = [tasks.update_global_totals.subtask(kwargs=kw)
          for kw in today_jobs + metrics_jobs]
    TaskSet(ts).apply_async()
예제 #18
0
파일: cron.py 프로젝트: zuzelvp/olympia
def update_addons_current_version():
    """Update the current_version field of the addons."""

    # Candidate for deletion - Bug 750510
    if not waffle.switch_is_active('current_version_crons'):
        return

    d = (Addon.objects.filter(disabled_by_user=False,
                              status__in=amo.VALID_STATUSES)
         .exclude(type=amo.ADDON_PERSONA).values_list('id'))

    ts = [_update_addons_current_version.subtask(args=[chunk])
          for chunk in chunked(d, 100)]
    TaskSet(ts).apply_async()
예제 #19
0
def reindex_addons(index=None, addon_type=None):
    from . import tasks
    ids = (Addon.objects.values_list('id', flat=True).filter(
        _current_version__isnull=False,
        status__in=amo.VALID_STATUSES,
        is_listed=True,
        disabled_by_user=False))
    if addon_type:
        ids = ids.filter(type=addon_type)
    ts = [
        tasks.index_addons.subtask(args=[chunk], kwargs=dict(index=index))
        for chunk in chunked(sorted(list(ids)), 150)
    ]
    TaskSet(ts).apply_async()
예제 #20
0
파일: cron.py 프로젝트: viczsaurav/olympia
def update_addon_appsupport():
    # Find all the add-ons that need their app support details updated.
    newish = (Q(last_updated__gte=F('appsupport__created')) |
              Q(appsupport__created__isnull=True))
    # Search providers don't list supported apps.
    has_app = Q(versions__apps__isnull=False) | Q(type=amo.ADDON_SEARCH)
    has_file = Q(versions__files__status__in=amo.VALID_STATUSES)
    good = Q(has_app, has_file) | Q(type=amo.ADDON_PERSONA)
    ids = (Addon.objects.valid().distinct()
           .filter(newish, good).values_list('id', flat=True))

    ts = [_update_appsupport.subtask(args=[chunk])
          for chunk in chunked(ids, 20)]
    TaskSet(ts).apply_async()
예제 #21
0
def _run_galaxy_workflow(analysis_uuid):
    """
    Import files into Galaxy and execute Galaxy Workflow
    """
    analysis = _get_analysis(analysis_uuid)
    analysis_status = _get_analysis_status(analysis_uuid)

    if not analysis_status.galaxy_import_task_group_id:
        logger.debug("Starting analysis execution in Galaxy")
        try:
            analysis.prepare_galaxy()
        except (requests.exceptions.ConnectionError,
                galaxy.client.ConnectionError):
            error_msg = "Analysis '{}' failed during preparation in " \
                        "Galaxy".format(analysis)
            logger.error(error_msg)
            analysis.set_status(Analysis.FAILURE_STATUS, error_msg)
            analysis.send_email()
            get_taskset_result(
                analysis_status.refinery_import_task_group_id).delete()
            return
        galaxy_import_tasks = [
            _start_galaxy_analysis.subtask((analysis.uuid, )),
        ]
        galaxy_import_taskset = TaskSet(
            tasks=galaxy_import_tasks).apply_async()
        galaxy_import_taskset.save()
        analysis_status.galaxy_import_task_group_id = \
            galaxy_import_taskset.taskset_id
        analysis_status.set_galaxy_history_state(AnalysisStatus.PROGRESS)
        run_analysis.retry(countdown=RETRY_INTERVAL)

    # check if data files were successfully imported into Galaxy
    galaxy_import_taskset = get_taskset_result(
        analysis_status.galaxy_import_task_group_id)
    if not galaxy_import_taskset.ready():
        logger.debug("Analysis '%s' pending in Galaxy", analysis)
        run_analysis.retry(countdown=RETRY_INTERVAL)
    elif not galaxy_import_taskset.successful():
        error_msg = "Analysis '{}' failed in Galaxy".format(analysis)
        logger.error(error_msg)
        analysis.set_status(Analysis.FAILURE_STATUS, error_msg)
        analysis_status.set_galaxy_history_state(AnalysisStatus.ERROR)
        analysis.send_email()
        get_taskset_result(
            analysis_status.refinery_import_task_group_id).delete()
        galaxy_import_taskset.delete()
        analysis.galaxy_cleanup()
        return
예제 #22
0
 def _run(fn_name, xs):
     fn = getattr(tasks, fn_name)
     job = TaskSet(tasks=[apply(fn.subtask, (x,)) for x in xs])
     result = job.apply_async()
     out = []
     if wait:
         with _close_taskset(result):
             while not result.ready():
                 time.sleep(5)
                 if result.failed():
                     raise ValueError("Failed distributed task; cleaning up")
             for x in result.join():
                 if x:
                     out.extend(x)
     return out
예제 #23
0
파일: cron.py 프로젝트: zuzelvp/olympia
def update_addon_average_daily_users():
    """Update add-ons ADU totals."""
    raise_if_reindex_in_progress('amo')
    cursor = connections[multidb.get_slave()].cursor()
    q = """SELECT addon_id, AVG(`count`)
           FROM update_counts
           WHERE `date` > DATE_SUB(CURDATE(), INTERVAL 7 DAY)
           GROUP BY addon_id
           ORDER BY addon_id"""
    cursor.execute(q)
    d = cursor.fetchall()
    cursor.close()

    ts = [_update_addon_average_daily_users.subtask(args=[chunk])
          for chunk in chunked(d, 250)]
    TaskSet(ts).apply_async()
예제 #24
0
def _attach_workflow_outputs(analysis_uuid):
    """
    Attach the resulting files from the Galaxy workflow execution to
    our Analysis
    """
    analysis = _get_analysis(analysis_uuid)
    if analysis.workflow.type == Workflow.ANALYSIS_TYPE:
        tasks = analysis.attach_derived_nodes_to_dataset()
        logger.info("Starting auxiliary file creation for analysis %s'",
                    analysis)
        TaskSet(tasks=tasks).apply_async()
    elif analysis.workflow.type == Workflow.DOWNLOAD_TYPE:
        analysis.attach_outputs_downloads()
    else:
        logger.warning("Unknown workflow type '%s' in analysis '%s'",
                       analysis.workflow.type, analysis.name)
예제 #25
0
def get_clients_with_prepaid_accounts():
    sql = text("""
        SELECT DISTINCT(leads_id) FROM subcontractors
        WHERE prepaid = 'yes'
        """)
    conn = engine.connect()
    r = conn.execute(sql).fetchall()

    client_ids = []
    for x in r:
        client_ids.append(x[0])

    #retrieve leads_id of timesheet records for the past 90 days
    #this is to prevent subcontractors record recently terminated and does not show up on adjustment
    now = get_ph_time()
    prev_date = now - timedelta(days=90)

    now_string = now.strftime('%F %T')
    prev_date_string = prev_date.strftime('%F %T')

    sql = text("""
        SELECT DISTINCT(t.leads_id) 
        FROM timesheet AS t
        JOIN subcontractors AS s
        ON t.leads_id = s.leads_id
        WHERE s.prepaid = 'yes'
        AND month_year BETWEEN :prev_date_string AND :now_string
        """)
    r = conn.execute(sql,
                     prev_date_string=prev_date_string,
                     now_string=now_string)

    for x in r:
        client_id = x[0]
        if client_id not in client_ids:
            client_ids.append(client_id)

    tasks = []
    for client_id in client_ids:
        y = get_client_details.subtask((client_id, ))
        tasks.append(y)

    job = TaskSet(tasks=tasks)
    result = job.apply_async()
    data = result.join()
    conn.close()
    return data
예제 #26
0
    def test_apply_async(self):

        applied = [0]

        class mocksubtask(subtask):
            def apply_async(self, *args, **kwargs):
                applied[0] += 1

        ts = TaskSet([mocksubtask(MockTask, (i, i)) for i in (2, 4, 8)])
        ts.apply_async()
        self.assertEqual(applied[0], 3)

        class Publisher(object):
            def send(self, *args, **kwargs):
                pass

        ts.apply_async(publisher=Publisher())
예제 #27
0
    def submit_and_retry_if_celery_fails(self, submissions):

        submission_task = TaskSet(
            self.send_parallel_submissions_task.subtask(
                (self, submission),
                retry=True,
                retry_policy={
                    'max_retries':
                    settings.CELERY_NUMBER_OF_RETRIES_IN_CASE_OF_FAILURE,
                    'interval_start':
                    settings.CELERY_TIME_TO_WAIT_BEFORE_RETRYING_SUBMISSION
                }) for submission in submissions)

        submission_job = submission_task.apply_async()
        wait_until_its_done = submission_job.get()

        return submission_job
예제 #28
0
    def test_interface__compat(self):
        with catch_warnings(record=True) as log:
            ts = TaskSet(MockTask, [[(2, 2)], [(4, 4)], [(8, 8)]])
            self.assertListEqual(ts.tasks,
                                 [MockTask.subtask((i, i)) for i in (2, 4, 8)])
            self.assertIn("Using this invocation of TaskSet is deprecated",
                          log[0].message.args[0])
            log[:] = []
            self.assertEqual(ts.task, registry.tasks[MockTask.name])
            self.assertTrue(log)
            self.assertIn("TaskSet.task is deprecated", log[0].message.args[0])

            log[:] = []
            self.assertEqual(ts.task_name, MockTask.name)
            self.assertTrue(log)
            self.assertIn("TaskSet.task_name is deprecated",
                          log[0].message.args[0])
예제 #29
0
 def apply_async(self):
     tasks = []
     for node in self.children:
         func = node.func
         args = node.args
         kwargs = node.kwargs
         callback = kwargs.pop('callback', [])
         if not isinstance(callback, (list, tuple)):
             callback = [callback]
         subtasks = node._get_child_tasks()
         callback += subtasks
         kwargs = dict(callback=callback, **kwargs)
         _task = func.subtask(args=args, kwargs=kwargs)
         tasks.append(_task)
     taskset = TaskSet(tasks)
     result = taskset.apply_async()
     return result
예제 #30
0
def _run_galaxy_workflow(analysis_uuid):
    """
    Create DataSetCollection objects in galaxy, and invoke the workflow
    belonging to our tool.
    """
    analysis = _get_analysis(analysis_uuid)
    analysis_status = _get_analysis_status(analysis_uuid)
    tool = _get_workflow_tool(analysis_uuid)

    if not analysis_status.galaxy_workflow_task_group_id:
        logger.debug("Starting workflow execution in Galaxy")

        tool.update_file_relationships_with_galaxy_history_data()

        galaxy_workflow_tasks = [
            _invoke_galaxy_workflow.subtask((analysis_uuid, ))
        ]

        galaxy_workflow_taskset = TaskSet(
            tasks=galaxy_workflow_tasks).apply_async()

        galaxy_workflow_taskset.save()

        analysis_status.set_galaxy_workflow_task_group_id(
            galaxy_workflow_taskset.taskset_id)
        analysis_status.set_galaxy_history_state(AnalysisStatus.PROGRESS)
        run_analysis.retry(countdown=RETRY_INTERVAL)

    # Check on the status of the running galaxy workflow
    galaxy_workflow_taskset = get_taskset_result(
        analysis_status.galaxy_workflow_task_group_id)
    if not galaxy_workflow_taskset.ready():
        logger.debug("Analysis '%s' pending in Galaxy", analysis)
        run_analysis.retry(countdown=RETRY_INTERVAL)

    elif not galaxy_workflow_taskset.successful():
        error_msg = "Analysis '{}' failed in Galaxy".format(analysis)
        logger.error(error_msg)
        analysis.set_status(Analysis.FAILURE_STATUS, error_msg)
        analysis_status.set_galaxy_history_state(AnalysisStatus.ERROR)
        analysis.send_email()
        get_taskset_result(
            analysis_status.refinery_import_task_group_id).delete()
        galaxy_workflow_taskset.delete()
        analysis.galaxy_cleanup()
        return