Example #1
0
def launch_routing(job_id):
    """
    Launch routing and create_gds subtasks for job_id Job.

    Create a database session. Get the first Job object filtered by
    job_id. Get the count, range 2-tuples and create routing subtasks.
    Create a celery chord of the list of routing tasks and a create_gds
    subtask. If there is an error retry twice. Commit and close the
    database session.
    """
    db, job = dbutil.get_db_job(job_id)
    print("Launching routing for {0}_{1}_{2}".format(
        job.measurement_file.design_number,
        job.measurement_file.design_rev,
        job.measurement_file.panel_id))
    try:
        riemann.send({"host": config.HOST,
                      "service": "contasks.launch_routing",
                      "state": "start"})
        count = int(redisutil.measurement_file_count(job.measurement_file))
        workranges = onyxutil.workranges_for_units(count)
        routing = [nodetasks.route.subtask([job.id, workrange])
                   for workrange in workranges]
        create_gds = nodetasks.create_gds.subtask([job.id, len(workranges)])
        print("Launched {0} routing jobs".format(len(routing)))
        chord(routing)(create_gds)
    except TypeError as exc:
        riemann.send({"host": config.HOST,
                      "service": "contasks.launch_routing",
                      "state": "failed",
                      "description": exc.message})
        raise launch_routing.retry(exc=exc)
    db.commit()
    db.close()
Example #2
0
def lp_unapproved_commsupt():
    event.Event('event', {
        'task': 'overwatch_tasks',
        'info': {
            'message': 'started lp_unapproved_commsupt'
        }
    })
    unapproved_service_ids = report_tasks.get_unapproved_pilot_whales()
    if unapproved_service_ids:
        pending_service_ids = database_tasks.check_arbitration_status(unapproved_service_ids)
        ids_for_arbitration = [x for x in unapproved_service_ids if x not in pending_service_ids]
        batch_count = 0
        header = []
        for unapproved_service_id in ids_for_arbitration:
            if batch_count >= NOTE_ARBITRATION_BATCH_SIZE:
                chord(header)(adjust_notes.s())
                header = []
                batch_count = 0
            else:
                header.append(arbitrate_note.s(unapproved_service_id))
                batch_count += 1
        if len(header) > 0:
            chord(header)(adjust_notes.s())
    event.Event('event', {
        'task': 'overwatch_tasks',
        'info': {
            'message': 'finished lp_unapproved_commsupt'
        }
    })
Example #3
0
File: db.py Project: h1ds/h1ds
    def add_shot(self, shot, update=False):
        """Add shot to summary database.

        Arguments:
            shot (h1ds.models.Shot or shot number)

        Keyword arguments:
            force_overwrite - if False: If the shot already exists then this will do nothing.
                            - if True - overwrite any existing entries for shot

        """
        if not isinstance(shot, Shot):
            shot = Shot(number=shot, device=self.device)
        new_shot = not self.shot_exists(shot)
        if not update and not new_shot:
            return

        table_attributes = self.get_attributes_from_table(filter_initial_attributes=True)

        if new_shot or not update:
            task_name = insert_table_attributes
        else:
            task_name = update_table_attributes

        # Hack workaround - see notes at top of file
        shot_manager = get_backend_shot_manager_for_device(self.device)
        shot_timestamp = shot_manager().get_timestamp_for_shot(shot.number)

        chord(
            (get_summary_attribute_data.s(self.device.slug, shot.number, a) for a in table_attributes),
            task_name.s(table_name=self.table_name, shot_number=shot.number, shot_timestamp=str(shot_timestamp))
        ).apply_async()
Example #4
0
    def test_create_chord_exclude_body(self):
        """If the body task of a chord is not a UserTask, it should be cleanly omitted from the status."""
        chord([
            sample_task.s(self.user.id, '1', user_task_name='Chord: 1 & 2'),
            sample_task.s(self.user.id, '2', user_task_name='I should be ignored')
        ])(normal_task.s('3'))
        assert UserTaskStatus.objects.count() == 4
        chord_status = UserTaskStatus.objects.get(task_class='celery.chord')
        assert chord_status.task_id
        assert chord_status.parent is None
        assert chord_status.is_container
        assert chord_status.name == 'Chord: 1 & 2'
        assert chord_status.total_steps == 2
        verify_state(chord_status, False)

        group_status = UserTaskStatus.objects.get(task_class='celery.group')
        assert group_status.task_id
        assert group_status.parent_id == chord_status.id
        assert group_status.is_container
        assert group_status.name == 'Chord: 1 & 2'
        assert group_status.total_steps == 2
        verify_state(group_status, False)

        header_tasks = UserTaskStatus.objects.filter(parent=group_status)
        assert len(header_tasks) == 2
        for status in header_tasks:
            assert status.task_id
            assert status.parent_id == group_status.id
            assert not status.is_container
            assert status.name in ['SampleTask: 1', 'SampleTask: 2']
            assert status.total_steps == 1
            verify_state(status, False)
Example #5
0
def run(name, input_file, master_format='marc', reader_info={}, **kwargs):
    """Entry point to run any of the modes of the uploader.

    :param name: Upload mode, see `~.config.UPLOADER_WORKFLOWS` for more info.
    :type name: str
    :input_file: Input master format, typically the content of an XML file.
    :type input_file: str
    :param master_format: Input file format, for example `marc`
    :type master_format: str
    :param reader_info: Any kind of information relevan to the reader, like for
        example char encoding or special characters.
    :type reader_info: dict
    :param kwargs:
        * force:
        * pretend:
        * sync: False by default, if set to True the hole process will be
          teated synchronously
        * filename: original blob filename if it contains relative paths
    """
    signals.uploader_started.send(mode=name,
                                  blob=input_file,
                                  master_format=master_format,
                                  **kwargs)
    for chunk in split_blob(input_file, master_format,
                            cfg['UPLOADER_NUMBER_RECORD_PER_WORKER'],
                            **reader_info):
        chord(translate.starmap(
            [(blob, master_format, reader_info) for blob in chunk])
        )(run_workflow.s(name=name, **kwargs))
Example #6
0
def create_task():
    if not request.json or not 'id' in request.json:
        abort(400)
    task = {
        'id': request.json['id'],
        'text': request.json['text'],
    }
    clean_test_descripciones = []
    app.logger.info('petition_classification: ' + task['text'])
    features = review_words(task['text'])
    clean_test_descripciones.append(u" ".join(
        KaggleWord2VecUtility.review_to_wordlist(features, True)))

    # Uses chord to run two jobs and a callback after processing ends
    # 1) A text classifier
    # 2) A profanity filter
    # 3) A callback to put all together in a JSON
    callback = update_remote_petition.subtask()
    chord([
        evaluate_petition.s(task['id'], clean_test_descripciones),
        catch_bad_words_in_text.s(task['text'])
    ])(callback)

    return jsonify({'id': request.json['id'],
                    'text': request.json['text']}), 201
Example #7
0
def _delete_organization_buildings(org_pk, chunk_size=100, *args, **kwargs):
    """Deletes all BuildingSnapshot instances within an organization

    :param org_pk: int, str, the organization pk
    """
    qs = BuildingSnapshot.objects.filter(super_organization=org_pk)
    ids = qs.values_list('id', flat=True)
    deleting_cache_key = get_prog_key(
        'delete_organization_buildings',
        org_pk
    )
    if not ids:
        set_cache(deleting_cache_key, 'success', 100)
        return

    # delete the canonical buildings
    can_ids = CanonicalBuilding.objects.filter(
        canonical_snapshot__super_organization=org_pk
    ).values_list('id', flat=True)
    _delete_canonical_buildings.delay(can_ids)

    step = float(chunk_size) / len(ids)
    set_cache(deleting_cache_key, 'success', 0)
    tasks = []
    for del_ids in batch(ids, chunk_size):
        # we could also use .s instead of .subtask and not wrap the *args
        tasks.append(
            _delete_organization_buildings_chunk.subtask(
                (del_ids, deleting_cache_key, step, org_pk)
            )
        )
    chord(tasks, interval=15)(finish_delete.subtask([org_pk]))
Example #8
0
def task4(task, *args, **kwargs):
    print ('task4')
    time.sleep(2)

    header = [task5.s() for i in range(10)]

    chord(header)(task6.s())
Example #9
0
    def test_chord_in_chords_with_chains(self, manager):
        try:
            manager.app.backend.ensure_chords_allowed()
        except NotImplementedError as e:
            raise pytest.skip(e.args[0])

        c = chord(
            group([
                chain(
                    add.si(1, 2),
                    chord(
                        group([add.si(1, 2), add.si(1, 2)]),
                        add.si(1, 2),
                    ),
                ),
                chain(
                    add.si(1, 2),
                    chord(
                        group([add.si(1, 2), add.si(1, 2)]),
                        add.si(1, 2),
                    ),
                ),
            ]),
            add.si(2, 2)
        )

        r = c.delay()

        assert r.get(timeout=TIMEOUT) == 4
Example #10
0
def compute_user_post_aggregates(run_id):
    try:
        allids = "allids_{}".format(run_id)
        sql = """
            select
                {allids}.efid as efid_user,
                {posts}.post_id,
                max({posts}.efid) as efid_wall,
                max({posts}.post_from) as efid_poster,
                max({posts}.post_type) as post_type,
                bool_or({tagged}.tagged_efid is not null) as user_tagged,
                bool_or({likes}.liker_efid is not null) as user_likes,
                bool_or({comments}.commenter_id is not null) as user_commented,
                count(distinct {comments}.comment_id) as num_comments,
                bool_or({locales}.tagged_efid is not null) as user_placed
            from
                (
                    select distinct tagged_efid as efid, post_id from v2_post_tags
                    union
                    select distinct liker_efid as efid, post_id from v2_post_likes
                    union
                    select distinct commenter_id as efid, post_id from v2_post_comments
                    union
                    select distinct tagged_efid as efid, post_id from v2_user_locales
                ) {allids}
                join {posts} on ({allids}.post_id = {posts}.post_id)
                left join {likes} on ({allids}.efid = {likes}.liker_efid and {likes}.post_id = {posts}.post_id)
                left join {comments} on ({allids}.efid = {comments}.commenter_id and {comments}.post_id = {posts}.post_id)
                left join {tagged} on ({allids}.efid = {tagged}.tagged_efid and {tagged}.post_id = {posts}.post_id)
                left join {locales} on ({allids}.efid = {locales}.tagged_efid and {locales}.post_id = {posts}.post_id)
            where {posts}.efid in {affected_efid_subquery}
            group by 1, 2
        """
        bindings = {
            'posts': fbsync.POSTS_TABLE,
            'likes': fbsync.POST_LIKES_TABLE,
            'comments': fbsync.POST_COMMENTS_TABLE,
            'tagged': fbsync.POST_TAGS_TABLE,
            'locales': fbsync.USER_LOCALES_TABLE,
            'allids': allids,
        }

        fbsync.upsert(
            run_id,
            fbsync.USER_POST_AGGREGATES_TABLE,
            'efid_wall',
            sql,
            bindings,
            logger=logger
        )

        callback = compute_user_aggregates.s(run_id)
        celery.chord([
            compute_user_timeline_aggregates.s(run_id),
            compute_poster_aggregates.s(run_id),
            compute_edges.s(run_id),
        ])(callback)
    except Exception, exc:
        logger.exception("user_post aggregation for run_id %s failed due to \"%s\"", run_id, exc)
        raise
Example #11
0
    def run(self, pk):
        logger.info("MainSearchTask: starting request %s. pk: %s" %
                (self.request.id,
                 pk))
        try:
            # -----------------------------------------------------------------
            #   Execute subtasks to get results, and a final
            #   subtask to persist all the results.
            #
            #   Using a chord allows MainSearchTask to return immediately,
            #   execute subtasks in parallel, and then commit results in a
            #   final task.
            #
            #   Perform an initial get of the Search object to confirm it
            #   exists before launching subtasks.
            # -----------------------------------------------------------------
            search = Search.objects.get(pk = pk)
            subtasks = [subtask_class.subtask((search.pk, )) for subtask_class in self.subtask_classes]
            callback = CommitSearchResultsTask.subtask((search.pk, ))
            chord(subtasks)(callback)
            # -----------------------------------------------------------------

        except:
            logger.exception("MainSearchTask_%s: unhandled exception." % self.request.id)
            raise
Example #12
0
def prepare_custom_crons():
    envs = conf.platforms['poola-custom']
    for k in envs:
        host = envs[k][0]
        sites = fabfile.get_data_release_sites(host)
        #group(call_custom_cron.s(host, item) for item in sites).delay()
        chord(call_custom_cron.s(host, item) for item in sites)(check_cron_results.s(k))
Example #13
0
    def encode(self, profiles=[]):
        """
        Encode media with the specified ``EncodeProfile``s using
        asynchronous Celery tasks. The media is encoded into a
        temporary directory and then uploaded to the configured media
        server.

        If ``profiles`` is specified, encode with that list of
        ``EncodeProfile`` primary keys; otherwise, encode with all
        associated ``EncodeProfile``s.
        """
        from .tasks import encode_media, upload_media, encode_media_complete

        self.encoding = True
        self.save()

        if not profiles:
            profiles = list(self.profiles.values_list('pk', flat=True))

        tmpdir = tempfile.mkdtemp()
        group = []
        for profile_id in profiles:
            group.append(chain(encode_media.s(self.model_name, self.id,
                                              profile_id, tmpdir),
                               upload_media.s(self.model_name, self.id)))
        chord((group), encode_media_complete.si(self.model_name, self.id,
                                                tmpdir)).apply_async(countdown=5)
Example #14
0
def delete_organization(org_pk, deleting_cache_key, chunk_size=100, *args,
                        **kwargs):
    result = {
        'status': 'success',
        'progress': 0,
        'progress_key': deleting_cache_key
    }

    set_cache(deleting_cache_key, result['status'], result)

    if CanonicalBuilding.objects.filter(
            canonical_snapshot__super_organization=org_pk).exists():
        _delete_canonical_buildings.delay(org_pk)

    if BuildingSnapshot.objects.filter(super_organization=org_pk).exists():
        ids = list(
            BuildingSnapshot.objects.filter(
                super_organization=org_pk).values_list('id', flat=True)
        )

        step = float(chunk_size) / len(ids)
        tasks = []
        for del_ids in batch(ids, chunk_size):
            # we could also use .s instead of .subtask and not wrap the *args
            tasks.append(
                _delete_organization_buildings_chunk.subtask(
                    (del_ids, deleting_cache_key, step, org_pk)
                )
            )
        chord(tasks, interval=15)(_delete_organization_related_data.subtask(
            [org_pk, deleting_cache_key]))
    else:
        _delete_organization_related_data(None, org_pk, deleting_cache_key)
Example #15
0
    def test_nested_chord(self, manager):
        try:
            manager.app.backend.ensure_chords_allowed()
        except NotImplementedError as e:
            raise pytest.skip(e.args[0])

        c1 = chord([
            chord([add.s(1, 2), add.s(3, 4)], add.s([5])),
            chord([add.s(6, 7)], add.s([10]))
        ], add_to_all.s(['A']))
        res1 = c1()
        assert res1.get(timeout=TIMEOUT) == [[3, 7, 5, 'A'], [13, 10, 'A']]

        c2 = group([
            group([add.s(1, 2), add.s(3, 4)]) | add.s([5]),
            group([add.s(6, 7)]) | add.s([10]),
        ]) | add_to_all.s(['A'])
        res2 = c2()
        assert res2.get(timeout=TIMEOUT) == [[3, 7, 5, 'A'], [13, 10, 'A']]

        c = group([
            group([
                group([
                    group([
                        add.s(1, 2)
                    ]) | add.s([3])
                ]) | add.s([4])
            ]) | add.s([5])
        ]) | add.s([6])

        res = c()
        assert [[[[3, 3], 4], 5], 6] == res.get(timeout=TIMEOUT)
Example #16
0
def post_update_social(post_id):

    logger.debug("start: social scoring")

    if getattr(settings, 'FEEDS_POST_UPDATE_SOCIAL', False):
        try:
            p = Post.objects.get(pk=post_id)
        except Post.DoesNotExist:
            logger.debug("Post %s does not exist." % (post_id))
            return 0

        header = []

        f = (post_update_twitter.subtask((p.id, )))
        header.append(f)
        f = (post_update_facebook.subtask((p.id, )))
        header.append(f)
        f = (post_update_linkedin.subtask((p.id, )))
        header.append(f)

        callback = tsum.s(p)
        chord(header)(callback)

        logger.debug("stop: social scoring. got %s" % p.score)
        return p.score
    else:
        """
        Disabled, return 0.
        """
        return 0
Example #17
0
def start_calculation(self, test_suite_id):
    logger.info('Runing test_suite %s', test_suite_id)
    with transaction.atomic():
        TestSuite.objects.filter(pk=test_suite_id).update(status=INPROGRESS)
        tests = TestRun.objects.select_related('dataset').filter(testsuite=test_suite_id, status=NEW)
        chord(test_func.s(d) for t in tests for d in json.loads(t.dataset.data))(update_db.s([t.id for t in tests]))
        tests.update(status=INPROGRESS)
Example #18
0
def ingestImage(self, imageId):
    image = Image().load(imageId, force=True)

    if image['ingested']:
        logger.warning('Attempting to ingest an already ingested image.')
        return

    try:
        imageFile = Image().originalFile(image)
        originalFileStreamResponse = self.session.get(
            'file/%s/download' % imageFile['_id'])
        originalFileStreamResponse.raise_for_status()
        originalFileStreamResponse = io.BytesIO(originalFileStreamResponse.content)

        # Scikit-Image is ~70ms faster at decoding image data
        imageData = ScikitSegmentationHelper.loadImage(originalFileStreamResponse)
        image['meta']['acquisition']['pixelsY'] = imageData.shape[0]
        image['meta']['acquisition']['pixelsX'] = imageData.shape[1]
    except Exception:
        logger.exception('Failed to validate image')
        image['readable'] = False
        image['ingested'] = True
        Image().save(image)
        return

    image['readable'] = True
    image['ingestionState'] = {
        'largeImage': None,
        'superpixelMask': None
    }
    Image().save(image)

    callback = markImageIngested.s(image['_id'])
    chord([generateSuperpixels.s(image['_id']),
           generateLargeImage.s(image['_id'])])(callback)
Example #19
0
def scanAppAnnieTrackId():
    releaseAllAccounts()
    c = Ec2()
    c.launchSpotInstance(3,'single_worker',True)
    appList = IosApp.objects.filter(trackId__lt=0)
    #for item in appList:
    #    getBasicDataFromAppAnnie(item)
    chord( [getBasicDataFromAppAnnie.delay(item) for item in appList ])(c.shutdown.delay()).get()
Example #20
0
def scanAppAnnieStartDate():
    releaseAllAccounts()
    c = Ec2()
    c.launchSpotInstance(7,'single_worker',True)
    appList = IosApp.objects.filter(ratingCount__gt=0, minDate="")
    #for item in appList:
    #    getMinDateForAppAnnie(item)
    chord( [getMinDateForAppAnnie.delay(item) for item in appList ])(shutdown.delay(c)).get()
Example #21
0
def create_pickles():
    lists = pickle_tasks.build_credible_lists()
    report_list = lists['report_list']
    mailing_list = lists['mailing_list']
    build_header = []
    for report_dict in report_list:
        build_header.append(build_credible_report.s(report_dict))
    chord(build_header)(build_master_workbook.s(mailing_list=mailing_list))
Example #22
0
def updateTrafficWeekely():
    c = Ec2()
    c.launchSpotInstance(7,'two_workers')
    fetcher = WebDataFetcher()
    companyList = Company.objects.filter(analysed=True)
    #for item in companyList:
    #    fetcher.fetcheAlexaDataAuto(item)
    chord( [ fetcher.fetcheAlexaDataAuto.delay(item)  for item in companyList ])(c.shutdown.delay())
Example #23
0
 def test_forward_options(self):
     body = xsum.s()
     x = chord([add.s(i, i) for i in range(10)], body=body)
     x.apply_async(group_id='some_group_id')
     self.assertEqual(body.options['group_id'], 'some_group_id')
     x2 = chord([add.s(i, i) for i in range(10)], body=body)
     x2.apply_async(chord='some_chord_id')
     self.assertEqual(body.options['chord'], 'some_chord_id')
Example #24
0
def build_sitemaps():
    """
    Build and save sitemap files for every MDN language and as a
    callback save the sitempa index file as well.
    """
    tasks = [build_locale_sitemap.si(locale) for locale in settings.MDN_LANGUAGES]
    post_task = build_index_sitemap.s()
    chord(header=tasks, body=post_task).apply_async()
Example #25
0
def webDataProcessor():
    c = Ec2()
    c.launchSpotInstance(10,'two_workers')
    companyList = Company.objects.filter(analysed=True)
    #for company in companyList:
    #    print company.id
    #    webBundleTask(company)
    chord( [ webBundleTask.delay(item)  for item in companyList ])(c.shutdown.delay())
Example #26
0
def update_all_markets(*args, **kwargs):
    market_updates = []
    for market in Market.objects.filter(
                    Q(cached_until__lte=datetime.now(tz=UTC)) | Q(cached_until=None)):
        market_updates.extend(update_market(market.locationID))
        market.updated()
        log.info('Updating "{0}" market'.format(get_location_name(market.locationID)))
    chord(market_updates, write_static_prices.s()).apply_async()
Example #27
0
def calculate_similarities(service_id):
    header = []
    fields = report_tasks.get_commsupt_fields(service_id)
    service_id = fields['clientvisit_id']
    del fields['clientvisit_id']
    for field_name in fields:
        field_value = fields[field_name]
        header.append(calculate_field_similarities.s(field_name, field_value, service_id))
    chord(header)(store_results.s())
Example #28
0
File: tasks.py Project: Elchi3/kuma
def build_sitemaps():
    """
    Build and save sitemap files for every MDN language and as a
    callback save the sitemap index file as well.
    """
    tasks = [build_locale_sitemap.si(lang[0]) for lang in settings.LANGUAGES]
    post_task = build_index_sitemap.s()
    # we retry the chord unlock 300 times, so 5 mins with an interval of 1s
    chord(header=tasks, body=post_task).apply_async(max_retries=300, interval=1)
Example #29
0
def commit_import_event(import_type, import_event_id):
    ie = _get_import_event(import_type, import_event_id)

    commit_tasks = [_commit_rows.s(import_type, import_event_id, i)
                    for i in xrange(0, ie.row_count, BLOCK_SIZE)]

    finalize_task = _finalize_commit.si(import_type, import_event_id)

    chord(commit_tasks, finalize_task).delay()
Example #30
0
def _save_raw_data(file_pk, *args, **kwargs):
    """Chunk up the CSV or XLSX file and save the raw data into the DB BuildingSnapshot table."""

    result = {'status': 'success', 'progress': 100}
    prog_key = get_prog_key('save_raw_data', file_pk)
    try:
        import_file = ImportFile.objects.get(pk=file_pk)
        if import_file.raw_save_done:
            result['status'] = 'warning'
            result['message'] = 'Raw data already saved'
            set_cache(prog_key, result['status'], result)
            return result

        if import_file.source_type == "Green Button Raw":
            return _save_raw_green_button_data(file_pk, *args, **kwargs)

        parser = reader.MCMParser(import_file.local_file)
        cache_first_rows(import_file, parser)
        rows = parser.next()
        import_file.num_rows = 0
        import_file.num_columns = parser.num_columns()

        # Why are we setting the num_rows to the number of chunks?
        tasks = []
        for chunk in batch(rows, 100):
            import_file.num_rows += len(chunk)
            tasks.append(_save_raw_data_chunk.s(chunk, file_pk, prog_key))

        import_file.save()

        # need to rework how the progress keys are implemented here
        tasks = add_cache_increment_parameter(tasks)
        if tasks:
            chord(tasks, interval=15)(finish_raw_save.s(file_pk))
        else:
            finish_raw_save.s(file_pk)

    except StopIteration:
        result['status'] = 'error'
        result['message'] = 'StopIteration Exception'
        result['stacktrace'] = traceback.format_exc()
    except Error as e:
        result['status'] = 'error'
        result['message'] = 'File Content Error: ' + e.message
        result['stacktrace'] = traceback.format_exc()
    except KeyError as e:
        result['status'] = 'error'
        result['message'] = 'Invalid Column Name: "' + e.message + '"'
        result['stacktrace'] = traceback.format_exc()
    except Exception as e:
        result['status'] = 'error'
        result['message'] = 'Unhandled Error: ' + str(e.message)
        result['stacktrace'] = traceback.format_exc()

    set_cache(prog_key, result['status'], result)
    return result
Example #31
0
    def check_upload(self, file_upload, listed=True):
        """Check that the given new file upload is validated properly."""
        # Run validator.
        utils.Validator(file_upload, listed=listed)

        channel = amo.RELEASE_CHANNEL_LISTED if listed else amo.RELEASE_CHANNEL_UNLISTED

        # Make sure we setup the correct validation task.
        self.mock_chain.assert_called_once_with(
            tasks.create_initial_validation_results.si(),
            repack_fileupload.s(file_upload.pk),
            tasks.validate_upload.s(file_upload.pk, channel),
            tasks.check_for_api_keys_in_file.s(file_upload.pk),
            chord(
                [tasks.forward_linter_results.s(file_upload.pk)],
                call_mad_api.s(file_upload.pk),
            ),
            tasks.handle_upload_validation_result.s(file_upload.pk, channel,
                                                    False),
        )
Example #32
0
    def execute(self, xs, ys):
        mul_sigs = [
            signature("multiper.worker.work",
                      kwargs={
                          'x': x,
                          'y': y
                      },
                      app=self.app,
                      queue="multiper_queue") for x, y in zip(xs, ys)
        ]
        mulgrp = group(*mul_sigs)
        sum_sig = signature("adder.worker.work",
                            app=self.app,
                            queue="adder_queue")
        softmax_sig = signature("softmaxer.worker.work",
                                app=self.app,
                                queue="softmaxer_queue")

        flow = chord(mulgrp)(chain(sum_sig, softmax_sig))
        return flow
Example #33
0
    def test_does_not_add_run_wat_when_disabled(self, mock_chain):
        self.create_switch('enable-wat', active=False)
        file_upload = self.get_upload('webextension.xpi',
                                      with_validation=False)
        channel = amo.RELEASE_CHANNEL_LISTED

        utils.Validator(file_upload, listed=True)

        mock_chain.assert_called_once_with(
            tasks.create_initial_validation_results.si(),
            repack_fileupload.s(file_upload.pk),
            tasks.validate_upload.s(file_upload.pk, channel),
            tasks.check_for_api_keys_in_file.s(file_upload.pk),
            chord(
                [tasks.forward_linter_results.s(file_upload.pk)],
                call_mad_api.s(file_upload.pk),
            ),
            tasks.handle_upload_validation_result.s(file_upload.pk, channel,
                                                    False),
        )
Example #34
0
def batch_do(doctype_query_or_list,
             function,
             task,
             field,
             force=False,
             bulksize=50,
             *args,
             **kwargs):
    """
    Applies a function:task combination to all documents given, but saves them in batches to avoid overloading the database.
    """
    documents = _doctype_query_or_list(doctype_query_or_list,
                                       force=force,
                                       field=field,
                                       task=task)
    batchjobs = []
    target_func = taskmaster.tasks[identify_task(function, task)]
    if not LOCAL_ONLY:
        for batch in _batcher(documents, batchsize=bulksize):
            if not batch: continue  #ignore empty batches
            batch_tasks = [
                target_func.s(document=doc,
                              field=field,
                              force=force,
                              *args,
                              **kwargs) for doc in batch
            ]
            batch_chord = chord(batch_tasks)
            batch_result = batch_chord(
                taskmaster.tasks['core.database.bulk_upsert'].s())
            batchjobs.append(batch_result)
        return group(batch for batch in batchjobs)
    else:
        for num, batch in enumerate(_batcher(documents, batchsize=bulksize)):
            core.database.bulk_upsert().run(documents=[
                target_func.run(
                    document=doc, field=field, force=force, *args, **kwargs)
                for doc in batch
            ])
            now = datetime.datetime.now()
            logger.info("processed batch {num} {now}".format(**locals()))
Example #35
0
    def handle(self, *args, **options):
        task = tasks.get(options.get('task'))
        if not task:
            raise CommandError('Unknown task provided. Options are: %s'
                               % ', '.join(tasks.keys()))
        if options.get('with_deleted'):
            addon_manager = Addon.unfiltered
        else:
            addon_manager = Addon.objects
        if options.get('ids'):
            ids_list = options.get('ids').split(',')
            addon_manager = addon_manager.filter(id__in=ids_list)
        pks = (addon_manager.filter(*task['qs'])
                            .values_list('pk', flat=True)
                            .order_by('id'))
        if options.get('limit'):
            pks = pks[:options.get('limit')]
        if 'pre' in task:
            # This is run in process to ensure its run before the tasks.
            pks = task['pre'](pks)
        if pks:
            kwargs = task.get('kwargs', {})
            if task.get('allowed_kwargs'):
                kwargs.update({
                    arg: options.get(arg, None)
                    for arg in task['allowed_kwargs']})
            # All the remaining tasks go in one group.
            grouping = []
            for chunk in chunked(pks, 100):
                grouping.append(
                    task['method'].subtask(args=[chunk], kwargs=kwargs))

            # Add the post task on to the end.
            post = None
            if 'post' in task:
                post = task['post'].subtask(
                    args=[], kwargs=kwargs, immutable=True)
                ts = chord(grouping, post)
            else:
                ts = group(grouping)
            ts.apply_async()
Example #36
0
    def test_redis_subscribed_channels_leak(self, manager):
        if not manager.app.conf.result_backend.startswith('redis'):
            raise pytest.skip('Requires redis result backend.')

        manager.app.backend.result_consumer.on_after_fork()
        initial_channels = get_active_redis_channels()
        initial_channels_count = len(initial_channels)

        total_chords = 10
        async_results = [
            chord([add.s(5, 6), add.s(6, 7)])(delayed_sum.s())
            for _ in range(total_chords)
        ]

        manager.assert_result_tasks_in_progress_or_completed(async_results)

        channels_before = get_active_redis_channels()
        channels_before_count = len(channels_before)

        assert set(channels_before) != set(initial_channels)
        assert channels_before_count > initial_channels_count

        # The total number of active Redis channels at this point
        # is the number of chord header tasks multiplied by the
        # total chord tasks, plus the initial channels
        # (existing from previous tests).
        chord_header_task_count = 2
        assert channels_before_count <= \
            chord_header_task_count * total_chords + initial_channels_count

        result_values = [
            result.get(timeout=TIMEOUT)
            for result in async_results
        ]
        assert result_values == [24] * total_chords

        channels_after = get_active_redis_channels()
        channels_after_count = len(channels_after)

        assert channels_after_count == initial_channels_count
        assert set(channels_after) == set(initial_channels)
Example #37
0
def update_real_time_data(today=True):
    countries_ids = Country.objects.values_list('id', flat=True)

    if today:
        date = timezone.now().date()
        aggregate_task = aggregate_country_data
    else:
        date = timezone.now().date() - timedelta(days=1)
        aggregate_task = finalize_daily_data

    chain(
        load_data_from_unicef_db.s(),
        load_brasil_daily_statistics.s(),
        chord(
            group([
                aggregate_task.s(country_id, date)
                for country_id in countries_ids
            ]),
            finalize_task.si(),
        ),
    ).delay()
Example #38
0
def run_mr_chunk(data):
    """ Map-reduce task where:
    1. Map tasks are running in parallel
    2. Celery aggregate the mappers results
    3. Single reduce task receives whole set of mappers results
    4. Client receives reduced result from the reducer
    """

    # Cretate map tasks
    maps = (tasks.map_chunk.map(x) for x in data)
    
    # Cretate reduce tasks
    mapreducer = celery.chord(maps)(tasks.reduce.s())

    mapper = mapreducer.parent
    reducer = mapreducer
    
    # Required for celery.result.GroupResult.restore
    mapper.save()
    
    return (mapper.id, reducer.id)
Example #39
0
def dump_user_installs_cron():
    """
    Sets up tasks to do user install dumps.
    """
    chunk_size = 100
    # Get valid users to dump.
    user_ids = set(Installed.objects.filter(addon__type=amo.ADDON_WEBAPP)
                   .values_list('user', flat=True))

    # Remove old dump data before running.
    user_dir = os.path.join(settings.DUMPED_USERS_PATH, 'users')
    if os.path.exists(user_dir):
        shutil.rmtree(user_dir)

    grouping = []
    for chunk in chunked(user_ids, chunk_size):
        grouping.append(dump_user_installs.subtask(args=[chunk]))

    post = zip_users.subtask(immutable=True)
    ts = chord(grouping, post)
    ts.apply_async()
Example #40
0
def run_partial_mr(data):
    """ Map-reduce task with local reduce step where:
    1. Map tasks are running in parallel
    2. Local reducers are running in parallel 
    3. Local reducers are receving part of mappers results
    4. Celery aggregate the local reducers results
    5. Single reduce task receives partly reduced (by local reducers) data
    6. Client receives reduced result from the global reducer
    """
    # Cretate map tasks chaned with partial reduce tasks
    maps = (celery.chain(tasks.map_bulk.s(x), tasks.partial_reduce.s()) for x in data)
    # Create global reduce task
    mapreducer = celery.chord(maps)(tasks.partial_reduce.s())

    mapper = mapreducer.parent
    reducer = mapreducer
    
    # required for celery.result.GroupResult.restore
    mapper.save()
    
    return (mapper.id, reducer.id)
Example #41
0
    def test_appends_final_task_for_file_uploads(self, mock_chain):
        final_task = mock.Mock()
        file_upload = self.get_upload('webextension.xpi',
                                      with_validation=False)
        channel = amo.RELEASE_CHANNEL_LISTED

        utils.Validator(file_upload, listed=True, final_task=final_task)

        mock_chain.assert_called_once_with(
            tasks.create_initial_validation_results.si(),
            repack_fileupload.s(file_upload.pk),
            tasks.validate_upload.s(file_upload.pk, channel),
            tasks.check_for_api_keys_in_file.s(file_upload.pk),
            chord(
                [tasks.forward_linter_results.s(file_upload.pk)],
                call_mad_api.s(file_upload.pk),
            ),
            tasks.handle_upload_validation_result.s(file_upload.pk, channel,
                                                    False),
            final_task,
        )
Example #42
0
    def __create_canvas(self, operator=None, is_retry=False):
        deployment_id = self.id
        tasks = self._meta.task_set
        fort_batch = self.get_fort_batch()
        target_canvases = [tgt.create_bake_canvas(operator)
                           for tgt in fort_batch.targets.all()]
        ts = [
            tasks.start_baking.si(tasks, deployment_id, operator),
            chord(target_canvases, tasks.finish_rolling_batch.si(
                tasks, deployment_id, fort_batch.id, operator)),
            tasks.finish_baking.si(tasks, deployment_id, operator)
        ]

        if is_retry:
            restart_rolling_batch = tasks.start_rolling_batch.subtask(
                args=(tasks, deployment_id, fort_batch.id, operator),
                immutable=True)
            ts.insert(1, restart_rolling_batch)

        canvas = chain(*ts)
        return canvas
Example #43
0
def run_db_mr(data):
    """ Map-reduce task with db-backed reduce step where:
    1. Map tasks are running in parallel
    2. Db-backed reduce steps are running in parallel 
    3. Db-backed reduce are aggregating mappers results in DB
    4. Celery aggregates nothing
    5. Client receives the DB-stored reduced result
    """
    
    # Cretate map tasks chaned with DB-backed reduce tasks
    maps = (celery.chain(tasks.map_bulk.s(x), tasks.db_reduce.s()) for x in data)
    # Create global reduce task
    mapreducer = celery.chord(maps)(tasks.get_db_result.s())

    mapper = mapreducer.parent
    reducer = mapreducer
    
    # required for celery.result.GroupResult.restore
    mapper.save()
    
    return (mapper.id, reducer.id)
Example #44
0
def index():
    simulations = 100000
    per_worker = 1000
    n = int(simulations / per_worker)

    S0 = 100
    K = 120
    T = 0.5
    r = 0.01
    sig = 0.1

    logger.info(f'Create chord, n={n}')

    task = chord([ArithAsian.s(
        S0=S0,
        K=K,
        T=T,
        r=r,
        sig=sig,
        n_simulation=per_worker) for i in range(0, n)], mean.s())()
    return jsonify({'id': str(task.id), 'status': task.status}), 201
Example #45
0
def get_cad_risk_score(user_id):
    """ Given an API user id, perform the grs risk score calculations.
    This is the high level pipeline invocation method used to submit all
    subsequent and dependent steps. """
    with record('tasks.cad.get_cad_risk_score', user_id):
        step_1 = get_ancestry.s(user_id)
        steps_2_and_3 = [
            _get_cad_haplotypes.s(user_id, chunk[0]) | _impute_and_get_cad_risk_per_chunk.s(user_id, chunk)
            for chunk in steps.get_chunks()
        ]
        step_4 = _get_total_cad_risk.s(user_id)
        notify_user = (
            _send_cad_notification.si(user_id) | send_post_cad_survey_to_users.si(user_id)
        )

        workflow = chord(
            header=group([step_1, *steps_2_and_3]),
            body=step_4
        ) | _store_results.s(user_id) | notify_user

        workflow.delay()
Example #46
0
    def test_adds_yara_and_customs(self, mock_chain):
        self.create_switch('enable-customs', active=True)
        self.create_switch('enable-yara', active=True)
        file_upload = self.get_upload('webextension.xpi',
                                      with_validation=False)
        channel = amo.RELEASE_CHANNEL_LISTED

        utils.Validator(file_upload, listed=True)

        mock_chain.assert_called_once_with(
            tasks.create_initial_validation_results.si(),
            repack_fileupload.s(file_upload.pk),
            tasks.validate_upload.s(file_upload.pk, channel),
            chord([
                tasks.forward_linter_results.s(file_upload.pk),
                run_yara.s(file_upload.pk),
                run_customs.s(file_upload.pk),
            ],
                  tasks.handle_upload_validation_result.s(
                      file_upload.pk, channel, False)),
        )
Example #47
0
def init_site_category(request, site_id):
    data = {}
    if request.method == 'GET' and request.is_ajax():
        try:
            site = Website.objects.get(pk=site_id)
        except ObjectDoesNotExist:
            site = None
        data = {}
        if site is None:
            data.update({
                'state': 'failed',
                'msg': u'站点不存在,请刷新当前页面'
            })
        else:
            if not site.spider_class:
                data.update({
                    'state': 'failed',
                    'msg': u'初始化站点失败,请设置爬虫'
                })
            elif site.status == 'pending':
                data.update({
                    'state': 'failed',
                    'msg': u'正在初始化站点,请等待'
                })
            elif site.status == 'failed':
                data.update({
                    'state': 'failed',
                    'msg': u'初始化站点失败,请重试'
                })
            else:
                site.status = 'pending'
                site.save()
                task = chord([chain(initial_categories.s(site.pk), group_categories.s(initial_category_children.s(), site_id))])(finish_init_category.s(site.pk))
                task.delay()
                data.update({
                    'state': 'success',
                })
    else:
        data.update({'state': 'failed', 'msg': 'Method not allowed.'})
    return JsonResponse(data)
Example #48
0
    def handle(self, *args, **options):
        tasks = self.get_tasks()
        task_info = tasks.get(options.get('task_info'))
        if not task_info:
            raise CommandError('Unknown task provided. Options are: %s' %
                               ', '.join(tasks.keys()))
        base_qs = self.get_base_queryset(options)
        pks = self.get_pks(
            base_qs,
            task_info['queryset_filters'],
            distinct=task_info.get('distinct'),
        )
        if options.get('limit'):
            pks = pks[:options.get('limit')]
        if 'pre' in task_info:
            # This is run in process to ensure its run before the tasks.
            pks = task_info['pre'](pks)
        if pks:
            kwargs = task_info.get('kwargs', {})
            if task_info.get('allowed_kwargs'):
                kwargs.update({
                    arg: options.get(arg, None)
                    for arg in task_info['allowed_kwargs']
                })
            # All the remaining tasks go in one group.
            grouping = []
            for chunk in chunked(pks, options.get('batch_size')):
                grouping.append(task_info['task'].subtask(args=[chunk],
                                                          kwargs=kwargs))

            # Add the post task on to the end.
            post = None
            if 'post' in task_info:
                post = task_info['post'].subtask(args=[],
                                                 kwargs=kwargs,
                                                 immutable=True)
                ts = chord(grouping, post)
            else:
                ts = group(grouping)
            ts.apply_async()
Example #49
0
 def run(self, inventory):
     if self.task == "rollout":
         chord(
             [rollout.s(data=host) for host in inventory["hosts"]],
             complete.s(deployment_id=1),
         ).delay()
     elif self.task == "rollback":
         chord(
             [rollback.s(data=host) for host in inventory["hosts"]],
             complete.s(deployment_id=1),
         ).delay()
     elif self.task == "server_update":
         chord(
             [server_update.s(data=host) for host in inventory["hosts"]],
             complete.s(deployment_id=1),
         ).delay()
Example #50
0
    def create_job(tasks, *args, **kvargs):
        """Create celery signature with chord, group and chain

        :param tasks: list of celery tasks. Task can be a celery task or a dict like
            {'task':<celery task>, 'args':..}
        :return: celery signature
        """
        tasks.reverse()
        process = tasks.pop().signature(
            args, immutable=True, queue=task_manager.conf.TASK_DEFAULT_QUEUE)
        last_task = None
        for task in tasks:
            if not isinstance(task, list):
                if isinstance(task, dict):
                    internal_args = list(args)
                    internal_args.extend(task.get('args'))
                    item = task.get('task').signature(internal_args, immutable=True,
                                                       queue=task_manager.conf.TASK_DEFAULT_QUEUE)
                else:
                    item = task.signature(
                        args, immutable=True, queue=task_manager.conf.TASK_DEFAULT_QUEUE)
                if last_task is not None:
                    item.link(last_task)
            elif isinstance(task, list) and len(task) > 0:
                subitems = []
                for subtask in task:
                    if isinstance(subtask, dict):
                        internal_args = list(args)
                        internal_args.extend(subtask.get('args'))
                        subitem = subtask.get('task').signature(internal_args, immutable=True,
                                                                 queue=task_manager.conf.TASK_DEFAULT_QUEUE)
                    else:
                        subitem = subtask.get('task').signature(args, immutable=True,
                                                                 queue=task_manager.conf.TASK_DEFAULT_QUEUE)
                    subitems.append(subitem)
                item = chord(subitems, last_task)
            last_task = item
        process.link(last_task)
        return process
Example #51
0
 def get(self, request):
     chord_key = get_random_string(6, string.ascii_lowercase)
     all_tasks = celery.chord(
         task_id="chord-%s" % chord_key,
         header=celery.group(
             tasks.process_chunk.subtask(args=(x, ),
                                         task_id="chord-%s-chunk-%s-%s" %
                                         (chord_key, i, x))
             for i, x in enumerate(range(10, 15))),
         # immutable = ignore results from parent
         body=celery.chain(
             tasks.post_step_1.subtask(args=(20, ),
                                       task_id="chord-%s-post-1" %
                                       chord_key,
                                       immutable=True),
             tasks.post_step_2.subtask(args=(20, ),
                                       task_id="chord-%s-post-1" %
                                       chord_key,
                                       immutable=True),
         ))
     result = all_tasks.apply_async()
     return Response(data=dict(chord_key=chord_key, result=repr(result)))
Example #52
0
    def run(self, submission_id):
        """Call :py:class:`SplitSubmissionHelper` to split
        :py:class:`uid.models.Submission` data.
        Call :py:class:`SubmitTask` for each
        batch of data and then call :py:class:`SubmissionCompleteTask` after
        all data were submitted"""

        logger.info("Starting %s for submission %s" %
                    (self.name, submission_id))

        uid_submission = self.get_uid_submission(submission_id)

        # call an helper class to create database objects
        submission_data_helper = SplitSubmissionHelper(uid_submission)

        # iterate over animal and samples
        submission_data_helper.process_data()

        # prepare to launch chord tasks
        submissioncomplete = SubmissionCompleteTask()

        # assign kwargs to chord
        callback = submissioncomplete.s(uid_submission_id=submission_id)

        submit = SubmitTask()
        header = [submit.s(pk) for pk in submission_data_helper.submission_ids]

        logger.debug("Preparing chord for %s tasks" % len(header))

        # call chord task. Chord will be called only after all tasks
        res = chord(header)(callback)

        logger.info("Start submission chord process for %s with task %s" %
                    (uid_submission, res.task_id))

        logger.info("%s completed" % self.name)

        # return a status
        return "success"
Example #53
0
def run_web_scanners(scan_information):
    # Deep copy just in case
    web_information = copy.deepcopy(scan_information)

    if web_information['type'] == 'domain':
        web_information['scan_type'] = 'target'
        web_information['target'] = web_information['domain']
        subdomains_http = mongo.get_responsive_http_resources(
            web_information['domain'])
        only_urls = list()
        for subdomain in subdomains_http:
            only_urls.append(subdomain['url'])
        web_information['target'] = only_urls

    # Chain is defined
    # We flag the scanned resources as 'scanned'
    execution_chord = chord(
        [
            # Fast_scans
            header_scan_task.s(web_information).set(queue='fast_queue'),
            http_method_scan_task.s(web_information).set(queue='fast_queue'),
            libraries_scan_task.s(web_information).set(queue='fast_queue'),
            ffuf_task.s(web_information).set(queue='fast_queue'),
            iis_shortname_scan_task.s(web_information).set(queue='fast_queue'),
            bucket_finder_task.s(web_information).set(queue='fast_queue'),
            token_scan_task.s(web_information).set(queue='fast_queue'),
            css_scan_task.s(web_information).set(queue='fast_queue'),
            firebase_scan_task.s(web_information).set(queue='fast_queue'),
            host_header_attack_scan.s(web_information).set(queue='fast_queue'),
            # Slow_scans
            cors_scan_task.s(web_information).set(queue='slow_queue'),
            ssl_tls_scan_task.s(web_information).set(queue='slow_queue'),
            acunetix_scan_task.s(web_information).set(queue='acunetix_queue'),
            burp_scan_task.s(web_information).set(queue='burp_queue')
        ],
        body=web_security_scan_finished.s().set(queue='fast_queue'),
        immutable=True)
    execution_chord.apply_async(queue='fast_queue', interval=60)
    return
Example #54
0
    def run(self, execution_id):
        execution = self._get_execution(execution_id)
        if execution.status == Execution.ABORTED:
            return
        execution.celery_task_id = self.request.id
        execution.save_start()

        ExecutionLiveLog.add(execution_id,
                             'execution_started',
                             status=execution.status,
                             time_start=execution.time_start)

        chord_chain = []
        for command in execution.commands.all():
            tasks = [
                CommandTask().si(execution_command_server_id=server.id)
                for server in command.servers.all()
            ]
            if len(tasks):
                chord_chain.append(chord(tasks, _dummy_callback.s()))
        chord_chain.append(ExecutionTaskFinish().si(execution_id))
        chain(chord_chain)()
Example #55
0
 def test_wait_chain(self):
     chain_tasks = [
         {"fixture_name": "0-A-2"},
         {"fixture_name": "0-A-3"},
         {"fixture_name": "0-B-1"}
     ]
     _c = []
     for task in chain_tasks:
         _c.append(wait.s(**task))
     logger.info(_c)
     _chains = []
     _chains.append(chain(_c))
     # t = chain(
     #         wait.s({"priority":0, "fixture_name": "0-A"}),
     #         chord(
     #             _chains,
     #             wait.s({"priority":0, "fixture_name": "0-B"})
     #         ),
     #         wait.s({"priority":0, "fixture_name": "0-C"}),
     #     )
     temp = chord(
                 _chains,
                 wait.s({"priority":0, "fixture_name": "0-B"})
             )
     result = temp.apply_async(priority=0)
     logger.info(result)
     complete = False
     while not complete:
         complete = True
         if result.state != "SUCCESS":
             complete = False
         else:
             logger.info(result.result)
     self.assertEqual(
         success,
         ["0-C", "1-C", "3-C", "2-C"],
         "Numeric Priority not completed in expected order"
     )
Example #56
0
    def test_simple_chord_with_a_delay_in_group_save(self, manager,
                                                     monkeypatch):
        try:
            manager.app.backend.ensure_chords_allowed()
        except NotImplementedError as e:
            raise pytest.skip(e.args[0])

        if not isinstance(manager.app.backend, BaseKeyValueStoreBackend):
            raise pytest.skip("The delay may only occur in the cache backend")

        x = BaseKeyValueStoreBackend._apply_chord_incr

        def apply_chord_incr_with_sleep(self, *args, **kwargs):
            sleep(1)
            x(self, *args, **kwargs)

        monkeypatch.setattr(BaseKeyValueStoreBackend, '_apply_chord_incr',
                            apply_chord_incr_with_sleep)

        c = chord(header=[add.si(1, 1), add.si(1, 1)], body=tsum.s())

        result = c()
        assert result.get(timeout=TIMEOUT) == 4
Example #57
0
def run_async_flow(data):

    run_id = data['run_id']
    ws_id = data['ws_id']

    header = [
        ((party_a.s(data) | update_ws.s()) | (model1.s() | update_ws.s())),
        ((party_b.s(data) | update_ws.s()) | (model1.s() | update_ws.s())),
        ((party_c.s(data) | update_ws.s()) | (model1.s() | update_ws.s()))
    ]

    callback = (agg_submodels.s(run_id=run_id, ws_id=ws_id) | meta_model.s()
                | update_ws.s())

    res = chord(header)(callback)

    # res = chord([
    #         party_a.subtask(kwargs={'ws_id':ws_id}, options={'link':model1.subtask(kwargs={'ws_id':ws_id})}),
    #         party_b.subtask(kwargs={'ws_id':ws_id}, options={'link':model1.subtask(kwargs={'ws_id':ws_id})}),
    #         party_c.subtask(kwargs={'ws_id':ws_id}, options={'link':model1.subtask(kwargs={'ws_id':ws_id})})
    #       ])(agg_submodels.subtask(options={'link':meta_model.subtask(kwargs={'ws_id':ws_id})}))

    return 'running get_third_parties'
Example #58
0
def to_download(domain: str,
                callback: Signature,
                days: int = 1) -> List[Dict[str, str]]:
    """Query urls to download
    """
    logs = []  # type: List[Dict[str, str]]

    tmpdir = tempfile.mkdtemp('.logs', '-'.join([str(days), 'scripture.']))
    yesterday = (datetime.now() - timedelta(days=days)).strftime("%Y-%m-%d")
    request = LogRequest.DescribeCdnDomainLogsRequest()
    request.set_DomainName(domain)
    request.set_LogDay(yesterday)
    request.set_PageSize(30)
    response = client.do_action_with_exception(request)  # type: str
    resp = json.loads(response)  # type: Dict
    domain_log_details = resp['DomainLogModel']['DomainLogDetails']
    for log_detail in domain_log_details['DomainLogDetail']:
        log_name = os.path.join(tmpdir, log_detail['LogName'])
        logs.append({'log_path': log_detail['LogPath'], 'log_name': log_name})

    callback = signature(callback).clone(link=cleanup.s(tmpdir=tmpdir))
    jobs = chord(download.s(log['log_path'], log['log_name']) for log in logs)
    async_result = jobs(callback)
    return async_result.id
Example #59
0
def queue_merlin_study(study, adapter):
    """
    Launch a chain of tasks based off of a MerlinStudy.
    """
    samples = study.samples
    sample_labels = study.sample_labels
    egraph = study.dag
    LOG.info("Calculating task groupings from DAG.")
    groups_of_chains = egraph.group_tasks("_source")

    # magic to turn graph into celery tasks
    LOG.info("Converting graph to tasks.")
    celery_dag = chain(
        chord(
            group(
                [
                    expand_tasks_with_samples.s(
                        egraph,
                        gchain,
                        samples,
                        sample_labels,
                        merlin_step,
                        adapter,
                        study.level_max_dirs,
                    ).set(queue=egraph.step(chain_group[0][0]).get_task_queue())
                    for gchain in chain_group
                ]
            ),
            chordfinisher.s().set(
                queue=egraph.step(chain_group[0][0]).get_task_queue()
            ),
        )
        for chain_group in groups_of_chains[1:]
    )
    LOG.info("Launching tasks.")
    return celery_dag.delay(None)
Example #60
0
def ingest_volumes(full_sync):
    """
        Start a celery task to ingest each valid volume folder, returning when all tasks are complete.
    """
    # Fetch lists of VolumeMetadata entries from the database.
    already_ingested_db_volumes = set(
        VolumeMetadata.objects.filter(ingest_status='ingested').values_list(
            'barcode', flat=True))
    not_ingested_db_volumes = set(
        VolumeMetadata.objects.filter(
            ingest_status__in=['to_ingest', 'error']).values_list('barcode',
                                                                  flat=True))
    all_db_volumes = already_ingested_db_volumes | not_ingested_db_volumes

    # Add the first occurrence of each volume to filtered_volume_folders.
    filtered_volume_folders = []
    all_s3_barcodes = set()
    for volume_folder, barcode in get_unique_volumes_from_queue():
        all_s3_barcodes.add(barcode)

        # skip already ingested volumes if not full_sync
        if not full_sync and barcode in already_ingested_db_volumes:
            clear_redis_volume(volume_folder)
            continue

        filtered_volume_folders.append(volume_folder)

    # Mark volumes that are in DB but not in S3.
    missing_from_s3 = all_db_volumes - all_s3_barcodes
    for barcode in missing_from_s3:
        VolumeMetadata.objects.filter(barcode=barcode).update(
            ingest_status='error', ingest_errors={"missing_from_s3": barcode})

    # process each unique volume
    return chord(
        (ingest_volume_from_redis.s(i) for i in filtered_volume_folders))