Example #1
0
    def handle(self, *args, **options):
        filter_qs = {}

        # Don't include disabled projects by default
        if not options['disabled_projects']:
            filter_qs['entity__resource__project__disabled'] = False

        # Don't include obsolete by default
        if not options['obsolete_entities']:
            filter_qs['entity__obsolete'] = False

        translations_pks = (
            Translation.objects
            .filter(
                entity__resource__format__in=DB_FORMATS,
                **filter_qs
            )
            .values_list('pk', flat=True)
        )

        # Split translations into even batches and send them to Celery workers
        batch_size = int(options['batch_size'])
        group(
            signature(
                check_translations,
                args=(translations_pks[i:i + batch_size],)
            )
            for i in range(0, len(translations_pks), batch_size)
        ).apply_async()
Example #2
0
def lp_clinical_team():
    event.Event('event', {
        'task': 'overwatch_tasks',
        'info': {
            'message': 'started lp_clinical_team'
        }
    })
    update_data = report_tasks.get_profile_changes()
    count = 0
    header = []
    batch = {}
    for client_id in update_data:
        if count >= UPDATE_CLIENT_BATCH_SIZE:
            header.append(update_client_profile_batch.s(batch))
            batch = {}
            count = 0
        else:
            update_package = update_data[client_id]
            batch[client_id] = update_package
            count += 1
    if len(batch) > 0:
        header.append(update_client_profile_batch.s(batch))
    group(header)()
    event.Event('event', {
        'task': 'overwatch_tasks',
        'info': {
            'message': 'finished lp_clinical_team'
        }
    })
Example #3
0
    def test_group_chord_group_chain(self, manager):
        from celery.five import bytes_if_py2

        if not manager.app.conf.result_backend.startswith('redis'):
            raise pytest.skip('Requires redis result backend.')
        redis_connection = StrictRedis()
        redis_connection.delete('redis-echo')
        before = group(redis_echo.si('before {}'.format(i)) for i in range(3))
        connect = redis_echo.si('connect')
        after = group(redis_echo.si('after {}'.format(i)) for i in range(2))

        result = (before | connect | after).delay()
        result.get(timeout=TIMEOUT)
        redis_messages = list(map(
            bytes_if_py2,
            redis_connection.lrange('redis-echo', 0, -1)
        ))
        before_items = \
            set(map(bytes_if_py2, (b'before 0', b'before 1', b'before 2')))
        after_items = set(map(bytes_if_py2, (b'after 0', b'after 1')))

        assert set(redis_messages[:3]) == before_items
        assert redis_messages[3] == b'connect'
        assert set(redis_messages[4:]) == after_items
        redis_connection.delete('redis-echo')
Example #4
0
def alert_change_domain(domain_id, **kwargs):
    from app.models.models import Domain, Host, hosts, User

    utc_now = datetime.datetime.utcnow()
    experation_date = kwargs.pop('experation_date')
    domain_name = kwargs.pop('domain_name')

    query = db_session.session.query(User.username)
    query = query.join(hosts,  hosts.c.user_id == User.id)
    query = query.join(Host,  hosts.c.host_id == Host.id)
    query = query.join(Domain,  Host.domain_id == Domain.id)
    query = query.filter(Domain.id==domain_id)
    query = query.filter(or_(User.date_out >=utc_now, User.date_out.is_(None)))
    query_set = query.all()

    message = render_jinja2('email.html', text=u'Продлена аренда домена <br> <b>%s</b> <br> до <br> %s' % (domain_name, experation_date.strftime('"%d" %B %Y')))

    tasks_list = list()
    for user_inst in query_set:
        user_email = user_inst[0]
        logger.info('send mail %s. domain %s', user_email, domain_id)

        tasks_list.append(send_email.s(subject='ChicagoOnline. %s' % domain_name,
                                       message=message.encode('utf-8'),
                                       recipients=[user_email],
                                       html=True))

    if tasks_list:
        group(*tasks_list).delay()
Example #5
0
    def post(self, request, *args, **kwargs):
        """Import a manifest at a remote_url."""

        remote_url = request.data.get("remote_url")

        if not remote_url:
            return Response(
                {'error': 'Did not provide remote_url.'},
                status=status.HTTP_400_BAD_REQUEST)

        shared_id = str(uuid.uuid4())
        imp = ManifestPreImporter(remote_url)
        lst = imp.get_all_urls()

        # If there are manifests to import, create a celery group for the task.
        if lst:
            if len(lst) == 1:
                g = group([import_single_manifest.s(imp.text, lst[0])])
            else:
                g = group([import_single_manifest.s(None, url) for url in lst]).skew(start=0, step=0.3)
            task = g.apply_async(task_id=shared_id)
            task.save()
        else:
            if imp.errors:
                return Response({'errors': imp.errors}, status=status.HTTP_400_BAD_REQUEST)
            return Response({'errors': ['Failed to find recognisable IIIF manifest data.']}, status=status.HTTP_400_BAD_REQUEST)

        # Return a URL where the status of the import can be polled.
        status_url = reverse('status', request=request, args=[shared_id])
        return Response({'status': status_url}, status.HTTP_202_ACCEPTED)
Example #6
0
def import_replications(start, end):
    """Recieves a start and a end number and import each replication file in
    this interval.
    """
    Import(start=start, end=end).save()
    urls = [format_url(n) for n in range(start, end + 1)]
    group(get_filter_changeset_file.s(url) for url in urls)()
Example #7
0
def update_all_apidata(*args, **kwargs):
    character_keys = APIKey.objects.filter(expired=False).exclude(type='Corporation')
    corpkeys = APIKey.objects.filter(expired=False, type='Corporation')
    tasks = queue_character_tasks(character_keys) + queue_corporation_tasks(corpkeys)

    if len(tasks) > 0:
        group(tasks).apply_async(queue='transient')
def task_group():
    # 100 random num(group) -> filter -> sum (chain)
    # execute several tasks in parallel.
    g = group(rand.s(x) for x in range(100))
    # chain a group together with another task will auto upgrade to be a chord
    gchain = chain(g, filter.s(), xsum.s())
    gchain.apply_async(countdown=1)
    # logger.warning(datetime.now() + timedelta(seconds=5))
    # gchain.apply_async(eta=datetime.now() + timedelta(seconds=5))
    # gchain.apply_async(countdown=5)

    job = group([
        add.s(1, 2),
        add.s(2, 3),
        add.s(3, 4),
        add.s(4, 5),
    ])
    chain(job, xsum.s()).apply_async()
    # chord(job, xsum.s()).apply_async()
    # sync call
    if settings.CELERY_ALWAYS_EAGER:
        res = job.delay()
        cnt = 0
        while not res.ready():
            print('wait job ready%s' % '.' * cnt)
        # async call will stucked here
        print('job result: %s' % sum(res.get(timeout=2)))
Example #9
0
def fetch_transactions_status(transaction_ids=None):
    eligible_transactions = Transaction.objects.filter(state=Transaction.States.Pending)

    if transaction_ids:
        eligible_transactions = eligible_transactions.filter(pk__in=transaction_ids)

    group(fetch_transaction_status.s(transaction.id) for transaction in eligible_transactions)()
Example #10
0
def process_queue(*args, **kwargs):
    keys = kwargs.get('keys')
    queue = kwargs.get('queue')

    if queue == Person.not_added_key:
        # process not added persons queue
        result = group(add_instance.subtask(
            (key, Person), options={'queue': 'persons'}) for key in keys)
        result.apply_async(queue='persons')
    elif queue == Movie.not_added_key:
        # process not added movies queue
        result = group(add_instance.subtask(
            (key, Movie), options={'queue': 'movies'}) for key in keys)
        result.apply_async(queue='movies')
    elif queue == Person.not_saved_relations_key:
        # process not saved persons relations queue,
        # save person -> movie relations
        result = group(save_relations.subtask(
            (key, Person), options={'queue': 'relations'}) for key in keys)
        result.apply_async(queue='relations')
    elif queue == Movie.not_saved_relations_key:
        # process not saved movies relations queue,
        # save movie -> movie relations
        result = group(save_relations.subtask(
            (key, Movie), options={'queue': 'relations'}) for key in keys)
        result.apply_async(queue='relations')
Example #11
0
    def _call_task_errbacks(self, request, exc, traceback):
        old_signature = []
        for errback in request.errbacks:
            errback = self.app.signature(errback)
            if (
                    # Celery tasks type created with the @task decorator have
                    # the __header__ property, but Celery task created from
                    # Task class do not have this property.
                    # That's why we have to check if this property exists
                    # before checking is it partial function.
                    hasattr(errback.type, '__header__') and

                    # workaround to support tasks with bind=True executed as
                    # link errors. Otherwise retries can't be used
                    not isinstance(errback.type.__header__, partial) and
                    arity_greater(errback.type.__header__, 1)
            ):
                errback(request, exc, traceback)
            else:
                old_signature.append(errback)
        if old_signature:
            # Previously errback was called as a task so we still
            # need to do so if the errback only takes a single task_id arg.
            task_id = request.id
            root_id = request.root_id or task_id
            group(old_signature, app=self.app).apply_async(
                (task_id,), parent_id=task_id, root_id=root_id
            )
Example #12
0
def generate_pdfs():
    dirty_documents = chain(Invoice.objects.filter(pdf__dirty__gt=0),
                            Proforma.objects.filter(pdf__dirty__gt=0))

    # Generate PDFs in parallel
    group(generate_pdf.s(document.id, document.kind)
          for document in dirty_documents)()
Example #13
0
def scan_launch(scan_id):
    with session_transaction() as session:
        scan = None
        try:
            log.debug("scan: %s launching", scan_id)
            # Part for common action for whole scan
            scan = Scan.load_from_ext_id(scan_id, session)
            scan_request = scan_ctrl._create_scan_request(
                    scan.files_ext,
                    scan.get_probelist(),
                    scan.mimetype_filtering)
            scan_request = scan_ctrl._add_empty_results(
                    scan.files_ext,
                    scan_request,
                    scan, session)
            # Nothing to do
            if scan_request.nb_files == 0:
                scan.set_status(IrmaScanStatus.finished)
                log.warning("scan %s: finished nothing to do", scan_id)
                return

            # Part for action file_ext by file_ext
            file_ext_id_list = [file.external_id for file in scan.files_ext]

            celery.group(scan_launch_file_ext.si(file_ext_id)
                         for file_ext_id in file_ext_id_list)()
            scan.set_status(IrmaScanStatus.launched)
            session.commit()
            log.info("scan %s: launched", scan_id)
            return
        except Exception as e:
            log.exception(e)
            if scan is not None:
                scan.set_status(IrmaScanStatus.error)
Example #14
0
    def test_on_run(self):
        args = [
            ('twseid', 'INFO', './log/twseid.log', True, False),
            ('otcid', 'INFO', './log/otcid.log', True, False)
        ]
        tasks = group([
            run_scrapy_service.subtask(args[0]),
            run_scrapy_service.subtask(args[1])
        ])
        results = tasks.apply_async()
        results.join()
        self.assertTrue(results.ready())
        self.assertTrue(results.successful())

        args = [
            ('twsehistrader', 'INFO', './log/twsehistrader.log', True, True),
            ('twsehisstock', 'INFO', './log/twsehisstock.log', True, True),
            ('otchistrader', 'INFO', './log/otchistrader.log', True, True),
            ('otchisstock', 'INFO', './log/otchisstock.log', True, True)
        ]
        tasks = group([
            run_scrapy_service.subtask(args[0]),
            run_scrapy_service.subtask(args[1]),
            run_scrapy_service.subtask(args[2]),
            run_scrapy_service.subtask(args[3])
        ])
        t = timeit.Timer()
        results = tasks.apply_async()
        results.join()
        print "scrapy all bin.tasks used %.4f(s)" % (t.timeit())
        self.assertTrue(results.ready())
        self.assertTrue(results.successful())
Example #15
0
    def handle(self, *args, **options):
        pks = (ActivityLog.objects.review_queue().values_list('pk', flat=True)
                                  .order_by('id'))

        ts = [add_versionlog.subtask(args=[chunk])
              for chunk in chunked(pks, 100)]
        group(ts).apply_async()
Example #16
0
def generate_eod_tasks():
    '''
    Task responsible for generating work items used to obtain end of day
    data for stocks using get_eod_data() task
    '''
    db = MongoDBUtil()
    symbol_sets = set()

    #Gets all symbols
    sp500 = finsymbols.get_sp500_symbols()
    amex = finsymbols.get_amex_symbols()
    nyse = finsymbols.get_nyse_symbols()
    nasdaq = finsymbols.get_nasdaq_symbols()

    #Adds all symbols to set which removes duplicates
    symbol_sets.update(_get_symbol_set(sp500))
    symbol_sets.update(_get_symbol_set(amex))
    symbol_sets.update(_get_symbol_set(nyse))
    symbol_sets.update(_get_symbol_set(nasdaq))

    now = datetime.datetime.now()
    end_date = '-'.join([str(now.year),str(now.month),str(now.day)])

    his_symbols = db.has_historical_data(symbol_sets)
    if(len(his_symbols) >= 1):
        start_date = '1980-01-01'
        hist_job = group(get_eod_data.s(symbol,start_date,end_date) for symbol in symbol_sets)
        hist_job.apply_async()

    # Obtain data for current date
    job = group(get_eod_data.s(symbol,end_date,end_date) for symbol in symbol_sets)
    job.apply_async()
Example #17
0
def broadcast(type, task, args, kwargs=None, callback=None):  # pylint: disable=redefined-builtin
    """
    Run a broadcast across our servers.

    Returns a task group that can be checked for results.

    `callback` should be a task signature that will be run once,
    after all of the broadcast tasks have finished running.
    """
    if type not in ['web', 'app', 'build']:
        raise ValueError('allowed value of `type` are web, app and build.')
    if kwargs is None:
        kwargs = {}

    if type in ['web', 'app']:
        servers = settings.MULTIPLE_APP_SERVERS
    elif type in ['build']:
        servers = settings.MULTIPLE_BUILD_SERVERS

    tasks = []
    for server in servers:
        task_sig = task.s(*args, **kwargs).set(queue=server)
        tasks.append(task_sig)
    if callback:
        task_promise = chord(tasks, callback).apply_async()
    else:
        # Celery's Group class does some special handling when an iterable with
        # len() == 1 is passed in. This will be hit if there is only one server
        # defined in the above queue lists
        if len(tasks) > 1:
            task_promise = group(*tasks).apply_async()
        else:
            task_promise = group(tasks).apply_async()
    return task_promise
Example #18
0
File: cron.py Project: diox/olympia
def update_user_ratings():
    """Update add-on author's ratings."""

    cursor = connections[multidb.get_replica()].cursor()
    # We build this query ahead of time because the cursor complains about data
    # truncation if it does the parameters.  Also, this query is surprisingly
    # quick, <1sec for 6100 rows returned
    q = """   SELECT
                addons_users.user_id as user_id,
                AVG(rating) as avg_rating
              FROM reviews
                INNER JOIN versions
                INNER JOIN addons_users
                INNER JOIN addons
              ON reviews.version_id = versions.id
                AND addons.id = versions.addon_id
                AND addons_users.addon_id = addons.id
              WHERE reviews.reply_to IS NULL
                AND reviews.rating > 0
                AND addons.status IN (%s)
              GROUP BY addons_users.user_id
              """ % (",".join(map(str, VALID_ADDON_STATUSES)))

    cursor.execute(q)
    d = cursor.fetchall()
    cursor.close()

    ts = [update_user_ratings_task.subtask(args=[chunk])
          for chunk in chunked(d, 1000)]

    group(ts).apply_async()
Example #19
0
 def inform(self):
     while True:
         self.container.put(obj.async_jump.get())
         obj.async_jump = AsyncResult()
         if self.container.qsize() > 10:
             group(parse.s(item) for item in self.task_args())()
         obj.async_in.set()
Example #20
0
    def test_chord_in_chords_with_chains(self, manager):
        try:
            manager.app.backend.ensure_chords_allowed()
        except NotImplementedError as e:
            raise pytest.skip(e.args[0])

        c = chord(
            group([
                chain(
                    add.si(1, 2),
                    chord(
                        group([add.si(1, 2), add.si(1, 2)]),
                        add.si(1, 2),
                    ),
                ),
                chain(
                    add.si(1, 2),
                    chord(
                        group([add.si(1, 2), add.si(1, 2)]),
                        add.si(1, 2),
                    ),
                ),
            ]),
            add.si(2, 2)
        )

        r = c.delay()

        assert r.get(timeout=TIMEOUT) == 4
Example #21
0
File: db.py Project: h1ds/h1ds
    def populate_attribute(self, summary_attribute, update=True):
        """Populate all instances of summary_attribute in table.

        Arguments:
            summary_attribute - either an instance of h1ds.models.SummaryAttribute or its slug

        """
        try:
            attr_slug = summary_attribute.slug
        except AttributeError:
            attr_slug = summary_attribute

        shot_queryset = Shot.objects.filter(device=self.device, number__lte=self.device.latest_shot.number)

        if update:
            task_name = update_single_table_attribute
        else:
            task_name = insert_single_table_attribute

        shot_manager = get_backend_shot_manager_for_device(self.device)
        shot_timestamp = shot_manager().get_timestamp_for_shot

        group(
            (task_name.s(self.device.slug, shot.number, shot_timestamp(shot.number), attr_slug) for shot in shot_queryset),
        ).apply_async()
Example #22
0
    def test_group_to_chord(self):
        c = (
            self.add.s(5) |
            group([self.add.s(i, i) for i in range(5)], app=self.app) |
            self.add.s(10) |
            self.add.s(20) |
            self.add.s(30)
        )
        c._use_link = True
        tasks, results = c.prepare_steps((), c.tasks)

        self.assertEqual(tasks[-1].args[0], 5)
        self.assertIsInstance(tasks[-2], chord)
        self.assertEqual(len(tasks[-2].tasks), 5)
        self.assertEqual(tasks[-2].parent_id, tasks[-1].id)
        self.assertEqual(tasks[-2].root_id, tasks[-1].id)
        self.assertEqual(tasks[-2].body.args[0], 10)
        self.assertEqual(tasks[-2].body.parent_id, tasks[-2].id)

        self.assertEqual(tasks[-3].args[0], 20)
        self.assertEqual(tasks[-3].root_id, tasks[-1].id)
        self.assertEqual(tasks[-3].parent_id, tasks[-2].body.id)

        self.assertEqual(tasks[-4].args[0], 30)
        self.assertEqual(tasks[-4].parent_id, tasks[-3].id)
        self.assertEqual(tasks[-4].root_id, tasks[-1].id)

        self.assertTrue(tasks[-2].body.options['link'])
        self.assertTrue(tasks[-2].body.options['link'][0].options['link'])

        c2 = self.add.s(2, 2) | group(self.add.s(i, i) for i in range(10))
        c2._use_link = True
        tasks2, _ = c2.prepare_steps((), c2.tasks)
        self.assertIsInstance(tasks2[0], group)
Example #23
0
def execute_transactions(transaction_ids=None):
    executable_transactions = Transaction.objects.filter(state=Transaction.States.Initial)

    if transaction_ids:
        executable_transactions = executable_transactions.filter(pk__in=transaction_ids)

    group(execute_transaction.s(transaction.id) for transaction in executable_transactions)()
Example #24
0
 def _evil_groupmember(self, evil_t, *eargs, **opts):
     g1 = group(add.s(2, 2).set(**opts), evil_t.s(*eargs).set(**opts),
                add.s(4, 4).set(**opts), add.s(8, 8).set(**opts))
     g2 = group(add.s(3, 3).set(**opts), add.s(5, 5).set(**opts),
                evil_t.s(*eargs).set(**opts), add.s(7, 7).set(**opts))
     self.join(g1(), timeout=10)
     self.join(g2(), timeout=10)
Example #25
0
 def on_error(request, exc, uuid, state=FAILURE, call_errbacks=True):
     if propagate:
         raise
     I = Info(state, exc)
     R = I.handle_error_state(task, eager=eager)
     if call_errbacks:
         group([signature(errback, app=app) for errback in request.errbacks or []], app=app).apply_async((uuid,))
     return I, R, I.state, I.retval
Example #26
0
def fire():
    myName = getHostname()
    log("{0} fires.".format(myName))
    setOutputActive()

    # kick 下游 neurons
    connections = getConnections()
    group([kick.subtask((myName,), routing_key=connection) for connection in connections]).apply_async()
Example #27
0
def reindex_collections(index=None):
    from . import tasks
    ids = (Collection.objects.exclude(type=amo.COLLECTION_SYNCHRONIZED)
           .values_list('id', flat=True))
    taskset = [tasks.index_collections.subtask(args=[chunk],
                                               kwargs=dict(index=index))
               for chunk in chunked(sorted(list(ids)), 150)]
    group(taskset).apply_async()
Example #28
0
def check_virtual_connection_consistency():
    with db_session() as db:
        group(convert_virtual_connections.s(_) for _, in (
            db.query(User.id).filter(
                func.lower(User.username)
                .in_(db.query(func.lower(VirtualUserConnection.to_username).distinct()))
            )
        )).delay()
Example #29
0
def reindex_addons(index=None, addon_type=None):
    from . import tasks
    ids = Addon.unfiltered.values_list('id', flat=True)
    if addon_type:
        ids = ids.filter(type=addon_type)
    ts = [tasks.index_addons.subtask(args=[chunk], kwargs=dict(index=index))
          for chunk in chunked(sorted(list(ids)), 150)]
    group(ts).apply_async()
Example #30
0
def update_collections_total():
    """Update collections downloads totals."""

    d = (CollectionCount.objects.values('collection_id')
                                .annotate(sum=Sum('count')))

    ts = [tasks.update_collections_total.subtask(args=[chunk])
          for chunk in chunked(d, 50)]
    group(ts).apply_async()
Example #31
0
def build_sweep(sweep_id: str):
    from sweep_builder.init_tokens import init_tokens
    from sweep_builder.pipeline import iter_pipeline
    from sweep_builder.reality_inferrer.reality import iter_reality_base

    try:
        _measurement_name_base = __name__ + '.' + build_sweep.__name__ + '.'
        _measurement_tags = {'sweep_id': sweep_id}

        # In the jobs persister we purposefully avoid persisting
        # anything besides the Job ID. This means that things like tokens
        # and other data on *Claim is lost.
        # As long as we are doing that, we need to leave tokens somewhere
        # for workers to pick up.
        logger.info(f"#{sweep_id} Prepositioning platform tokens")
        init_tokens(sweep_id)

        logger.info(f"#{sweep_id} Starting sweep building")

        # task_group = TaskGroup()
        delayed_tasks = []

        cnt = 0
        with Measure.counter(_measurement_name_base + 'outer_loop',
                             tags=_measurement_tags) as cntr:

            for reality_claim in iter_reality_base():
                # what we get here are Scope and AdAccount objects.
                # Children of AdAccount reality claims are to be processed
                # in separate Celery tasks. But we still have jobs
                # associated with Scopes objects, so
                # need to rate and store the jobs before chipping off
                # a separate task for each of AdAccounts.
                if reality_claim.entity_type == Entity.AdAccount:

                    # child_task_id = task_group.generate_task_id()
                    # task_group.report_task_active(child_task_id)

                    delayed_tasks.append(
                        # we are using Celery chord to process AdAccounts in parallel
                        # for very very large (hundreds of thousands) numbers of AdAccounts,
                        # chord management will be super memory expensive,
                        # as chord timer/controller will be looking at entire list on
                        # each tick.
                        # In that case, probably better to switch to
                        # a callback per handler + mutex/counter somewhere
                        build_sweep_slice_per_ad_account_task.si(
                            sweep_id,
                            reality_claim,
                            # task_id=child_task_id
                        ))
                elif reality_claim.entity_type == Entity.Page:
                    delayed_tasks.append(
                        build_sweep_slice_per_page.si(sweep_id, reality_claim))
                else:
                    cnt = 1
                    _step = 1000
                    for _ in iter_pipeline(sweep_id, [reality_claim]):
                        cnt += 1
                        if cnt % _step == 0:
                            cntr += _step
                            logger.info(
                                f'#{sweep_id}-root: Queueing up #{cnt}')

                    # because above counter communicates only increments of _step,
                    # we need to report remainder --- amount under _step
                    cntr += cnt % _step

        logger.info(f"#{sweep_id}-root: Queued up a total of {cnt} tasks")

        # # here we fan out actual work to celery workers
        # # and wait for all tasks to finish before returning
        group_result = group(delayed_tasks).delay()

        # In case the workers crash, go-away (scaling) or are otherwise
        # non-responsive, the following would wait indefinitely.
        # Since that's not desirable and the total sweep build time is minutes at
        # maximum, we add a reasonable timeout
        # Because we are not joining on the results, but actually periodically
        # looking for "you done yet?", we can exit if this threshold is busted, and
        # let the next run recover from the situation
        # You will nee
        should_be_done_by = time.time() + (60 * 20)

        Measure.gauge(f'{_measurement_name_base}per_account_sweep.total',
                      tags=_measurement_tags)(len(group_result.results))

        # Monitor the progress. Although this obviously can be achieved with
        # group_result.join(), we need to "see" into the task group progress
        with Measure.gauge(f'{_measurement_name_base}per_account_sweep.done',
                           tags=_measurement_tags) as measure_done:
            while True:
                done_counter = 0
                for result in group_result.results:
                    logger.debug(f'{result}: {result.state}')
                    if result.ready():
                        done_counter += 1

                logger.debug(
                    f"TOTAL: {done_counter}/{len(group_result.results)}")
                logger.debug("=" * 20)

                logger.debug("Checking group result")

                measure_done(done_counter)
                if group_result.ready():
                    logger.debug(f"#{sweep_id}-root: Sweep build complete")
                    break

                # Important. If we don't sleep, the native join in celery context
                # switches all the time and we end up with 100% cpu, eventually somehow
                # deadlocking the process. 5 seconds is kind of an arbitrary number, but
                # does what we need and the impact of a (potential) delay is absolutely
                # minimal
                time.sleep(5)

                # The last line of defense. Workers did not finish in time we
                # expected, no point waiting, kill it.
                if time.time() > should_be_done_by:
                    Measure.gauge(
                        f'{_measurement_name_base}per_account_sweep.early_exits',
                        tags=_measurement_tags)(1)
                    logger.warning(
                        "Exiting incomplete sweep build, it's taking too long")
                    return

        logger.info("Waiting on results join")
        if group_result.supports_native_join:
            group_result.join_native()
        else:
            # Eager mode does not support native join.
            group_result.join()

        # # alternative to Celery's native group_result.join()
        # # our manual task tracking code + join()
        # task_group.join()
        logger.info("Join complete, sweep build ended")
    except Exception as ex:
        ErrorInspector.inspect(ex, None, {'sweep_id': sweep_id})
Example #32
0
    def run(self):
        db_session.add(self)
        log = self.get_log(job_run=self)
        db_session.add(log)

        try:
            self.status = JobRunStatus.running
            self.run_at = now()
            print self.run_at

            log.add_log("started", "Job Started at %s" % (self.run_at))
            db_session.commit()

            checks_to_run = []
            # Let the rules populate the checks they wish to run.
            # Don't forget to first open connections on all sources so they can be queried.
            [
                source.open_connection()
                for source in self.job_template.data_sources
            ]

            map(lambda r: r.run(self, checks_to_run), self.job_template.rules)
            [
                source.close_connection()
                for source in self.job_template.data_sources
            ]
            # Dedupe checks_to_run even against checks. Expect of tuples of format (DataSource, table_name_string, Check)
            seen = set()
            seen_add = seen.add
            checks_to_run = [
                c for c in checks_to_run
                if not ((c[0].id, c[1], c[2].id) in seen or seen_add(
                    (c[0].id, c[1], c[2].id)))
            ]
            if len(checks_to_run) > 0:
                # Bucketize checks based on parallelization chosen. Each bucket runs sequentially.
                checks_by_parallelization = self.get_checks_by_parallelization(
                    checks_to_run)

                # Run each bucket of checks in a separate celery worker, by turning each subarray into an array of celery run check
                # job signatures, and then splatting each array of run check signatures into a chain(requiring them to be done one
                # at a time in each chain), then you group all chains together so they run in parallel. Each chain is a worker.
                # Then finally you call register finished when all done.
                separate_queues = [
                    map(
                        lambda c: celery_jobs.job_runs.run_check.si(
                            c[0].id, c[1], c[2].id, self.id), chks)
                    for chks in checks_by_parallelization
                ]
                sep_chains = [chain(*queue) for queue in separate_queues]
                print sep_chains
                group_of_chains = (group(*sep_chains)
                                   | celery_jobs.job_runs.register_finished.s(
                                       self.id)).apply_async()
            else:
                self.set_finished()

        except Exception:
            self.set_failed()

        db_session.add(log)
        db_session.commit()
Example #33
0
    def trace_task(uuid, args, kwargs, request=None):
        # R      - is the possibly prepared return value.
        # I      - is the Info object.
        # T      - runtime
        # Rstr   - textual representation of return value
        # retval - is the always unmodified return value.
        # state  - is the resulting task state.

        # This function is very long because we've unrolled all the calls
        # for performance reasons, and because the function is so long
        # we want the main variables (I, and R) to stand out visually from the
        # the rest of the variables, so breaking PEP8 is worth it ;)
        R = I = T = Rstr = retval = state = None
        task_request = None
        time_start = monotonic()
        try:
            try:
                kwargs.items
            except AttributeError:
                raise InvalidTaskError(
                    "Task keyword arguments is not a mapping")
            push_task(task)
            task_request = Context(request or {},
                                   args=args,
                                   called_directly=False,
                                   kwargs=kwargs)
            root_id = task_request.root_id or uuid
            task_priority = (task_request.delivery_info.get("priority")
                             if inherit_parent_priority else None)
            push_request(task_request)
            try:
                # -*- PRE -*-
                if prerun_receivers:
                    send_prerun(sender=task,
                                task_id=uuid,
                                task=task,
                                args=args,
                                kwargs=kwargs)
                loader_task_init(uuid, task)
                if track_started:
                    store_result(
                        uuid,
                        {
                            "pid": pid,
                            "hostname": hostname
                        },
                        STARTED,
                        request=task_request,
                    )

                # -*- TRACE -*-
                try:
                    R = retval = fun(*args, **kwargs)
                    state = SUCCESS
                except Reject as exc:
                    I, R = Info(REJECTED, exc), ExceptionInfo(internal=True)
                    state, retval = I.state, I.retval
                    I.handle_reject(task, task_request)
                    traceback_clear(exc)
                except Ignore as exc:
                    I, R = Info(IGNORED, exc), ExceptionInfo(internal=True)
                    state, retval = I.state, I.retval
                    I.handle_ignore(task, task_request)
                    traceback_clear(exc)
                except Retry as exc:
                    I, R, state, retval = on_error(task_request,
                                                   exc,
                                                   uuid,
                                                   RETRY,
                                                   call_errbacks=False)
                    traceback_clear(exc)
                except Exception as exc:
                    I, R, state, retval = on_error(task_request, exc, uuid)
                    traceback_clear(exc)
                except BaseException:
                    raise
                else:
                    try:
                        # callback tasks must be applied before the result is
                        # stored, so that result.children is populated.

                        # groups are called inline and will store trail
                        # separately, so need to call them separately
                        # so that the trail's not added multiple times :(
                        # (Issue #1936)
                        callbacks = task.request.callbacks
                        if callbacks:
                            if len(task.request.callbacks) > 1:
                                sigs, groups = [], []
                                for sig in callbacks:
                                    sig = signature(sig, app=app)
                                    if isinstance(sig, group):
                                        groups.append(sig)
                                    else:
                                        sigs.append(sig)
                                for group_ in groups:
                                    group_.apply_async(
                                        (retval, ),
                                        parent_id=uuid,
                                        root_id=root_id,
                                        priority=task_priority,
                                    )
                                if sigs:
                                    group(sigs, app=app).apply_async(
                                        (retval, ),
                                        parent_id=uuid,
                                        root_id=root_id,
                                        priority=task_priority,
                                    )
                            else:
                                signature(callbacks[0], app=app).apply_async(
                                    (retval, ),
                                    parent_id=uuid,
                                    root_id=root_id,
                                    priority=task_priority,
                                )

                        # execute first task in chain
                        chain = task_request.chain
                        if chain:
                            _chsig = signature(chain.pop(), app=app)
                            _chsig.apply_async(
                                (retval, ),
                                chain=chain,
                                parent_id=uuid,
                                root_id=root_id,
                                priority=task_priority,
                            )
                        mark_as_done(
                            uuid,
                            retval,
                            task_request,
                            publish_result,
                        )
                    except EncodeError as exc:
                        I, R, state, retval = on_error(task_request, exc, uuid)
                    else:
                        Rstr = saferepr(R, resultrepr_maxsize)
                        T = monotonic() - time_start
                        if task_on_success:
                            task_on_success(retval, uuid, args, kwargs)
                        if success_receivers:
                            send_success(sender=task, result=retval)
                        if _does_info:
                            info(
                                LOG_SUCCESS,
                                {
                                    "id": uuid,
                                    "name": get_task_name(task_request, name),
                                    "return_value": Rstr,
                                    "runtime": T,
                                },
                            )

                # -* POST *-
                if state not in IGNORE_STATES:
                    if task_after_return:
                        task_after_return(
                            state,
                            retval,
                            uuid,
                            args,
                            kwargs,
                            None,
                        )
            finally:
                try:
                    if postrun_receivers:
                        send_postrun(
                            sender=task,
                            task_id=uuid,
                            task=task,
                            args=args,
                            kwargs=kwargs,
                            retval=retval,
                            state=state,
                        )
                finally:
                    pop_task()
                    pop_request()
                    if not eager:
                        try:
                            backend_cleanup()
                            loader_cleanup()
                        except (KeyboardInterrupt, SystemExit, MemoryError):
                            raise
                        except Exception as exc:
                            logger.error("Process cleanup failed: %r",
                                         exc,
                                         exc_info=True)
        except MemoryError:
            raise
        except Exception as exc:
            _signal_internal_error(task, uuid, args, kwargs, request, exc)
            if eager:
                raise
            R = report_internal_error(task, exc)
            if task_request is not None:
                I, _, _, _ = on_error(task_request, exc, uuid)
        return trace_ok_t(R, I, T, Rstr)
Example #34
0
def handle_grb_lvalert(alert):
    """Parse an LVAlert message related to superevents/GRB external triggers
    and dispatch it to other tasks.

    Notes
    -----
    This LVAlert message handler is triggered by creating a new superevent or
    GRB external trigger event, or a label associated with completeness of sky
    maps:

    * Any new event triggers a coincidence search with
      :meth:`gwcelery.tasks.raven.coincidence_search`.
    * When both a GW and GRB sky map are available during a coincidence,
      indicated by the labels ``SKYMAP_READY`` and ``EXT_SKYMAP_READY``
      respectfully, this trigger the spacetime coinc FAR to be calculated. If
      an alert is triggered with these same conditions, indicated by the
      ``RAVEN_ALERT`` label, a combined GW-GRB sky map is created using
      :meth:`gwcelery.tasks.external_skymaps.create_combined_skymap`.

    """
    # Determine GraceDB ID
    graceid = alert['uid']

    # launch searches
    if alert['alert_type'] == 'new':
        if alert['object'].get('group') == 'External':
            # Create and upload Swift sky map for the joint targeted
            # sub-threshold search as agreed on in the MOU
            if alert['object']['search'] == 'SubGRBTargeted' and \
                    alert['object']['pipeline'] == 'Swift':
                external_skymaps.create_upload_external_skymap(
                    alert['object'], None, alert['object']['created'])

            # launch standard Burst-GRB search
            raven.coincidence_search(graceid, alert['object'], group='Burst')

            if alert['object']['search'] in ['SubGRB', 'SubGRBTargeted']:
                # if sub-threshold GRB, launch search with that pipeline
                raven.coincidence_search(
                    graceid, alert['object'], group='CBC',
                    searches=['SubGRB', 'SubGRBTargeted'],
                    pipelines=[alert['object']['pipeline']])
            else:
                # if threshold GRB, launch standard CBC-GRB search
                raven.coincidence_search(graceid, alert['object'],
                                         group='CBC', searches=['GRB'])
        elif 'S' in graceid:
            # launch standard GRB search based on group
            preferred_event_id = alert['object']['preferred_event']
            gw_group = gracedb.get_group(preferred_event_id)
            raven.coincidence_search(graceid, alert['object'],
                                     group=gw_group, searches=['GRB'])
            if gw_group == 'CBC':
                # launch subthreshold searches if CBC
                # for Fermi and Swift separately to use different time windows
                for pipeline in ['Fermi', 'Swift']:
                    raven.coincidence_search(
                        graceid, alert['object'], group='CBC',
                        searches=['SubGRB', 'SubGRBTargeted'],
                        pipelines=[pipeline])

    # rerun raven pipeline or created combined sky map when sky maps are
    # available
    elif alert['alert_type'] == 'label_added' and \
            alert['object'].get('group') == 'External':
        if _skymaps_are_ready(alert['object'], alert['data']['name'],
                              'compare'):
            # if both sky maps present and a coincidence, compare sky maps
            se_id, ext_ids = _get_superevent_ext_ids(graceid, alert['object'],
                                                     'compare')
            superevent = gracedb.get_superevent(se_id)
            preferred_event_id = superevent['preferred_event']
            gw_group = gracedb.get_group(preferred_event_id)
            tl, th = raven._time_window(graceid, gw_group,
                                        [alert['object']['pipeline']],
                                        [alert['object']['search']])
            raven.raven_pipeline([alert['object']], se_id, superevent,
                                 tl, th, gw_group)
        if _skymaps_are_ready(alert['object'], alert['data']['name'],
                              'combine'):
            # if both sky maps present and a raven alert, create combined
            # skymap
            se_id, ext_id = _get_superevent_ext_ids(graceid, alert['object'],
                                                    'combine')
            external_skymaps.create_combined_skymap(se_id, ext_id)
        elif 'EM_COINC' in alert['object']['labels']:
            # if not complete, check if GW sky map; apply label to external
            # event if GW sky map
            se_labels = gracedb.get_labels(alert['object']['superevent'])
            if 'SKYMAP_READY' in se_labels:
                gracedb.create_label.si('SKYMAP_READY', graceid).delay()
    elif alert['alert_type'] == 'label_added' and 'S' in graceid and \
            'SKYMAP_READY' in alert['object']['labels']:
        # if sky map in superevent, apply label to all external events
        # at the time
        group(
            gracedb.create_label.si('SKYMAP_READY', ext_id)
            for ext_id in alert['object']['em_events']
        ).delay()
Example #35
0
 def test_chain_with_chord_raises_error(self):
     with pytest.raises(NotImplementedError):
         (self.add.s(2, 2) | group(self.add.s(2, 2), self.add.s(5, 6))
          | self.add.s()).delay()
Example #36
0
def sync_all():
    job = group([sync.s(account.id) for account in StripeAccount.query.all()])
    job.apply_async()
Example #37
0
def run_collectors_by_type(*args):
    for slug in args:
        collector_type = CollectorType.objects\
            .prefetch_related('collectors').get(slug=slug)
        slugs = [c.slug for c in collector_type.collectors.all()]
        group(run_collector.s(slug) for slug in slugs)()
#!/usr/bin/env python
import bipolar
from celery import group
from time import sleep
from elasticsearch import helpers, Elasticsearch
import sys
import json

ip_net = sys.argv[1]

ips = bipolar.net_explode(ip_net)
my_group = group([bipolar.scan_heartbleed.s(ip) for ip in ips])
group_results = my_group.apply_async(queue='scan')
for child in group_results.children:
    print(child.as_tuple()[0][0])

#group_results = my_group.apply_async()
#while not group_results.ready():
#    print('waiting for jobs to complete')
#    sleep(10)
#group_results = group_results.get()
#
#scan_data = {}
#for results in group_results:
#    ip = results['scan'].keys()[0]
#    scan_data[ip] = results['scan'][ip]
#
#output = []
#for ip in scan_data.keys():
#    open_ports = []
#    if 'tcp' in scan_data[ip].keys():
Example #39
0
import bipolar
from celery import group
from time import sleep
from elasticsearch import helpers, Elasticsearch
import sys
import json

sqli_file = sys.argv[1]

urls = []
with open(sqli_file) as f:
    url_data = f.readlines()
    for data in url_data:
        urls.append(data.strip())

my_group = group([bipolar.sqli_check.s(url) for url in urls])
group_results = my_group.apply_async()
print(group_results)
while not group_results.ready():
    print('waiting for jobs to complete')
    sleep(10)
    group_results = group_results.get()

output = []
for results in group1_results:
    if results is not None:
        for i in results:
            output.append(json.dumps(i))

print(output)
Example #40
0
    def train_networks(self, networks, initial=False):
        """
        train each networks on cluster server
        :param networks: network lists
        :return: networks
        """
        try:
            tasks = []
            # if first version run it alone first for
            first_network = {}
            if (initial == True and len(networks) > 0
                    and len(list(first_network.keys())) == 0):
                network = networks[0]
                network['flag'] = True
                key = '_'.join(
                    [network['nn_id'],
                     str(network['nn_wf_ver_id'])])

                if (self.celery_flag):
                    result = train.delay(
                        network.get('nn_id'),
                        str(network.get('nn_wf_ver_id'))).get()
                    network['acc'] = result[key].get('accuracy')
                else:
                    result = train(network.get('nn_id'),
                                   str(network.get('nn_wf_ver_id')))
                    network['acc'] = result[key].get('accuracy')

                first_network = networks[0].copy()
                del networks[0]

            if (self.celery_flag):
                # You can use cluster servers for faster hyper parameter searching
                # using cluster server with celery for genetic algorithm
                for network in networks:
                    if (network['flag'] == True):
                        continue
                    tasks.append(
                        train.subtask((network.get('nn_id'),
                                       str(network.get('nn_wf_ver_id')))))
                results = group(tasks).apply_async()
                results = results.join()
                for result in results:
                    for network in networks:
                        key = '_'.join(
                            [network['nn_id'],
                             str(network['nn_wf_ver_id'])])
                        if (key in list(result.keys())
                                and result[key] is not None
                                and result[key].get('accuracy') is not None):
                            network['acc'] = result[key].get('accuracy')
                            network['flag'] = True
            else:
                # for debug you can run all tasks on django process
                for network in networks:
                    if (network['flag'] == True):
                        continue
                    result = train(network.get('nn_id'),
                                   str(network.get('nn_wf_ver_id')))
                    key = '_'.join(
                        [network['nn_id'],
                         str(network['nn_wf_ver_id'])])
                    network['acc'] = result[key].get('accuracy')
                    network['flag'] = True
            if len(list(first_network.keys())) > 0:
                networks.append(first_network)
        except Exception as e:
            logging.error("Error on training : {0} ".format(e))
        finally:
            return networks
Example #41
0
def add_chord_to_chord(self, nums, val):
    subtasks = [add.s(num, val) for num in nums]
    self.add_to_chord(group(subtasks) | tsum.s())
    return 0
Example #42
0
 def alwaysexits(self):
     g = group(exiting.s() for _ in range(10))
     self.join(g(), timeout=10)
Example #43
0
 def alwayskilled(self):
     g = group(kill.s() for _ in range(10))
     self.join(g(), timeout=10)
Example #44
0
def adds(self):
    group(add.s(i, i) for i in xrange(10)).delay()
Example #45
0
def orthorectify(initWorkingSetName,
                 stepName,
                 requestInfo,
                 jobId,
                 outputFolder,
                 imageFiles,
                 dsmFile,
                 dtmFile,
                 rpcFiles,
                 occlusionThreshold=None,
                 denoiseRadius=None):
    """
    Run Girder Worker jobs to orthorectify source images.

    Requirements:
    - Danesfield Docker image is available on host

    :param initWorkingSetName: The name of the top-level working set.
    :type initWorkingSetName: str
    :param stepName: The name of the step.
    :type stepName: str (DanesfieldStep)
    :param requestInfo: HTTP request and authorization info.
    :type requestInfo: RequestInfo
    :param jobId: Job ID.
    :type jobId: str
    :param outputFolder: Output folder document.
    :type outputFolder: dict
    :param imageFiles: List of image files.
    :type imageFiles: list[dict]
    :param dsmFile: DSM file document.
    :type dsmFile: dict
    :param dtmFile: DTM file document.
    :type dtmFile: dict
    :param rpcFiles: List of RPC files.
    :type rpcFiles: list[dict]
    :param occlusionThreshold:
    :type occlusionThreshold: float
    :param denoiseRadius:
    :type denoiseRadius: float
    :returns: None
    """
    gc = createGirderClient(requestInfo)

    def createOrthorectifyTask(imageFile, rpcFile):
        # Set output file name based on input file name
        orthoName = os.path.splitext(imageFile['name'])[0] + '_ortho.tif'
        outputVolumePath = VolumePath(orthoName)

        # Docker container arguments
        containerArgs = [
            'danesfield/tools/orthorectify.py',
            # Source image
            GirderFileIdToVolume(imageFile['_id'], gc=gc),
            # DSM
            GirderFileIdToVolume(dsmFile['_id'], gc=gc),
            # Destination image
            outputVolumePath,
            '--dtm',
            GirderFileIdToVolume(dtmFile['_id'], gc=gc),
            '--raytheon-rpc',
            GirderFileIdToVolume(rpcFile['_id'], gc=gc),
        ]
        if occlusionThreshold is not None:
            containerArgs.extend(
                ['--occlusion-thresh',
                 str(occlusionThreshold)])
        if denoiseRadius is not None:
            containerArgs.extend(['--denoise-radius', str(denoiseRadius)])

        # Result hooks
        # - Upload output files to output folder
        # - Provide upload metadata
        upload_kwargs = createUploadMetadata(jobId, stepName)
        resultHooks = [
            GirderUploadVolumePathToFolder(outputVolumePath,
                                           outputFolder['_id'],
                                           upload_kwargs=upload_kwargs,
                                           gc=gc)
        ]

        return docker_run.s(
            **createDockerRunArguments(image=DockerImage.DANESFIELD,
                                       containerArgs=containerArgs,
                                       jobTitle=('[%s] Orthorectify: %s' %
                                                 (initWorkingSetName,
                                                  imageFile['name'])),
                                       jobType=stepName,
                                       user=requestInfo.user,
                                       resultHooks=resultHooks))

    # Find RPC file corresponding to each image, or None
    correspondingRpcFiles = [
        next((rpcFile for rpcFile in rpcFiles
              if rpcFileMatchesImageFile(rpcFile, imageFile)), None)
        for imageFile in imageFiles
    ]
    # For some images, it seems that we're not getting RPC files from
    # the P3D step.  Deciding to simply skip those images and log a
    # warning instead of raising an exception for now.
    imagesMissingRpcFiles = [
        imageFile['name']
        for imageFile, rpcFile in zip(imageFiles, correspondingRpcFiles)
        if not rpcFile
    ]
    if imagesMissingRpcFiles:
        logprint.info(
            'Step: {} -- Warning: Missing RPC files for images: {}'.format(
                stepName, imagesMissingRpcFiles))
        # raise DanesfieldWorkflowException(
        #     'Missing RPC files for images: {}'.format(imagesMissingRpcFiles),
        #     step=stepName)

    # Run tasks in parallel using a group; skip if we have no rpcFile
    # for the given image
    tasks = [
        createOrthorectifyTask(imageFile, rpcFile)
        for imageFile, rpcFile in zip(imageFiles, correspondingRpcFiles)
        if rpcFile is not None
    ]
    groupResult = group(tasks).delay()

    DanesfieldWorkflowManager.instance().setGroupResult(
        jobId, stepName, groupResult)

    # Add info for job event listeners
    for result in groupResult.results:
        addJobInfo(result.job, jobId=jobId, stepName=stepName)
Example #46
0
 def chaincomplex(self):
     c = (add.s(2, 2) | (add.s(4) | add.s(8) | add.s(16))
          | group(add.s(i) for i in range(4)))
     res = c()
     assert_equal(res.get(), [32, 33, 34, 35])
Example #47
0
def add_to_all(self, nums, val):
    """Add the given value to all supplied numbers."""
    subtasks = [add.s(num, val) for num in nums]
    raise self.replace(group(*subtasks))
Example #48
0
def get_content(page_num):
    """并行调用任务,group一次创建多个任务"""
    # start = time.time()
    for i in range(1, page_num + 1):
        group(C.s(base_url.format(i)))()
Example #49
0
 def test_unicode_task(self, manager):
     manager.join(
         group(print_unicode.s() for _ in range(5))(),
         timeout=TIMEOUT,
         propagate=True,
     )
Example #50
0
    def _parallel_get_market_trade_metrics(self, tca_request_list, dummy_market):
        logger = LoggerManager.getLogger(__name__)

        market_holder_list = DataFrameHolder()
        trade_order_holder_list = DataFrameHolder()

        # For each currency pair select collect the trades and market data, then calculate benchmarks and slippage
        result = []

        keep_looping = True

        # If we have also asked for trades/order
        if tca_request_list[0].trade_order_mapping is not None:
            point_in_time_executions_only = \
                self._util_func.dict_key_list(tca_request_list[0].trade_order_mapping) == ['trade_df']
        else:
            point_in_time_executions_only = True

        parallel_library = tca_request_list[0].multithreading_params['parallel_library']

        if parallel_library == 'single':
            # from tcapy.analysis.tcatickerloaderimpl import TCATickerLoaderImpl
            tca_ticker_loader = Mediator.get_tca_ticker_loader(version=self._version)

        start_date = tca_request_list[0].start_date
        finish_date = tca_request_list[0].finish_date

        # Parameters for the loop
        i = 0; no_of_tries = 5

        # Error trapping for Celery, if have failed event retry it
        while i < no_of_tries and keep_looping:

            try:
                # For each TCA request kick off a thread
                for tca_request_single_ticker in tca_request_list:

                    # Split up the request by date (monthly/weekly chunks)
                    tca_request_date_split = self._split_tca_request_by_date(
                        tca_request_single_ticker, tca_request_single_ticker.ticker,
                        period=tca_request_single_ticker.multithreading_params['cache_period'])

                    if not(constants.multithreading_params['splice_request_by_dates']) \
                                or tca_request_list[0].tca_type == 'detailed' \
                                or tca_request_list[0].tca_type == 'compliance' \
                                or tca_request_list[0].summary_display == 'candlestick'\
                                or not(point_in_time_executions_only):

                        if 'celery' in parallel_library:
                            # Load all the data for this ticker and THEN calculate the metrics on it
                            result.append(chord((get_market_trade_holder_via_celery.s(tca_request_data)
                                                 for tca_request_data in tca_request_date_split),
                                                calculate_metrics_single_ticker_via_celery.s(tca_request_single_ticker,
                                                                                             dummy_market)).apply_async())
                        elif parallel_library == 'single':
                            # This is not actually parallel, but is mainly for debugging purposes
                            for tca_request_s in tca_request_date_split:

                                # print(tca_request_s.start_date)
                                market_df, trade_order_df_dict = tca_ticker_loader.get_market_trade_order_holder(
                                    tca_request_s, return_cache_handles=False)

                                market_df, trade_order_df_list, ticker, trade_order_keys = \
                                    tca_ticker_loader.calculate_metrics_single_ticker((market_df, trade_order_df_dict),
                                                                                        tca_request_s, dummy_market)

                                market_holder_list.add_dataframe(market_df, ticker)

                                trade_order_holder_list.add_dataframe_dict(
                                    dict(zip(trade_order_keys, trade_order_df_list)))


                    else:
                        # Otherwise work on parallel chunks by date
                        # doesn't currently work with orders which straddle day/week/month boundaries
                        # but should work with points in time
                        #
                        # In practice, it's not really much faster than the above code
                        if 'celery' == parallel_library:

                            # For each ticker/date combination load data and process chunk (so can do fully in parallel)
                            result.append(group(get_market_trade_holder_and_calculate_metrics_single_ticker_via_celery.s(
                                         tca_request_data,
                                         dummy_market) for tca_request_data in tca_request_date_split).apply_async())

                # Now combine the results from the parallel operations, if using celery
                if 'celery' in parallel_library:

                    # Careful, when the output is empty!
                    output = [p.get(timeout=constants.celery_timeout_seconds) for p in result if p is not None]

                    # If pipelined/splice_request_by_dates will have two lists so flatten it into one
                    output = self._util_func.flatten_list_of_lists(output)

                    for market_df, trade_order_df_list, ticker, trade_order_keys in output:
                        market_holder_list.add_dataframe(market_df, ticker)
                        # market_df_dict[ticker] = market_df

                        trade_order_holder_list.add_dataframe_dict(dict(zip(trade_order_keys, trade_order_df_list)))

                    del result
                    del output

                keep_looping = False

            except DateException as e:
                raise e

                keep_looping = False

            except TradeMarketNonOverlapException as e:
                raise e

                keep_looping = False

            except DataMissingException as e:
                raise e

                keep_looping = False

            except ErrorWritingOverlapDataException as e:
                raise e

                keep_looping = False

            # Exception likely related to Celery and possibly lack of communication with Redis message broker
            # or Memcached results backend
            # except Exception as e:
            except Exception as e:
                if i == no_of_tries - 1:
                    err_msg = "Failed with " + parallel_library + " after multiple attempts: " + str(e) + ", " + str(traceback.format_exc())

                    raise Exception(err_msg)

                i = i + 1

                logger.warn("Failed with " + parallel_library + ", trying again for " + str(i) + " time: " + str(e) + ", " + str(traceback.format_exc()))

        logger.debug("Finished parallel computation")

        # Expand out the DataFrame holders into dictionaries of DataFrames
        market_df_dict = market_holder_list.get_combined_dataframe_dict()
        trade_order_results_df_dict = trade_order_holder_list.get_combined_dataframe_dict(start_date=start_date, finish_date=finish_date)

        # TODO add candlestick drawing here for cases when using split threading by date
        trade_order_results_df_dict = self._util_func.remove_keymatch_dict(trade_order_results_df_dict, 'market_df_downsampled')

        return market_df_dict, trade_order_results_df_dict
Example #51
0
    def run_task(self, job_uid=None, user=None):
        run_uid = ''
        logger.debug('Running Job with id: {0}'.format(job_uid))
        job = Job.objects.get(uid=job_uid)
        job_name = self.normalize_job_name(job.name)
        formats = [format.slug for format in job.formats.all()]
        export_tasks = []
        # build a list of celery tasks based on the export formats..
        for format in formats:
            try:
                # see settings.EXPORT_TASKS for configuration
                task_fq_name = self.export_task_registry[format]
                # instantiate the required class.
                parts = task_fq_name.split('.')
                module_path, class_name = '.'.join(parts[:-1]), parts[-1]
                module = importlib.import_module(module_path)
                CeleryExportTask = getattr(module, class_name)
                export_task = CeleryExportTask()
                export_tasks.append(export_task)
            except KeyError as e:
                logger.debug(e)
            except ImportError as e:
                msg = 'Error importing export task: {0}'.format(e)
                logger.debug(msg)

        # run the tasks
        if len(export_tasks) > 0:
            # start the run
            run = None
            try:
                # enforce max runs
                max_runs = settings.EXPORT_MAX_RUNS
                run_count = job.runs.count()
                if run_count > 0:
                    while run_count > max_runs - 1:
                        job.runs.earliest(field_name='started_at').delete(
                        )  # delete earliest
                        run_count -= 1
                # add the new run
                if not user:
                    user = job.user
                run = ExportRun.objects.create(
                    job=job, user=user, status='SUBMITTED')  # persist the run
                run.save()
                run_uid = str(run.uid)
                logger.debug('Saved run with id: {0}'.format(run_uid))
            except DatabaseError as e:
                logger.error('Error saving export run: {0}'.format(e))
                raise e

            # setup the staging directory
            stage_dir = settings.EXPORT_STAGING_ROOT + str(run_uid) + '/'
            os.makedirs(stage_dir, 6600)

            # pull out the tags to create the conf file
            categories = job.categorised_tags  # dict of points/lines/polygons
            bbox = job.overpass_extents  # extents of job in order required by overpass

            # setup the initial tasks
            conf = OSMConfTask()
            query = OverpassQueryTask()
            pbfconvert = OSMToPBFConvertTask()
            prep_schema = OSMPrepSchemaTask()

            # check for transform and/or translate configurations
            """
            Not implemented for now.

            transform = job.configs.filter(config_type='TRANSFORM')
            translate = job.configs.filter(config_type='TRANSLATION')
            """

            # save initial tasks to the db with 'PENDING' state..
            for initial_task in [conf, query, pbfconvert, prep_schema]:
                try:
                    ExportTask.objects.create(run=run,
                                              status='PENDING',
                                              name=initial_task.name)
                    logger.debug('Saved task: {0}'.format(initial_task.name))
                except DatabaseError as e:
                    logger.error('Saving task {0} threw: {1}'.format(
                        initial_task.name, e))
                    raise e
            # save the rest of the ExportFormat tasks.
            for export_task in export_tasks:
                """
                    Set the region name on the Garmin Export task.
                    The region gets written to the exported '.img' file.
                    Could set additional params here in future if required.
                """
                if export_task.name == 'Garmin Export':
                    export_task.region = job.region.name
                try:
                    ExportTask.objects.create(run=run,
                                              status='PENDING',
                                              name=export_task.name)
                    logger.debug('Saved task: {0}'.format(export_task.name))
                except DatabaseError as e:
                    logger.error('Saving task {0} threw: {1}'.format(
                        export_task.name, e))
                    raise e
            # check if we need to generate a preset file from Job feature selections
            if job.feature_save or job.feature_pub:
                # run GeneratePresetTask
                preset_task = GeneratePresetTask()
                ExportTask.objects.create(run=run,
                                          status='PENDING',
                                          name=preset_task.name)
                logger.debug('Saved task: {0}'.format(preset_task.name))
                # add to export tasks
                export_tasks.append(preset_task)
            """
                Create a celery chain which runs the initial conf and query tasks (initial_tasks),
                followed by a chain of pbfconvert and prep_schema (schema_tasks).
                The export format tasks (format_tasks) are then run in parallel, followed
                by the finalize_task at the end to clean up staging dirs, update run status, email user etc..
            """

            initial_tasks = chain(
                conf.si(categories=categories,
                        stage_dir=stage_dir,
                        run_uid=run_uid,
                        job_name=job_name) | query.si(stage_dir=stage_dir,
                                                      job_name=job_name,
                                                      bbox=bbox,
                                                      run_uid=run_uid,
                                                      filters=job.filters))

            schema_tasks = chain(
                pbfconvert.si(
                    stage_dir=stage_dir, job_name=job_name, run_uid=run_uid)
                | prep_schema.si(
                    stage_dir=stage_dir, job_name=job_name, run_uid=run_uid))

            format_tasks = group(
                task.si(
                    run_uid=run_uid, stage_dir=stage_dir, job_name=job_name)
                for task in export_tasks)

            finalize_task = FinalizeRunTask()
            """
                If header tasks fail, errors will not propagate to the finalize_task.
                This means that the finalize_task will always be called, and will update the
                overall run status.
            """
            chain(
                chain(initial_tasks, schema_tasks),
                chord(header=format_tasks,
                      body=finalize_task.si(
                          stage_dir=stage_dir, run_uid=run_uid)).set(
                              link_error=finalize_task.si())).apply_async(
                                  expires=datetime.now() + timedelta(
                                      days=1))  # tasks expire after one day.

            return run

        else:
            return False
Example #52
0
import cv2
import base64
import json
import time
import os
from celery import group
from celery_proj.celery_app import predict_task

img_ls = []
img_loc = "Data"
for filename in os.listdir(img_loc):
    if filename.endswith(".jpg"):
        img_ls.append(filename)


def json_encode(img):
    full_p = os.path.join(img_loc, img)
    img = cv2.imread(full_p)
    _, im_arr = cv2.imencode('.jpg', img)
    im_bytes = im_arr.tobytes()
    base_img = base64.b64encode(im_bytes).decode('utf-8')
    return base_img


predict_result = group(
    predict_task.s(json_encode(i), (180, 180)) for i in img_ls)()
res = predict_result.get()

for i in res:
    print(i)
Example #53
0
def grid_search_controller(config_path):
    # start = time.time()
    
    # Dynamic importing config file from config_path
    config = load(config_path)

    # Dynamic loading lambda name
    LAMBDA_NAME = getattr(config.Cross_Validation, "LAMBDA_NAME")
    
    # Clean the log of specified lambda function
    clean_logs('/aws/lambda/' + LAMBDA_NAME)

    # Dynamic load parameters 
    PARAMETERS = []
    CV_SETTINGS = []
    for key in dir(config.Hyperparameter):
        if key.isupper():
            PARAMETERS.append(key)
    for key in dir(config.Cross_Validation):
        if key.isupper():
            CV_SETTINGS.append(key)

    # Tune forecast horizon of the chosen model
    payload_list = create_event(config, PARAMETERS, CV_SETTINGS)
    
    min_metric = float('inf')
    chosen_model_event = None
    metrics = []
    
    # from src.lambda_func.prophet.prophet import grid_search_worker
    # for payload in payload_list:
    #     map_item = grid_search_worker(payload)
        
    #     metrics.append(map_item['average_metric'])
    #     if map_item['average_metric'] < min_metric:
    #         print ("======Update chosen model event==========")
    #         chosen_model_event = map_item['event']
    #         min_metric = map_item['average_metric']
    
    # print ("=======Metric=======")
    # print (min_metric)
    # print ("======Event=======")
    # print (chosen_model_event)
    # print ("======Metrics=======")
    # print (metrics)
    # print ("====Execution time====")
    # print (time.time() - start)
    
    start = time.time()
    print ("=====Time Stamp======")
    print (start)
    job = group(invoke_lambda.s(
                    function_name = LAMBDA_NAME,
                    sync = True,
                    payload = payload
                    ) for payload in payload_list)
    print("===Async Tasks start===")
    result = job.apply_async()
    result.save()
    from celery.result import GroupResult
    saved_result = GroupResult.restore(result.id)

    while not saved_result.ready():
        time.sleep(0.1)
    model_list = saved_result.get(timeout=None)

    print("===Async Tasks end===")
    print (time.time() - start)

    for item in model_list:
        payload = item['Payload']
        if payload['average_metric'] < min_metric:
            chosen_model_event = payload['event']
            min_metric = payload['average_metric']
    
    from src.celery_lambda import measurement
    measurement.parse_log("/aws/lambda/prophet_worker")

    # Non-zero forecast period makes lambda upload graphs to s3
    chosen_model_event['forecast'] = getattr(config.Cross_Validation, "FORECAST")
    
    # Invoke Lambda with forecast

    response = invoke_lambda(function_name = LAMBDA_NAME,
                             sync=True,
                             payload=chosen_model_event)
    print ("=======The Execution Time===========")
    print (time.time() - start)
    print (response)
Example #54
0
def move_ucr_data_into_aggregation_tables(date=None, intervals=2):
    date = date or datetime.utcnow().date()
    monthly_dates = []

    # probably this should be run one time, for now I leave this in aggregations script (not a big cost)
    # but remove issues when someone add new table to mapping, also we don't need to add new rows manually
    # on production servers
    _update_ucr_table_mapping()

    first_day_of_month = date.replace(day=1)
    for interval in range(intervals - 1, 0, -1):
        # calculate the last day of the previous months to send to the aggregation script
        first_day_next_month = first_day_of_month - relativedelta(
            months=interval - 1)
        monthly_dates.append(first_day_next_month - relativedelta(days=1))

    monthly_dates.append(date)

    db_alias = get_icds_ucr_db_alias()
    if db_alias:
        with connections[db_alias].cursor() as cursor:
            _create_aggregate_functions(cursor)
            _update_aggregate_locations_tables(cursor)

        state_ids = (SQLLocation.objects.filter(
            domain=DASHBOARD_DOMAIN,
            location_type__name='state').values_list('location_id', flat=True))

        for monthly_date in monthly_dates:
            calculation_date = monthly_date.strftime('%Y-%m-%d')
            stage_1_tasks = [
                icds_state_aggregation_task.si(state_id=state_id,
                                               date=monthly_date,
                                               func=_aggregate_gm_forms)
                for state_id in state_ids
            ]
            stage_1_tasks.extend([
                icds_state_aggregation_task.si(state_id=state_id,
                                               date=monthly_date,
                                               func=_aggregate_df_forms)
                for state_id in state_ids
            ])
            stage_1_tasks.extend([
                icds_state_aggregation_task.si(state_id=state_id,
                                               date=monthly_date,
                                               func=_aggregate_cf_forms)
                for state_id in state_ids
            ])
            stage_1_tasks.extend([
                icds_state_aggregation_task.si(
                    state_id=state_id,
                    date=monthly_date,
                    func=_aggregate_child_health_thr_forms)
                for state_id in state_ids
            ])
            stage_1_tasks.extend([
                icds_state_aggregation_task.si(
                    state_id=state_id,
                    date=monthly_date,
                    func=_aggregate_ccs_record_thr_forms)
                for state_id in state_ids
            ])
            stage_1_tasks.extend([
                icds_state_aggregation_task.si(
                    state_id=state_id,
                    date=monthly_date,
                    func=_aggregate_child_health_pnc_forms)
                for state_id in state_ids
            ])
            stage_1_tasks.extend([
                icds_state_aggregation_task.si(
                    state_id=state_id,
                    date=monthly_date,
                    func=_aggregate_ccs_record_pnc_forms)
                for state_id in state_ids
            ])
            # stage_1_tasks.extend([
            #     icds_state_aggregation_task.si(
            #         state_id=state_id, date=monthly_date, func=_aggregate_delivery_forms
            #     ) for state_id in state_ids
            # ])
            stage_1_tasks.extend([
                icds_state_aggregation_task.si(state_id=state_id,
                                               date=monthly_date,
                                               func=_aggregate_bp_forms)
                for state_id in state_ids
            ])
            stage_1_tasks.extend([
                icds_state_aggregation_task.si(state_id=state_id,
                                               date=monthly_date,
                                               func=_aggregate_awc_infra_forms)
                for state_id in state_ids
            ])
            stage_1_tasks.append(
                icds_aggregation_task.si(date=calculation_date,
                                         func=_update_months_table))
            res = group(*stage_1_tasks).apply_async()
            res_daily = icds_aggregation_task.delay(
                date=calculation_date, func=_daily_attendance_table)
            res.get()

            res_child = chain(
                icds_aggregation_task.si(date=calculation_date,
                                         func=_child_health_monthly_table),
                icds_aggregation_task.si(date=calculation_date,
                                         func=_agg_child_health_table),
            ).apply_async()
            res_ccs = chain(
                icds_aggregation_task.si(date=calculation_date,
                                         func=_ccs_record_monthly_table),
                icds_aggregation_task.si(date=calculation_date,
                                         func=_agg_ccs_record_table),
            ).apply_async()
            res_daily.get()
            res_ccs.get()
            res_child.get()

            res_awc = icds_aggregation_task.delay(date=calculation_date,
                                                  func=_agg_awc_table)
            res_awc.get()

        chain(
            icds_aggregation_task.si(date=date.strftime('%Y-%m-%d'),
                                     func=aggregate_awc_daily),
            email_dashboad_team.si(
                aggregation_date=date.strftime('%Y-%m-%d'))).delay()
Example #55
0
def GetDevicesInfo(addrlist):
    job = group([GetASICInfo.s(ip) for ip in addrlist if ip != ''])
    run = job.apply_async()
    result = run.get()

    return result
def process_one(filename=None):
    """Enqueues a mail file for processing"""

    res = chain(parse.s(filename), group(deploy_db.s(), deploy_es.s()))()

    print "Enqueued mail file for processing: {} ({})".format(filename, res)
Example #57
0
 def unicodetask(self):
     self.join(group(print_unicode.s() for _ in range(5))(),
               timeout=1,
               propagate=True)
Example #58
0
def dispatch_tasks(task_id, end_time_hour, end_time_minute):
    username, camera_id = task_id.split('##')
    camera = Camera.objects.filter(
        user__username=username,
        camera_id=camera_id).first()  # get the camera model to extract frame

    ai_skill_settings = camera.ai_skill_settings.all(
    )  # get all ai_skills setted
    for ai_skill_setting in ai_skill_settings:
        ai_skill = ai_skill_setting.ai_skill
        coordinates = ai_skill_setting.coordinates
        face_relevence = ai_skill_setting.face_relevance

        skill_url = ai_skill.ai_skill_url
        camera_url = camera.camera_url

        skill_test = None
        try:
            skill_test = requests.get(skill_url).status_code
            if skill_test != 200:
                ai_skill.state = 0
                ai_skill.save()
            else:
                ai_skill.state = 1
                ai_skill.save()
        except requests.exceptions.ConnectionError:
            ai_skill.state = 0
            ai_skill.save()

        camera_test = is_opened(camera_url=camera_url)
        if not camera_test:
            camera.state = 10  # connection failure
            camera.save()

        if skill_test == 200 and camera_test:
            info = {
                'user': camera.user.id,
                'camera': camera.id,
                'ai_skill': ai_skill.id
            }

            all_faces = None
            if face_relevence:
                similarity = face_relevence.similarity
                quality = face_relevence.quality

                face_groups = face_relevence.face_group.all()
                face_images = []
                for face_group in face_groups:
                    faces = face_group.face_set.all()
                    if faces:
                        for face in faces:
                            face_image = face.face_image
                            face_images.append(
                                base64.b64encode(face_image.read()))

                all_faces = {
                    'similarity': similarity,
                    'quality': quality,
                    'faces': face_images
                }

            with RedisTaskState(task_id=task_id) as task_state:
                task_state.set_state('running')

            group(
                put_image.s(camera_url=camera_url,
                            coordinates=coordinates,
                            task_id=task_id,
                            end_time_hour=end_time_hour,
                            end_time_minute=end_time_minute),
                detect_image.s(skill_id=ai_skill.id,
                               task_id=task_id,
                               end_time_hour=end_time_hour,
                               end_time_minute=end_time_minute,
                               interval=camera.extraction_settings.frequency,
                               faces=all_faces,
                               **info)).apply_async()
        else:
            with RedisTaskState(task_id=task_id) as task_state:
                task_state.set_state('error')
            clear_queue(task_id)
Example #59
0
 def manyshort(self):
     self.join(group(add.s(i, i) for i in range(1000))(),
               timeout=10,
               propagate=True)
Example #60
0
 def always_timeout(self):
     self.join(
         group(sleeping.s(1).set(time_limit=0.1) for _ in range(100))(),
         timeout=10,
         propagate=True,
     )