Exemple #1
0
def process_region(input_seqs, trop_dict, basename, database=None, mrbayes_args={}, extra_fields={}):

    try:
        if os.path.exists(basename + '.tree'):
            return
        handle = open(basename + '.tree', 'w')
        logging.info('Making Tree ' + str(extra_fields))
        contree, treeset = make_mrbayes_trees(input_seqs, **mrbayes_args)
        contree.write_to_stream(handle, 'nexus')
        treeset.write_to_path(basename + '.treeset', 'nexus')
    except IOError:
        return
    except OSError:
        return

    bats_write_subtask = subtask('TreeingTools.write_results_to_mongo', (), {
        'result_type': 'BATS',
        'extra_fields': extra_fields,
        'database': database
    })

    logging.info('Starting BATS ' + str(extra_fields))
    run_bats.apply_async(args=(basename + '.treeset', trop_dict),
                         kwargs = {'nreps': 50}, link = bats_write_subtask)

    benj_write_subtask = subtask('TreeingTools.write_results_to_mongo', (), {
        'result_type': 'Benj',
        'extra_fields': extra_fields,
        'database': database
    })

    dmat = get_pairwise_distances(contree)
    logging.info('Starting Dist Pvals ' + str(extra_fields))
    check_distance_pvals.apply_async(args=(dmat, trop_dict),
                                     kwargs = {'nreps': 500}, link = benj_write_subtask)
def _process_customer(requester, customer, mailboxes, folders, users):
    if customer.id is None or (customer.emails is None
                               and customer.fullname is None):
        # can't use customer with no data
        logger.debug("Customer '%s' for user '%s' cannot be used - no data",
                     (customer.id or customer.fullname), requester.username)
        return
    db_customer, created = Document.objects.get_or_create(
        helpscout_customer_id=customer.id,
        requester=requester,
        user_id=requester.id)
    db_customer.helpscout_name = customer.fullname
    logger.debug("Processing Helpscout customer '%s' for user '%s'",
                 customer.fullname, requester.username)
    new_updated = customer.modifiedat
    new_updated_ts = parse_dt(new_updated).timestamp()
    if not created and db_customer.last_updated_ts:
        new_updated_ts = db_customer.last_updated_ts \
            if db_customer.last_updated_ts > new_updated_ts else new_updated_ts
    db_customer.last_updated = datetime.utcfromtimestamp(
        new_updated_ts).isoformat() + 'Z'
    db_customer.last_updated_ts = new_updated_ts
    db_customer.helpscout_title = 'User: {}'.format(customer.fullname)
    db_customer.webview_link = 'https://secure.helpscout.net/customer/{}/0/'.format(
        customer.id)
    db_customer.primary_keywords = HELPSCOUT_KEYWORDS['primary']
    db_customer.secondary_keywords = HELPSCOUT_KEYWORDS['secondary']
    db_customer.helpscout_company = customer.organization
    db_customer.helpscout_emails = ', '.join(
        e.get('value') for e in customer.emails
        if 'value' in e) if customer.emails else None
    db_customer.save()
    algolia_engine.sync(db_customer, add=created)
    subtask(process_customer).delay(requester, db_customer, mailboxes, folders,
                                    users)
Exemple #3
0
def get_friends_for_user(self, fb_id, callback, next_uri=None):
    """
    Get the facebook friends for the user with fb_id.

    1. Needs a valid access_token in the cache
    2. Needs 'user_friends' permission
    3. Needs a callback function that can store the friends somewhere

    If 1. is not present, the task is delayed
    If 2. is not the case, you're out of luck
    """
    access_token = get_cached_access_token(fb_id)
    if access_token is None:
        raise self.retry(exc=ValueError("Failed to fetch facebook data for %s. "
                                        "No access_token found in cache" % fb_id))

    graph = facebook.GraphAPI(access_token)

    try:
        if next_uri:
            data = graph.bare_request(next_uri)
        else:
            data = graph.get_connections('me', 'friends', limit=500)
    except facebook.GraphAPIError as exc:
        raise self.retry(exc=exc)

    subtask(callback).delay(data['data'])
    if data['paging'].get('next'):
        self.delay(fb_id, callback, next_uri=data['paging']['next'])
Exemple #4
0
def on_app_ready(sender=None, headers=None, body=None, **kwargs):
    if cache.get("CELERY_APP_READY", 0) == 1:
        return
    cache.set("CELERY_APP_READY", 1, 10)
    tasks = get_after_app_ready_tasks()
    logger.debug("Start need start task: [{}]".format(", ".join(tasks)))
    for task in tasks:
        subtask(task).delay()
Exemple #5
0
def on_app_ready(sender=None, headers=None, body=None, **kwargs):
    if cache.get("CELERY_APP_READY", 0) == 1:
        return
    cache.set("CELERY_APP_READY", 1, 10)
    logger.debug("App ready signal recv")
    logger.debug("Start need start task: [{}]".format(
        ", ".join(__AFTER_APP_READY_RUN_TASKS)))
    for task in __AFTER_APP_READY_RUN_TASKS:
        subtask(task).delay()
Exemple #6
0
def unlock_graph(result, callback, interval=1, propagate=False,
        max_retries=None):
    if result.ready():
        second_level_res = result.get()
        if second_level_res.ready():
            subtask(callback).delay(list(joinall(
                second_level_res, propagate=propagate)))
    else:
        unlock_graph.retry(countdown=interval, max_retries=max_retries)
def on_app_ready(sender=None, headers=None, **kwargs):
    if cache.get("CELERY_APP_READY", 0) == 1:
        return
    cache.set("CELERY_APP_READY", 1, 10)
    tasks = get_after_app_ready_tasks()
    logger.debug("Work ready signal recv")
    logger.debug("Start need start task: [{}]".format(", ".join(tasks)))
    for task in tasks:
        subtask(task).delay()
Exemple #8
0
def on_app_ready(sender=None, headers=None, body=None, **kwargs):
    if cache.get("CELERY_APP_READY", 0) == 1:
        return
    cache.set("CELERY_APP_READY", 1, 10)
    logger.debug("App ready signal recv")
    logger.debug("Start need start task: [{}]".format(
        ", ".join(__AFTER_APP_READY_RUN_TASKS))
    )
    for task in __AFTER_APP_READY_RUN_TASKS:
        subtask(task).delay()
Exemple #9
0
 def on_chord_part_return(self, task, propagate=False):
     from celery import subtask
     from celery.result import TaskSetResult
     setid = task.request.taskset
     if not setid:
         return
     key = self.get_key_for_chord(setid)
     deps = TaskSetResult.restore(setid, backend=task.backend)
     if self.client.incr(key) >= deps.total:
         subtask(task.request.chord).delay(deps.join(propagate=propagate))
         deps.delete()
         self.client.delete(key)
Exemple #10
0
	def _launch_all_scanning_subtasks_after_task_enum(self, jobs):
		err_handler = ScanRunErrorHandlerTask()
		result_handler = ScanRunResultHandlerTask()
		# filter out none jobs that we don't have to launch
		logr.debug("Enumerating scanning subtasks based on '{0}' jobs.".format(len(jobs)))
		filt_jobs = filter(lambda jobdb: jobdb[0] is not None, jobs)

		r = [job.apply_async(timeout=self.scan_timeout * 0.9,
		                 link_error=subtask(err_handler, queue=err_handler.queue),
		                 link=subtask(result_handler, args=(db_entry.task_id,), queue=result_handler.queue),
		                 ) for job, db_entry in filt_jobs]
		return r
Exemple #11
0
def collect_files(requester, repo_id, repo_name, repo_url, default_branch,
                  enrichment_delay):
    """
    List all files in a repo - should be called once, after first sync of a repo. Subsequent syncing is handled
    via collect_commits() function.

    Note that this uses Github's API call for retrieval of recursive trees:
      https://developer.github.com/v3/git/trees/#get-a-tree-recursively
    This API call returns a flat list of all files and saves us many API calls that would be needed
    to recursively fetch files for each repo directory. But it may not work well for very big repos
    (> 5k files), becuase Github API has a limit of number of elements it will return in one call.
    """
    github_client = init_github_client(requester)
    repo = github_client.get_repo(full_name_or_id=repo_name)
    new_files = []
    for f in repo.get_git_tree(sha=repo.default_branch, recursive=True).tree:
        db_file, created = Document.objects.get_or_create(
            github_file_id=_compute_sha('{}{}'.format(repo_id, f.path)),
            github_repo_id=repo_id,
            requester=requester,
            user_id=requester.id)
        if created:
            new_files.append({
                'sha': f.sha,
                'filename': f.path,
                'action': 'modified',
                'type': f.type
            })
            db_file.primary_keywords = GITHUB_PRIMARY_KEYWORDS
            db_file.secondary_keywords = GITHUB_SECONDARY_KEYWORDS['file']
            # set the timestamp to 0 (epoch) to signal that we don't know the update timestamp
            db_file.last_updated_ts = 0
            db_file.last_updated = datetime.utcfromtimestamp(
                0).isoformat() + 'Z'
            db_file.github_title = '{}: {}'.format(
                'Dir' if f.type == 'tree' else 'File',
                f.path.split('/')[-1])
            db_file.github_file_path = f.path
            db_file.github_repo_full_name = repo_name
            db_file.webview_link = '{}/blob/{}/{}'.format(
                repo_url, default_branch, f.path)
            algolia_engine.sync(db_file, add=created)
        db_file.last_synced = get_utc_timestamp()
        db_file.download_status = Document.PENDING
        db_file.save()
    # run enrich_files() for all new_files in chunks of 50 items
    i = 0
    for ff in [new_files[x:x + 50] for x in range(0, len(new_files), 50)]:
        i = i + 1
        subtask(enrich_files).apply_async(
            args=[requester, ff, repo.id, repo_name, repo_url, default_branch],
            countdown=enrichment_delay + (240 * i))
Exemple #12
0
def update_synchronization():
    """
    Check for new/updated files in external systems for all users. Should be called periodically after initial syncing.
    Gdrive-only at the moment.
    """
    logger.debug("Update synchronizations started")
    for sa in SocialAttributes.objects.filter(start_page_token__isnull=False):
        if should_sync(sa.user, 'google-oauth2', 'tasks.gdrive'):
            if sa.user.social_auth.filter(provider='google-oauth2').first():
                access_token, refresh_token = get_google_tokens(sa.user)
                subtask(sync_gdrive_changes).delay(sa.user, access_token, refresh_token, sa.start_page_token)
        else:
            logger.info("Gdrive oauth token for user '%s' already in use, skipping sync ...", sa.user.username)
Exemple #13
0
def run_ansible_task(tid, callback=None, **kwargs):
    """
    :param tid: is the tasks serialized data
    :param callback: callback function name
    :return:
    """
    task = get_object_or_none(Task, id=tid)
    if task:
        result = task.run()
        if callback is not None:
            subtask(callback).delay(result, task_name=task.name)
        return result
    else:
        logger.error("No task found")
Exemple #14
0
    def on_chord_part_return(self, task, propagate=None):
        if not self.implements_incr:
            return
        from celery import subtask
        from celery.result import GroupResult
        app = self.app
        if propagate is None:
            propagate = self.app.conf.CELERY_CHORD_PROPAGATES
        gid = task.request.group
        if not gid:
            return
        key = self.get_key_for_chord(gid)
        deps = GroupResult.restore(gid, backend=task.backend)
        if deps is None:
            callback = subtask(task.request.chord)
            return app._tasks[callback.task].backend.fail_from_current_stack(
                callback.id,
                exc=ChordError('GroupResult {0} no longer exists'.format(gid))
            )
        val = self.incr(key)
        if val >= len(deps):
            callback = subtask(task.request.chord)
            j = deps.join_native if deps.supports_native_join else deps.join
            try:
                ret = j(propagate=propagate)
            except Exception as exc:
                try:
                    culprit = next(deps._failed_join_report())
                    reason = 'Dependency {0.id} raised {1!r}'.format(
                        culprit, exc,
                    )
                except StopIteration:
                    reason = repr(exc)

                app._tasks[callback.task].backend.fail_from_current_stack(
                    callback.id, exc=ChordError(reason),
                )
            else:
                try:
                    callback.delay(ret)
                except Exception as exc:
                    app._tasks[callback.task].backend.fail_from_current_stack(
                        callback.id,
                        exc=ChordError('Callback error: {0!r}'.format(exc)),
                    )
            finally:
                deps.delete()
                self.client.delete(key)
        else:
            self.expire(key, 86400)
Exemple #15
0
def run_ansible_task(tid, callback=None, **kwargs):
    """
    :param tid: is the tasks serialized data
    :param callback: callback function name
    :return:
    """
    task = get_object_or_none(Task, id=tid)
    if task:
        result = task.run()
        if callback is not None:
            subtask(callback).delay(result, task_name=task.name)
        return result
    else:
        logger.error("No task found")
Exemple #16
0
def rollout(self, data=None, callback=None):
    logger.info(f"Starting Rollout for {data['hostname']}")
    payload = {
        "deployment_id": data["deployment_id"],
        "versionlock": data["versionlock"],
    }
    http = get_http
    r = http.post(
        f"{data['protocol']}://{data['hostname']}:{data['port']}/api/{data['version']}/rollout",
        json=payload)
    result = r.json()
    if callback is not None:
        subtask(callback).delay(result)
    return result
Exemple #17
0
def dmap(it, callback):
    """ Distributed Map function.
    Given an iterable of data and a task method, map the method over the
    given data.
    """
    callback = subtask(callback)
    return group(callback.clone([arg,]) for arg in it)()
Exemple #18
0
def updateAllPincodes():
    '''Fetch new data for all pincodes currently in the database.
    Returns a group so that this update can be executed in parallel'''
    callback = subtask(updatePincode.s())
    return group(
        callback.clone((pincode.code, )) for pincode in Pincode.query.filter(
            Pincode.subscriptions.any()).all())()
Exemple #19
0
 def on_chord_part_return(self, task, propagate=True):
     if not self.implements_incr:
         return
     from celery import subtask
     from celery.result import GroupResult
     gid = task.request.group
     if not gid:
         return
     key = self.get_key_for_chord(gid)
     deps = GroupResult.restore(gid, backend=task.backend)
     val = self.incr(key)
     if val >= len(deps):
         j = deps.join_native if deps.supports_native_join else deps.join
         callback = subtask(task.request.chord)
         try:
             ret = j(propagate=propagate)
         except Exception as exc:
             culprit = next(deps._failed_join_report())
             self.app._tasks[callback.task].backend.fail_from_current_stack(
                 callback.id, exc=ChordError('Dependency %s raised %r' % (
                     culprit.id, exc))
             )
         else:
             callback.delay(ret)
         finally:
             deps.delete()
             self.client.delete(key)
     else:
         self.expire(key, 86400)
Exemple #20
0
def updateAllSubscribers():
    '''Notify new data for all subscriptions currently in the database.
    Returns a group so that this update can be executed in parallel'''
    callback = subtask(updateSubscriber.s())
    return group(
        callback.clone((subscription.id, )) for subscription in
        Subscription.query.filter(Subscription.pincodes.any()).all())()
Exemple #21
0
def get_tweets():
    """Get some tweets from the twitter api and store them to the db."""
    if not Tweet.objects.all():
        # If the db is empty, don't get max_id.
        tweets = api.search(
            q='#python',
            count=100
        )
    else:
        # If the db is not empty, get max_id.
        subtask(clean_tweetdb)
        max_id = min([tweet.tweet_id for tweet in Tweet.objects.all()])
        tweets = api.search(
            q='#python',
            max_id=max_id,
            count=100
        )

    # Store the tweet data in lists.
    tweets_id = [tweet.id for tweet in tweets]
    tweets_date = [tweet.created_at for tweet in tweets]
    tweets_source = [tweet.source for tweet in tweets]
    tweets_favorite_cnt = [tweet.favorite_count for tweet in tweets]
    tweets_retweet_cnt = [tweet.retweet_count for tweet in tweets]
    tweets_text = [tweet.text for tweet in tweets]

    # Iterate over these lists and add data to db.
    for i, j, k, l, m, n in zip(
            tweets_id,
            tweets_date,
            tweets_source,
            tweets_favorite_cnt,
            tweets_retweet_cnt,
            tweets_text,
    ):
        try:
            # Check that they are valid.
            Tweet.objects.create(
                tweet_id=i,
                tweet_date=j,
                tweet_source=k,
                tweet_favorite_cnt=l,
                tweet_retweet_cnt=m,
                tweet_text=n,
            )
        except IntegrityError:
            pass
Exemple #22
0
def callback_map(it, callback):
    logger.info(sys._getframe().f_code.co_name + " start")
    # ref:http://stackoverflow.com/questions/13271056/how-to-chain-a-celery-task-that-returns-a-list-into-a-group
    # Map a callback over an iterator and return as a group

    # print('it: ' + str(it))
    callback = subtask(callback)
    return group(callback.clone([arg, ]) for arg in it)()
Exemple #23
0
 def on_chord_part_return(self, task, propagate=False):
     if not self.implements_incr:
         return
     from celery import subtask
     from celery.result import GroupResult
     gid = task.request.group
     if not gid:
         return
     key = self.get_key_for_chord(gid)
     deps = GroupResult.restore(gid, backend=task.backend)
     val = self.incr(key)
     if val >= len(deps):
         subtask(task.request.chord).delay(deps.join(propagate=propagate))
         deps.delete()
         self.client.delete(key)
     else:
         self.expire(key, 86400)
Exemple #24
0
 def on_chord_part_return(self, task, propagate=False):
     if not self.implements_incr:
         return
     from celery import subtask
     from celery.result import GroupResult
     gid = task.request.group
     if not gid:
         return
     key = self.get_key_for_chord(gid)
     deps = GroupResult.restore(gid, backend=task.backend)
     val = self.incr(key)
     if val >= len(deps):
         subtask(task.request.chord).delay(deps.join(propagate=propagate))
         deps.delete()
         self.client.delete(key)
     else:
         self.expire(key, 86400)
Exemple #25
0
    def _launch_all_scanning_subtasks_after_task_enum(self, jobs):
        err_handler = ScanRunErrorHandlerTask()
        result_handler = ScanRunResultHandlerTask()
        # filter out none jobs that we don't have to launch
        logr.debug("Enumerating scanning subtasks based on '{0}' jobs.".format(
            len(jobs)))
        filt_jobs = filter(lambda jobdb: jobdb[0] is not None, jobs)

        r = [
            job.apply_async(
                timeout=self.scan_timeout * 0.9,
                link_error=subtask(err_handler, queue=err_handler.queue),
                link=subtask(result_handler,
                             args=(db_entry.task_id, ),
                             queue=result_handler.queue),
            ) for job, db_entry in filt_jobs
        ]
        return r
Exemple #26
0
def dmap(it, callback):
    """ Distributed Map function.
    Given an iterable of data and a task method, map the method over the
    given data.
    """
    callback = subtask(callback)
    return group(callback.clone([
        arg,
    ]) for arg in it)()
    def execute_task(self,
                     task_name,
                     task_queue=None,
                     kwargs=None,
                     node_context=None,
                     send_task_events=DEFAULT_SEND_TASK_EVENTS,
                     total_retries=None,
                     retry_interval=None):
        """
        Execute a task

        :param task_name: the task named
        :param task_queue: the task queue, if None runs the task locally
        :param kwargs: optional kwargs to be passed to the task
        :param node_context: Used internally by node.execute_operation
        """
        kwargs = kwargs or {}
        task_id = str(uuid.uuid4())
        cloudify_context = self._build_cloudify_context(
            task_id,
            task_queue,
            task_name,
            node_context)
        kwargs['__cloudify_context'] = cloudify_context

        if task_queue is None:
            # Local task
            values = task_name.split('.')
            module_name = '.'.join(values[:-1])
            method_name = values[-1]
            module = importlib.import_module(module_name)
            task = getattr(module, method_name)
            return self.local_task(local_task=task,
                                   info=task_name,
                                   name=task_name,
                                   kwargs=kwargs,
                                   task_id=task_id,
                                   send_task_events=send_task_events,
                                   total_retries=total_retries,
                                   retry_interval=retry_interval)
        else:
            # Remote task
            # Import here because this only applies to remote tasks execution
            # environment
            import celery

            task = celery.subtask(task_name,
                                  kwargs=kwargs,
                                  queue=task_queue,
                                  immutable=True)
            return self.remote_task(task=task,
                                    cloudify_context=cloudify_context,
                                    task_id=task_id,
                                    send_task_events=send_task_events,
                                    total_retries=total_retries,
                                    retry_interval=retry_interval)
Exemple #28
0
def group_tasks(it, callback):
    """
    这个方法是为了结合chain和group
    :param it:
    :param callback:
    :return:
    """
    callback = subtask(callback)
    result = group(callback.clone((args, )) for args in it)()
    return result
Exemple #29
0
def dmap(self, it, callback):
    """
    Map a callback over an iterator and return as a group
    """
    callback = subtask(callback)
    tasks = group([callback.clone([arg,]) for arg in it])

    _tasks = tasks()
    ids = map(lambda x: x, _tasks)
    app.backend.mark_as_started(self.request.id, **{'__subtasks': ids})
    return _tasks
Exemple #30
0
def dmap(it, callback):
    '''
    Map a callback over an iterator and return as a group
    args:
        it: list/tuple/gen: input iterator
        callback: celery.Task: function to apply for each item in it
    return:
        celery.group: ...
    '''
    callback = subtask(callback)
    return group(callback.clone([arg,]) for arg in it)()
Exemple #31
0
def dmap(args_iter, celery_task):
    """
    Takes an iterator of argument tuples and queues them up for celery to run with the function.
    """
    callback = subtask(celery_task)
    if isinstance(args_iter, list):
        run_in_parallel = group(
            clone_signature(callback, args=(args, )) for args in args_iter)
    elif isinstance(args_iter, dict):
        run_in_parallel = group(clone_signature(callback, kwargs=args_iter))
    return run_in_parallel.delay()
    def execute_task(self,
                     task_name,
                     task_queue=None,
                     kwargs=None,
                     node_context=None,
                     send_task_events=DEFAULT_SEND_TASK_EVENTS,
                     total_retries=None,
                     retry_interval=None):
        """
        Execute a task

        :param task_name: the task named
        :param task_queue: the task queue, if None runs the task locally
        :param kwargs: optional kwargs to be passed to the task
        :param node_context: Used internally by node.execute_operation
        """
        kwargs = kwargs or {}
        task_id = str(uuid.uuid4())
        cloudify_context = self._build_cloudify_context(
            task_id, task_queue, task_name, node_context)
        kwargs['__cloudify_context'] = cloudify_context

        if task_queue is None:
            # Local task
            values = task_name.split('.')
            module_name = '.'.join(values[:-1])
            method_name = values[-1]
            module = importlib.import_module(module_name)
            task = getattr(module, method_name)
            return self.local_task(local_task=task,
                                   info=task_name,
                                   name=task_name,
                                   kwargs=kwargs,
                                   task_id=task_id,
                                   send_task_events=send_task_events,
                                   total_retries=total_retries,
                                   retry_interval=retry_interval)
        else:
            # Remote task
            # Import here because this only applies to remote tasks execution
            # environment
            import celery

            task = celery.subtask(task_name,
                                  kwargs=kwargs,
                                  queue=task_queue,
                                  immutable=True)
            return self.remote_task(task=task,
                                    cloudify_context=cloudify_context,
                                    task_id=task_id,
                                    send_task_events=send_task_events,
                                    total_retries=total_retries,
                                    retry_interval=retry_interval)
Exemple #33
0
def get_tweets():
    """Get some tweets from the twitter api and store them to the db."""
    if not Tweet.objects.all():
        # If the db is empty, don't get max_id.
        tweets = api.search(q='#python', count=100)
    else:
        # If the db is not empty, get max_id.
        subtask(clean_tweetdb)
        max_id = min([tweet.tweet_id for tweet in Tweet.objects.all()])
        tweets = api.search(q='#python', max_id=max_id, count=100)

    # Store the tweet data in lists.
    tweets_id = [tweet.id for tweet in tweets]
    tweets_date = [tweet.created_at for tweet in tweets]
    tweets_source = [tweet.source for tweet in tweets]
    tweets_favorite_cnt = [tweet.favorite_count for tweet in tweets]
    tweets_retweet_cnt = [tweet.retweet_count for tweet in tweets]
    tweets_text = [tweet.text for tweet in tweets]

    # Iterate over these lists and add data to db.
    for i, j, k, l, m, n in zip(
            tweets_id,
            tweets_date,
            tweets_source,
            tweets_favorite_cnt,
            tweets_retweet_cnt,
            tweets_text,
    ):
        try:
            # Check that they are valid.
            Tweet.objects.create(
                tweet_id=i,
                tweet_date=j,
                tweet_source=k,
                tweet_favorite_cnt=l,
                tweet_retweet_cnt=m,
                tweet_text=n,
            )
        except IntegrityError:
            pass
Exemple #34
0
    def on_chord_part_return(self, task, propagate=None):
        if not self.implements_incr:
            return
        from celery import subtask
        from celery.result import GroupResult
        app = self.app
        if propagate is None:
            propagate = self.app.conf.CELERY_CHORD_PROPAGATES
        gid = task.request.group
        if not gid:
            return
        key = self.get_key_for_chord(gid)
        deps = GroupResult.restore(gid, backend=task.backend)
        val = self.incr(key)
        if val >= len(deps):
            j = deps.join_native if deps.supports_native_join else deps.join
            callback = subtask(task.request.chord)
            try:
                ret = j(propagate=propagate)
            except Exception as exc:
                try:
                    culprit = next(deps._failed_join_report())
                    reason = 'Dependency {0.id} raised {1!r}'.format(
                        culprit,
                        exc,
                    )
                except StopIteration:
                    reason = repr(exc)

                app._tasks[callback.task].backend.fail_from_current_stack(
                    callback.id,
                    exc=ChordError(reason),
                )
            else:
                try:
                    callback.delay(ret)
                except Exception as exc:
                    app._tasks[callback.task].backend.fail_from_current_stack(
                        callback.id,
                        exc=ChordError('Callback error: {0!r}'.format(exc)),
                    )
            finally:
                deps.delete()
                self.client.delete(key)
        else:
            self.expire(key, 86400)
Exemple #35
0
    def add_job(self, job, job_id=None):
        """ Add a job (and its tasks) to the queue and update the monitoring counters """
        if not job_id:
            job_id = job["id"]

        self.add_tasks(job["tasks"], job_id)

        self.jqueuer_job_added_count += 1
        monitoring.add_job(self.experiment_id, self.service_name, job_id)

        job_queue_id = ("j_" + self.service_name + "_" +
                        str(int(round(time.time() * 1000))) + "_" +
                        str(random.randrange(100, 999)))

        chain = subtask("job_operations.add",
                        queue=JOB_QUEUE_PREFIX + self.service_name)
        chain.delay(self.experiment_id, job_queue_id, job)
Exemple #36
0
    def add_job(self, job, job_id=None):
        if (not job_id):
            job_id = job["id"]

        self.add_tasks(job['tasks'], job_id)

        self.jqueuer_job_added_count += 1
        monitoring.add_job(self.experiment_id, self.service_name, job_id)

        job_queue_id = "j_" + self.service_name + "_" + str(
            int(round(time.time() * 1000))) + "_" + str(
                random.randrange(100, 999))

        chain = subtask('job_operations.add',
                        queue=JOB_QUEUE_PREFIX + self.service_name)
        chain.delay(self.experiment_id, job_queue_id, job)

        self.add_log(
            "The job_id {} ,job_queue_id: {} - ,JOB_QUEUE_PREFIX: {}has just been added"
            .format(str(job_id), str(job_queue_id), str(JOB_QUEUE_PREFIX)))
    def get_task(self, workflow_task, queue=None, target=None):

        runtime_props = []

        def _derive(property_name):
            executor = workflow_task.cloudify_context['executor']
            host_id = workflow_task.cloudify_context['host_id']
            if executor == 'host_agent':
                if len(runtime_props) == 0:
                    host_node_instance = get_node_instance(host_id)
                    cloudify_agent = host_node_instance.runtime_properties.get(
                        'cloudify_agent')
                    if not cloudify_agent:
                        raise exceptions.NonRecoverableError(
                            'Missing cloudify_agent runtime information. '
                            'This most likely means that the Compute node '
                            'never started successfully')
                    runtime_props.append(cloudify_agent)
                return runtime_props[0][property_name]
            return self.workflow_ctx.deployment.id

        if queue is None:
            queue = _derive('queue')

        if target is None:
            target = _derive('name')

        kwargs = workflow_task.kwargs
        # augment cloudify context with target and queue
        kwargs['__cloudify_context']['task_queue'] = queue
        kwargs['__cloudify_context']['task_target'] = target

        # Remote task
        # Import here because this only applies to remote tasks execution
        # environment
        import celery

        return celery.subtask(workflow_task.name,
                              kwargs=kwargs,
                              queue=queue,
                              immutable=True), queue, target
Exemple #38
0
    def test_on_chord_part_return(self, restore):
        b = self.MockBackend(app=self.app)
        deps = Mock()
        deps.__len__ = Mock()
        deps.__len__.return_value = 10
        restore.return_value = deps
        b.client.incr.return_value = 1
        task = Mock()
        task.name = 'foobarbaz'
        self.app.tasks['foobarbaz'] = task
        task.request.chord = subtask(task)
        task.request.group = 'group_id'

        b.on_chord_part_return(task)
        self.assertTrue(b.client.incr.call_count)

        b.client.incr.return_value = len(deps)
        b.on_chord_part_return(task)
        deps.join_native.assert_called_with(propagate=True)
        deps.delete.assert_called_with()

        self.assertTrue(b.client.expire.call_count)
Exemple #39
0
    def test_on_chord_part_return(self, restore):
        tb = CacheBackend(backend='memory://', app=self.app)

        deps = Mock()
        deps.__len__ = Mock()
        deps.__len__.return_value = 2
        restore.return_value = deps
        task = Mock()
        task.name = 'foobarbaz'
        self.app.tasks['foobarbaz'] = task
        task.request.chord = subtask(task)

        gid, res = uuid(), [self.app.AsyncResult(uuid()) for _ in range(3)]
        task.request.group = gid
        tb.on_chord_apply(gid, {}, result=res)

        self.assertFalse(deps.join_native.called)
        tb.on_chord_part_return(task)
        self.assertFalse(deps.join_native.called)

        tb.on_chord_part_return(task)
        deps.join_native.assert_called_with(propagate=True)
        deps.delete.assert_called_with()
Exemple #40
0
 def on_chord_part_return(self, task, propagate=None):
     if not self.implements_incr:
         return
     from celery import subtask
     from celery.result import GroupResult
     app = self.app
     if propagate is None:
         propagate = self.app.conf.CELERY_CHORD_PROPAGATES
     gid = task.request.group
     if not gid:
         return
     key = self.get_key_for_chord(gid)
     deps = GroupResult.restore(gid, backend=task.backend)
     val = self.incr(key)
     if val >= len(deps):
         j = deps.join_native if deps.supports_native_join else deps.join
         callback = subtask(task.request.chord)
         try:
             ret = j(propagate=propagate)
         except Exception, exc:
             try:
                 culprit = deps._failed_join_report().next()
                 reason = 'Dependency %s raised %r' % (culprit.id, exc)
             except StopIteration:
                 reason = repr(exc)
             app._tasks[callback.task].backend.fail_from_current_stack(
                 callback.id, exc=ChordError(reason),
             )
         else:
             try:
                 callback.delay(ret)
             except Exception, exc:
                 app._tasks[callback.task].backend.fail_from_current_stack(
                     callback.id,
                     exc=ChordError('Callback error: %r' % (exc, )),
                 )
Exemple #41
0
def dmap(it, callback):
    # http://stackoverflow.com/questions/13271056/how-to-chain-a-celery-task-that-returns-a-list-into-a-group
    # Map a callback over an iterator and return as a group
    callback = subtask(callback)
    return group(callback.clone((arg,)) for arg in it)()
Exemple #42
0
def dmap(it, callback):
    # Map a callback over an iterator and return as a group
    callback = subtask(callback)
    return group(callback.clone((arg, )) for arg in it)()
Exemple #43
0
def video_map(videos_list, processing_callback, link):
    callback = subtask(processing_callback)
    return group(callback.clone([arg,], link=link) for arg in videos_list if arg)()
Exemple #44
0
def hello(name, callback=None):
    print("Hello {}".format(name))
    if callback is not None:
        subtask(callback).delay("Guahongwei")
Exemple #45
0
from celery import subtask
from celery import group, chain, chord, chunks
from proj import tasks
# Get subtask
## method 1
sub1 = subtask(tasks.add, args=(2, 2), countdown=1)
res = sub1.apply_async()
#print res.get()

## method 2
sub = tasks.add.subtask((2, 2), countdown=1)
res = sub.apply_async()
#print res.get()

## method 3
#print tasks.add.s(2, 2).set(countdown=1).apply_async().get()


# Partials
partial = tasks.add.s(2)
#print partial.delay(2).get()


# Immutbility
#sub = tasks.add.apply_async((2, 2), link=tasks.add_callback_noarg.subtask(immutable=True))
sub = tasks.add.apply_async((2, 2), link=tasks.add_callback_noarg.si())
#sub.get()


# Callbacks
sub = tasks.add.apply_async((2, 2), link=tasks.minus.s(3))
Exemple #46
0
def process_gdrive_docs(requester, access_token, refresh_token, files_fn, json_key):
    service = connect_to_gdrive(access_token, refresh_token)
    folders = {}

    page_token = None
    new_start_page_token = None
    while True:
        files = files_fn(service, page_token)
        new_start_page_token = files.get('newStartPageToken', new_start_page_token)
        items = files.get(json_key, [])
        if not folders and len(items) > 0:
            # retrieve all folders to be able to get file path more easily in the file listing(s)
            logger.debug("Getting folders for %s/%s", requester.id, requester.username)
            folders = get_gdrive_folders(service)
            # check if any folder was marked as hidden and we already have it synced ...
            # if we do, then remove it (plus all children) from our indexing
            for folder_id, folder in folders.items():
                if folder.get('hidden') is True:
                    desync_folder(folder.get('id'), folders, requester, service)

        for item in items:
            if 'file' in item:
                item = item['file']
            # check for ignored mime types
            if any(x.match(item.get('mimeType', '')) for x in IGNORED_MIMES):
                continue
            parents = item.get('parents', [])
            hidden = is_hidden(item.get('description')) or any(is_hidden_in_folder(f, folders) for f in parents)
            if item.get('trashed') or hidden:
                # file was removed or hidden
                Document.objects.filter(
                    document_id=item['id'],
                    requester=requester,
                    user_id=requester.id
                ).delete()
                continue

            # handle file path within gdrive
            parent = parents[0] if parents else None
            path = get_gdrive_path(parent, folders)

            doc, created = get_or_create(
                model=Document,
                document_id=item['id'],
                requester=requester,
                user_id=requester.id
            )
            doc.mime_type = item.get('mimeType').lower()
            doc.title = item.get('name')
            doc.webview_link = item.get('webViewLink')
            doc.icon_link = item.get('iconLink')
            doc.thumbnail_link = item.get('thumbnailLink')
            doc.last_updated = item.get('modifiedTime')
            doc.path = path
            last_modified_on_server = parse_date(doc.last_updated)
            doc.last_updated_ts = last_modified_on_server.timestamp()
            doc.modifier_display_name = item.get('lastModifyingUser', {}).get('displayName')
            doc.modifier_photo_link = item.get('lastModifyingUser', {}).get('photoLink')
            doc.owner_display_name = item['owners'][0]['displayName']
            doc.owner_photo_link = item.get('owners', [{}])[0].get('photoLink')
            doc.primary_keywords = GDRIVE_KEYWORDS['primary']
            doc.secondary_keywords = GDRIVE_KEYWORDS['secondary'][doc.mime_type] \
                if doc.mime_type in GDRIVE_KEYWORDS['secondary'] else None
            can_download = item.get('capabilities', {}).get('canDownload', True)
            if can_download:
                # check also the mime type as we only support some of them
                if not any(x for x in EXPORTABLE_MIMES if doc.mime_type.startswith(x)):
                    can_download = False
            if can_download:
                if not created:
                    if doc.download_status is Document.READY and can_download and \
                            (doc.last_synced is None or last_modified_on_server > doc.last_synced):
                        doc.download_status = Document.PENDING
                        subtask(download_gdrive_document).delay(doc, access_token, refresh_token)
                else:
                    algolia_engine.sync(doc, add=created)
                    subtask(download_gdrive_document).delay(doc, access_token, refresh_token)
            else:
                doc.download_status = Document.READY
                doc.last_synced = get_utc_timestamp()
                doc.save()
                algolia_engine.sync(doc, add=False)

            doc.save()

        page_token = files.get('nextPageToken')
        if not page_token:
            break
    return new_start_page_token
Exemple #47
0
def map_signature_chain(args_list, *signatures):
    return group([
        chain(
            subtask(signatures[0]).clone((args, )),
            *(subtask(sig) for sig in signatures[1:])) for args in args_list
    ])
Exemple #48
0
def callback_list_map(lists, callback):
    logger.info(sys._getframe().f_code.co_name + " start")

    callback = subtask(callback)

    return group(callback.clone([arg, ]) for arg in lists.get())()
Exemple #49
0
def dmap(it, callback):
    # http://stackoverflow.com/questions/13271056/how-to-chain-a-celery-task-that-returns-a-list-into-a-group
    # Map a callback over an iterator and return as a group
    callback = subtask(callback)
    return group(callback.clone((arg, )) for arg in it)()
Exemple #50
0
def dmap(it, callback):
    callback = subtask(callback)
    return group(callback.clone([arg,]) for arg in it)()
Exemple #51
0
def map_single_task(args_list, *signatures):
    return group(
        [subtask(signatures[0]).clone((args, )) for args in args_list])
Exemple #52
0
def add_cb(x, y, callback=None):
    result = x + y
    if callback:
        return subtask(callback).apply_async(result)
    return result
Exemple #53
0
def hello(name, callback=None):
    print("Hello {}".format(name))
    if callback is not None:
        subtask(callback).delay("Guahongwei")
Exemple #54
0
def collect_repos(requester):
    github_client = init_github_client(requester)
    # simple check if we are approaching api rate limits
    if github_client.rate_limiting[0] < 500:
        logger.debug(
            "Skipping github repos sync for user '%s' due to rate limits",
            requester.username)
        return

    i = 0
    for repo in github_client.get_user().get_repos():
        if not (repo.id or repo.full_name):
            logger.debug("Skipping github repo '%s' for user '%s'",
                         repo.full_name, requester.username)
            # seems like broken data, skip it
            continue
        if repo.fork:
            # don't process forked repos
            logger.debug("Skipping forked github repo '%s' for user '%s'",
                         repo.full_name, requester.username)
            continue

        db_repo, created = Document.objects.get_or_create(
            github_repo_id=repo.id,
            github_commit_id__isnull=True,
            github_file_id__isnull=True,
            github_issue_id__isnull=True,
            requester=requester,
            user_id=requester.id)
        db_repo.primary_keywords = GITHUB_PRIMARY_KEYWORDS
        db_repo.secondary_keywords = GITHUB_SECONDARY_KEYWORDS['repo']
        db_repo.github_title = 'Repo: {}'.format(repo.name)
        db_repo.github_repo_owner = repo.owner.login
        db_repo.github_repo_description = repo.description
        logger.debug("Processing github repo '%s' for user '%s'",
                     repo.full_name, requester.username)
        commit_count = 0
        contributors = []
        try:
            # fetch contributors
            for cnt in repo.get_contributors():
                commit_count = commit_count + cnt.contributions
                if len(contributors) <= 10:
                    contributors.append({
                        'name': cnt.name,
                        'url': cnt.html_url,
                        'avatar': cnt.avatar_url
                    })
        except UnknownObjectException:
            # most probably, this repo is disabled
            if created:
                logger.debug("Removing github repo '%s' for user '%s'",
                             repo.full_name, requester.username)
                db_repo.delete()
            continue
        db_repo.github_repo_commit_count = commit_count
        db_repo.github_repo_contributors = contributors
        db_repo.github_repo_full_name = repo.full_name
        new_timestamp = max(repo.updated_at, repo.pushed_at)
        if created or new_timestamp.timestamp() > (db_repo.last_updated_ts
                                                   or 0):
            i = i + 1
            db_repo.last_updated_ts = new_timestamp.timestamp()
            db_repo.last_updated = new_timestamp.isoformat() + 'Z'
            db_repo.webview_link = repo.html_url
            # fetch readme file
            try:
                readme = repo.get_readme()
                readme_content = cut_utf_string(readme.decoded_content.decode(
                    'UTF-8', errors='replace'),
                                                9000,
                                                step=100)
                md = github_client.render_markdown(text=readme_content).decode(
                    'UTF-8', errors='replace')
                # also replace <em> tags, because they are used by Algolia highlighting
                db_repo.github_repo_content = md.replace('<em>',
                                                         '<b>').replace(
                                                             '</em>', '</b>')
                db_repo.github_repo_readme = readme.name
            except UnknownObjectException:
                # readme does not exist
                db_repo.github_repo_content = None
            algolia_engine.sync(db_repo, add=created)
            if created:
                # sync files
                subtask(collect_files).delay(requester,
                                             repo.id,
                                             repo.full_name,
                                             repo.html_url,
                                             repo.default_branch,
                                             enrichment_delay=i * 300)
        # sync commits
        subtask(collect_commits).apply_async(args=[
            requester, repo.id, repo.full_name, repo.html_url,
            repo.default_branch, commit_count
        ],
                                             countdown=240 *
                                             i if created else 1)
        # sync issues
        subtask(collect_issues).apply_async(
            args=[requester, repo.id, repo.full_name, created],
            countdown=180 * i if created else 1)

        db_repo.last_synced = get_utc_timestamp()
        db_repo.download_status = Document.READY
        db_repo.save()
Exemple #55
0
def collect_boards(requester):
    trello_client = init_trello_client(requester)
    orgs = dict()

    for board in trello_client.list_boards(board_filter='open,closed'):
        db_board, created = Document.objects.get_or_create(
            trello_board_id=board.id,
            trello_card_id__isnull=True,
            requester=requester,
            user_id=requester.id
        )
        board_last_activity = board.raw.get('dateLastActivity')
        if not board_last_activity:
            # this nasty hack is needed, becuse some Trello boards don't have 'dateLastActivity' timestamp
            # -> looks like it's those boards that have been inactive for some time
            if not created:
                board_last_activity = db_board.last_updated.isoformat()
            else:
                # Trello was established in 2011, so we use 01.01.2011 as epoch
                actions = board.fetch_actions(action_filter='all', action_limit=1, since='2011-01-01T00:00:00.000Z')
                if actions:
                    board_last_activity = actions[0].get('date')

        last_activity = parse_dt(board_last_activity).isoformat()
        last_activity_ts = int(parse_dt(board_last_activity).timestamp())
        if not created and db_board.download_status == Document.READY and \
                (db_board.last_updated_ts and db_board.last_updated_ts >= last_activity_ts):
            logger.debug("Trello board '%s' for user '%s' hasn't changed", board.name[:50], requester.username)
            continue
        logger.debug("Processing board '%s' for user '%s'", board.name[:50], requester.username)
        db_board.primary_keywords = TRELLO_PRIMARY_KEYWORDS
        db_board.secondary_keywords = TRELLO_SECONDARY_KEYWORDS['board']
        db_board.last_updated = last_activity
        db_board.last_updated_ts = last_activity_ts
        db_board.trello_title = 'Board: {}'.format(board.name)
        db_board.webview_link = board.url
        db_board._trello_description = board.description
        db_board.trello_board_status = 'Closed' if board.closed else 'Open'

        orgId = board.raw.get('idOrganization')
        if orgId and orgId not in orgs:
            try:
                org = trello_client.get_organization(orgId).raw
                orgs[orgId] = {
                    'name': org.get('displayName'),
                    'logo': 'https://trello-logos.s3.amazonaws.com/{}/30.png'.format(orgId),
                    'url': org.get('url')
                }
            except ResourceUnavailable:
                # defunct/deleted organization, assume that board is personal
                orgId = None
        db_board.trello_board_org = orgs[orgId] if orgId else None

        build_list = lambda l: {
            'id': l.id,
            'name': l.name,
            'closed': l.closed,
            'pos': l.pos
        }
        all_lists = {l.id: build_list(l) for l in board.all_lists()}
        db_board.trello_content = {
            'description': _to_html(board.description),
            'lists': sorted(
                filter(lambda x: not x.get('closed'), all_lists.values()),
                key=itemgetter('pos')
            )
        }

        build_member = lambda m: {
            'name': m.full_name,
            'url': m.url,
            'avatar': 'https://trello-avatars.s3.amazonaws.com/{}/30.png'.format(m.avatar_hash)
        }
        all_members = {m.id: build_member(m) for m in board.all_members()}
        db_board.trello_board_members = list(all_members.values())

        db_board.last_synced = get_utc_timestamp()
        db_board.download_status = Document.READY
        db_board.save()
        algolia_engine.sync(db_board, add=created)
        subtask(collect_cards).delay(requester, db_board, board.name, all_members, all_lists)
        # add sleep of 30s to avoid breaking api limits
        time.sleep(30)
Exemple #56
0
def dmap(iter, callback):
    # Map a callback over an iterator and return as a group
    callback = subtask(callback)
    return group(callback.clone([arg,]) for arg in iter)()