Exemple #1
0
def reset_endpoints():
    """
    Handler which creates reset endpoint tasks for selected endpoints/orgs.
    """
    endpoint_indexes = request.form.getlist('endpoint_index')
    org_uid = request.form.get('org_uid')

    if not endpoint_indexes:
        flash("At least one endpoint is required")
        return render_template('select_endpoints.html',
                               endpoints=ENDPOINTS,
                               org_uid=org_uid), 200

    if org_uid:
        taskqueue.add(target='admin',
                      url='/admin/reset_endpoints_task/{}'.format(org_uid),
                      params={'endpoint_index': endpoint_indexes})

        flash("Kicked off reset of {} endpoints for {}".format(
            len(endpoint_indexes), org_uid))

        return redirect(prefix('/'))
    else:
        count = query_to_tasks(
            query=Org.query(Org.status == CONNECTED),
            queue=Queue('admin'),
            task_generator=lambda key: Task(
                url='/admin/reset_endpoints_task/{}'.format(key.string_id()),
                params={'endpoint_index': endpoint_indexes}))

        flash("Kicked off reset of {} endpoints for {} orgs".format(
            len(endpoint_indexes), count))

        return redirect(prefix('/commands'))
Exemple #2
0
    def post(self):
        groupname = self.request.get('group')
        if groupname == 'ctrp-taskcheck':

            # Grab the default queue and keep checking for whether or not
            # all of the tasks have finished.
            default_queue = Queue()
            stats = default_queue.fetch_statistics()
            while stats.tasks > 0:
                logging.info("task check: waiting for %d tasks to finish" %
                             stats.tasks)
                time.sleep(5)
                stats = default_queue.fetch_statistics()

            self.finishBuilding()

        else:
            importer = wowapi.Importer()

            q = Group.query(Group.name == groupname)
            groups = q.fetch()
            # sanity check, tho this shouldn't be possible
            if len(groups) == 0:
                logging.info('Builder failed to find group %s' % groupname)
                return

            logging.info('Builder task for %s started' % groupname)
            self.processGroup(groups[0], importer, True)
            logging.info('Builder task for %s completed' % groupname)
Exemple #3
0
def post_all_feeds():
    """Post all new items for feeds for a specific interval"""
    if request.headers.get('X-Appengine-Cron') != 'true':
        raise ndb.Return(jsonify_error(message='Not a cron call'))

    logger.info('Starting a post job')
    futures = []
    for feed_type, feed_class in FEED_TYPE_TO_CLASS.iteritems():
        feeds = feed_class.query(feed_class.is_dirty == True)
        logger.info("Got some feeds_count: %s feeds_type: %s", feeds.count(), feed_type)
        success = 0
        more = True
        cursor = None
        while more:
            feeds_to_fetch, cursor, more = yield feeds.fetch_page_async(BATCH_SIZE, start_cursor=cursor)
            keys = ','.join([x.key.urlsafe() for x in feeds_to_fetch])
            if not keys:
                continue
            futures.append(Queue().add_async(Task(url=url_for('tq_feed_post-canonical'), method='POST', params={'keys': keys})))
            success += len(feeds_to_fetch)
        logger.info('queued post for %d feeds feed_type:%s', success, feed_type)

    for future in futures:
        yield future

    logger.info('Finished Post Job')
    yield write_epoch_to_stat(Stat, 'post_job')
    raise ndb.Return(jsonify(status='ok'))
Exemple #4
0
def update_all_feeds(interval_id):
    """Update all feeds for a specific interval"""
    if request.headers.get('X-Appengine-Cron') != 'true':
        raise ndb.Return(jsonify_error(message='Not a cron call'))

    for feed_type, feed_class in FEED_TYPE_TO_CLASS.iteritems():
        feeds = Feed.for_interval(interval_id)
        success = 0
        more = True
        cursor = None
        futures = []
        while more:
            feeds_to_fetch, cursor, more = yield feeds.fetch_page_async(BATCH_SIZE, start_cursor=cursor)
            feeds_to_fetch = filter(lambda x: getattr(x, 'external_polling_bucket', DEFAULT_POLLING_BUCKET) == DEFAULT_POLLING_BUCKET, feeds_to_fetch)
            keys = ','.join([x.key.urlsafe() for x in feeds_to_fetch])
            if not keys:
                continue

            futures.append(Queue('poll').add_async(Task(url=url_for('tq_feed_poll-canonical'), method='POST', params={'keys': keys})))
            success += 1

    for future in futures:
        yield future

    logger.info('queued poll for %d feeds at interval_id=%s', success, interval_id)

    raise ndb.Return(jsonify(status='ok'))
Exemple #5
0
def instagram_push_update():
    data = request.stream.read()
    instagram_client_secret = Configuration.value_for_name('instagram_client_secret')

    server_signature = request.headers.get('X-Hub-Signature', None)
    signature = hmac.new(str(instagram_client_secret), data, digestmod=hashlib.sha1).hexdigest()

    if server_signature != signature:
        logger.warn('Got PuSH subscribe POST from instagram w/o valid signature: sent=%s != expected=%s',
                    server_signature, signature)

        raise ndb.Return('')

    logger.info('Got PuSH body: %s', data)
    logger.info('Got PuSH headers: %s', request.headers)

    parsed_feed = json.loads(data)
    user_ids = [int(x.get('object_id')) for x in parsed_feed]
    feeds = InstagramFeed.query(InstagramFeed.user_id.IN(user_ids))

    cursor = None
    more = True
    keys = []
    while more:
        feed_keys, cursor, more = feeds.fetch_page(BATCH_SIZE, keys_only=True, start_cursor=cursor)
        keys += feed_keys

    keys = ','.join([x.urlsafe() for x in keys])
    if keys:
        yield Queue('poll').add_async(Task(url=url_for('tq_feed_poll-canonical'), method='POST', params={'keys': keys}))

    raise ndb.Return('ok')
Exemple #6
0
    def __init__(self, tag, queue_name, size, duration=60, auto_delete=True):
        """The generator will yield json deserialized payloads from tasks with
        the corresponding tag.

        :param tag: :class: `str` Pull queue tag to query against
        :param queue_name: :class: `str` Name of PULL queue holding tasks to
                           lease.
        :param size: :class: `int` The number of items to pull at once
        :param duration: :class: `int` After this time, the tasks may be leased
                         again. Tracked in seconds
        :param auto_delete: :class: `bool` Delete tasks when iteration is
                            complete.

        :return: :class: `iterator` of json deserialized payloads
        """
        from google.appengine.api.taskqueue import Queue

        self.queue_name = queue_name
        self.queue = Queue(name=self.queue_name)

        self.tag = tag
        self.size = size
        self.duration = duration
        self.auto_delete = auto_delete

        self._messages = []
        self._processed_messages = []
        self._fetched = False
Exemple #7
0
    def insert(self):
        """Insert the pull task into the requested queue, 'default' if non
        given.
        """
        from google.appengine.api.taskqueue import Queue

        task = self.to_task()

        Queue(name=self.get_queue()).add(task)
Exemple #8
0
    def get(self):
        queue = Queue()
        stats = queue.fetch_statistics()

        template_values = {
            'tasks': stats.tasks,
            'in_flight': stats.in_flight,
        }
        template = JINJA_ENVIRONMENT.get_template('templates/ranker.html')
        self.response.write(template.render(template_values))
Exemple #9
0
  def _AddUserRecallTasks(self, user_recall_tasks):
    """Helper to enqueue list of user recall tasks in batches.

    Args:
      user_recall_tasks: Task or list of Tasks; one for each user.

    Raises:
      re-raises any errors with task queue.
    """
    Queue('user-recall-queue').add(task=user_recall_tasks)
Exemple #10
0
 def schedule(cls, queue_name, tasks):
     queue = Queue(queue_name)
     batch_size = 100
     task_count = 0
     for task_batch in cls.split_every(batch_size, tasks):
         cls._add_single_batch(queue, task_batch)
         task_count += len(task_batch)
     if task_count > 0:
         logging.info("Scheduled %d tasks in max %d batches", task_count,
                      batch_size)
Exemple #11
0
 def addTasks(self):
     q = Comic.all()
     queue = Queue(name='update-queue')
     tasks = []
     for comic in q:
         if len(tasks) >= 100:
             queue.add(tasks)
             tasks = []
         else:
             task = Task(url='/tasks/update/' + str(comic.id))
             tasks.append(task)
     self.response.out.write("OK")
Exemple #12
0
    def post(self):

        # refuse to start the tasks if there are some already running
        queue = Queue()
        stats = queue.fetch_statistics()
        if stats.tasks == 0:

            # queue up all of the groups into individual tasks.  the configuration
            # in queue.yaml only allows 10 tasks to run at once.  the builder only
            # allows 10 URL requests at a time, which should hopefully keep the
            # Blizzard API queries under control.
            q = Group.query()
            groups = q.fetch()
            for g in groups:
                taskqueue.add(url='/builder', params={'group': g.name})

            checker = Task(url='/builder', params={'group': 'ctrp-taskcheck'})
            taskcheck = Queue(name='taskcheck')
            taskcheck.add(checker)

        self.redirect('/rank')
Exemple #13
0
 def schedule(cls, queue_name, tasks):
     if not isinstance(tasks, list):
         tasks = [tasks]
     queue = Queue(queue_name)
     page_size = 100
     task_count = 0
     for task_batch in paginated(page_size, tasks):
         cls.__add_single_batch(queue, task_batch)
         task_count += len(task_batch)
     if task_count > 0:
         logging.info("Scheduled %d tasks in max %d batches", task_count,
                      page_size)
Exemple #14
0
    def _queue_tasks(self):
        scanners = QuestionsScanner.all()

        tasks = []
        for scanner in scanners:
            domain = scanner.key().name()
            task = Task(url='/tasks/scan_new_questions/%s' % (domain, ))
            tasks.append(task)

        if len(tasks) > 0:
            queue = Queue(name="scannewquestions")
            queue.add(tasks)
Exemple #15
0
    def test_items_to_tasks(self):
        """
        Verifies that multiple pages of tasks get queued up properly.
        """
        count = task_utils.items_to_tasks(
            items=[1, 2, 3],
            queue=Queue('adapter-update'),
            task_generator=lambda item: Task(url='/something/{}'.format(item)))

        self.assertEqual(count, 3)
        task_count = len(self.taskqueue.get_filtered_tasks())
        self.assertEqual(task_count, 3)
Exemple #16
0
def init_all_updates():
    """
    Initialises update cycle for each connected org by putting a task onto the update queue (which ends up calling
    init_new_changeset(org_uid)).
    """
    count = query_to_tasks(
        query=Org.query(
            Org.status == CONNECTED, Org.last_update_cycle_completed_at <
            datetime.utcnow() - SYNC_INTERVAL,
            Org.provider.IN(API_PROVIDERS)).order(
                -Org.last_update_cycle_completed_at,
                Org.key),  # Queries involving IN need to be ordered by key
        queue=Queue('adapter-update'),
        task_generator=lambda key: Task(url='/adapter/{}/init_update'.format(
            key.string_id())))

    logging.info("queued {} tasks for a sync update".format(count))
Exemple #17
0
    def test_query_to_tasks(self):
        """
        Verifies that multiple pages of tasks get queued up properly.
        """
        Org(id='test1', status=CONNECTED).put()
        Org(id='test2', status=CONNECTED).put()
        Org(id='test3', status=DISCONNECTED).put()

        count = task_utils.query_to_tasks(
            query=Org.query(Org.status == CONNECTED),
            queue=Queue('adapter-update'),
            task_generator=lambda key: Task(url='/something/{}'.format(
                key.string_id())))

        self.assertEqual(count, 2)
        task_count = len(self.taskqueue.get_filtered_tasks())
        self.assertEqual(task_count, 2)
Exemple #18
0
    def post(self):
        # refuse to start the tasks if there are some already running
        queue = Queue()
        stats = queue.fetch_statistics()
        if stats.tasks == 0:
            print 'nop'
            taskqueue.add(url='/builder', params={'start': 'A', 'end': 'B'})
            taskqueue.add(url='/builder', params={'start': 'C', 'end': 'E'})
            taskqueue.add(url='/builder', params={'start': 'F', 'end': 'G'})
            taskqueue.add(url='/builder', params={'start': 'H', 'end': 'H'})
            taskqueue.add(url='/builder', params={'start': 'I', 'end': 'M'})
            taskqueue.add(url='/builder', params={'start': 'N', 'end': 'O'})
            taskqueue.add(url='/builder', params={'start': 'P', 'end': 'R'})
            taskqueue.add(url='/builder', params={'start': 'S', 'end': 'S'})
            taskqueue.add(url='/builder', params={'start': 'T', 'end': 'T'})
            taskqueue.add(url='/builder', params={'start': 'U', 'end': 'Z'})

        self.redirect('/rank')
Exemple #19
0
  def _AddUserRetrievalTask(self, task):
    """Helper to transactionally add the tasks.

    Do not set transactional=True in the Task because Push Queues have a
    5 Task per add limit when transactional=True per:
    http://developers.google.com/appengine/docs/python/taskqueue/overview-push

    Args:
      task: Task or list of Tasks to retrieve domain users.

    Raises:
      re-raises any taskqueue errors raised.
    """
    try:
      Queue('retrieve-users-queue').add(task=task)
    except TaskQueueError:
      view_utils.FailRecallTask(
          task_key_id=self._task_key_id,
          reason_string='Failed to enqueue retrieve users tasks.')
      raise
Exemple #20
0
 def set_up(self):
     self._rpc = taskqueue.create_rpc()
     q = Queue(self._queue_name)
     q.add_async(self._task, rpc=self._rpc)
Exemple #21
0
 def get(self):
     num_pages_to_fetch = 50
     q = Queue('scrape')
     for i in range(0, num_pages_to_fetch):
         q.add(Task(url='/tasks/scrape?page=%d' % i, method='GET'))
     self.response.out.write("done")
    def setUp(self):
        TestGae.setUp(self, PROJECT_DIR)
        from google.appengine.api.taskqueue import Queue, Task

        self.queue = Queue('default')
        self.queue.add(Task('xxx', url='/'))
def start_publish():
    """
    Kicks off a dataflow template to publish normalised data. The jobs are created via a task queue task, passing the
    ID of the OrgChangesets which need to be published.

    This endpoint is invoked by a regular cron job or by a request from the admin UI, and takes an additional parameter
    which allows for each org to be published by a separate dataflow job (this is useful for isolation of an org which
    causes the whole publish job to fail).

    Returns:
        (str, int): http response
    """
    logging.info("about to kick off a publish dataflow job")

    per_org = request.form.get('per_org') == '1'
    if per_org:
        logging.info("publish job per org requested")

    # we want to publish changesets which:
    # - have newly been ingested (publish not running and not finished)
    # - OR have been attempted to be published but failed
    #   - due to the whole job failing
    #   - OR publish of the individual changeset failing
    org_changesets_query = OrgChangeset.query(
        ndb.OR(
            ndb.AND(OrgChangeset.publish_job_running == False,
                    OrgChangeset.publish_job_finished == False),
            ndb.AND(
                OrgChangeset.publish_job_running == False,
                OrgChangeset.publish_job_finished == True,
                ndb.OR(OrgChangeset.publish_job_failed == True,
                       OrgChangeset.publish_changeset_failed == True)))).order(
                           OrgChangeset.key)

    org_changesets = list(emit_items(org_changesets_query))

    # Query any currently running org changesets
    running_org_changesets_query = OrgChangeset.query(
        OrgChangeset.publish_job_running == True)
    running_org_changesets = list(emit_items(running_org_changesets_query))

    running_orgs = list(
        set([
            running_org_changeset.org_uid
            for running_org_changeset in running_org_changesets
        ]))

    # Filter any org changesets that already have a running changeset for that org
    gated_org_changesets = filter(lambda oc: oc.org_uid not in running_orgs,
                                  org_changesets)

    if len(gated_org_changesets) != len(org_changesets):
        filtered_ocs = filter(lambda oc: oc.org_uid in running_orgs,
                              org_changesets)
        filtered_oc_tuples = [(oc.org_uid, oc.changeset)
                              for oc in filtered_ocs]

        logging.info(
            "stopped these changesets from being published as job already running for the org: {}"
            .format(filtered_oc_tuples))

    if not gated_org_changesets:
        logging.info("nothing to publish")
        return '', 204

    # remove changesets for blacklisted orgs
    blacklisted_orgs = {}
    org_changesets_to_publish = []
    for org_changeset in gated_org_changesets:
        org = blacklisted_orgs.get(org_changeset.org_uid,
                                   Org.get_by_id(org_changeset.org_uid))
        if org and org.publish_disabled:
            blacklisted_orgs[org.key.string_id()] = org
        else:
            org_changesets_to_publish.append(org_changeset)

    to_publish = []

    if per_org:
        org_changesets_sorted = sorted(org_changesets_to_publish,
                                       key=attrgetter('org_uid'))
        for org_uid, changesets in groupby(org_changesets_sorted,
                                           key=attrgetter('org_uid')):
            to_publish.append({
                'org_uid':
                org_uid,
                'org_changeset_ids':
                [changeset.key.id() for changeset in changesets]
            })
    else:
        to_publish.append({
            'org_changeset_ids':
            [changeset.key.id() for changeset in org_changesets_to_publish]
        })

    logging.info("have {} publish tasks to create".format(len(to_publish)))

    items_to_tasks(items=to_publish,
                   queue=Queue('create-publish-job'),
                   task_generator=lambda item: Task(
                       url='/orchestrator/create_publish_job_task',
                       payload=dumps({'job_params': item})))

    return '', 204