def reset_endpoints(): """ Handler which creates reset endpoint tasks for selected endpoints/orgs. """ endpoint_indexes = request.form.getlist('endpoint_index') org_uid = request.form.get('org_uid') if not endpoint_indexes: flash("At least one endpoint is required") return render_template('select_endpoints.html', endpoints=ENDPOINTS, org_uid=org_uid), 200 if org_uid: taskqueue.add(target='admin', url='/admin/reset_endpoints_task/{}'.format(org_uid), params={'endpoint_index': endpoint_indexes}) flash("Kicked off reset of {} endpoints for {}".format( len(endpoint_indexes), org_uid)) return redirect(prefix('/')) else: count = query_to_tasks( query=Org.query(Org.status == CONNECTED), queue=Queue('admin'), task_generator=lambda key: Task( url='/admin/reset_endpoints_task/{}'.format(key.string_id()), params={'endpoint_index': endpoint_indexes})) flash("Kicked off reset of {} endpoints for {} orgs".format( len(endpoint_indexes), count)) return redirect(prefix('/commands'))
def post(self): groupname = self.request.get('group') if groupname == 'ctrp-taskcheck': # Grab the default queue and keep checking for whether or not # all of the tasks have finished. default_queue = Queue() stats = default_queue.fetch_statistics() while stats.tasks > 0: logging.info("task check: waiting for %d tasks to finish" % stats.tasks) time.sleep(5) stats = default_queue.fetch_statistics() self.finishBuilding() else: importer = wowapi.Importer() q = Group.query(Group.name == groupname) groups = q.fetch() # sanity check, tho this shouldn't be possible if len(groups) == 0: logging.info('Builder failed to find group %s' % groupname) return logging.info('Builder task for %s started' % groupname) self.processGroup(groups[0], importer, True) logging.info('Builder task for %s completed' % groupname)
def post_all_feeds(): """Post all new items for feeds for a specific interval""" if request.headers.get('X-Appengine-Cron') != 'true': raise ndb.Return(jsonify_error(message='Not a cron call')) logger.info('Starting a post job') futures = [] for feed_type, feed_class in FEED_TYPE_TO_CLASS.iteritems(): feeds = feed_class.query(feed_class.is_dirty == True) logger.info("Got some feeds_count: %s feeds_type: %s", feeds.count(), feed_type) success = 0 more = True cursor = None while more: feeds_to_fetch, cursor, more = yield feeds.fetch_page_async(BATCH_SIZE, start_cursor=cursor) keys = ','.join([x.key.urlsafe() for x in feeds_to_fetch]) if not keys: continue futures.append(Queue().add_async(Task(url=url_for('tq_feed_post-canonical'), method='POST', params={'keys': keys}))) success += len(feeds_to_fetch) logger.info('queued post for %d feeds feed_type:%s', success, feed_type) for future in futures: yield future logger.info('Finished Post Job') yield write_epoch_to_stat(Stat, 'post_job') raise ndb.Return(jsonify(status='ok'))
def update_all_feeds(interval_id): """Update all feeds for a specific interval""" if request.headers.get('X-Appengine-Cron') != 'true': raise ndb.Return(jsonify_error(message='Not a cron call')) for feed_type, feed_class in FEED_TYPE_TO_CLASS.iteritems(): feeds = Feed.for_interval(interval_id) success = 0 more = True cursor = None futures = [] while more: feeds_to_fetch, cursor, more = yield feeds.fetch_page_async(BATCH_SIZE, start_cursor=cursor) feeds_to_fetch = filter(lambda x: getattr(x, 'external_polling_bucket', DEFAULT_POLLING_BUCKET) == DEFAULT_POLLING_BUCKET, feeds_to_fetch) keys = ','.join([x.key.urlsafe() for x in feeds_to_fetch]) if not keys: continue futures.append(Queue('poll').add_async(Task(url=url_for('tq_feed_poll-canonical'), method='POST', params={'keys': keys}))) success += 1 for future in futures: yield future logger.info('queued poll for %d feeds at interval_id=%s', success, interval_id) raise ndb.Return(jsonify(status='ok'))
def instagram_push_update(): data = request.stream.read() instagram_client_secret = Configuration.value_for_name('instagram_client_secret') server_signature = request.headers.get('X-Hub-Signature', None) signature = hmac.new(str(instagram_client_secret), data, digestmod=hashlib.sha1).hexdigest() if server_signature != signature: logger.warn('Got PuSH subscribe POST from instagram w/o valid signature: sent=%s != expected=%s', server_signature, signature) raise ndb.Return('') logger.info('Got PuSH body: %s', data) logger.info('Got PuSH headers: %s', request.headers) parsed_feed = json.loads(data) user_ids = [int(x.get('object_id')) for x in parsed_feed] feeds = InstagramFeed.query(InstagramFeed.user_id.IN(user_ids)) cursor = None more = True keys = [] while more: feed_keys, cursor, more = feeds.fetch_page(BATCH_SIZE, keys_only=True, start_cursor=cursor) keys += feed_keys keys = ','.join([x.urlsafe() for x in keys]) if keys: yield Queue('poll').add_async(Task(url=url_for('tq_feed_poll-canonical'), method='POST', params={'keys': keys})) raise ndb.Return('ok')
def __init__(self, tag, queue_name, size, duration=60, auto_delete=True): """The generator will yield json deserialized payloads from tasks with the corresponding tag. :param tag: :class: `str` Pull queue tag to query against :param queue_name: :class: `str` Name of PULL queue holding tasks to lease. :param size: :class: `int` The number of items to pull at once :param duration: :class: `int` After this time, the tasks may be leased again. Tracked in seconds :param auto_delete: :class: `bool` Delete tasks when iteration is complete. :return: :class: `iterator` of json deserialized payloads """ from google.appengine.api.taskqueue import Queue self.queue_name = queue_name self.queue = Queue(name=self.queue_name) self.tag = tag self.size = size self.duration = duration self.auto_delete = auto_delete self._messages = [] self._processed_messages = [] self._fetched = False
def insert(self): """Insert the pull task into the requested queue, 'default' if non given. """ from google.appengine.api.taskqueue import Queue task = self.to_task() Queue(name=self.get_queue()).add(task)
def get(self): queue = Queue() stats = queue.fetch_statistics() template_values = { 'tasks': stats.tasks, 'in_flight': stats.in_flight, } template = JINJA_ENVIRONMENT.get_template('templates/ranker.html') self.response.write(template.render(template_values))
def _AddUserRecallTasks(self, user_recall_tasks): """Helper to enqueue list of user recall tasks in batches. Args: user_recall_tasks: Task or list of Tasks; one for each user. Raises: re-raises any errors with task queue. """ Queue('user-recall-queue').add(task=user_recall_tasks)
def schedule(cls, queue_name, tasks): queue = Queue(queue_name) batch_size = 100 task_count = 0 for task_batch in cls.split_every(batch_size, tasks): cls._add_single_batch(queue, task_batch) task_count += len(task_batch) if task_count > 0: logging.info("Scheduled %d tasks in max %d batches", task_count, batch_size)
def addTasks(self): q = Comic.all() queue = Queue(name='update-queue') tasks = [] for comic in q: if len(tasks) >= 100: queue.add(tasks) tasks = [] else: task = Task(url='/tasks/update/' + str(comic.id)) tasks.append(task) self.response.out.write("OK")
def post(self): # refuse to start the tasks if there are some already running queue = Queue() stats = queue.fetch_statistics() if stats.tasks == 0: # queue up all of the groups into individual tasks. the configuration # in queue.yaml only allows 10 tasks to run at once. the builder only # allows 10 URL requests at a time, which should hopefully keep the # Blizzard API queries under control. q = Group.query() groups = q.fetch() for g in groups: taskqueue.add(url='/builder', params={'group': g.name}) checker = Task(url='/builder', params={'group': 'ctrp-taskcheck'}) taskcheck = Queue(name='taskcheck') taskcheck.add(checker) self.redirect('/rank')
def schedule(cls, queue_name, tasks): if not isinstance(tasks, list): tasks = [tasks] queue = Queue(queue_name) page_size = 100 task_count = 0 for task_batch in paginated(page_size, tasks): cls.__add_single_batch(queue, task_batch) task_count += len(task_batch) if task_count > 0: logging.info("Scheduled %d tasks in max %d batches", task_count, page_size)
def _queue_tasks(self): scanners = QuestionsScanner.all() tasks = [] for scanner in scanners: domain = scanner.key().name() task = Task(url='/tasks/scan_new_questions/%s' % (domain, )) tasks.append(task) if len(tasks) > 0: queue = Queue(name="scannewquestions") queue.add(tasks)
def test_items_to_tasks(self): """ Verifies that multiple pages of tasks get queued up properly. """ count = task_utils.items_to_tasks( items=[1, 2, 3], queue=Queue('adapter-update'), task_generator=lambda item: Task(url='/something/{}'.format(item))) self.assertEqual(count, 3) task_count = len(self.taskqueue.get_filtered_tasks()) self.assertEqual(task_count, 3)
def init_all_updates(): """ Initialises update cycle for each connected org by putting a task onto the update queue (which ends up calling init_new_changeset(org_uid)). """ count = query_to_tasks( query=Org.query( Org.status == CONNECTED, Org.last_update_cycle_completed_at < datetime.utcnow() - SYNC_INTERVAL, Org.provider.IN(API_PROVIDERS)).order( -Org.last_update_cycle_completed_at, Org.key), # Queries involving IN need to be ordered by key queue=Queue('adapter-update'), task_generator=lambda key: Task(url='/adapter/{}/init_update'.format( key.string_id()))) logging.info("queued {} tasks for a sync update".format(count))
def test_query_to_tasks(self): """ Verifies that multiple pages of tasks get queued up properly. """ Org(id='test1', status=CONNECTED).put() Org(id='test2', status=CONNECTED).put() Org(id='test3', status=DISCONNECTED).put() count = task_utils.query_to_tasks( query=Org.query(Org.status == CONNECTED), queue=Queue('adapter-update'), task_generator=lambda key: Task(url='/something/{}'.format( key.string_id()))) self.assertEqual(count, 2) task_count = len(self.taskqueue.get_filtered_tasks()) self.assertEqual(task_count, 2)
def post(self): # refuse to start the tasks if there are some already running queue = Queue() stats = queue.fetch_statistics() if stats.tasks == 0: print 'nop' taskqueue.add(url='/builder', params={'start': 'A', 'end': 'B'}) taskqueue.add(url='/builder', params={'start': 'C', 'end': 'E'}) taskqueue.add(url='/builder', params={'start': 'F', 'end': 'G'}) taskqueue.add(url='/builder', params={'start': 'H', 'end': 'H'}) taskqueue.add(url='/builder', params={'start': 'I', 'end': 'M'}) taskqueue.add(url='/builder', params={'start': 'N', 'end': 'O'}) taskqueue.add(url='/builder', params={'start': 'P', 'end': 'R'}) taskqueue.add(url='/builder', params={'start': 'S', 'end': 'S'}) taskqueue.add(url='/builder', params={'start': 'T', 'end': 'T'}) taskqueue.add(url='/builder', params={'start': 'U', 'end': 'Z'}) self.redirect('/rank')
def _AddUserRetrievalTask(self, task): """Helper to transactionally add the tasks. Do not set transactional=True in the Task because Push Queues have a 5 Task per add limit when transactional=True per: http://developers.google.com/appengine/docs/python/taskqueue/overview-push Args: task: Task or list of Tasks to retrieve domain users. Raises: re-raises any taskqueue errors raised. """ try: Queue('retrieve-users-queue').add(task=task) except TaskQueueError: view_utils.FailRecallTask( task_key_id=self._task_key_id, reason_string='Failed to enqueue retrieve users tasks.') raise
def set_up(self): self._rpc = taskqueue.create_rpc() q = Queue(self._queue_name) q.add_async(self._task, rpc=self._rpc)
def get(self): num_pages_to_fetch = 50 q = Queue('scrape') for i in range(0, num_pages_to_fetch): q.add(Task(url='/tasks/scrape?page=%d' % i, method='GET')) self.response.out.write("done")
def setUp(self): TestGae.setUp(self, PROJECT_DIR) from google.appengine.api.taskqueue import Queue, Task self.queue = Queue('default') self.queue.add(Task('xxx', url='/'))
def start_publish(): """ Kicks off a dataflow template to publish normalised data. The jobs are created via a task queue task, passing the ID of the OrgChangesets which need to be published. This endpoint is invoked by a regular cron job or by a request from the admin UI, and takes an additional parameter which allows for each org to be published by a separate dataflow job (this is useful for isolation of an org which causes the whole publish job to fail). Returns: (str, int): http response """ logging.info("about to kick off a publish dataflow job") per_org = request.form.get('per_org') == '1' if per_org: logging.info("publish job per org requested") # we want to publish changesets which: # - have newly been ingested (publish not running and not finished) # - OR have been attempted to be published but failed # - due to the whole job failing # - OR publish of the individual changeset failing org_changesets_query = OrgChangeset.query( ndb.OR( ndb.AND(OrgChangeset.publish_job_running == False, OrgChangeset.publish_job_finished == False), ndb.AND( OrgChangeset.publish_job_running == False, OrgChangeset.publish_job_finished == True, ndb.OR(OrgChangeset.publish_job_failed == True, OrgChangeset.publish_changeset_failed == True)))).order( OrgChangeset.key) org_changesets = list(emit_items(org_changesets_query)) # Query any currently running org changesets running_org_changesets_query = OrgChangeset.query( OrgChangeset.publish_job_running == True) running_org_changesets = list(emit_items(running_org_changesets_query)) running_orgs = list( set([ running_org_changeset.org_uid for running_org_changeset in running_org_changesets ])) # Filter any org changesets that already have a running changeset for that org gated_org_changesets = filter(lambda oc: oc.org_uid not in running_orgs, org_changesets) if len(gated_org_changesets) != len(org_changesets): filtered_ocs = filter(lambda oc: oc.org_uid in running_orgs, org_changesets) filtered_oc_tuples = [(oc.org_uid, oc.changeset) for oc in filtered_ocs] logging.info( "stopped these changesets from being published as job already running for the org: {}" .format(filtered_oc_tuples)) if not gated_org_changesets: logging.info("nothing to publish") return '', 204 # remove changesets for blacklisted orgs blacklisted_orgs = {} org_changesets_to_publish = [] for org_changeset in gated_org_changesets: org = blacklisted_orgs.get(org_changeset.org_uid, Org.get_by_id(org_changeset.org_uid)) if org and org.publish_disabled: blacklisted_orgs[org.key.string_id()] = org else: org_changesets_to_publish.append(org_changeset) to_publish = [] if per_org: org_changesets_sorted = sorted(org_changesets_to_publish, key=attrgetter('org_uid')) for org_uid, changesets in groupby(org_changesets_sorted, key=attrgetter('org_uid')): to_publish.append({ 'org_uid': org_uid, 'org_changeset_ids': [changeset.key.id() for changeset in changesets] }) else: to_publish.append({ 'org_changeset_ids': [changeset.key.id() for changeset in org_changesets_to_publish] }) logging.info("have {} publish tasks to create".format(len(to_publish))) items_to_tasks(items=to_publish, queue=Queue('create-publish-job'), task_generator=lambda item: Task( url='/orchestrator/create_publish_job_task', payload=dumps({'job_params': item}))) return '', 204