class TaskMaster(object): """Creates and enqueues tasks""" def __init__(self): self.queue = Queue() def add_feed_update_task(self): """Enqueue task updating feeds""" self.queue.add(Task(url='/task/update_feeds')) # TODO make url a parameter def add_torrent_task(self, torrent_entry): """"Enqueue task for torrent entry represented by dict""" task = Task(url='/task/torrent', params=torrent_entry) # TODO make url a parameter self.queue.add(task) def add_new_torrents(self, scraper): """Enqueues tasks for all new torrents""" try: new_entries = scraper.get_new_torrents() except webclient.NotLoggedIn: # Session expired pass except webclient.RequestError: # Tracker is down, happens sometimes pass else: for e in new_entries: self.add_torrent_task(e)
def post(self): groupname = self.request.get('group') if groupname == 'ctrp-taskcheck': # Grab the default queue and keep checking for whether or not # all of the tasks have finished. default_queue = Queue() stats = default_queue.fetch_statistics() while stats.tasks > 0: logging.info("task check: waiting for %d tasks to finish" % stats.tasks) time.sleep(5) stats = default_queue.fetch_statistics() self.finishBuilding() else: importer = wowapi.Importer() q = Group.query(Group.name == groupname) groups = q.fetch() # sanity check, tho this shouldn't be possible if len(groups) == 0: logging.info('Builder failed to find group %s' % groupname) return logging.info('Builder task for %s started' % groupname) self.processGroup(groups[0], importer, True) logging.info('Builder task for %s completed' % groupname)
def __init__(self, tag, queue_name, size, duration=60, auto_delete=True): """The generator will yield json deserialized payloads from tasks with the corresponding tag. :param tag: :class: `str` Pull queue tag to query against :param queue_name: :class: `str` Name of PULL queue holding tasks to lease. :param size: :class: `int` The number of items to pull at once :param duration: :class: `int` After this time, the tasks may be leased again. Tracked in seconds :param auto_delete: :class: `bool` Delete tasks when iteration is complete. :return: :class: `iterator` of json deserialized payloads """ from google.appengine.api.taskqueue import Queue self.queue_name = queue_name self.queue = Queue(name=self.queue_name) self.tag = tag self.size = size self.duration = duration self.auto_delete = auto_delete self._messages = [] self._processed_messages = [] self._fetched = False
def schedule_tasks(tasks, queue_name=deferred._DEFAULT_QUEUE): # type: (list[Task], str) -> list[Task] queue = Queue(queue_name) results = [] for chunk in chunks(tasks, MAX_TASKS_PER_ADD): results.extend(queue.add(chunk)) return results
def get(self): queue = Queue() stats = queue.fetch_statistics() template_values = { 'tasks': stats.tasks, 'in_flight': stats.in_flight, } template = JINJA_ENVIRONMENT.get_template('templates/ranker.html') self.response.write(template.render(template_values))
def addTasks(self): q = Comic.all() queue = Queue(name='update-queue') tasks = [] for comic in q: if len(tasks) >= 100: queue.add(tasks) tasks = [] else: task = Task(url='/tasks/update/' + str(comic.id)) tasks.append(task) self.response.out.write("OK")
def _queue_tasks(self): scanners = QuestionsScanner.all() tasks = [] for scanner in scanners: domain = scanner.key().name() task = Task(url='/tasks/scan_new_questions/%s' % (domain, )) tasks.append(task) if len(tasks) > 0: queue = Queue(name="scannewquestions") queue.add(tasks)
def check_users_sweeper(request): logger.info('check users sweeper runnning...') expired_users = [user for user in NIH_User.objects.all() if is_very_expired(user.NIH_assertion_expiration) and user.active] if expired_users: fallback_queue = Queue(name=FALLBACK_QUEUE_NAME) batch_size = 25 for i in xrange(0, len(expired_users), batch_size): users = expired_users[i:i+batch_size] logger.info('scheduling check_user_login tasks for the following users: ') logger.info(str([str(user.NIH_username) for user in users])) tasks = [Task(url='/tasks/check_user_login', params={'user_id': user.user_id}, countdown=0) for user in users] fallback_queue.add(tasks) return HttpResponse('')
def add(self, params): event = Event( name = params["name"], file = params["file"], file150 = params["file150"], filetype = params["filetype"].split('.')[1], start_date = params["start_date"], # end_date = params["end_date"], description = params["description"], creator = params["creator"], # people_invited = params["people_invited"], # type = params["type"], ) event.put() list = params["people_invited"] for l in list: iu = InvitedUser( email = l, event = event ) queue = Queue('mail-queue') subject = "You have been invited to the event " + event.name + " in Rockolin'" body = """ Hi!, You have been invited to the event """ + event.name + """ This event would be on: """ + str(event.start_date) + """ If you want to decide the music justo go to the following link: http://rockolinapp.appspot.com/event/""" +str(event.key()) queue.add(Task(url='/task/mail', params = { 'to' : l, 'subject' : subject, 'body' : body })) iu.put() queue.purge() song_list = params["setlist"] for s in params["setlist"]: song = Song.get(s) slv = SetListVotes( event = event, song = song, votes = 0 ) slv.put() return event
def __init__(self, tag, queue_name, size, duration=60, deadline=10, auto_delete=True): """The generator will yield json deserialized payloads from tasks with the corresponding tag. :param tag: :class: `str` Pull queue tag to query against :param queue_name: :class: `str` Name of PULL queue holding tasks to lease. :param size: :class: `int` The number of items to pull at once :param duration: :class: `int` After this time, the tasks may be leased again. Tracked in seconds :param deadline: :class: `int` The time in seconds to wait for the rpc. :param auto_delete: :class: `bool` Delete tasks when iteration is complete. :return: :class: `iterator` of json deserialized payloads """ from google.appengine.api.taskqueue import Queue self.queue_name = queue_name self.queue = Queue(name=self.queue_name) self.tag = tag self.size = size self.duration = duration self.auto_delete = auto_delete self.deadline = deadline self._messages = [] self._processed_messages = [] self._fetched = False
def post_all_feeds(): """Post all new items for feeds for a specific interval""" if request.headers.get('X-Appengine-Cron') != 'true': raise ndb.Return(jsonify_error(message='Not a cron call')) logger.info('Starting a post job') futures = [] for feed_type, feed_class in FEED_TYPE_TO_CLASS.iteritems(): feeds = feed_class.query(feed_class.is_dirty == True) logger.info("Got some feeds_count: %s feeds_type: %s", feeds.count(), feed_type) success = 0 more = True cursor = None while more: feeds_to_fetch, cursor, more = yield feeds.fetch_page_async(BATCH_SIZE, start_cursor=cursor) keys = ','.join([x.key.urlsafe() for x in feeds_to_fetch]) if not keys: continue futures.append(Queue().add_async(Task(url=url_for('tq_feed_post-canonical'), method='POST', params={'keys': keys}))) success += len(feeds_to_fetch) logger.info('queued post for %d feeds feed_type:%s', success, feed_type) for future in futures: yield future logger.info('Finished Post Job') yield write_epoch_to_stat(Stat, 'post_job') raise ndb.Return(jsonify(status='ok'))
def update_all_feeds(interval_id): """Update all feeds for a specific interval""" if request.headers.get('X-Appengine-Cron') != 'true': raise ndb.Return(jsonify_error(message='Not a cron call')) for feed_type, feed_class in FEED_TYPE_TO_CLASS.iteritems(): feeds = Feed.for_interval(interval_id) success = 0 more = True cursor = None futures = [] while more: feeds_to_fetch, cursor, more = yield feeds.fetch_page_async(BATCH_SIZE, start_cursor=cursor) feeds_to_fetch = filter(lambda x: getattr(x, 'external_polling_bucket', DEFAULT_POLLING_BUCKET) == DEFAULT_POLLING_BUCKET, feeds_to_fetch) keys = ','.join([x.key.urlsafe() for x in feeds_to_fetch]) if not keys: continue futures.append(Queue('poll').add_async(Task(url=url_for('tq_feed_poll-canonical'), method='POST', params={'keys': keys}))) success += 1 for future in futures: yield future logger.info('queued poll for %d feeds at interval_id=%s', success, interval_id) raise ndb.Return(jsonify(status='ok'))
def instagram_push_update(): data = request.stream.read() instagram_client_secret = Configuration.value_for_name('instagram_client_secret') server_signature = request.headers.get('X-Hub-Signature', None) signature = hmac.new(str(instagram_client_secret), data, digestmod=hashlib.sha1).hexdigest() if server_signature != signature: logger.warn('Got PuSH subscribe POST from instagram w/o valid signature: sent=%s != expected=%s', server_signature, signature) raise ndb.Return('') logger.info('Got PuSH body: %s', data) logger.info('Got PuSH headers: %s', request.headers) parsed_feed = json.loads(data) user_ids = [int(x.get('object_id')) for x in parsed_feed] feeds = InstagramFeed.query(InstagramFeed.user_id.IN(user_ids)) cursor = None more = True keys = [] while more: feed_keys, cursor, more = feeds.fetch_page(BATCH_SIZE, keys_only=True, start_cursor=cursor) keys += feed_keys keys = ','.join([x.urlsafe() for x in keys]) if keys: yield Queue('poll').add_async(Task(url=url_for('tq_feed_poll-canonical'), method='POST', params={'keys': keys})) raise ndb.Return('ok')
def reset_endpoints(): """ Handler which creates reset endpoint tasks for selected endpoints/orgs. """ endpoint_indexes = request.form.getlist('endpoint_index') org_uid = request.form.get('org_uid') if not endpoint_indexes: flash("At least one endpoint is required") return render_template('select_endpoints.html', endpoints=ENDPOINTS, org_uid=org_uid), 200 if org_uid: taskqueue.add(target='admin', url='/admin/reset_endpoints_task/{}'.format(org_uid), params={'endpoint_index': endpoint_indexes}) flash("Kicked off reset of {} endpoints for {}".format( len(endpoint_indexes), org_uid)) return redirect(prefix('/')) else: count = query_to_tasks( query=Org.query(Org.status == CONNECTED), queue=Queue('admin'), task_generator=lambda key: Task( url='/admin/reset_endpoints_task/{}'.format(key.string_id()), params={'endpoint_index': endpoint_indexes})) flash("Kicked off reset of {} endpoints for {} orgs".format( len(endpoint_indexes), count)) return redirect(prefix('/commands'))
def update(self, remote_ip, uptime): self.last_seen = datetime.utcnow() if self.last_ip != remote_ip: LogEntry.log_event(self.key(), 'Info', 'IP changed - new IP: ' + remote_ip) self.last_ip = remote_ip if uptime is not None: if self.update is not None and self.uptime > uptime: LogEntry.log_event(self.key(), 'Reboot', 'Reboot - Previous uptime: ' + str(timedelta(seconds=self.uptime))) for action_key in self.reboot_actions: try: db.get(action_key).perform_action() except Exception as exp: logging.error('Error executing reboot action: ' + str(exp)) self.uptime = uptime self.put() # job got back online if self.status == 'offline': self.status = 'online' LogEntry.log_event(self.key(), 'Info', 'Job back online - IP: ' + remote_ip) # perform all back_online actions for action_key in self.backonline_actions: try: db.get(action_key).perform_action() except Exception as exp: logging.error('Error executing backonline action: ' + str(exp)) # delete previous (waiting) task if self.task_name is not None: logging.debug('old task: ' + self.task_name) Queue.delete_tasks(Queue(), Task(name=self.task_name)) task_name = self.name + '_' + datetime.utcnow().strftime('%Y-%m-%d_%H-%M-%S-%f') # create task to be executed in updated no called in interval minutes taskqueue.add(name=task_name, url='/task', params={'key': self.key()}, countdown=(self.interval + 2) * 60) self.task_name = task_name self.put()
def send_voting_email(voting_user): """ Create voting user and add voting email to mail-queue Input: voting_user: VotingUser, the user to be sent. """ # Make email content with token-link election = voting_user.key.parent().get() if not isinstance(election, Election): msg = 'voting_user should have election as ndb ancestor' raise ValueError(msg) voting_link = "http://ntuvb-allstar.appspot.com/vote/"\ + voting_user.token from_email = "*****@*****.**" email_body = ( u"<h3>您好 {student_id}:</h3>" u"<p>感謝您參與{election_title} <br>" u"<h4><a href='{voting_link}'> 投票請由此進入 </a></h4> <br>" u"<p><b style=\"color: red\">此為您個人的投票連結,請勿轉寄或外流</b><br>" u"若您未參與本次投票,請直接刪除本封信件 <br>" u"任何疑問請來信至: {help_mail} <br></p>" ).format( student_id=voting_user.student_id, election_title=election.title, voting_link=voting_link, help_mail=from_email) text_body = u"投票請進: %s" % voting_link email_subject = election.title+u"投票認證信" to_email = voting_user.student_id+"@ntu.edu.tw" queue = Queue('mail-queue') queue.add(Task( url='/queue/mail', params={ 'subject': email_subject, 'body': email_body, 'text_body': text_body, 'to': to_email, 'from': from_email, } )) voting_user.last_time_mail_queued = datetime.now() key = voting_user.put() key.get() # for strong consistency
def insert(self): """Insert the pull task into the requested queue, 'default' if non given. """ from google.appengine.api.taskqueue import Queue task = self.to_task() Queue(name=self.get_queue()).add(task)
def post(self): # refuse to start the tasks if there are some already running queue = Queue() stats = queue.fetch_statistics() if stats.tasks == 0: print 'nop' taskqueue.add(url='/builder', params={'start': 'A', 'end': 'B'}) taskqueue.add(url='/builder', params={'start': 'C', 'end': 'E'}) taskqueue.add(url='/builder', params={'start': 'F', 'end': 'G'}) taskqueue.add(url='/builder', params={'start': 'H', 'end': 'H'}) taskqueue.add(url='/builder', params={'start': 'I', 'end': 'M'}) taskqueue.add(url='/builder', params={'start': 'N', 'end': 'O'}) taskqueue.add(url='/builder', params={'start': 'P', 'end': 'R'}) taskqueue.add(url='/builder', params={'start': 'S', 'end': 'S'}) taskqueue.add(url='/builder', params={'start': 'T', 'end': 'T'}) taskqueue.add(url='/builder', params={'start': 'U', 'end': 'Z'}) self.redirect('/rank')
def schedule(cls, queue_name, tasks): queue = Queue(queue_name) batch_size = 100 task_count = 0 for task_batch in cls.split_every(batch_size, tasks): cls._add_single_batch(queue, task_batch) task_count += len(task_batch) if task_count > 0: logging.info("Scheduled %d tasks in max %d batches", task_count, batch_size)
def _AddUserRecallTasks(self, user_recall_tasks): """Helper to enqueue list of user recall tasks in batches. Args: user_recall_tasks: Task or list of Tasks; one for each user. Raises: re-raises any errors with task queue. """ Queue('user-recall-queue').add(task=user_recall_tasks)
def test_items_to_tasks(self): """ Verifies that multiple pages of tasks get queued up properly. """ count = task_utils.items_to_tasks( items=[1, 2, 3], queue=Queue('adapter-update'), task_generator=lambda item: Task(url='/something/{}'.format(item))) self.assertEqual(count, 3) task_count = len(self.taskqueue.get_filtered_tasks()) self.assertEqual(task_count, 3)
def schedule(cls, queue_name, tasks): if not isinstance(tasks, list): tasks = [tasks] queue = Queue(queue_name) page_size = 100 task_count = 0 for task_batch in paginated(page_size, tasks): cls.__add_single_batch(queue, task_batch) task_count += len(task_batch) if task_count > 0: logging.info("Scheduled %d tasks in max %d batches", task_count, page_size)
def test_query_to_tasks(self): """ Verifies that multiple pages of tasks get queued up properly. """ Org(id='test1', status=CONNECTED).put() Org(id='test2', status=CONNECTED).put() Org(id='test3', status=DISCONNECTED).put() count = task_utils.query_to_tasks( query=Org.query(Org.status == CONNECTED), queue=Queue('adapter-update'), task_generator=lambda key: Task(url='/something/{}'.format( key.string_id()))) self.assertEqual(count, 2) task_count = len(self.taskqueue.get_filtered_tasks()) self.assertEqual(task_count, 2)
def init_all_updates(): """ Initialises update cycle for each connected org by putting a task onto the update queue (which ends up calling init_new_changeset(org_uid)). """ count = query_to_tasks( query=Org.query( Org.status == CONNECTED, Org.last_update_cycle_completed_at < datetime.utcnow() - SYNC_INTERVAL, Org.provider.IN(API_PROVIDERS)).order( -Org.last_update_cycle_completed_at, Org.key), # Queries involving IN need to be ordered by key queue=Queue('adapter-update'), task_generator=lambda key: Task(url='/adapter/{}/init_update'.format( key.string_id()))) logging.info("queued {} tasks for a sync update".format(count))
def _AddUserRetrievalTask(self, task): """Helper to transactionally add the tasks. Do not set transactional=True in the Task because Push Queues have a 5 Task per add limit when transactional=True per: http://developers.google.com/appengine/docs/python/taskqueue/overview-push Args: task: Task or list of Tasks to retrieve domain users. Raises: re-raises any taskqueue errors raised. """ try: Queue('retrieve-users-queue').add(task=task) except TaskQueueError: view_utils.FailRecallTask( task_key_id=self._task_key_id, reason_string='Failed to enqueue retrieve users tasks.') raise
def post(self): # refuse to start the tasks if there are some already running queue = Queue() stats = queue.fetch_statistics() if stats.tasks == 0: # queue up all of the groups into individual tasks. the configuration # in queue.yaml only allows 10 tasks to run at once. the builder only # allows 10 URL requests at a time, which should hopefully keep the # Blizzard API queries under control. q = Group.query() groups = q.fetch() for g in groups: taskqueue.add(url='/builder', params={'group': g.name}) checker = Task(url='/builder', params={'group': 'ctrp-taskcheck'}) taskcheck = Queue(name='taskcheck') taskcheck.add(checker) self.redirect('/rank')
class TestTaskQueue(TestGae): """ Test testbed task queue """ def setUp(self): TestGae.setUp(self, PROJECT_DIR) from google.appengine.api.taskqueue import Queue, Task self.queue = Queue('default') self.queue.add(Task('xxx', url='/')) def test_queue(self): """ check for number of tasks in queue """ self.assert_tasks_num(1) tasks = self.gae_tasks(queue_name='default', flush_queue=False) assert len(tasks) == 1 self.assert_tasks_num(1) tasks = self.gae_tasks(queue_name='default', flush_queue=True) assert len(tasks) == 1 self.assert_tasks_num(0) def test_flask_execute(self): """ execute queue in fask app context """ from google.appengine.api.taskqueue import Task from flask import Flask app = Flask(__name__) app.config['TESTING'] = True @app.route('/', methods=['POST']) def root_page(): """ flask view """ return 'OK' client = app.test_client() data = self.gae_tasks_dict() assert len(data) == 1 task = data[data.keys()[0]] self.gae_task_flask_execute(task, client, is_delete=False, is_debug_print=True) data = self.gae_tasks_dict() assert len(data) == 1 self.gae_task_flask_execute(task, client, is_debug_print=True) data = self.gae_tasks_dict() assert not data self.queue.add(Task('xxx', url='/')) self.gae_queue_flask_execute(client) data = self.gae_tasks_dict() assert not data def test_dict(self): """ get queue content as dict """ data = self.gae_tasks_dict() assert len(data) == 1 assert 'task1' in data def test_dump(self): """ dump queue content """ self.gae_queue_dump() self.gae_queue_dump(fields=['name', 'url'])
def set_up(self): self._rpc = taskqueue.create_rpc() q = Queue(self._queue_name) q.add_async(self._task, rpc=self._rpc)
def get(self): num_pages_to_fetch=50 q = Queue('scrape') for i in range(0,num_pages_to_fetch): q.add(Task(url='/tasks/scrape?page=%d' % i, method='GET')) self.response.out.write("done")
def get(self): num_pages_to_fetch = 50 q = Queue('scrape') for i in range(0, num_pages_to_fetch): q.add(Task(url='/tasks/scrape?page=%d' % i, method='GET')) self.response.out.write("done")
def __init__(self): self.queue = Queue()
def start_publish(): """ Kicks off a dataflow template to publish normalised data. The jobs are created via a task queue task, passing the ID of the OrgChangesets which need to be published. This endpoint is invoked by a regular cron job or by a request from the admin UI, and takes an additional parameter which allows for each org to be published by a separate dataflow job (this is useful for isolation of an org which causes the whole publish job to fail). Returns: (str, int): http response """ logging.info("about to kick off a publish dataflow job") per_org = request.form.get('per_org') == '1' if per_org: logging.info("publish job per org requested") # we want to publish changesets which: # - have newly been ingested (publish not running and not finished) # - OR have been attempted to be published but failed # - due to the whole job failing # - OR publish of the individual changeset failing org_changesets_query = OrgChangeset.query( ndb.OR( ndb.AND(OrgChangeset.publish_job_running == False, OrgChangeset.publish_job_finished == False), ndb.AND( OrgChangeset.publish_job_running == False, OrgChangeset.publish_job_finished == True, ndb.OR(OrgChangeset.publish_job_failed == True, OrgChangeset.publish_changeset_failed == True)))).order( OrgChangeset.key) org_changesets = list(emit_items(org_changesets_query)) # Query any currently running org changesets running_org_changesets_query = OrgChangeset.query( OrgChangeset.publish_job_running == True) running_org_changesets = list(emit_items(running_org_changesets_query)) running_orgs = list( set([ running_org_changeset.org_uid for running_org_changeset in running_org_changesets ])) # Filter any org changesets that already have a running changeset for that org gated_org_changesets = filter(lambda oc: oc.org_uid not in running_orgs, org_changesets) if len(gated_org_changesets) != len(org_changesets): filtered_ocs = filter(lambda oc: oc.org_uid in running_orgs, org_changesets) filtered_oc_tuples = [(oc.org_uid, oc.changeset) for oc in filtered_ocs] logging.info( "stopped these changesets from being published as job already running for the org: {}" .format(filtered_oc_tuples)) if not gated_org_changesets: logging.info("nothing to publish") return '', 204 # remove changesets for blacklisted orgs blacklisted_orgs = {} org_changesets_to_publish = [] for org_changeset in gated_org_changesets: org = blacklisted_orgs.get(org_changeset.org_uid, Org.get_by_id(org_changeset.org_uid)) if org and org.publish_disabled: blacklisted_orgs[org.key.string_id()] = org else: org_changesets_to_publish.append(org_changeset) to_publish = [] if per_org: org_changesets_sorted = sorted(org_changesets_to_publish, key=attrgetter('org_uid')) for org_uid, changesets in groupby(org_changesets_sorted, key=attrgetter('org_uid')): to_publish.append({ 'org_uid': org_uid, 'org_changeset_ids': [changeset.key.id() for changeset in changesets] }) else: to_publish.append({ 'org_changeset_ids': [changeset.key.id() for changeset in org_changesets_to_publish] }) logging.info("have {} publish tasks to create".format(len(to_publish))) items_to_tasks(items=to_publish, queue=Queue('create-publish-job'), task_generator=lambda item: Task( url='/orchestrator/create_publish_job_task', payload=dumps({'job_params': item}))) return '', 204
def get(self): session = get_current_session() authorized_tokens = session.get('authorized_tokens', None) if authorized_tokens is None: self.redirect('/connect') twitter = Twython( twitter_token = CONSUMER_KEY, twitter_secret = CONSUMER_SECRET, oauth_token = authorized_tokens['oauth_token'], oauth_token_secret = authorized_tokens['oauth_token_secret'] ) twitter_id = authorized_tokens['user_id'] username = authorized_tokens['screen_name'] statistic = UserStatistic.get_by_key_name(twitter_id) if statistic is None: statistic = UserStatistic( key_name=twitter_id, twitter_id=long(twitter_id), ) statistic.put() if statistic.statistics == None \ or (statistic.updated + timedelta(hours=1)) < datetime.now(): try: task = Task( url = '/fetch', params = {'twitter_id': twitter_id} ) queue = Queue(name='fetch-tweets') queue.add(task) statistic.updated = datetime.now() statistic.put() template_values = { 'username': username, 'updated': True, 'error': False, } except: # need to send to error page if error is happened logging.exception('something bad happened') template_values = { 'username': username, 'updated': False, 'error': True, } else: template_values = { 'username': username, 'updated': False, 'error': False, } path = os.path.join(TEMPLATE, 'message.html') self.response.out.write(template.render(path, template_values))
def post(self): twitter_id = self.request.get('twitter_id', None) if twitter_id is None: return user = User.get_by_key_name(twitter_id) username = user.username twitter = Twython( twitter_token = CONSUMER_KEY, twitter_secret = CONSUMER_SECRET, oauth_token = user.oauth_token, oauth_token_secret = user.oauth_token_secret, ) statistic = UserStatistic.get_by_key_name(twitter_id) stat = dict() total = 0 page = 0 max_id = None start_time = None end_time = None while True: if max_id is None: tweets = twitter.getFriendsTimeline( count=200, include_entities=1, ) else: tweets = twitter.getFriendsTimeline( count=200, include_entities=1, max_id=max_id, #page=page, ) if len(tweets) == 0: break else: page = page + 1 total = total + len(tweets) if end_time is None: end_time = datetime \ .strptime(tweets[0]['created_at'], "%a %b %d %H:%M:%S +0000 %Y") last_tweet = tweets[len(tweets) - 1] start_time = datetime \ .strptime(last_tweet['created_at'], "%a %b %d %H:%M:%S +0000 %Y") for tweet in tweets: user = tweet['user']['screen_name'] if not stat.has_key(user): stat[user] = 0 stat[user] = stat[user] + 1 max_id = tweets[len(tweets) - 1]['id'] if total > MAX_TWEETS: break sorted_stat = sorted(stat, key=stat.get) sorted_stat.reverse() sorted_dict = [] for item in sorted_stat: if stat[item] > 4: sorted_dict.append(dict( user = item, count = stat[item], )) statistic.start_time = start_time statistic.end_time = end_time statistic.total = total statistic.statistics = simplejson.dumps(sorted_dict) statistic.count += 1 statistic.put() # notify user try: task = Task( url = '/notify', params = {'username': username} ) queue = Queue(name='notify-user') queue.add(task) except: # need to send to error page if error is happened logging.exception('something bad happened')
def setUp(self): TestGae.setUp(self, PROJECT_DIR) from google.appengine.api.taskqueue import Queue, Task self.queue = Queue('default') self.queue.add(Task('xxx', url='/'))
class MessageIterator(object): """This iterator will return a batch of messages for a given group. This iterator should be directly used when trying to avoid the lease operation inside a transaction, or when other flows are needed. """ def __init__(self, tag, queue_name, size, duration=60, deadline=10, auto_delete=True): """The generator will yield json deserialized payloads from tasks with the corresponding tag. :param tag: :class: `str` Pull queue tag to query against :param queue_name: :class: `str` Name of PULL queue holding tasks to lease. :param size: :class: `int` The number of items to pull at once :param duration: :class: `int` After this time, the tasks may be leased again. Tracked in seconds :param deadline: :class: `int` The time in seconds to wait for the rpc. :param auto_delete: :class: `bool` Delete tasks when iteration is complete. :return: :class: `iterator` of json deserialized payloads """ from google.appengine.api.taskqueue import Queue self.queue_name = queue_name self.queue = Queue(name=self.queue_name) self.tag = tag self.size = size self.duration = duration self.auto_delete = auto_delete self.deadline = deadline self._messages = [] self._processed_messages = [] self._fetched = False def fetch_messages(self): """Fetch messages from the specified pull-queue. This should only be called a single time by a given MessageIterator object. If the MessageIterator is iterated over again, it should return the originally leased messages. """ if self._fetched: return start = time.time() loaded_messages = self.queue.lease_tasks_by_tag( self.duration, self.size, tag=self.tag, deadline=self.deadline) # If we are within 0.1 sec of our deadline and no messages were # returned, then we are hitting queue contention issues and this # should be a DeadlineExceederError. # TODO: investigate other ways around this, perhaps async leases, etc. if (not loaded_messages and round(time.time() - start, 1) >= self.deadline - 0.1): raise DeadlineExceededError() self._messages.extend(loaded_messages) self._fetched = True logging.debug("Calling fetch messages with %s:%s:%s:%s:%s:%s" % ( len(self._messages), len(loaded_messages), len(self._processed_messages), self.duration, self.size, self.tag)) def __iter__(self): """Initialize this MessageIterator for iteration. If messages have not been fetched, fetch them. If messages have been fetched, reset self._messages and self._processed_messages for re-iteration. The reset is done to prevent deleting messages that were never applied. """ if self._processed_messages: # If the iterator is used within a transaction, and there is a # retry we need to re-process the original messages, not new # messages. self._messages = list( set(self._messages) | set(self._processed_messages)) self._processed_messages = [] if not self._messages: self.fetch_messages() return self def next(self): """Get the next batch of messages from the previously fetched messages. If there's no more messages, check if we should auto-delete the messages and raise StopIteration. """ if not self._messages: if self.auto_delete: self.delete_messages() raise StopIteration message = self._messages.pop(0) self._processed_messages.append(message) return json.loads(message.payload) def delete_messages(self, only_processed=True): """Delete the messages previously leased. Unless otherwise directed, only the messages iterated over will be deleted. """ messages = self._processed_messages if not only_processed: messages += self._messages if messages: try: self.queue.delete_tasks(messages) except Exception: logging.exception("Error deleting messages") raise
class MessageIterator(object): """This iterator will return a batch of messages for a given group. This iterator should be directly used when trying to avoid the lease operation inside a transaction, or when other flows are needed. """ def __init__(self, tag, queue_name, size, duration=60, deadline=10, auto_delete=True): """The generator will yield json deserialized payloads from tasks with the corresponding tag. :param tag: :class: `str` Pull queue tag to query against :param queue_name: :class: `str` Name of PULL queue holding tasks to lease. :param size: :class: `int` The number of items to pull at once :param duration: :class: `int` After this time, the tasks may be leased again. Tracked in seconds :param deadline: :class: `int` The time in seconds to wait for the rpc. :param auto_delete: :class: `bool` Delete tasks when iteration is complete. :return: :class: `iterator` of json deserialized payloads """ from google.appengine.api.taskqueue import Queue self.queue_name = queue_name self.queue = Queue(name=self.queue_name) self.tag = tag self.size = size self.duration = duration self.auto_delete = auto_delete self.deadline = deadline self._messages = [] self._processed_messages = [] self._fetched = False def fetch_messages(self): """Fetch messages from the specified pull-queue. This should only be called a single time by a given MessageIterator object. If the MessageIterator is iterated over again, it should return the originally leased messages. """ if self._fetched: return start = time.time() loaded_messages = self.queue.lease_tasks_by_tag(self.duration, self.size, tag=self.tag, deadline=self.deadline) # If we are within 0.1 sec of our deadline and no messages were # returned, then we are hitting queue contention issues and this # should be a DeadlineExceederError. # TODO: investigate other ways around this, perhaps async leases, etc. if (not loaded_messages and round(time.time() - start, 1) >= self.deadline - 0.1): raise DeadlineExceededError() self._messages.extend(loaded_messages) self._fetched = True logging.debug("Calling fetch messages with %s:%s:%s:%s:%s:%s" % (len( self._messages), len(loaded_messages), len( self._processed_messages), self.duration, self.size, self.tag)) def __iter__(self): """Initialize this MessageIterator for iteration. If messages have not been fetched, fetch them. If messages have been fetched, reset self._messages and self._processed_messages for re-iteration. The reset is done to prevent deleting messages that were never applied. """ if self._processed_messages: # If the iterator is used within a transaction, and there is a # retry we need to re-process the original messages, not new # messages. self._messages = list( set(self._messages) | set(self._processed_messages)) self._processed_messages = [] if not self._messages: self.fetch_messages() return self def next(self): """Get the next batch of messages from the previously fetched messages. If there's no more messages, check if we should auto-delete the messages and raise StopIteration. """ if not self._messages: if self.auto_delete: self.delete_messages() raise StopIteration message = self._messages.pop(0) self._processed_messages.append(message) return json.loads(message.payload) def delete_messages(self, only_processed=True): """Delete the messages previously leased. Unless otherwise directed, only the messages iterated over will be deleted. """ messages = self._processed_messages if not only_processed: messages += self._messages if messages: try: self.queue.delete_tasks(messages) except Exception: logging.exception("Error deleting messages") raise