Ejemplo n.º 1
0
class TaskMaster(object):
    """Creates and enqueues tasks"""
    def __init__(self):
        self.queue = Queue()

    def add_feed_update_task(self):
        """Enqueue task updating feeds"""
        self.queue.add(Task(url='/task/update_feeds'))  # TODO make url a parameter

    def add_torrent_task(self, torrent_entry):
        """"Enqueue task for torrent entry represented by dict"""
        task = Task(url='/task/torrent', params=torrent_entry)  # TODO make url a parameter
        self.queue.add(task)

    def add_new_torrents(self, scraper):
        """Enqueues tasks for all new torrents"""
        try:
            new_entries = scraper.get_new_torrents()

        except webclient.NotLoggedIn:   # Session expired
            pass
        except webclient.RequestError:  # Tracker is down, happens sometimes
            pass
        else:
            for e in new_entries:
                self.add_torrent_task(e)
Ejemplo n.º 2
0
    def post(self):
        groupname = self.request.get('group')
        if groupname == 'ctrp-taskcheck':

            # Grab the default queue and keep checking for whether or not
            # all of the tasks have finished.
            default_queue = Queue()
            stats = default_queue.fetch_statistics()
            while stats.tasks > 0:
                logging.info("task check: waiting for %d tasks to finish" %
                             stats.tasks)
                time.sleep(5)
                stats = default_queue.fetch_statistics()

            self.finishBuilding()

        else:
            importer = wowapi.Importer()

            q = Group.query(Group.name == groupname)
            groups = q.fetch()
            # sanity check, tho this shouldn't be possible
            if len(groups) == 0:
                logging.info('Builder failed to find group %s' % groupname)
                return

            logging.info('Builder task for %s started' % groupname)
            self.processGroup(groups[0], importer, True)
            logging.info('Builder task for %s completed' % groupname)
Ejemplo n.º 3
0
    def __init__(self, tag, queue_name, size, duration=60, auto_delete=True):
        """The generator will yield json deserialized payloads from tasks with
        the corresponding tag.

        :param tag: :class: `str` Pull queue tag to query against
        :param queue_name: :class: `str` Name of PULL queue holding tasks to
                           lease.
        :param size: :class: `int` The number of items to pull at once
        :param duration: :class: `int` After this time, the tasks may be leased
                         again. Tracked in seconds
        :param auto_delete: :class: `bool` Delete tasks when iteration is
                            complete.

        :return: :class: `iterator` of json deserialized payloads
        """
        from google.appengine.api.taskqueue import Queue

        self.queue_name = queue_name
        self.queue = Queue(name=self.queue_name)

        self.tag = tag
        self.size = size
        self.duration = duration
        self.auto_delete = auto_delete

        self._messages = []
        self._processed_messages = []
        self._fetched = False
Ejemplo n.º 4
0
def schedule_tasks(tasks, queue_name=deferred._DEFAULT_QUEUE):
    # type: (list[Task], str) -> list[Task]
    queue = Queue(queue_name)
    results = []
    for chunk in chunks(tasks, MAX_TASKS_PER_ADD):
        results.extend(queue.add(chunk))
    return results
Ejemplo n.º 5
0
    def get(self):
        queue = Queue()
        stats = queue.fetch_statistics()

        template_values = {
            'tasks': stats.tasks,
            'in_flight': stats.in_flight,
        }
        template = JINJA_ENVIRONMENT.get_template('templates/ranker.html')
        self.response.write(template.render(template_values))
Ejemplo n.º 6
0
 def addTasks(self):
     q = Comic.all()
     queue = Queue(name='update-queue')
     tasks = []
     for comic in q:
         if len(tasks) >= 100:
             queue.add(tasks)
             tasks = []
         else:
             task = Task(url='/tasks/update/' + str(comic.id))
             tasks.append(task)
     self.response.out.write("OK")
Ejemplo n.º 7
0
 def addTasks(self):
     q = Comic.all()
     queue = Queue(name='update-queue')
     tasks = []
     for comic in q:
         if len(tasks) >= 100:
             queue.add(tasks)
             tasks = []
         else:
             task = Task(url='/tasks/update/' + str(comic.id))
             tasks.append(task)
     self.response.out.write("OK")
Ejemplo n.º 8
0
    def _queue_tasks(self):
        scanners = QuestionsScanner.all()

        tasks = []
        for scanner in scanners:
            domain = scanner.key().name()
            task = Task(url='/tasks/scan_new_questions/%s' % (domain, ))
            tasks.append(task)

        if len(tasks) > 0:
            queue = Queue(name="scannewquestions")
            queue.add(tasks)
Ejemplo n.º 9
0
def check_users_sweeper(request):
    logger.info('check users sweeper runnning...')
    expired_users = [user for user in NIH_User.objects.all()
                     if is_very_expired(user.NIH_assertion_expiration) and user.active]

    if expired_users:
        fallback_queue = Queue(name=FALLBACK_QUEUE_NAME)
        batch_size = 25
        for i in xrange(0, len(expired_users), batch_size):
            users = expired_users[i:i+batch_size]
            logger.info('scheduling check_user_login tasks for the following users: ')
            logger.info(str([str(user.NIH_username) for user in users]))
            tasks = [Task(url='/tasks/check_user_login', params={'user_id': user.user_id}, countdown=0) for user in users]
            fallback_queue.add(tasks)
    return HttpResponse('')
Ejemplo n.º 10
0
    def add(self, params):
        
        event = Event(
            name = params["name"],
            file = params["file"],
            file150 = params["file150"],
            filetype = params["filetype"].split('.')[1],
            start_date = params["start_date"],
#            end_date = params["end_date"],
            description = params["description"],
            creator = params["creator"],
#            people_invited = params["people_invited"],
#            type = params["type"],
            
            
        )
        event.put()
        list = params["people_invited"]
        for l in list:
            iu = InvitedUser(
                             email = l,
                             event = event
                             )
            queue = Queue('mail-queue')
            subject =  "You have been invited to the event " + event.name + " in Rockolin'"
            body = """     Hi!, You have been invited to the event """ + event.name + """
                    This event would be on: """ + str(event.start_date) + """ 
                    If you want to decide the music justo go to the following 
                    link: http://rockolinapp.appspot.com/event/""" +str(event.key())
            
            queue.add(Task(url='/task/mail', params = { 'to' : l, 'subject' : subject, 'body' : body }))        
            iu.put()
        queue.purge()
        song_list = params["setlist"] 
        
        for s in params["setlist"]:
            song = Song.get(s)
            slv = SetListVotes(
                               event = event,
                               song = song,
                               
                               votes = 0
                               
                               )
            slv.put()
            
            
        return event
Ejemplo n.º 11
0
    def __init__(self, tag, queue_name, size, duration=60, deadline=10,
                 auto_delete=True):
        """The generator will yield json deserialized payloads from tasks with
        the corresponding tag.

        :param tag: :class: `str` Pull queue tag to query against
        :param queue_name: :class: `str` Name of PULL queue holding tasks to
                           lease.
        :param size: :class: `int` The number of items to pull at once
        :param duration: :class: `int` After this time, the tasks may be leased
                         again. Tracked in seconds
        :param deadline: :class: `int` The time in seconds to wait for the rpc.
        :param auto_delete: :class: `bool` Delete tasks when iteration is
                            complete.

        :return: :class: `iterator` of json deserialized payloads
        """
        from google.appengine.api.taskqueue import Queue

        self.queue_name = queue_name
        self.queue = Queue(name=self.queue_name)

        self.tag = tag
        self.size = size
        self.duration = duration
        self.auto_delete = auto_delete
        self.deadline = deadline

        self._messages = []
        self._processed_messages = []
        self._fetched = False
Ejemplo n.º 12
0
def post_all_feeds():
    """Post all new items for feeds for a specific interval"""
    if request.headers.get('X-Appengine-Cron') != 'true':
        raise ndb.Return(jsonify_error(message='Not a cron call'))

    logger.info('Starting a post job')
    futures = []
    for feed_type, feed_class in FEED_TYPE_TO_CLASS.iteritems():
        feeds = feed_class.query(feed_class.is_dirty == True)
        logger.info("Got some feeds_count: %s feeds_type: %s", feeds.count(), feed_type)
        success = 0
        more = True
        cursor = None
        while more:
            feeds_to_fetch, cursor, more = yield feeds.fetch_page_async(BATCH_SIZE, start_cursor=cursor)
            keys = ','.join([x.key.urlsafe() for x in feeds_to_fetch])
            if not keys:
                continue
            futures.append(Queue().add_async(Task(url=url_for('tq_feed_post-canonical'), method='POST', params={'keys': keys})))
            success += len(feeds_to_fetch)
        logger.info('queued post for %d feeds feed_type:%s', success, feed_type)

    for future in futures:
        yield future

    logger.info('Finished Post Job')
    yield write_epoch_to_stat(Stat, 'post_job')
    raise ndb.Return(jsonify(status='ok'))
Ejemplo n.º 13
0
def update_all_feeds(interval_id):
    """Update all feeds for a specific interval"""
    if request.headers.get('X-Appengine-Cron') != 'true':
        raise ndb.Return(jsonify_error(message='Not a cron call'))

    for feed_type, feed_class in FEED_TYPE_TO_CLASS.iteritems():
        feeds = Feed.for_interval(interval_id)
        success = 0
        more = True
        cursor = None
        futures = []
        while more:
            feeds_to_fetch, cursor, more = yield feeds.fetch_page_async(BATCH_SIZE, start_cursor=cursor)
            feeds_to_fetch = filter(lambda x: getattr(x, 'external_polling_bucket', DEFAULT_POLLING_BUCKET) == DEFAULT_POLLING_BUCKET, feeds_to_fetch)
            keys = ','.join([x.key.urlsafe() for x in feeds_to_fetch])
            if not keys:
                continue

            futures.append(Queue('poll').add_async(Task(url=url_for('tq_feed_poll-canonical'), method='POST', params={'keys': keys})))
            success += 1

    for future in futures:
        yield future

    logger.info('queued poll for %d feeds at interval_id=%s', success, interval_id)

    raise ndb.Return(jsonify(status='ok'))
Ejemplo n.º 14
0
def instagram_push_update():
    data = request.stream.read()
    instagram_client_secret = Configuration.value_for_name('instagram_client_secret')

    server_signature = request.headers.get('X-Hub-Signature', None)
    signature = hmac.new(str(instagram_client_secret), data, digestmod=hashlib.sha1).hexdigest()

    if server_signature != signature:
        logger.warn('Got PuSH subscribe POST from instagram w/o valid signature: sent=%s != expected=%s',
                    server_signature, signature)

        raise ndb.Return('')

    logger.info('Got PuSH body: %s', data)
    logger.info('Got PuSH headers: %s', request.headers)

    parsed_feed = json.loads(data)
    user_ids = [int(x.get('object_id')) for x in parsed_feed]
    feeds = InstagramFeed.query(InstagramFeed.user_id.IN(user_ids))

    cursor = None
    more = True
    keys = []
    while more:
        feed_keys, cursor, more = feeds.fetch_page(BATCH_SIZE, keys_only=True, start_cursor=cursor)
        keys += feed_keys

    keys = ','.join([x.urlsafe() for x in keys])
    if keys:
        yield Queue('poll').add_async(Task(url=url_for('tq_feed_poll-canonical'), method='POST', params={'keys': keys}))

    raise ndb.Return('ok')
Ejemplo n.º 15
0
def reset_endpoints():
    """
    Handler which creates reset endpoint tasks for selected endpoints/orgs.
    """
    endpoint_indexes = request.form.getlist('endpoint_index')
    org_uid = request.form.get('org_uid')

    if not endpoint_indexes:
        flash("At least one endpoint is required")
        return render_template('select_endpoints.html',
                               endpoints=ENDPOINTS,
                               org_uid=org_uid), 200

    if org_uid:
        taskqueue.add(target='admin',
                      url='/admin/reset_endpoints_task/{}'.format(org_uid),
                      params={'endpoint_index': endpoint_indexes})

        flash("Kicked off reset of {} endpoints for {}".format(
            len(endpoint_indexes), org_uid))

        return redirect(prefix('/'))
    else:
        count = query_to_tasks(
            query=Org.query(Org.status == CONNECTED),
            queue=Queue('admin'),
            task_generator=lambda key: Task(
                url='/admin/reset_endpoints_task/{}'.format(key.string_id()),
                params={'endpoint_index': endpoint_indexes}))

        flash("Kicked off reset of {} endpoints for {} orgs".format(
            len(endpoint_indexes), count))

        return redirect(prefix('/commands'))
Ejemplo n.º 16
0
    def update(self, remote_ip, uptime):
        self.last_seen = datetime.utcnow()

        if self.last_ip != remote_ip:
            LogEntry.log_event(self.key(), 'Info', 'IP changed - new IP: ' + remote_ip)

        self.last_ip = remote_ip

        if uptime is not None:
            if self.update is not None and self.uptime > uptime:
                LogEntry.log_event(self.key(), 'Reboot',
                                   'Reboot - Previous uptime: ' + str(timedelta(seconds=self.uptime)))
                for action_key in self.reboot_actions:
                    try:
                        db.get(action_key).perform_action()
                    except Exception as exp:
                        logging.error('Error executing reboot action: ' + str(exp))

        self.uptime = uptime
        self.put()

        # job got back online
        if self.status == 'offline':
            self.status = 'online'
            LogEntry.log_event(self.key(), 'Info', 'Job back online - IP: ' + remote_ip)

            # perform all back_online actions
            for action_key in self.backonline_actions:
                try:
                    db.get(action_key).perform_action()
                except Exception as exp:
                    logging.error('Error executing backonline action: ' + str(exp))

        # delete previous (waiting) task
        if self.task_name is not None:
            logging.debug('old task: ' + self.task_name)
            Queue.delete_tasks(Queue(), Task(name=self.task_name))

        task_name = self.name + '_' + datetime.utcnow().strftime('%Y-%m-%d_%H-%M-%S-%f')

        # create task to be executed in updated no called in interval minutes
        taskqueue.add(name=task_name, url='/task', params={'key': self.key()}, countdown=(self.interval + 2) * 60)

        self.task_name = task_name
        self.put()
Ejemplo n.º 17
0
def send_voting_email(voting_user):
    """ Create voting user and add voting email to mail-queue
    Input:
        voting_user: VotingUser, the user to be sent.
    """
    # Make email content with token-link
    election = voting_user.key.parent().get()
    if not isinstance(election, Election):
        msg = 'voting_user should have election as ndb ancestor'
        raise ValueError(msg)

    voting_link = "http://ntuvb-allstar.appspot.com/vote/"\
                  + voting_user.token
    from_email = "*****@*****.**"
    email_body = (
        u"<h3>您好 {student_id}:</h3>"
        u"<p>感謝您參與{election_title} <br>"
        u"<h4><a href='{voting_link}'> 投票請由此進入 </a></h4> <br>"
        u"<p><b style=\"color: red\">此為您個人的投票連結,請勿轉寄或外流</b><br>"
        u"若您未參與本次投票,請直接刪除本封信件 <br>"
        u"任何疑問請來信至: {help_mail} <br></p>"
    ).format(
        student_id=voting_user.student_id,
        election_title=election.title,
        voting_link=voting_link,
        help_mail=from_email)
    text_body = u"投票請進: %s" % voting_link
    email_subject = election.title+u"投票認證信"
    to_email = voting_user.student_id+"@ntu.edu.tw"

    queue = Queue('mail-queue')
    queue.add(Task(
        url='/queue/mail',
        params={
            'subject': email_subject,
            'body': email_body,
            'text_body': text_body,
            'to': to_email,
            'from': from_email,
        }
    ))

    voting_user.last_time_mail_queued = datetime.now()
    key = voting_user.put()
    key.get()  # for strong consistency
Ejemplo n.º 18
0
    def insert(self):
        """Insert the pull task into the requested queue, 'default' if non
        given.
        """
        from google.appengine.api.taskqueue import Queue

        task = self.to_task()

        Queue(name=self.get_queue()).add(task)
Ejemplo n.º 19
0
    def post(self):
        # refuse to start the tasks if there are some already running
        queue = Queue()
        stats = queue.fetch_statistics()
        if stats.tasks == 0:
            print 'nop'
            taskqueue.add(url='/builder', params={'start': 'A', 'end': 'B'})
            taskqueue.add(url='/builder', params={'start': 'C', 'end': 'E'})
            taskqueue.add(url='/builder', params={'start': 'F', 'end': 'G'})
            taskqueue.add(url='/builder', params={'start': 'H', 'end': 'H'})
            taskqueue.add(url='/builder', params={'start': 'I', 'end': 'M'})
            taskqueue.add(url='/builder', params={'start': 'N', 'end': 'O'})
            taskqueue.add(url='/builder', params={'start': 'P', 'end': 'R'})
            taskqueue.add(url='/builder', params={'start': 'S', 'end': 'S'})
            taskqueue.add(url='/builder', params={'start': 'T', 'end': 'T'})
            taskqueue.add(url='/builder', params={'start': 'U', 'end': 'Z'})

        self.redirect('/rank')
Ejemplo n.º 20
0
 def schedule(cls, queue_name, tasks):
     queue = Queue(queue_name)
     batch_size = 100
     task_count = 0
     for task_batch in cls.split_every(batch_size, tasks):
         cls._add_single_batch(queue, task_batch)
         task_count += len(task_batch)
     if task_count > 0:
         logging.info("Scheduled %d tasks in max %d batches", task_count,
                      batch_size)
Ejemplo n.º 21
0
  def _AddUserRecallTasks(self, user_recall_tasks):
    """Helper to enqueue list of user recall tasks in batches.

    Args:
      user_recall_tasks: Task or list of Tasks; one for each user.

    Raises:
      re-raises any errors with task queue.
    """
    Queue('user-recall-queue').add(task=user_recall_tasks)
Ejemplo n.º 22
0
    def test_items_to_tasks(self):
        """
        Verifies that multiple pages of tasks get queued up properly.
        """
        count = task_utils.items_to_tasks(
            items=[1, 2, 3],
            queue=Queue('adapter-update'),
            task_generator=lambda item: Task(url='/something/{}'.format(item)))

        self.assertEqual(count, 3)
        task_count = len(self.taskqueue.get_filtered_tasks())
        self.assertEqual(task_count, 3)
Ejemplo n.º 23
0
 def schedule(cls, queue_name, tasks):
     if not isinstance(tasks, list):
         tasks = [tasks]
     queue = Queue(queue_name)
     page_size = 100
     task_count = 0
     for task_batch in paginated(page_size, tasks):
         cls.__add_single_batch(queue, task_batch)
         task_count += len(task_batch)
     if task_count > 0:
         logging.info("Scheduled %d tasks in max %d batches", task_count,
                      page_size)
Ejemplo n.º 24
0
    def test_query_to_tasks(self):
        """
        Verifies that multiple pages of tasks get queued up properly.
        """
        Org(id='test1', status=CONNECTED).put()
        Org(id='test2', status=CONNECTED).put()
        Org(id='test3', status=DISCONNECTED).put()

        count = task_utils.query_to_tasks(
            query=Org.query(Org.status == CONNECTED),
            queue=Queue('adapter-update'),
            task_generator=lambda key: Task(url='/something/{}'.format(
                key.string_id())))

        self.assertEqual(count, 2)
        task_count = len(self.taskqueue.get_filtered_tasks())
        self.assertEqual(task_count, 2)
Ejemplo n.º 25
0
def init_all_updates():
    """
    Initialises update cycle for each connected org by putting a task onto the update queue (which ends up calling
    init_new_changeset(org_uid)).
    """
    count = query_to_tasks(
        query=Org.query(
            Org.status == CONNECTED, Org.last_update_cycle_completed_at <
            datetime.utcnow() - SYNC_INTERVAL,
            Org.provider.IN(API_PROVIDERS)).order(
                -Org.last_update_cycle_completed_at,
                Org.key),  # Queries involving IN need to be ordered by key
        queue=Queue('adapter-update'),
        task_generator=lambda key: Task(url='/adapter/{}/init_update'.format(
            key.string_id())))

    logging.info("queued {} tasks for a sync update".format(count))
Ejemplo n.º 26
0
  def _AddUserRetrievalTask(self, task):
    """Helper to transactionally add the tasks.

    Do not set transactional=True in the Task because Push Queues have a
    5 Task per add limit when transactional=True per:
    http://developers.google.com/appengine/docs/python/taskqueue/overview-push

    Args:
      task: Task or list of Tasks to retrieve domain users.

    Raises:
      re-raises any taskqueue errors raised.
    """
    try:
      Queue('retrieve-users-queue').add(task=task)
    except TaskQueueError:
      view_utils.FailRecallTask(
          task_key_id=self._task_key_id,
          reason_string='Failed to enqueue retrieve users tasks.')
      raise
Ejemplo n.º 27
0
    def post(self):

        # refuse to start the tasks if there are some already running
        queue = Queue()
        stats = queue.fetch_statistics()
        if stats.tasks == 0:

            # queue up all of the groups into individual tasks.  the configuration
            # in queue.yaml only allows 10 tasks to run at once.  the builder only
            # allows 10 URL requests at a time, which should hopefully keep the
            # Blizzard API queries under control.
            q = Group.query()
            groups = q.fetch()
            for g in groups:
                taskqueue.add(url='/builder', params={'group': g.name})

            checker = Task(url='/builder', params={'group': 'ctrp-taskcheck'})
            taskcheck = Queue(name='taskcheck')
            taskcheck.add(checker)

        self.redirect('/rank')
Ejemplo n.º 28
0
class TestTaskQueue(TestGae):
    """
    Test testbed task queue
    """
    def setUp(self):
        TestGae.setUp(self, PROJECT_DIR)
        from google.appengine.api.taskqueue import Queue, Task

        self.queue = Queue('default')
        self.queue.add(Task('xxx', url='/'))

    def test_queue(self):
        """
        check for number of tasks in queue
        """
        self.assert_tasks_num(1)
        tasks = self.gae_tasks(queue_name='default', flush_queue=False)

        assert len(tasks) == 1
        self.assert_tasks_num(1)

        tasks = self.gae_tasks(queue_name='default', flush_queue=True)
        assert len(tasks) == 1
        self.assert_tasks_num(0)

    def test_flask_execute(self):
        """
        execute queue in fask app context
        """
        from google.appengine.api.taskqueue import Task
        from flask import Flask
        app = Flask(__name__)
        app.config['TESTING'] = True

        @app.route('/', methods=['POST'])
        def root_page():
            """
            flask view
            """
            return 'OK'

        client = app.test_client()

        data = self.gae_tasks_dict()
        assert len(data) == 1
        task = data[data.keys()[0]]

        self.gae_task_flask_execute(task,
                                    client,
                                    is_delete=False,
                                    is_debug_print=True)
        data = self.gae_tasks_dict()
        assert len(data) == 1

        self.gae_task_flask_execute(task, client, is_debug_print=True)
        data = self.gae_tasks_dict()
        assert not data

        self.queue.add(Task('xxx', url='/'))

        self.gae_queue_flask_execute(client)
        data = self.gae_tasks_dict()
        assert not data

    def test_dict(self):
        """
        get queue content as dict
        """
        data = self.gae_tasks_dict()
        assert len(data) == 1
        assert 'task1' in data

    def test_dump(self):
        """
        dump queue content
        """
        self.gae_queue_dump()
        self.gae_queue_dump(fields=['name', 'url'])
Ejemplo n.º 29
0
 def set_up(self):
     self._rpc = taskqueue.create_rpc()
     q = Queue(self._queue_name)
     q.add_async(self._task, rpc=self._rpc)
Ejemplo n.º 30
0
 def get(self):
     num_pages_to_fetch=50
     q = Queue('scrape')
     for i in range(0,num_pages_to_fetch):
         q.add(Task(url='/tasks/scrape?page=%d' % i, method='GET'))
     self.response.out.write("done")
Ejemplo n.º 31
0
 def get(self):
     num_pages_to_fetch = 50
     q = Queue('scrape')
     for i in range(0, num_pages_to_fetch):
         q.add(Task(url='/tasks/scrape?page=%d' % i, method='GET'))
     self.response.out.write("done")
Ejemplo n.º 32
0
 def __init__(self):
     self.queue = Queue()
def start_publish():
    """
    Kicks off a dataflow template to publish normalised data. The jobs are created via a task queue task, passing the
    ID of the OrgChangesets which need to be published.

    This endpoint is invoked by a regular cron job or by a request from the admin UI, and takes an additional parameter
    which allows for each org to be published by a separate dataflow job (this is useful for isolation of an org which
    causes the whole publish job to fail).

    Returns:
        (str, int): http response
    """
    logging.info("about to kick off a publish dataflow job")

    per_org = request.form.get('per_org') == '1'
    if per_org:
        logging.info("publish job per org requested")

    # we want to publish changesets which:
    # - have newly been ingested (publish not running and not finished)
    # - OR have been attempted to be published but failed
    #   - due to the whole job failing
    #   - OR publish of the individual changeset failing
    org_changesets_query = OrgChangeset.query(
        ndb.OR(
            ndb.AND(OrgChangeset.publish_job_running == False,
                    OrgChangeset.publish_job_finished == False),
            ndb.AND(
                OrgChangeset.publish_job_running == False,
                OrgChangeset.publish_job_finished == True,
                ndb.OR(OrgChangeset.publish_job_failed == True,
                       OrgChangeset.publish_changeset_failed == True)))).order(
                           OrgChangeset.key)

    org_changesets = list(emit_items(org_changesets_query))

    # Query any currently running org changesets
    running_org_changesets_query = OrgChangeset.query(
        OrgChangeset.publish_job_running == True)
    running_org_changesets = list(emit_items(running_org_changesets_query))

    running_orgs = list(
        set([
            running_org_changeset.org_uid
            for running_org_changeset in running_org_changesets
        ]))

    # Filter any org changesets that already have a running changeset for that org
    gated_org_changesets = filter(lambda oc: oc.org_uid not in running_orgs,
                                  org_changesets)

    if len(gated_org_changesets) != len(org_changesets):
        filtered_ocs = filter(lambda oc: oc.org_uid in running_orgs,
                              org_changesets)
        filtered_oc_tuples = [(oc.org_uid, oc.changeset)
                              for oc in filtered_ocs]

        logging.info(
            "stopped these changesets from being published as job already running for the org: {}"
            .format(filtered_oc_tuples))

    if not gated_org_changesets:
        logging.info("nothing to publish")
        return '', 204

    # remove changesets for blacklisted orgs
    blacklisted_orgs = {}
    org_changesets_to_publish = []
    for org_changeset in gated_org_changesets:
        org = blacklisted_orgs.get(org_changeset.org_uid,
                                   Org.get_by_id(org_changeset.org_uid))
        if org and org.publish_disabled:
            blacklisted_orgs[org.key.string_id()] = org
        else:
            org_changesets_to_publish.append(org_changeset)

    to_publish = []

    if per_org:
        org_changesets_sorted = sorted(org_changesets_to_publish,
                                       key=attrgetter('org_uid'))
        for org_uid, changesets in groupby(org_changesets_sorted,
                                           key=attrgetter('org_uid')):
            to_publish.append({
                'org_uid':
                org_uid,
                'org_changeset_ids':
                [changeset.key.id() for changeset in changesets]
            })
    else:
        to_publish.append({
            'org_changeset_ids':
            [changeset.key.id() for changeset in org_changesets_to_publish]
        })

    logging.info("have {} publish tasks to create".format(len(to_publish)))

    items_to_tasks(items=to_publish,
                   queue=Queue('create-publish-job'),
                   task_generator=lambda item: Task(
                       url='/orchestrator/create_publish_job_task',
                       payload=dumps({'job_params': item})))

    return '', 204
Ejemplo n.º 34
0
    def get(self):
        session = get_current_session()
        authorized_tokens = session.get('authorized_tokens', None)
        if authorized_tokens is None:
            self.redirect('/connect')

        twitter = Twython(
            twitter_token = CONSUMER_KEY,
            twitter_secret = CONSUMER_SECRET,
            oauth_token = authorized_tokens['oauth_token'],
            oauth_token_secret = authorized_tokens['oauth_token_secret']
        )

        twitter_id = authorized_tokens['user_id']
        username = authorized_tokens['screen_name']

        statistic = UserStatistic.get_by_key_name(twitter_id)
        if statistic is None:
            statistic = UserStatistic(
                                      key_name=twitter_id,
                                      twitter_id=long(twitter_id),
                                     )
            statistic.put()

        if statistic.statistics == None \
            or (statistic.updated + timedelta(hours=1)) < datetime.now():

            try:
                task = Task(
                            url = '/fetch',
                            params = {'twitter_id': twitter_id}
                           )

                queue = Queue(name='fetch-tweets')
                queue.add(task)

                statistic.updated = datetime.now()
                statistic.put()

                template_values = {
                    'username': username,
                    'updated': True,
                    'error': False,
                }

            except:
                # need to send to error page if error is happened
                logging.exception('something bad happened')

                template_values = {
                    'username': username,
                    'updated': False,
                    'error': True,
                }

        else:
            template_values = {
                'username': username,
                'updated': False,
                'error': False,
            }

        path = os.path.join(TEMPLATE, 'message.html')
        self.response.out.write(template.render(path, template_values))
Ejemplo n.º 35
0
    def post(self):
        twitter_id = self.request.get('twitter_id', None)
        if twitter_id is None:
            return

        user = User.get_by_key_name(twitter_id)
        username = user.username

        twitter = Twython(
            twitter_token = CONSUMER_KEY,
            twitter_secret = CONSUMER_SECRET,
            oauth_token = user.oauth_token,
            oauth_token_secret = user.oauth_token_secret,
        )

        statistic = UserStatistic.get_by_key_name(twitter_id)

        stat = dict()
        total = 0
        page = 0
        max_id = None
        start_time = None
        end_time = None
        while True:
            if max_id is None:
                tweets = twitter.getFriendsTimeline(
                                                 count=200,
                                                 include_entities=1,
                                                )
            else:
                tweets = twitter.getFriendsTimeline(
                                                 count=200,
                                                 include_entities=1,
                                                 max_id=max_id,
                                                 #page=page,
                                                )

            if len(tweets) == 0:
                break
            else:
                page = page + 1
                total = total + len(tweets)

                if end_time is None:
                    end_time = datetime \
                                     .strptime(tweets[0]['created_at'],
                                         "%a %b %d %H:%M:%S +0000 %Y")


                last_tweet = tweets[len(tweets) - 1]
                start_time = datetime \
                               .strptime(last_tweet['created_at'],
                                     "%a %b %d %H:%M:%S +0000 %Y")

            for tweet in tweets:
                user = tweet['user']['screen_name']
                if not stat.has_key(user):
                    stat[user] = 0

                stat[user] = stat[user] + 1

            max_id = tweets[len(tweets) - 1]['id']

            if total > MAX_TWEETS:
                break

        sorted_stat = sorted(stat, key=stat.get)
        sorted_stat.reverse()

        sorted_dict = []
        for item in sorted_stat:
            if stat[item] > 4:
                sorted_dict.append(dict(
                    user = item,
                    count = stat[item],
                ))


        statistic.start_time = start_time
        statistic.end_time = end_time 
        statistic.total = total
        statistic.statistics = simplejson.dumps(sorted_dict)
        statistic.count += 1
        statistic.put()

        # notify user
        try:
            task = Task(
                        url = '/notify',
                        params = {'username': username}
                       )

            queue = Queue(name='notify-user')
            queue.add(task)

        except:
            # need to send to error page if error is happened
            logging.exception('something bad happened')
Ejemplo n.º 36
0
    def setUp(self):
        TestGae.setUp(self, PROJECT_DIR)
        from google.appengine.api.taskqueue import Queue, Task

        self.queue = Queue('default')
        self.queue.add(Task('xxx', url='/'))
Ejemplo n.º 37
0
class MessageIterator(object):
    """This iterator will return a batch of messages for a given group.

    This iterator should be directly used when trying to avoid the lease
    operation inside a transaction, or when other flows are needed.
    """

    def __init__(self, tag, queue_name, size, duration=60, deadline=10,
                 auto_delete=True):
        """The generator will yield json deserialized payloads from tasks with
        the corresponding tag.

        :param tag: :class: `str` Pull queue tag to query against
        :param queue_name: :class: `str` Name of PULL queue holding tasks to
                           lease.
        :param size: :class: `int` The number of items to pull at once
        :param duration: :class: `int` After this time, the tasks may be leased
                         again. Tracked in seconds
        :param deadline: :class: `int` The time in seconds to wait for the rpc.
        :param auto_delete: :class: `bool` Delete tasks when iteration is
                            complete.

        :return: :class: `iterator` of json deserialized payloads
        """
        from google.appengine.api.taskqueue import Queue

        self.queue_name = queue_name
        self.queue = Queue(name=self.queue_name)

        self.tag = tag
        self.size = size
        self.duration = duration
        self.auto_delete = auto_delete
        self.deadline = deadline

        self._messages = []
        self._processed_messages = []
        self._fetched = False

    def fetch_messages(self):
        """Fetch messages from the specified pull-queue.

        This should only be called a single time by a given MessageIterator
        object.  If the MessageIterator is iterated over again, it should
        return the originally leased messages.
        """
        if self._fetched:
            return

        start = time.time()

        loaded_messages = self.queue.lease_tasks_by_tag(
            self.duration, self.size, tag=self.tag, deadline=self.deadline)

        # If we are within 0.1 sec of our deadline and no messages were
        # returned, then we are hitting queue contention issues and this
        # should be a DeadlineExceederError.
        # TODO: investigate other ways around this, perhaps async leases, etc.
        if (not loaded_messages and
                round(time.time() - start, 1) >= self.deadline - 0.1):
            raise DeadlineExceededError()

        self._messages.extend(loaded_messages)

        self._fetched = True

        logging.debug("Calling fetch messages with %s:%s:%s:%s:%s:%s" % (
            len(self._messages), len(loaded_messages),
            len(self._processed_messages), self.duration, self.size, self.tag))

    def __iter__(self):
        """Initialize this MessageIterator for iteration.

        If messages have not been fetched, fetch them.  If messages have been
        fetched, reset self._messages and self._processed_messages for
        re-iteration.  The reset is done to prevent deleting messages that were
        never applied.
        """
        if self._processed_messages:
            # If the iterator is used within a transaction, and there is a
            # retry we need to re-process the original messages, not new
            # messages.
            self._messages = list(
                set(self._messages) | set(self._processed_messages))
            self._processed_messages = []

        if not self._messages:
            self.fetch_messages()

        return self

    def next(self):
        """Get the next batch of messages from the previously fetched messages.

        If there's no more messages, check if we should auto-delete the
        messages and raise StopIteration.
        """
        if not self._messages:
            if self.auto_delete:
                self.delete_messages()
            raise StopIteration

        message = self._messages.pop(0)
        self._processed_messages.append(message)
        return json.loads(message.payload)

    def delete_messages(self, only_processed=True):
        """Delete the messages previously leased.

        Unless otherwise directed, only the messages iterated over will be
        deleted.
        """
        messages = self._processed_messages
        if not only_processed:
            messages += self._messages

        if messages:
            try:
                self.queue.delete_tasks(messages)
            except Exception:
                logging.exception("Error deleting messages")
                raise
Ejemplo n.º 38
0
 def set_up(self):
     self._rpc = taskqueue.create_rpc()
     q = Queue(self._queue_name)
     q.add_async(self._task, rpc=self._rpc)
Ejemplo n.º 39
0
class MessageIterator(object):
    """This iterator will return a batch of messages for a given group.

    This iterator should be directly used when trying to avoid the lease
    operation inside a transaction, or when other flows are needed.
    """
    def __init__(self,
                 tag,
                 queue_name,
                 size,
                 duration=60,
                 deadline=10,
                 auto_delete=True):
        """The generator will yield json deserialized payloads from tasks with
        the corresponding tag.

        :param tag: :class: `str` Pull queue tag to query against
        :param queue_name: :class: `str` Name of PULL queue holding tasks to
                           lease.
        :param size: :class: `int` The number of items to pull at once
        :param duration: :class: `int` After this time, the tasks may be leased
                         again. Tracked in seconds
        :param deadline: :class: `int` The time in seconds to wait for the rpc.
        :param auto_delete: :class: `bool` Delete tasks when iteration is
                            complete.

        :return: :class: `iterator` of json deserialized payloads
        """
        from google.appengine.api.taskqueue import Queue

        self.queue_name = queue_name
        self.queue = Queue(name=self.queue_name)

        self.tag = tag
        self.size = size
        self.duration = duration
        self.auto_delete = auto_delete
        self.deadline = deadline

        self._messages = []
        self._processed_messages = []
        self._fetched = False

    def fetch_messages(self):
        """Fetch messages from the specified pull-queue.

        This should only be called a single time by a given MessageIterator
        object.  If the MessageIterator is iterated over again, it should
        return the originally leased messages.
        """
        if self._fetched:
            return

        start = time.time()

        loaded_messages = self.queue.lease_tasks_by_tag(self.duration,
                                                        self.size,
                                                        tag=self.tag,
                                                        deadline=self.deadline)

        # If we are within 0.1 sec of our deadline and no messages were
        # returned, then we are hitting queue contention issues and this
        # should be a DeadlineExceederError.
        # TODO: investigate other ways around this, perhaps async leases, etc.
        if (not loaded_messages
                and round(time.time() - start, 1) >= self.deadline - 0.1):
            raise DeadlineExceededError()

        self._messages.extend(loaded_messages)

        self._fetched = True

        logging.debug("Calling fetch messages with %s:%s:%s:%s:%s:%s" % (len(
            self._messages), len(loaded_messages), len(
                self._processed_messages), self.duration, self.size, self.tag))

    def __iter__(self):
        """Initialize this MessageIterator for iteration.

        If messages have not been fetched, fetch them.  If messages have been
        fetched, reset self._messages and self._processed_messages for
        re-iteration.  The reset is done to prevent deleting messages that were
        never applied.
        """
        if self._processed_messages:
            # If the iterator is used within a transaction, and there is a
            # retry we need to re-process the original messages, not new
            # messages.
            self._messages = list(
                set(self._messages) | set(self._processed_messages))
            self._processed_messages = []

        if not self._messages:
            self.fetch_messages()

        return self

    def next(self):
        """Get the next batch of messages from the previously fetched messages.

        If there's no more messages, check if we should auto-delete the
        messages and raise StopIteration.
        """
        if not self._messages:
            if self.auto_delete:
                self.delete_messages()
            raise StopIteration

        message = self._messages.pop(0)
        self._processed_messages.append(message)
        return json.loads(message.payload)

    def delete_messages(self, only_processed=True):
        """Delete the messages previously leased.

        Unless otherwise directed, only the messages iterated over will be
        deleted.
        """
        messages = self._processed_messages
        if not only_processed:
            messages += self._messages

        if messages:
            try:
                self.queue.delete_tasks(messages)
            except Exception:
                logging.exception("Error deleting messages")
                raise