def ProcessAlertGroups(): logging.info('Fetching alert groups.') groups = alert_group.AlertGroup.GetAll() logging.info('Found %s alert groups.', len(groups)) for group in groups: deferred.defer( _ProcessAlertGroup, group.key, _retry_options=taskqueue.TaskRetryOptions(task_retry_limit=0)) deferred.defer( _ProcessUngroupedAlerts, _retry_options=taskqueue.TaskRetryOptions(task_retry_limit=0))
def get(self): logging.info('Queueing task for deferred processing.') # Do not retry failed tasks. deferred.defer( ProcessAlertGroups, _retry_options=taskqueue.TaskRetryOptions(task_retry_limit=0)) self.response.write('OK')
def do_scale(self): """ Calculate and actually scale the cluster. :return: """ logging.debug('Starting do_scale %s', self.current_nodes) self.calc_how_many() self.total = min(self.total, self.max_instances) logging.info("Scaling to workers %s", self.total) if self.total == self.current_nodes: logging.debug('Not Modified') return 'Not Modified', 200 # make sure that we have the correct ratio between 2 type of workers new_workers, new_preemptible = self.preserve_ratio() # do the scaling retry_options = taskqueue.TaskRetryOptions(task_retry_limit=0) task = taskqueue.add(queue_name='shamash', url="/patch", method='GET', retry_options=retry_options, params={ 'cluster_name': self.cluster_name, 'new_workers': new_workers, 'new_preemptible': new_preemptible }) logging.debug('Task %s enqueued, ETA %s Cluster %s', task.name, task.eta, self.cluster_name) return 'ok', 204
def post(self): qs = '' try: body = json.loads(self.request.body) qs = body.get('reported') except ValueError: logging.exception('report') self.abort(500) recaptcha_response = urllib.urlencode({ 'secret': RECAPTCHA_PRIVKEY, 'response': body.get('recaptcha') }) res = json.loads( urllib2.urlopen( url='https://www.google.com/recaptcha/api/siteverify', data=recaptcha_response).read()) if not res.get('success'): self.abort(403) if qs: url = '%s/?%s' % (self.request.host_url, qs) logging.debug(url) taskqueue.add( url='/submit', target='uxssbot', params={ 'url': url, 'service': 'b2p' }, retry_options=taskqueue.TaskRetryOptions(task_retry_limit=1)) self.response.write('ok')
def update_content_requests(): tags = {'approved', 'default'} q = models.ContentRequestPublic.query() for tag in tags: q = q.filter(models.ContentRequestPublic.tags == tag) q = q.order(-models.ContentRequestPublic.sort_index) futures = [] delay = 0 for request in q: # TODO: Improved condition to avoid checking depleted requests. # TODO: Add way to manually update content request entries when request is closed. if not request.wallet or request.closed: continue task = taskqueue.Task( countdown=delay, url='/_ah/jobs/update_content_request_entries', params={ 'request_id': str(request.key.id()), 'wallet_id': str(request.wallet.id()), 'wallet_owner_id': str(request.wallet_owner.id()), }, retry_options=taskqueue.TaskRetryOptions(task_retry_limit=0)) futures.append(_add_task_async(task, queue_name=config.INTERNAL_QUEUE)) delay += 5 _wait_all(futures) logging.debug('Scheduled %d job(s) to update content request entries', len(futures)) return ''
def _(): archive_entity = archive_key.get() if not archive_entity: archive_entity = models.Archive( key=archive_key, initial_request=swarming_request) archive_entity.put() # Insert all of the tasks as entities under our parent. for json_task_name, json_task in swarming_request[ 'tasks'].iteritems(): # The id for this key is auto generated (thus why we specify 0). task_key = ndb.Key(models.Task, 0, parent=archive_key) task = models.Task(key=task_key, task_id=json_task['task_id'], task_name=json_task_name) task.put() # Only poll the archive if this commit succeeds. deferred.defer( PollArchive, archive_key=archive_key, _transactional=True, _retry_options=taskqueue.TaskRetryOptions( max_backoff_seconds=6.0, min_backoff_seconds=3.0, task_age_limit=60 * 60, # 1 hour )) return archive_entity
def AuthorizedPost(self): datastore_hooks.SetPrivilegedRequest() with timing.WallTimeLogger('decompress'): try: data_str = self.request.body zlib.decompress(data_str) logging.info('Recieved compressed data.') except zlib.error: data_str = self.request.get('data') data_str = zlib.compress(data_str) logging.info('Recieved uncompressed data.') if not data_str: raise api_request_handler.BadRequestError( 'Missing "data" parameter') filename = uuid.uuid4() params = {'gcs_file_path': '/add-histograms-cache/%s' % filename} gcs_file = cloudstorage.open(params['gcs_file_path'], 'w', content_type='application/octet-stream', retry_params=_RETRY_PARAMS) gcs_file.write(data_str) gcs_file.close() retry_options = taskqueue.TaskRetryOptions( task_retry_limit=_TASK_RETRY_LIMIT) queue = taskqueue.Queue('default') queue.add( taskqueue.Task(url='/add_histograms/process', payload=json.dumps(params), retry_options=retry_options))
def get(self): """Handle a cron job.""" gs_bucket_name = local_config.Config( local_config.PROJECT_PATH).get('backup.bucket') if not gs_bucket_name: logs.log('No backup bucket is set, skipping.') return kinds = [ kind.kind_name for kind in metadata.Kind.all() if (not kind.kind_name.startswith('_') and kind.kind_name not in EXCLUDED_MODELS) ] timestamp = datetime.datetime.utcnow().strftime('%Y-%m-%d-%H:%M:%S') taskqueue.add( url='/_ah/datastore_admin/backup.create', method='GET', target='ah-builtin-python-bundle', retry_options=taskqueue.TaskRetryOptions(task_retry_limit=3, min_backoff_seconds=5 * 60), # 5 minutes backoff. params={ 'filesystem': 'gs', 'gs_bucket_name': '%s/%s' % (gs_bucket_name, timestamp), 'kind': kinds }) self.response.headers['Content-Type'] = 'text/plain' self.response.out.write('OK') self.response.set_status(200)
def get(self): captcha = self.request.get('g-recaptcha-response') if not captcha: return self.response.set_status(403) captcha_api.request( 'POST', 'https://www.google.com/recaptcha/api/siteverify', urllib.urlencode({ 'secret': CAPTCHA_SECRET, 'response': captcha, 'remoteip': self.request.remote_addr })) captcha_res = json.loads(captcha_api.getresponse().read()) if captcha_res['success'] != True: return self.response.set_status(403) url = self.request.host_url + '/#' + urllib.quote( self.request.get('html')) taskqueue.add( url='/submit', target='uxssbot', params={ 'url': url, 'service': 'sanitizer' }, retry_options=taskqueue.TaskRetryOptions(task_retry_limit=1)) return self.response.write( 'Thanks, your solution is now submitted, we\'ll take a look soon')
def post(self): recaptcha_response = urllib.urlencode({ 'secret': RECAPTCHA_PRIVATE_KEY, 'response': self.request.get('g-recaptcha-response') }) res = json.loads( urllib2.urlopen( url='https://www.google.com/recaptcha/api/siteverify', data=recaptcha_response).read()) if not res.get('success'): self.abort(403) url = self.request.get('page', '') logging.debug(url) taskqueue.add( url='/submit', target='uxssbot', params={ 'url': url, 'service': 'pastetastic' }, retry_options=taskqueue.TaskRetryOptions(task_retry_limit=1)) self.response.write( REPORT_SUCCESS_TEMPLATE.render({ 'nonce': self.nonce, }))
def Post(self): if utils.IsDevAppserver(): # Don't require developers to zip the body. # In prod, the data will be written to cloud storage and processed on the # taskqueue, so the caller will not see any errors. In dev_appserver, # process the data immediately so the caller will see errors. # Also always create upload completion token for such requests. token, token_info = self._CreateUploadCompletionToken() ProcessHistogramSet( _LoadHistogramList(StringIO.StringIO(self.request.body)), token) token.UpdateStateAsync( upload_completion_token.State.COMPLETED).wait() return token_info with timing.WallTimeLogger('decompress'): try: data_str = self.request.body # Try to decompress at most 100 bytes from the data, only to determine # if we've been given compressed payload. zlib.decompressobj().decompress(data_str, 100) logging.info('Received compressed data.') except zlib.error: data_str = self.request.get('data') if not data_str: raise api_request_handler.BadRequestError( 'Missing or uncompressed data.') data_str = zlib.compress(data_str) logging.info('Received uncompressed data.') if not data_str: raise api_request_handler.BadRequestError( 'Missing "data" parameter') filename = uuid.uuid4() params = {'gcs_file_path': '/add-histograms-cache/%s' % filename} gcs_file = cloudstorage.open(params['gcs_file_path'], 'w', content_type='application/octet-stream', retry_params=_RETRY_PARAMS) gcs_file.write(data_str) gcs_file.close() token_info = None if utils.ShouldTurnOnUploadCompletionTokenExperiment(): _, token_info = self._CreateUploadCompletionToken( params['gcs_file_path']) params['upload_completion_token'] = token_info['token'] retry_options = taskqueue.TaskRetryOptions( task_retry_limit=_TASK_RETRY_LIMIT) queue = taskqueue.Queue('default') queue.add( taskqueue.Task(url='/add_histograms/process', payload=json.dumps(params), retry_options=retry_options)) return token_info
def voters_task(self, election, data, method): queue_data = {'election_key': str(election.key()), 'method': method, 'voters': data['voters']} retry_options = taskqueue.TaskRetryOptions(task_retry_limit=0) taskqueue.add(url=TASK_URL, params={'data':json.dumps(queue_data)}, retry_options=retry_options)
def enqueue_task_async( queue, url, payload, task_age_limit_sec): # pragma: no cover task = taskqueue.Task( url=url, payload=payload, retry_options=taskqueue.TaskRetryOptions( task_age_limit=task_age_limit_sec)) return taskqueue.Queue(queue).add_async(task, transactional=True)
def pull_from_legacy_site(): """A webhook triggered by the old site that pulls that site.""" deferred.defer( pull_from_old_marketplace, permalink=request.args.get("permalink", ""), _retry_options=taskqueue.TaskRetryOptions(task_retry_limit=5) ) # need to defer to prevent the prod site from deadlocking return "ok"
def post(self): # get args self.start_cursor = self.request.get('cursor') self.filtering_event_key = self.request.get('event') self.filename = self.request.get('filename') self.csv_header = self.request.get('csv_header') self.worker_url = self.request.get('worker_url') self.event = Event.get( self.filtering_event_key) if self.filtering_event_key else None # get (base) query, skip query to cursor, filter for sites query = self.get_base_query() if self.start_cursor: query.with_cursor(self.start_cursor) fetched_sites = query.fetch(limit=self.sites_per_task) sites = self.filter_sites(fetched_sites) # write part of csv file to GCS csv_part_gcs_fd = cloudstorage.open(BUCKET_NAME + '/' + self.filename + '.part.' + self.start_cursor, 'w', content_type='text/csv') self._write_csv_rows(csv_part_gcs_fd, sites) csv_part_gcs_fd.close() # decide what to do next self.end_cursor = query.cursor() if self.end_cursor and self.start_cursor != self.end_cursor: # chain to next task taskqueue.add( url=self.worker_url, params=self.get_continuation_param_dict(), retry_options=taskqueue.TaskRetryOptions(task_retry_limit=3), ) else: # finish file: combine parts and deduplicate lines logging.info(u"Deduplicating to create %s ..." % self.filename) sio = StringIO() path_prefix = BUCKET_NAME + '/' + self.filename + '.part' for gcs_file_stat in cloudstorage.listbucket(path_prefix): csv_part_gcs_fd = cloudstorage.open(gcs_file_stat.filename) for line in csv_part_gcs_fd: sio.write(line) csv_part_gcs_fd.close() sio.seek(0) deduplicated_lines = set(line for line in sio) # write csv header and deduplicated lines to new file csv_complete_gcs_fd = cloudstorage.open(BUCKET_NAME + '/' + self.filename, 'w', content_type='text/csv') csv_complete_gcs_fd.write(self.csv_header.encode('utf-8')) for line in deduplicated_lines: csv_complete_gcs_fd.write(line) csv_complete_gcs_fd.close()
def compute_results(election): # Assert validity if not election: logging.error('Election not found.') return if election.end > datetime.now(): logging.error('Election is still open.') return if election.result_computed: logging.error('Election results already computed.') return logging.info('Computing results for election: %s, organization: %s.', election.name, election.organization.name) total_ballot_count = 0 for election_position in election.election_positions: total_ballot_count += election_position.ballots.count() if total_ballot_count > 2500: large_election = True else: large_election = False all_computed = True for election_position in election.election_positions: if not election_position.result_computed: all_computed = False if large_election: logging.info('Found Large Election. Enqueueing Position.') # Enqueue a task for computing results task_name = 'compute-result-' + str(election_position.key()) + 'banana' retry_options = taskqueue.TaskRetryOptions(task_retry_limit=0) taskqueue.add( name=task_name, url='/tasks/position-results', params={ 'election_position_key': str(election_position.key())}, retry_options=retry_options, queue_name='election-results', target='task-manager' ) else: election_position.compute_winners() if all_computed: election.result_computed = True election.put() logging.info('Computed results for election: %s, organization: %s.', election.name, election.organization.name) if not large_election: admin_emails = ['*****@*****.**'] for org_admin in election.organization.organization_admins: admin_emails.append(org_admin.admin.email) new_results.email_election_results(admin_emails, election) election.result_emailed = True
def _ProcessPostRequest(self): """Triggers backup mapper jobs and returns their ids.""" backup = self.request.get('backup_name').strip() if not backup: return [('error', 'Unspecified Backup name.')] if BackupInformation.name_exists(backup): return [('error', 'Backup "%s" already exists.' % backup)] kinds = self.request.get('kind', allow_multiple=True) queue = self.request.get('queue') job_name = 'datastore_backup_%s_%%(kind)s' % re.sub( r'[^\w]', '_', backup) try: job_operation = utils.StartOperation('Backup: %s' % backup) backup_info = BackupInformation(parent=job_operation) backup_info.name = backup backup_info.kinds = kinds backup_info.put(config=datastore_rpc.Configuration( force_writes=True)) mapreduce_params = { 'done_callback_handler': self.BACKUP_COMPLETE_HANDLER, 'backup_info_pk': str(backup_info.key()), 'force_ops_writes': True } if len(kinds) <= 10: return [('job', job) for job in _run_map_jobs( job_operation.key(), backup_info.key(), kinds, job_name, self.BACKUP_HANDLER, self.INPUT_READER, self.OUTPUT_WRITER, self._GetBasicMapperParams(), mapreduce_params, queue)] else: retry_options = taskqueue.TaskRetryOptions(task_retry_limit=1) return [ ('task', deferred.defer(_run_map_jobs, job_operation.key(), backup_info.key(), kinds, job_name, self.BACKUP_HANDLER, self.INPUT_READER, self.OUTPUT_WRITER, self._GetBasicMapperParams(), mapreduce_params, queue, _queue=queue, _url=utils.ConfigDefaults.DEFERRED_PATH, _retry_options=retry_options).name) ] except Exception: logging.exception( 'Failed to start a datastore backup job[s] for "%s".', job_name) if job_operation: job_operation.status = utils.DatastoreAdminOperation.STATUS_FAILED job_operation.put(config=datastore_rpc.Configuration( force_writes=True)) raise
def add_in_tx(self, queue): retry_options = taskqueue.TaskRetryOptions(task_retry_limit=5) task = taskqueue.Task(url='/python/taskqueue/update_callback', retry_options=retry_options) queue.add(task, transactional=True) entity = datastore.Entity('TaskName', name=task.name) entity['status'] = 'started' datastore.Put(entity) return task.name
def enqueue_for_deletion(self, export_task): #Enqueue the task to be deleted in 15 minutes... timestamp = datetime.datetime.now() + datetime.timedelta(minutes=15) retry_options = taskqueue.TaskRetryOptions(task_retry_limit=0) queue_task = taskqueue.Task(url='/export/delete', eta=timestamp, params={"task": export_task.key.urlsafe()}, retry_options=retry_options) queue_task.add()
def _(): archive_entity = archive_key.get() if archive_entity.polling_finished: return archive_entity.polling_finished = True archive_entity.put() deferred.defer(UploadToGCS, archive_key=archive_key, _transactional=True, _retry_options=taskqueue.TaskRetryOptions( max_backoff_seconds=60.0, min_backoff_seconds=30.0))
def _get_retry_options(cls): # Retry up to once every hour with exponential backoff; limit tasks to # three hours; cron will re-enqueue them for days. This is because the # purpose of the queue is retrying in case of transient errors # (datastore or send_mail burbles), and the purpose of the cron is # retrying in case of longer errors (quota exhaustion). return taskqueue.TaskRetryOptions( min_backoff_seconds=1, max_backoff_seconds=_SECONDS_PER_HOUR, max_doublings=12, # Overflow task age limit; don't want underflow task_age_limit=cls._get_task_age_limit_seconds(), )
def cached(self, *arg, **kw): use_cached_val = use_cached if self.request.get('use_cached', None) is not None: use_cached_val = self.request.get('use_cached') == 'true' key = hashlib.sha1(self.request.method) key.update(self.request.url) key.update(self.request.body) key = key.hexdigest() if use_cached_val: cached = memcache.get(key=key, namespace="cached_view") if cached is not None: age = time.time() - cached['time'] if max_age is None or age < max_age: if eventually_consistent is not None and age > eventually_consistent: local_url = '/' + self.request.url.split( "://")[1].split("/", 1)[1] if 'use_cached=true' not in local_url: sep = "?" if '?' in local_url: sep = "&" local_url = local_url + sep + 'use_cached=false' else: local_url = local_url.replace( 'use_cached=true', 'use_cached=false') retry_options = taskqueue.TaskRetryOptions( task_retry_limit=1) que = taskqueue.Queue( "memcaching-eventually-consistency") que.add_async( taskqueue.Task(url=local_url, method=self.request.method, payload=self.request.body, headers=self.request.headers, retry_options=retry_options)) response = webob.Response.from_file( StringIO.StringIO(cached['value'])) for name in response.__dict__: setattr(self.response, name, getattr(response, name)) self.cache_hit = True return fn(self, *arg, **kw) memcache.set(key=key, value={ 'value': str(self.response), 'time': time.time() }, namespace="cached_view")
def process_queue(): if process_site_scan() or process_table_scan(): deferred.defer( process_queue, _queue='scrape', _countdown=1, _retry_options=taskqueue.TaskRetryOptions(max_backoff_seconds=30)) else: logging.info("Scrape finished") deferred.defer(update_category_counts, store_id=_store.id, _queue='scrape', _countdown=5)
def update_youtube_stats(): futures = [] delay = 0 for row in bigquery_client.query(QUERY_REACTED_CONTENT).rows(): task = taskqueue.Task( countdown=delay, url='/_ah/jobs/update_youtube_views_batched', params={'original_id': row.content_id}, retry_options=taskqueue.TaskRetryOptions(task_retry_limit=0)) futures.append(_add_task_async(task, queue_name=config.INTERNAL_QUEUE)) delay += 2 _wait_all(futures) return ''
def queue_new_persist_task(): """Queue up a new persist task on the task queue via deferred library. These tasks should fire off immediately. If they're being backed off by GAE due to errors, they shouldn't try less frequently than once every 60 seconds.""" try: deferred.defer( persist_task, _queue=config.QUEUE_NAME, _retry_options=taskqueue.TaskRetryOptions(max_backoff_seconds=60)) except (taskqueue.TaskAlreadyExistsError, taskqueue.TombstonedTaskError): logging.info("Task for gae/bingo persist already exists.")
def LocalImport(): retry_options = taskqueue.TaskRetryOptions(task_retry_limit=0) import pdb pdb.set_trace() if flask.request.args.get('queue_name'): queue_name = flask.request.args.get('queue_name') else: queue_name = 'default' taskqueue.add(url='/local-import/worker', queue_name=queue_name, method='GET', retry_options=retry_options) return 'Done!'
def start_export(self, org, event, worker_url, filtering_event_key=None, filename=None): self.worker_url = worker_url # create filename if not supplied if filename is None: filename = "%s-%s-%s.csv" % ( event.filename_friendly_name, re.sub(r'\W+', '-', org.name.lower()), timestamp_now(), ) self.filename = filename # decide header/title row header_sio = StringIO() writer = csv.writer(header_sio) writer.writerow([ "%s Work Orders. Created %s UTC%s" % ( event.name, str(datetime.datetime.utcnow()).split('.')[0], ' by %s' % org.name if org else '' ) ]) writer.writerow( get_csv_fields_list(event.short_name) ) self.csv_header = header_sio.getvalue() header_sio.close() # select event filter based on parameter or org-user if filtering_event_key: self.filtering_event_key = filtering_event_key elif org.is_global_admin: self.filtering_event_key = '' else: self.filtering_event_key = event.key() # start first task taskqueue.add( url=self.worker_url, params=self.get_continuation_param_dict(), retry_options=taskqueue.TaskRetryOptions(task_retry_limit=3), ) # write filename out as json self.response.headers['Content-Type'] = 'application/json' self.response.out.write( json.dumps({ 'filename': filename }) )
def post(self): payload = self.request.POST.get('payload') victim_url = (self.request.host_url + '/victim?' + urllib.urlencode({'payload': payload})) print 'Visiting', victim_url task = taskqueue.add( url='/submit', target='uxssbot', params={ 'url': victim_url, 'service': 'blindxss' }, retry_options=taskqueue.TaskRetryOptions(task_retry_limit=1)) template = JINJA_ENVIRONMENT.get_template('index.html') self.response.write(template.render({'payload': payload}))
def post(self): task = ExportTask() task.put() retry_options = taskqueue.TaskRetryOptions(task_retry_limit=0) queue_task = taskqueue.Task(url='/export/run', params={"task": task.key.urlsafe()}, retry_options=retry_options) queue_task.add() result = { "message": "Waiting for task to start..", "id": task.key.urlsafe() } self.response.headers['Content-Type'] = "application/json" self.response.write(json.dumps(result))
def post(self): job = models.ProcessingJob( name=self.request.get('ready_name'), description=self.request.get('ready_description'), status='running') job.put() retry_options = taskqueue.TaskRetryOptions(task_retry_limit=0) taskqueue.add(name=job.name, url='/intern/jobs-taskqueue', params={'job_key': str(job.key())}, retry_options=retry_options) self.response.write(json.dumps(job.to_json()))