def _DispatchTracesAndWaitForResult(self, job, traces, num_instances): def _slice_it(li, cols=2): start = 0 for i in xrange(cols): stop = start + len(li[i::cols]) yield li[start:stop] start = stop # TODO(simonhatch): In the future it might be possibly to only specify a # reducer and no mapper. Revisit this. bucket_path = cloud_config.Get().control_bucket_path + "/jobs/" mapper_url = '%s%s.mapper' % (bucket_path, job.key.id()) mapper_text = job.mapper.encode('ascii', 'ignore') cloud_helper.WriteGCS(mapper_url, mapper_text) version = self._GetVersion() tasks = {} # Split the traces up into N buckets. for current_traces in _slice_it(traces, num_instances): task_id = str(uuid.uuid4()) payload = { 'revision': job.revision, 'traces': json.dumps(current_traces), 'result': '%s%s.result' % (bucket_path, task_id), 'mapper': mapper_url, 'mapper_function': job.mapper_function } taskqueue.add(queue_name='mapper-queue', url='/cloud_worker/task', target=version, name=task_id, params=payload) tasks[task_id] = {'status': 'IN_PROGRESS'} job.running_tasks = [task_id for task_id, _ in tasks.iteritems()] job.put() # On production servers, we could just sit and wait for the results, but # dev_server is single threaded and won't run any other tasks until the # current one is finished. We'll just do the easy thing for now and # queue a task to check for the result. timeout = ( datetime.datetime.now() + datetime.timedelta(minutes=10)).strftime('%Y-%m-%d %H:%M:%S') taskqueue.add(queue_name='default', url='/cloud_mapper/task', target=version, countdown=1, params={ 'jobid': job.key.id(), 'type': 'check', 'tasks': json.dumps(tasks), 'timeout': timeout })
def get(self): form_html = _FORM_HTML.format( mapper=_DEFAULT_MAPPER, mapper_function=_DEFAULT_FUNCTION, reducer=_DEFAULT_REDUCER, reducer_function=_DEFAULT_REDUCER_FUNCTION, query='MAX_TRACE_HANDLES=10', corpus=cloud_config.Get().default_corpus) self.response.out.write(form_html)
def _QueryForTraces(self, corpus, query): payload = urllib.urlencode({'q': query}) query_url = '%s/query?%s' % (corpus, payload) headers = { 'X-URLFetch-Service-Id': cloud_config.Get().urlfetch_service_id } result = urlfetch.fetch(url=query_url, payload=payload, method=urlfetch.GET, headers=headers, follow_redirects=False, deadline=10) return json.loads(result.content)
def get(self): self.response.headers['Content-Type'] = 'text/plain' raw_query = self.request.get('q') if not raw_query: raw_query = 'MAX_TRACE_HANDLES=100' query = corpus_query.CorpusQuery.FromString(raw_query) (gql, args) = query.AsGQLWhereClause() reports = trace_info.TraceInfo.gql(gql, *args) reports_json = json.dumps([ 'gs://%s/%s.gz' % (cloud_config.Get().trace_upload_bucket, i.key.string_id()) for i in reports ]) self.response.out.write(reports_json)
def _delete_traces(self): trace_bucket = cloud_config.Get().trace_upload_bucket deleted_traces = 0 oldest_time = datetime.datetime.now() - datetime.timedelta(days=MAX_DAYS) q = TraceInfo.query(TraceInfo.date < oldest_time) for key in q.fetch(BATCH_SIZE, keys_only=True): gcs_path = '/%s/%s.gz' % (trace_bucket, key.id()) try: gcs.delete(gcs_path, retry_params=default_retry_params) except gcs.NotFoundError: pass key.delete() deleted_traces += 1 return deleted_traces
def post(self): trace_uuid = str(uuid.uuid4()) gcs_path = '/%s/%s.gz' % (cloud_config.Get().trace_upload_bucket, trace_uuid) gcs_file = gcs.open(gcs_path, 'w', content_type='application/octet-stream', options={}, retry_params=default_retry_params) gcs_file.write(self.request.get('trace')) gcs_file.close() trace_object = trace_info.TraceInfo(id=trace_uuid) trace_object.remote_addr = os.environ["REMOTE_ADDR"] for arg in self.request.arguments(): arg_key = arg.replace('-', '_').lower() if arg_key in trace_object._properties: try: setattr(trace_object, arg_key, self.request.get(arg)) except datastore_errors.BadValueError: pass scenario_config = self.request.get('config') if scenario_config: config_json = json.loads(scenario_config) if 'scenario_name' in config_json: trace_object.scenario_name = config_json['scenario_name'] tags_string = self.request.get('tags') if tags_string: # Tags are comma separated and should only include alphanumeric + '-'. if re.match('^[a-zA-Z0-9-,]+$', tags_string): trace_object.tags = tags_string.split(',') else: logging.warning( 'The provided tags string includes one or more invalid' ' characters and will be ignored') trace_object.ver = self.request.get('product-version') trace_object.put() self.response.write(trace_uuid)
def _CheckOnReduceResults(self, job): if job.status != 'IN_PROGRESS': return tasks = json.loads(self.request.get('tasks')) # TODO: There's really only one reducer job at the moment results = None for task_id, _ in tasks.iteritems(): task_results_path = '%s/jobs/%s.result' % ( cloud_config.Get().control_bucket_path, task_id) stat_result = cloud_helper.StatGCS(task_results_path) if stat_result is not None: tasks[task_id]['status'] = 'DONE' results = task_results_path logging.info("Reduce results: %s" % str(tasks)) if not results: timeout = datetime.datetime.strptime(self.request.get('timeout'), '%Y-%m-%d %H:%M:%S') if datetime.datetime.now() > timeout: self._CancelTasks(tasks) job.status = 'ERROR' job.put() logging.error('Task timed out waiting for results.') return taskqueue.add(url='/cloud_mapper/task', target=self._GetVersion(), countdown=1, params={ 'jobid': job.key.id(), 'type': 'check_reduce_results', 'tasks': json.dumps(tasks), 'timeout': self.request.get('timeout') }) return logging.info("Finished all tasks.") job.status = 'COMPLETE' job.results = results job.put()
def _CheckOnMapResults(self, job): if job.status != 'IN_PROGRESS': return tasks = json.loads(self.request.get('tasks')) reducer_url = self.request.get('reducer') reducer_function = job.reducer_function revision = job.revision timeout = datetime.datetime.strptime(self.request.get('timeout'), '%Y-%m-%d %H:%M:%S') # TODO: There's no reducer yet, so we can't actually collapse multiple # results into one results file. mappers_done = True for task_id, task_values in tasks.iteritems(): if task_values['status'] == 'DONE': continue task_results_path = '%s/jobs/%s.result' % ( cloud_config.Get().control_bucket_path, task_id) stat_result = cloud_helper.StatGCS(task_results_path) if stat_result is not None: logging.info(str(stat_result)) tasks[task_id]['status'] = 'DONE' else: mappers_done = False logging.info("Tasks: %s" % str(tasks)) if not mappers_done and datetime.datetime.now() < timeout: taskqueue.add(url='/cloud_mapper/task', target=self._GetVersion(), countdown=1, params={ 'jobid': job.key.id(), 'type': 'check_map_results', 'reducer': reducer_url, 'tasks': json.dumps(tasks), 'timeout': self.request.get('timeout') }) return # Clear out any leftover tasks in case we just hit the timeout. self._CancelTasks(tasks) map_results = [] for task_id, _ in tasks.iteritems(): if tasks[task_id]['status'] != 'DONE': continue task_results_path = '%s/jobs/%s.result' % ( cloud_config.Get().control_bucket_path, task_id) map_results.append(task_results_path) # We'll only do 1 reduce job for now, maybe shard it better later logging.info("Kicking off reduce.") task_id = str(uuid.uuid4()) payload = { 'revision': revision, 'traces': json.dumps(map_results), 'result': '%s/jobs/%s.result' % (cloud_config.Get().control_bucket_path, task_id), 'reducer': reducer_url, 'reducer_function': reducer_function, 'timeout': job.function_timeout, } taskqueue.add(queue_name='mapper-queue', url='/cloud_worker/task', target=self._GetVersion(), name=task_id, params=payload) tasks = {} tasks[task_id] = {'status': 'IN_PROGRESS'} job.running_tasks = [task_id for task_id, _ in tasks.iteritems()] job.put() reduce_tasks = {} reduce_tasks[task_id] = {'status': 'IN_PROGRESS'} # On production servers, we could just sit and wait for the results, but # dev_server is single threaded and won't run any other tasks until the # current one is finished. We'll just do the easy thing for now and # queue a task to check for the result. reducer_timeout = int(job.function_timeout) timeout = (datetime.datetime.now() + datetime.timedelta( seconds=reducer_timeout)).strftime('%Y-%m-%d %H:%M:%S') taskqueue.add(queue_name='default', url='/cloud_mapper/task', target=self._GetVersion(), countdown=1, params={ 'jobid': job.key.id(), 'type': 'check_reduce_results', 'tasks': json.dumps(reduce_tasks), 'timeout': timeout })
def post(self): os.putenv('PI_CLOUD_WORKER', '1') try: traces = json.loads(self.request.get('traces')) mapper = self.request.get('mapper') map_function = self.request.get('mapper_function') revision = self.request.get('revision') result_path = self.request.get('result') config = cloud_config.Get() if not _is_devserver(): subprocess.call(['git', 'pull'], cwd=config.catapult_path) subprocess.call(['git', 'checkout', revision], cwd=config.catapult_path) job_path = os.path.join(config.catapult_path, 'perf_insights', 'bin', 'map_traces') cwd = config.catapult_path else: job_path = os.path.join('perf_insights', 'bin', 'map_traces') cwd = os.path.abspath( os.path.join(os.path.dirname(__file__), '../../../..')) # Download all the traces temp_directory = _DownloadTraces(traces) # Download the mapper map_file_handle, map_file_name = tempfile.mkstemp() with open(map_file_name, 'w') as f: f.write(cloud_helper.ReadGCS(mapper)) # Output goes here. output_handle, output_name = tempfile.mkstemp() try: map_handle = '%s:%s' % (map_file_name, map_function) args = [ job_path, '--jobs=-1', '--corpus=local-directory', map_handle, '--trace_directory', temp_directory, '--output-file', output_name ] logging.info("Executing map job: %s" % args) map_job = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd) stdout, stderr = map_job.communicate() logging.info('stdout:\n' + stdout) logging.info('stderr:\n' + stderr) with open(output_name, 'r') as f: cloud_helper.WriteGCS(result_path, f.read()) finally: os.close(output_handle) os.unlink(output_name) os.close(map_file_handle) os.unlink(map_file_name) shutil.rmtree(temp_directory) except Exception: logging.info(traceback.format_exc())
def post(self): os.putenv('PI_CLOUD_WORKER', '1') try: traces = json.loads(self.request.get('traces')) mapper = self.request.get('mapper') map_function = self.request.get('mapper_function') reducer = self.request.get('reducer') reducer_function = self.request.get('reducer_function') revision = self.request.get('revision') result_path = self.request.get('result') timeout = self.request.get('timeout') if timeout: timeout = int(timeout) config = cloud_config.Get() if not _is_devserver(): logging.info("Updating catapult checkout to: %s" % revision) subprocess.call(['git', 'checkout', revision], cwd=config.catapult_path) job_path = os.path.join(config.catapult_path, 'perf_insights', 'bin', 'gce_instance_map_job') cwd = config.catapult_path else: logging.info("DevServer: Ignoring update step.") job_path = os.path.join('perf_insights', 'bin', 'gce_instance_map_job') cwd = os.path.abspath( os.path.join(os.path.dirname(__file__), '../../../..')) # Download all the traces temp_directory = _DownloadTraces(traces) # Output goes here. output_handle, output_name = tempfile.mkstemp() try: args = [ job_path, '--corpus=local-directory', '--trace_directory', temp_directory, '--output-file', output_name ] if mapper: # Download the mapper _, map_file_name = tempfile.mkstemp() with open(map_file_name, 'w') as f: f.write(cloud_helper.ReadGCS(mapper)) map_handle = '%s:%s' % (map_file_name, map_function) args.extend(['--map_function_handle', map_handle]) if reducer: # Download the reducer _, reducer_file_name = tempfile.mkstemp() with open(reducer_file_name, 'w') as f: f.write(cloud_helper.ReadGCS(reducer)) reducer_handle = '%s:%s' % (reducer_file_name, reducer_function) args.extend(['--reduce_function_handle', reducer_handle]) logging.info("Executing map job: %s" % ' '.join(args)) map_job = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd, preexec_fn=os.setsid) start_time = datetime.datetime.now() while datetime.datetime.now( ) - start_time < datetime.timedelta(seconds=timeout): time.sleep(1) if map_job.poll(): break if map_job.poll() is None: logging.warning('Job timed out, terminating.') # TODO: Kill child processes. os.killpg(os.getpgid(map_job.pid), signal.SIGTERM) stdout = '' stderr = '' if map_job.stdout: stdout = map_job.stdout.read() if map_job.stderr: stderr = map_job.stderr.read() logging.info('stdout:\n' + stdout) logging.info('stderr:\n' + stderr) with open(output_name, 'r') as f: logging.info('Writing result to: %s' % result_path) cloud_helper.WriteGCS(result_path, f.read()) finally: os.close(output_handle) os.unlink(output_name) shutil.rmtree(temp_directory) except Exception: logging.info(traceback.format_exc())