def run(self, job_id, job_class_str, output): """Extracts the results of a MR job and registers its completion. Args: job_id: str. The ID of the job to run. job_class_str: str. Should uniquely identify each type of job. output: str. The output produced by the job. """ job_class = mapreduce_util.for_name(job_class_str) try: iterator = input_readers.GoogleCloudStorageInputReader( output, 0) results_list = [] for item_reader in iterator: for item in item_reader: results_list.append(json.loads(item)) job_class.register_completion(job_id, results_list) except Exception as e: logging.exception( 'Job %s failed at %s' % ( job_id, utils.get_current_time_in_millisecs() ) ) job_class.register_failure( job_id, '%s\n%s' % (python_utils.UNICODE(e), traceback.format_exc()))
def run(self, job_name, sequence_num, namespace, output, complete_fn, kwargs): results = [] try: iterator = input_readers.GoogleCloudStorageInputReader(output, 0) for file_reader in iterator: for item in file_reader: # Map/reduce puts reducer output into blobstore files as a # string obtained via "str(result)". Use AST as a safe # alternative to eval() to get the Python object back. results.append(ast.literal_eval(item)) if complete_fn: util.for_name(complete_fn)(kwargs, results) with Namespace(namespace): db.run_in_transaction( DurableJobEntity._complete_job, job_name, sequence_num, MapReduceJob.build_output(self.root_pipeline_id, results)) # Don't know what exceptions are currently, or will be in future, # thrown from Map/Reduce or Pipeline libraries; these are under # active development. # # pylint: disable=broad-except except Exception, ex: logging.critical('Failed running map/reduce job %s: %s', job_name, str(ex)) common_utils.log_exception_origin() time_completed = time.time() with Namespace(namespace): db.run_in_transaction( DurableJobEntity._fail_job, job_name, sequence_num, MapReduceJob.build_output(self.root_pipeline_id, results, str(ex)))
def run(self, job_name, sequence_num, time_started, namespace, output): results = [] # TODO(mgainer): Notice errors earlier in pipeline, and mark job # as failed in that case as well. try: iterator = input_readers.GoogleCloudStorageInputReader(output, 0) for file_reader in iterator: for item in file_reader: # Map/reduce puts reducer output into blobstore files as a # string obtained via "str(result)". Use AST as a safe # alternative to eval() to get the Python object back. results.append(ast.literal_eval(item)) time_completed = time.time() with Namespace(namespace): db.run_in_transaction( DurableJobEntity._complete_job, job_name, sequence_num, MapReduceJob.build_output(self.root_pipeline_id, results), long(time_completed - time_started)) # Don't know what exceptions are currently, or will be in future, # thrown from Map/Reduce or Pipeline libraries; these are under # active development. # # pylint: disable=broad-except except Exception, ex: time_completed = time.time() with Namespace(namespace): db.run_in_transaction( DurableJobEntity._fail_job, job_name, sequence_num, MapReduceJob.build_output(self.root_pipeline_id, results, str(ex)), long(time_completed - time_started))
def run(self, job_id, job_class_str, output): job_class = mapreduce_util.for_name(job_class_str) try: iterator = input_readers.GoogleCloudStorageInputReader(output, 0) results_list = [] for item_reader in iterator: for item in item_reader: results_list.append(json.loads(item)) job_class.register_completion(job_id, results_list) except Exception as e: logging.error(traceback.format_exc()) logging.error('Job %s failed at %s' % (job_id, utils.get_current_time_in_millisecs())) job_class.register_failure( job_id, '%s\n%s' % (unicode(e), traceback.format_exc()))