Python GoogleCloudStorageInputReader Examples

Programming Language: Python

Namespace/Package Name: mapreduce.input_readers

Method/Function: GoogleCloudStorageInputReader

Examples at hotexamples.com: 4

Python GoogleCloudStorageInputReader - 4 examples found. These are the top rated real world Python examples of mapreduce.input_readers.GoogleCloudStorageInputReader extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

    def run(self, job_id, job_class_str, output):
        """Extracts the results of a MR job and registers its completion.

        Args:
            job_id: str. The ID of the job to run.
            job_class_str: str. Should uniquely identify each type of job.
            output: str. The output produced by the job.
        """
        job_class = mapreduce_util.for_name(job_class_str)

        try:
            iterator = input_readers.GoogleCloudStorageInputReader(
                output, 0)
            results_list = []
            for item_reader in iterator:
                for item in item_reader:
                    results_list.append(json.loads(item))
            job_class.register_completion(job_id, results_list)
        except Exception as e:
            logging.exception(
                'Job %s failed at %s' % (
                    job_id, utils.get_current_time_in_millisecs()
                )
            )
            job_class.register_failure(
                job_id,
                '%s\n%s' % (python_utils.UNICODE(e), traceback.format_exc()))

Example #2

Show file

    def run(self, job_name, sequence_num, namespace, output, complete_fn,
            kwargs):
        results = []
        try:
            iterator = input_readers.GoogleCloudStorageInputReader(output, 0)
            for file_reader in iterator:
                for item in file_reader:
                    # Map/reduce puts reducer output into blobstore files as a
                    # string obtained via "str(result)".  Use AST as a safe
                    # alternative to eval() to get the Python object back.
                    results.append(ast.literal_eval(item))
            if complete_fn:
                util.for_name(complete_fn)(kwargs, results)
            with Namespace(namespace):
                db.run_in_transaction(
                    DurableJobEntity._complete_job, job_name, sequence_num,
                    MapReduceJob.build_output(self.root_pipeline_id, results))

        # Don't know what exceptions are currently, or will be in future,
        # thrown from Map/Reduce or Pipeline libraries; these are under
        # active development.
        #
        # pylint: disable=broad-except
        except Exception, ex:
            logging.critical('Failed running map/reduce job %s: %s', job_name,
                             str(ex))
            common_utils.log_exception_origin()
            time_completed = time.time()
            with Namespace(namespace):
                db.run_in_transaction(
                    DurableJobEntity._fail_job, job_name, sequence_num,
                    MapReduceJob.build_output(self.root_pipeline_id, results,
                                              str(ex)))

Example #3

Show file

File: jobs.py Project: thejeshgn/seek

    def run(self, job_name, sequence_num, time_started, namespace, output):
        results = []

        # TODO(mgainer): Notice errors earlier in pipeline, and mark job
        # as failed in that case as well.
        try:
            iterator = input_readers.GoogleCloudStorageInputReader(output, 0)
            for file_reader in iterator:
                for item in file_reader:
                    # Map/reduce puts reducer output into blobstore files as a
                    # string obtained via "str(result)".  Use AST as a safe
                    # alternative to eval() to get the Python object back.
                    results.append(ast.literal_eval(item))
            time_completed = time.time()
            with Namespace(namespace):
                db.run_in_transaction(
                    DurableJobEntity._complete_job, job_name, sequence_num,
                    MapReduceJob.build_output(self.root_pipeline_id, results),
                    long(time_completed - time_started))
        # Don't know what exceptions are currently, or will be in future,
        # thrown from Map/Reduce or Pipeline libraries; these are under
        # active development.
        #
        # pylint: disable=broad-except
        except Exception, ex:
            time_completed = time.time()
            with Namespace(namespace):
                db.run_in_transaction(
                    DurableJobEntity._fail_job, job_name, sequence_num,
                    MapReduceJob.build_output(self.root_pipeline_id, results,
                                              str(ex)),
                    long(time_completed - time_started))

Example #4

Show file

File: jobs.py Project: yarinf/oppia

    def run(self, job_id, job_class_str, output):
        job_class = mapreduce_util.for_name(job_class_str)

        try:
            iterator = input_readers.GoogleCloudStorageInputReader(output, 0)
            results_list = []
            for item_reader in iterator:
                for item in item_reader:
                    results_list.append(json.loads(item))
            job_class.register_completion(job_id, results_list)
        except Exception as e:
            logging.error(traceback.format_exc())
            logging.error('Job %s failed at %s' %
                          (job_id, utils.get_current_time_in_millisecs()))
            job_class.register_failure(
                job_id, '%s\n%s' % (unicode(e), traceback.format_exc()))