Esempio n. 1
0
    def start(self, *args, **kwargs):
        if 'map' not in self.__class__.__dict__:
            raise TypeError(
                'No static map method defined on class {cls}'.format(
                    self.__class__))

        if 'finish' in self.__class__.__dict__:
            finish = self.__class__.finish
        else:
            finish = None

        kwargs["db"] = self.db

        return map_entities(
            self.model._meta.db_table if self.model else self.kind,
            settings.DATABASES.get(self.db, {}).get('NAMESPACE', ''),
            ".".join([qualname(self.__class__), "run_map"]),
            finalize_func=".".join([qualname(self.__class__), "finish"])
            if finish else None,
            _output_writer=self.output_writer_spec,
            _shards=self.shard_count,
            _job_name=self.job_name,
            _queue_name=kwargs.pop('queue_name', self.queue_name),
            *args,
            **kwargs)
Esempio n. 2
0
 def test_mapreduce_django_input(self):
     """
         Test basic django operations inside a map task, this shows that
         our handlers are working
     """
     nodes = MRTestNode.objects.all()
     for node in nodes:
         self.assertEqual(node.counter, 1)
     pipe = MapreducePipeline(
         "word_count",
         qualname(model_counter_increment),
         qualname(word_count_reduce),
         "djangae.contrib.processing.mapreduce.input_readers.DjangoInputReader",
         "mapreduce.output_writers.GoogleCloudStorageOutputWriter",
         mapper_params={
             'count': 10,
             'input_reader': {
                 'model': 'mapreduce.MRTestNode'
             }
         },
         reducer_params={
             "mime_type": "text/plain",
             'output_writer': {
                 'bucket_name': 'test'
             }
         },
         shards=5)
     pipe.start()
     process_task_queues()
     nodes = MRTestNode.objects.all()
     for node in nodes:
         self.assertEqual(node.counter, 2)
Esempio n. 3
0
    def start(self, *args, **kwargs):
        if 'map' not in self.__class__.__dict__:
            raise TypeError(
                'No static map method defined on class {cls}'.format(
                    self.__class__))

        if 'finish' in self.__class__.__dict__:
            finish = self.finish
        else:
            finish = None

        # We have to pass dotted paths to functions here because staticmethods don't have
        # any concept of self, or the class they are defined in.

        return map_queryset(
            self.model.objects.using(self.db).all(),
            ".".join([qualname(self.__class__), "run_map"]),
            finalize_func=".".join([qualname(self.__class__), "finish"])
            if finish else None,
            _shards=self.shard_count,
            _output_writer=self.output_writer_spec,
            _output_writer_kwargs=None,
            _job_name=self.job_name,
            _queue_name=kwargs.pop('queue_name', self.queue_name),
            *args,
            **kwargs)
Esempio n. 4
0
 def test_mapreduce_basic(self):
     """
         Tests basic mapreduce with random input
     """
     pipe = MapreducePipeline(
         "word_count",
         qualname(letter_count_map),
         qualname(word_count_reduce),
         "mapreduce.input_readers.RandomStringInputReader",
         "mapreduce.output_writers.GoogleCloudStorageOutputWriter",
         mapper_params={'count': 10},
         reducer_params={"mime_type": "text/plain", 'output_writer': {'bucket_name': 'test'}},
         shards=1
     )
     pipe.start()
     process_task_queues()
Esempio n. 5
0
    def start(self, *args, **kwargs):
        if 'map' not in self.__class__.__dict__:
            raise TypeError('No static map method defined on class {cls}'.format(self.__class__))

        if 'finish' in self.__class__.__dict__:
            finish = self.__class__.finish
        else:
            finish = None

        kwargs["db"] = self.db

        return map_entities(
            self.model._meta.db_table if self.model else self.kind,
            settings.DATABASES.get(self.db, {}).get('NAMESPACE', ''),
            ".".join([qualname(self.__class__), "run_map"]),
            finalize_func=".".join([qualname(self.__class__), "finish"]) if finish else None,
            _output_writer=self.output_writer_spec,
            _shards=self.shard_count,
            _job_name=self.job_name,
            _queue_name=kwargs.pop('queue_name', self.queue_name),
            *args,
            **kwargs
        )
Esempio n. 6
0
    def start(self, *args, **kwargs):
        if 'map' not in self.__class__.__dict__:
            raise TypeError('No static map method defined on class {cls}'.format(self.__class__))

        if 'finish' in self.__class__.__dict__:
            finish = self.finish
        else:
            finish = None

        # We have to pass dotted paths to functions here because staticmethods don't have
        # any concept of self, or the class they are defined in.

        return map_queryset(
            self.model.objects.using(self.db).all(),
            ".".join([qualname(self.__class__), "run_map"]),
            finalize_func=".".join([qualname(self.__class__), "finish"]) if finish else None,
            _shards=self.shard_count,
            _output_writer=self.output_writer_spec,
            _output_writer_kwargs=None,
            _job_name=self.job_name,
            _queue_name=kwargs.pop('queue_name', self.queue_name),
            *args,
            **kwargs
        )