コード例 #1
0
    def test_portal_worker(self):
        self._prepare_test()
        map_task = dp_pb.MapTask()
        map_task.output_base_dir = self._partition_output_dir
        map_task.output_partition_num = self._output_partition_num
        map_task.partition_id = 0
        map_task.task_name = 'map_part_{}'.format(map_task.partition_id)
        map_task.part_field = 'example_id'
        map_task.data_portal_type = dp_pb.DataPortalType.Streaming
        for partition_id in range(self._input_partition_num):
            map_task.fpaths.append(self._get_input_fpath(partition_id))

        # partitioner
        task = dp_pb.NewTaskResponse()
        task.map_task.CopyFrom(map_task)
        self._portal_worker._run_map_task(task.map_task)

        self._check_partitioner(task.map_task)

        # merge
        total_cnt = 0
        for partition_id in range(self._output_partition_num):
            reduce_task = dp_pb.ReduceTask()
            reduce_task.map_base_dir = self._partition_output_dir
            reduce_task.reduce_base_dir = self._merge_output_dir
            reduce_task.partition_id = partition_id
            reduce_task.task_name = 'reduce_part_{}'.format(partition_id)
            self._portal_worker._run_reduce_task(reduce_task)
            total_cnt += self._check_merge(reduce_task)

        self.assertEqual(total_cnt,
                         self._partition_item_num * self._input_partition_num)
        self._clean_up()
コード例 #2
0
    def test_portal_worker(self):
        self._prepare_test()
        map_task = dp_pb.MapTask()
        map_task.output_base_dir = self._partition_output_dir
        map_task.output_partition_num = self._output_partition_num
        map_task.partition_id = 0
        for partition_id in range(self._input_partition_num):
            map_task.fpaths.append(self._get_input_fpath(partition_id))

        # partitioner
        task = dp_pb.NewTaskResponse()
        task.map_task.CopyFrom(map_task)
        self._portal_worker._run_map_task(task.map_task)

        self._check_partitioner(task.map_task)

        # merge
        total_cnt = 0
        for partition_id in range(self._output_partition_num):
            reduce_task = dp_pb.ReduceTask()
            reduce_task.map_base_dir = self._partition_output_dir
            reduce_task.reduce_base_dir = self._merge_output_dir
            reduce_task.partition_id = partition_id
            self._portal_worker._run_reduce_task(reduce_task)
            total_cnt += self._check_merge(reduce_task)

        self.assertEqual(total_cnt, self._partition_item_num * self._input_partition_num)
        self._clean_up()
コード例 #3
0
 def RequestNewTask(self, request, context):
     response = dp_pb.NewTaskResponse()
     finished, task = \
         self._data_portal_job_manager.alloc_task(request.rank_id)
     if task is not None:
         if isinstance(task, dp_pb.MapTask):
             response.map_task.MergeFrom(task)
         else:
             assert isinstance(task, dp_pb.ReduceTask)
             response.reduce_task.MergeFrom(task)
     elif not finished:
         response.pending.MergeFrom(empty_pb2.Empty())
     else:
         response.finished.MergeFrom(empty_pb2.Empty())
     return response
コード例 #4
0
    def _run_map_task(self):
        map_task = dp_pb.MapTask()
        map_task.output_base_dir = self._partition_output_dir
        map_task.output_partition_num = self._output_partition_num
        map_task.partition_id = 0
        map_task.task_name = 'map_part_{}'.format(map_task.partition_id)
        map_task.part_field = 'example_id'
        map_task.data_portal_type = dp_pb.DataPortalType.Streaming
        for partition_id in range(self._input_partition_num):
            map_task.fpaths.append(self._get_input_fpath(partition_id))

        # partitioner
        task = dp_pb.NewTaskResponse()
        task.map_task.CopyFrom(map_task)
        self._portal_worker._run_map_task(task.map_task)
        return task