Ejemplo n.º 1
0
    def test_compare_lazy_var_and_simple_var(self):

        def _load_dict():
            return {str(x): str(x) for x in xrange(100)}

        my_lazy_var = lazy_var.declare(_load_dict)

        def _use_dict_by_lazy_var():
            my_dict = my_lazy_var.get()
            values = '\t'.join(my_dict.values())
            return values

        dump_object_before_call_get = cloudpickle.dumps(_use_dict_by_lazy_var)
        my_lazy_var.get()
        dump_object_after_call_get = cloudpickle.dumps(_use_dict_by_lazy_var)
        # assert equal
        self.assertEqual(dump_object_before_call_get, dump_object_after_call_get)

        my_simple_var = {}

        def _load_dict_to_simple_var():
            for x in xrange(100):
                my_simple_var[str(x)] = str(x)
            return my_simple_var

        def _use_dict_by_simple_var():
            values = '\t'.join(my_simple_var.values())
            return values

        dump_object_before_load_dict = cloudpickle.dumps(_use_dict_by_simple_var)
        _load_dict_to_simple_var()
        dump_object_after_load_dict = cloudpickle.dumps(_use_dict_by_simple_var)
        # assert not equal
        self.assertNotEqual(dump_object_before_load_dict, dump_object_after_load_dict)
Ejemplo n.º 2
0
 def __init__(self, user_input_base):
     super(_UserDefineFileBase, self).__init__('user_define_format')
     self.input_format = entity.Entity.of(
         entity.Entity.loader,
         cloudpickle.dumps(_LoaderImpl(user_input_base)))
     self.objector = user_input_base.get_serde()
     self._user_input_base = user_input_base
Ejemplo n.º 3
0
 def to_proto_message(self):
     from flume.proto import entity_pb2
     from bigflow.core import entity_names
     pb_entity = entity_pb2.PbEntity()
     pb_entity.name = entity_names.__dict__['PythonImplFunctor']
     config = {}
     config['fn'] = self.__fn
     config['expect_iterable'] = self.__expect_iterable
     pb_entity.config = cloudpickle.dumps(config)
     return pb_entity
Ejemplo n.º 4
0
 def to_proto_message(self):
     from flume.proto import entity_pb2
     from bigflow.core import entity_names
     pb_entity = entity_pb2.PbEntity()
     pb_entity.name = entity_names.__dict__['PythonImplFunctor']
     config = {}
     config['fn'] = self.__fn
     config['expect_iterable'] = self.__expect_iterable
     pb_entity.config = cloudpickle.dumps(config)
     return pb_entity
Ejemplo n.º 5
0
 def get_entity_config(self):
     from bigflow_python.proto import processor_pb2
     processor = processor_pb2.PbPythonProcessorConfig()
     if self.__config is not None:
         processor.config = cloudpickle.dumps(self.__config)
     for fn in self.__fns:
         fn = Functor.of(fn)
         processor.functor.add().CopyFrom(fn.to_proto_message())
     for side_input in self.__side_inputs:
         side_input_type = processor_pb2.POBJECT_TYPE
         if isinstance(side_input, pcollection.PCollection):
             side_input_type = processor_pb2.PCOLLECTION_TYPE
         processor.side_input_type.append(side_input_type)
     return processor.SerializeToString()
Ejemplo n.º 6
0
    def _generate_resource_message(self):
        folder = entity.ENTITY_FOLDER
        if os.path.exists(folder):
            for file_name in os.listdir(folder):
                src_file = os.path.join(folder, file_name)
                target_file = os.path.join(entity.FLUME_WORKER_ENTITY_FOLDER,
                                           file_name)
                self.add_file(src_file, target_file)

        def _sorted_hooks(hooks_dict):
            result = []
            for name in sorted(hooks_dict.keys()):
                result.append(hooks_dict[name])
            return result

        import copy
        resource = copy.deepcopy(self._resource)
        from bigflow.core.serde import cloudpickle
        resource.add_file_from_bytes(
            cloudpickle.dumps(_sorted_hooks(self._init_hooks)), ".init_hooks")
        resource.add_file_from_bytes(
            cloudpickle.dumps(_sorted_hooks(self._fini_hooks)), ".fini_hooks")
        self._resource_message = resource.to_proto_message()
Ejemplo n.º 7
0
 def get_entity_config(self):
     from bigflow_python.proto import processor_pb2
     processor = processor_pb2.PbPythonProcessorConfig()
     if self.__config is not None:
         processor.config = cloudpickle.dumps(self.__config)
     for fn in self.__fns:
         fn = Functor.of(fn)
         processor.functor.add().CopyFrom(fn.to_proto_message())
     for side_input in self.__side_inputs:
         side_input_type = processor_pb2.POBJECT_TYPE
         if isinstance(side_input, pcollection.PCollection):
             side_input_type = processor_pb2.PCOLLECTION_TYPE
         processor.side_input_type.append(side_input_type)
     return processor.SerializeToString()
Ejemplo n.º 8
0
    def test_compare_lazy_var_and_simple_var(self):
        def _load_dict():
            return {str(x): str(x) for x in xrange(100)}

        my_lazy_var = lazy_var.declare(_load_dict)

        def _use_dict_by_lazy_var():
            my_dict = my_lazy_var.get()
            values = '\t'.join(my_dict.values())
            return values

        dump_object_before_call_get = cloudpickle.dumps(_use_dict_by_lazy_var)
        my_lazy_var.get()
        dump_object_after_call_get = cloudpickle.dumps(_use_dict_by_lazy_var)
        # assert equal
        self.assertEqual(dump_object_before_call_get,
                         dump_object_after_call_get)

        my_simple_var = {}

        def _load_dict_to_simple_var():
            for x in xrange(100):
                my_simple_var[str(x)] = str(x)
            return my_simple_var

        def _use_dict_by_simple_var():
            values = '\t'.join(my_simple_var.values())
            return values

        dump_object_before_load_dict = cloudpickle.dumps(
            _use_dict_by_simple_var)
        _load_dict_to_simple_var()
        dump_object_after_load_dict = cloudpickle.dumps(
            _use_dict_by_simple_var)
        # assert not equal
        self.assertNotEqual(dump_object_before_load_dict,
                            dump_object_after_load_dict)
Ejemplo n.º 9
0
    def _generate_resource_message(self):
        folder = entity.ENTITY_FOLDER
        if os.path.exists(folder):
            for file_name in os.listdir(folder):
                src_file = os.path.join(folder, file_name)
                target_file = os.path.join(entity.FLUME_WORKER_ENTITY_FOLDER, file_name)
                self.add_file(src_file, target_file)

        def _sorted_hooks(hooks_dict):
            result = []
            for name in sorted(hooks_dict.keys()):
                result.append(hooks_dict[name])
            return result

        import copy
        resource = copy.deepcopy(self._resource)
        from bigflow.core.serde import cloudpickle
        resource.add_file_from_bytes(
                cloudpickle.dumps(_sorted_hooks(self._init_hooks)),
                ".init_hooks")
        resource.add_file_from_bytes(
                cloudpickle.dumps(_sorted_hooks(self._fini_hooks)),
                ".fini_hooks")
        self._resource_message = resource.to_proto_message()
Ejemplo n.º 10
0
    def __init__(self, name="", operator=None, message=None):
        if message is None:
            if len(name) == 0:
                raise error.InvalidLogicalPlanException("Invalid name for entity.")
            if operator is None:
                raise error.InvalidLogicalPlanException("Invalid operator(None) for entity.")

            if isinstance(operator, EntitiedBySelf):
                self.__name = operator.get_entity_name()
                self.__config = operator.get_entity_config()
            elif isinstance(operator, str):
                self.__name = name
                self.__config = operator
            else:
                self.__name = name
                self.__config = cloudpickle.dumps(operator)
        else:
            self.from_proto_message(message)
Ejemplo n.º 11
0
    def __init__(self, name="", operator=None, message=None):
        if message is None:
            if len(name) == 0:
                raise error.InvalidLogicalPlanException(
                    "Invalid name for entity.")
            if operator is None:
                raise error.InvalidLogicalPlanException(
                    "Invalid operator(None) for entity.")

            if isinstance(operator, EntitiedBySelf):
                self.__name = operator.get_entity_name()
                self.__config = operator.get_entity_config()
            elif isinstance(operator, str):
                self.__name = name
                self.__config = operator
            else:
                self.__name = name
                self.__config = cloudpickle.dumps(operator)
        else:
            self.from_proto_message(message)
Ejemplo n.º 12
0
 def get_entity_config(self):
     """
     get_entity_config
     """
     return cloudpickle.dumps(self)
Ejemplo n.º 13
0
 def get_entity_config(self):
     """ 内部函数 """
     return cloudpickle.dumps(self)
Ejemplo n.º 14
0
 def get_entity_config(self):
     return cloudpickle.dumps(self)
Ejemplo n.º 15
0
 def config(self):
     """Pass serializers to cpp runtime"""
     return cloudpickle.dumps(self.serializers)
Ejemplo n.º 16
0
 def config(self):
     """Config: Pass serialized arguments to cpp runtime"""
     return cloudpickle.dumps((self._is_serialize, self._objector, self._apply_index))
Ejemplo n.º 17
0
 def config(self):
     """ Config: Pass sep, fields_type arguments to cpp runtime"""
     return cloudpickle.dumps((self._sep, self._fields_type,
                               self._ignore_overflow, self._ignore_illegal_line))
Ejemplo n.º 18
0
 def config(self):
     """Pass serializers to cpp runtime"""
     return cloudpickle.dumps(self.serializers)
Ejemplo n.º 19
0
 def config(self):
     """Config: Pass serialized arguments to cpp runtime"""
     return cloudpickle.dumps(
         (self._is_serialize, self._objector, self._apply_index))
Ejemplo n.º 20
0
 def config(self):
     """ Config: Pass sep, fields_type arguments to cpp runtime"""
     return cloudpickle.dumps(
         (self._sep, self._fields_type, self._ignore_overflow,
          self._ignore_illegal_line))