コード例 #1
0
ファイル: roll_pair.py プロジェクト: mask-pp/eggroll
    def put(self, k, v, options: dict = None):
        if options is None:
            options = {}
        k, v = create_serdes(self.__store._store_locator._serdes).serialize(k), \
               create_serdes(self.__store._store_locator._serdes).serialize(v)
        er_pair = ErPair(key=k, value=v)
        outputs = []
        partition_id = self.partitioner(k)
        egg = self.ctx.route_to_egg(self.__store._partitions[partition_id])
        inputs = [ErPartition(id=partition_id, store_locator=self.__store._store_locator)]
        output = [ErPartition(id=0, store_locator=self.__store._store_locator)]

        job_id = generate_job_id(self.__session_id, RollPair.PUT)
        job = ErJob(id=job_id,
                    name=RollPair.PUT,
                    inputs=[self.__store],
                    outputs=outputs,
                    functors=[ErFunctor(name=RollPair.PUT, body=cloudpickle.dumps(er_pair))])

        task = ErTask(id=generate_task_id(job_id, partition_id),
                      name=RollPair.PUT,
                      inputs=inputs,
                      outputs=output,
                      job=job)
        job_resp = self.__command_client.simple_sync_send(
                input=task,
                output_type=ErPair,
                endpoint=egg._command_endpoint,
                command_uri=CommandURI(f'{RollPair.EGG_PAIR_URI_PREFIX}/{RollPair.RUN_TASK}'),
                serdes_type=self.__command_serdes
        )
        value = job_resp._value
        return value
コード例 #2
0
ファイル: roll_pair.py プロジェクト: mask-pp/eggroll
    def get(self, k, options: dict = None):
        if options is None:
            options = {}
        k = create_serdes(self.__store._store_locator._serdes).serialize(k)
        er_pair = ErPair(key=k, value=None)
        partition_id = self.partitioner(k)
        egg = self.ctx.route_to_egg(self.__store._partitions[partition_id])
        inputs = [ErPartition(id=partition_id, store_locator=self.__store._store_locator)]
        outputs = [ErPartition(id=partition_id, store_locator=self.__store._store_locator)]

        job_id = generate_job_id(self.__session_id, RollPair.GET)
        job = ErJob(id=job_id,
                    name=RollPair.GET,
                    inputs=[self.__store],
                    outputs=[self.__store],
                    functors=[ErFunctor(name=RollPair.GET, body=cloudpickle.dumps(er_pair))])

        task = ErTask(id=generate_task_id(job_id, partition_id),
                      name=RollPair.GET,
                      inputs=inputs,
                      outputs=outputs,
                      job=job)
        job_resp = self.__command_client.simple_sync_send(
                input=task,
                output_type=ErPair,
                endpoint=egg._command_endpoint,
                command_uri=self.RUN_TASK_URI,
                serdes_type=self.__command_serdes
        )

        return self.value_serdes.deserialize(job_resp._value) if job_resp._value != b'' else None
コード例 #3
0
    def get(self, k, options: dict = None):
        if options is None:
            options = {}
        L.debug(f"get k: {k}")
        k = create_serdes(self.__store._store_locator._serdes).serialize(k)
        er_pair = ErPair(key=k, value=None)
        outputs = []
        value = None
        partition_id = self.partitioner(k)
        egg = self.ctx.route_to_egg(self.__store._partitions[partition_id])
        L.info(
            f"partitions count: {self.__store._store_locator._total_partitions}, target partition: {partition_id}, endpoint: {egg._command_endpoint}"
        )
        inputs = [
            ErPartition(id=partition_id,
                        store_locator=self.__store._store_locator)
        ]
        output = [
            ErPartition(id=partition_id,
                        store_locator=self.__store._store_locator)
        ]

        job_id = generate_job_id(self.__session_id, RollPair.GET)
        job = ErJob(id=job_id,
                    name=RollPair.GET,
                    inputs=[self.__store],
                    outputs=outputs,
                    functors=[ErFunctor(body=cloudpickle.dumps(er_pair))])

        task = ErTask(id=generate_task_id(job_id, partition_id),
                      name=RollPair.GET,
                      inputs=inputs,
                      outputs=output,
                      job=job)
        job_resp = self.__command_client.simple_sync_send(
            input=task,
            output_type=ErPair,
            endpoint=egg._command_endpoint,
            command_uri=CommandURI(
                f'{RollPair.EGG_PAIR_URI_PREFIX}/{RollPair.RUN_TASK}'),
            serdes_type=self.__command_serdes)

        return self.value_serdes.deserialize(
            job_resp._value) if job_resp._value != b'' else None
コード例 #4
0
 def populate_processor(self, store: ErStore):
     populated_partitions = list()
     for p in store._partitions:
         pp = ErPartition(id=p._id,
                          store_locator=p._store_locator,
                          processor=self.route_to_egg(p))
         populated_partitions.append(pp)
     return ErStore(store_locator=store._store_locator,
                    partitions=populated_partitions,
                    options=store._options)
コード例 #5
0
 def populate_processor(self, store: ErStore):
     populated_partitions = list()
     for p in store._partitions:
         server_node_id = p._processor._server_node_id
         rank_in_node = self.get_rank_in_node(p._id, p._processor._server_node_id)
         pp = ErPartition(id=p._id,
                          store_locator=p._store_locator,
                          processor=self.route_to_egg_by_rank(server_node_id, rank_in_node),
                          rank_in_node=rank_in_node)
         populated_partitions.append(pp)
     return ErStore(store_locator=store._store_locator, partitions=populated_partitions, options=store._options)
コード例 #6
0
    def delete(self, k, options: dict = None):
        if options is None:
            options = {}
        key = create_serdes(self.__store._store_locator._serdes).serialize(k)
        er_pair = ErPair(key=key, value=None)
        outputs = []
        value = None
        partition_id = self.partitioner(key)
        egg = self.ctx.route_to_egg(self.__store._partitions[partition_id])
        L.info(egg._command_endpoint)
        L.info(f"count: {self.__store._store_locator._total_partitions}")
        inputs = [
            ErPartition(id=partition_id,
                        store_locator=self.__store._store_locator)
        ]
        output = [
            ErPartition(id=partition_id,
                        store_locator=self.__store._store_locator)
        ]

        job_id = generate_job_id(self.__session_id, RollPair.DELETE)
        job = ErJob(id=job_id,
                    name=RollPair.DELETE,
                    inputs=[self.__store],
                    outputs=outputs,
                    functors=[ErFunctor(body=cloudpickle.dumps(er_pair))])
        task = ErTask(id=generate_task_id(job_id, partition_id),
                      name=RollPair.DELETE,
                      inputs=inputs,
                      outputs=output,
                      job=job)
        L.info("start send req")
        job_resp = self.__command_client.simple_sync_send(
            input=task,
            output_type=ErPair,
            endpoint=egg._command_endpoint,
            command_uri=CommandURI(
                f'{RollPair.EGG_PAIR_URI_PREFIX}/{RollPair.RUN_TASK}'),
            serdes_type=self.__command_serdes)
コード例 #7
0
ファイル: roll_pair.py プロジェクト: mask-pp/eggroll
    def cleanup(self, name, namespace, options: dict = None):
        if not namespace:
            raise ValueError('namespace cannot be blank')
        L.debug(f'cleaning up namespace={namespace}, name={name}')
        if options is None:
            options = {}
        total_partitions = options.get('total_partitions', 1)
        partitioner = options.get('partitioner', PartitionerTypes.BYTESTRING_HASH)
        store_serdes = options.get('serdes', self.default_store_serdes)

        if name == '*':
            store_type = options.get('store_type', '*')
            L.debug(f'cleaning up whole store_type={store_type}, namespace={namespace}, name={name}')
            er_store = ErStore(store_locator=ErStoreLocator(namespace=namespace,
                                                            name=name,
                                                            store_type=store_type))
            job_id = generate_job_id(namespace, tag=RollPair.CLEANUP)
            job = ErJob(id=job_id,
                        name=RollPair.DESTROY,
                        inputs=[er_store],
                        options=options)

            args = list()
            cleanup_partitions = [ErPartition(id=-1, store_locator=er_store._store_locator)]

            for server_node, eggs in self.__session._eggs.items():
                egg = eggs[0]
                task = ErTask(id=generate_task_id(job_id, egg._command_endpoint._host),
                              name=job._name,
                              inputs=cleanup_partitions,
                              job=job)
                args.append(([task], egg._command_endpoint))

            futures = self.__command_client.async_call(
                    args=args,
                    output_types=[ErTask],
                    command_uri=CommandURI(f'{RollPair.EGG_PAIR_URI_PREFIX}/{RollPair.RUN_TASK}'))

            for future in futures:
                result = future.result()

            self.get_session()._cluster_manager_client.delete_store(er_store)
        else:
            # todo:1: add combine options to pass it through
            store_options = self.__session.get_all_options()
            store_options.update(options)
            final_options = store_options.copy()

            store = ErStore(
                    store_locator=ErStoreLocator(
                            store_type=StoreTypes.ROLLPAIR_LMDB,
                            namespace=namespace,
                            name=name,
                            total_partitions=total_partitions,
                            partitioner=partitioner,
                            serdes=store_serdes),
                    options=final_options)
            task_results = self.__session._cluster_manager_client.get_store_from_namespace(store)
            L.trace('res={}'.format(task_results._stores))
            if task_results._stores is not None:
                L.trace("item count={}".format(len(task_results._stores)))
                for item in task_results._stores:
                    L.trace("item namespace={} name={}".format(item._store_locator._namespace,
                                                               item._store_locator._name))
                    rp = RollPair(er_store=item, rp_ctx=self)
                    rp.destroy()
コード例 #8
0
    def _decompose_job(self, job: ErJob):
        input_total_partitions = job._inputs[0]._store_locator._total_partitions
        output_total_partitions = 0 \
            if not job._outputs \
            else job._outputs[0]._store_locator._total_partitions

        larger_total_partitions = max(input_total_partitions, output_total_partitions)

        populated_input_partitions = self.populate_processor(job._inputs[0])._partitions

        if output_total_partitions > 0:
            populated_output_partitions = self.populate_processor(job._outputs[0])._partitions
        else:
            populated_output_partitions = list()

        result = list()
        for i in range(larger_total_partitions):
            input_partitions = list()
            output_partitions = list()

            if i < input_total_partitions:
                input_processor = populated_input_partitions[i]._processor
                input_server_node_id = input_processor._server_node_id
                for input_store in job._inputs:
                    input_partitions.append(ErPartition(
                            id=i,
                            store_locator=input_store._store_locator,
                            processor=input_processor))
            else:
                input_processor = None
                input_server_node_id = None

            if i < output_total_partitions:
                output_processor = populated_output_partitions[i]._processor
                output_server_node_id = output_processor._server_node_id
                for output_store in job._outputs:
                    output_partitions.append(ErPartition(
                            id=i,
                            store_locator=output_store._store_locator,
                            processor=output_processor))
            else:
                output_processor = None
                output_server_node_id = None

            tasks = [ErTask(id=generate_task_id(job._id, i),
                           name=f'{job._name}',
                           inputs=input_partitions,
                           outputs=output_partitions,
                           job=job)]
            if input_server_node_id == output_server_node_id:
                result.append(
                        (tasks, input_processor._command_endpoint))
            else:
                if input_server_node_id is not None:
                    result.append(
                            (tasks, input_processor._command_endpoint))
                if output_server_node_id is not None:
                    result.append(
                            (tasks, output_processor._command_endpoint))

        return result