예제 #1
0
def moca_loads(data: Optional[bytes]) -> Any:
    """Load serialized object."""
    if data is None:
        return None
    elif data[:4] == b'moca':
        return p_loads(decompress(data[4:]))
    else:
        return p_loads(data)
예제 #2
0
def _receive(channel_info, name, tag):
    partitions = -1
    party_id = channel_info._party_id
    role = channel_info._role
    wish_cache_key = _get_message_cache_key(name, tag, party_id, role)

    if wish_cache_key in message_cache:
        return message_cache[wish_cache_key]

    for method, properties, body in channel_info.consume():
        LOGGER.debug(
            f"[rabbitmq._receive] method: {method}, properties: {properties}.")
        if properties.message_id != name or properties.correlation_id != tag:
            # todo: fix this
            LOGGER.warning(
                f"[rabbitmq._receive]: require {name}.{tag}, got {properties.message_id}.{properties.correlation_id}"
            )

        cache_key = _get_message_cache_key(properties.message_id,
                                           properties.correlation_id, party_id,
                                           role)
        # object
        if properties.content_type == 'text/plain':
            message_cache[cache_key] = p_loads(body)
            channel_info.basic_ack(delivery_tag=method.delivery_tag)

        # rdd
        if properties.content_type == 'application/json':
            data = json.loads(body)
            data_iter = ((p_loads(bytes.fromhex(el['k'])),
                          p_loads(bytes.fromhex(el['v']))) for el in data)
            sc = SparkContext.getOrCreate()
            partitions = properties.headers["partitions"]
            rdd = sc.parallelize(data_iter, partitions)
            if cache_key not in message_cache:
                message_cache[cache_key] = rdd
            else:
                message_cache[cache_key] = message_cache[cache_key].union(
                    rdd).coalesce(partitions)

            # trigger action
            message_cache[cache_key].persist(get_storage_level())
            count = message_cache[cache_key].count()
            LOGGER.debug(f"count: {count}")
            channel_info.basic_ack(delivery_tag=method.delivery_tag)

        # object
        if properties.content_type == 'text/plain':
            if cache_key == wish_cache_key:
                channel_info.cancel()
                return message_cache[cache_key]
        # rdd
        if properties.content_type == 'application/json':
            if cache_key == wish_cache_key and message_cache[cache_key].count(
            ) == properties.headers["total_size"]:
                channel_info.cancel()
                return message_cache[cache_key]
예제 #3
0
    def _partition_receive(self, index, kvs, name, tag, party_id, role,
                           topic_infos, mq, conf: dict):
        topic_pair = topic_infos[index][1]
        channel_info = self._get_channel(mq, topic_pair, party_id, role, conf)

        message_key_cache = set()
        count = 0
        partition_size = -1
        all_data = []

        while True:
            message = channel_info.consume()
            properties = message.properties()
            # must get bytes
            body = message.data().decode()
            print(f"[pulsar._partition_receive] properties: {properties}.")
            if properties['message_id'] != name or properties[
                    'correlation_id'] != tag:
                # leave this code to handle unexpected situation
                channel_info.basic_ack(message)
                print(
                    f"[pulsar._partition_receive]: require {name}.{tag}, got {properties['message_id']}.{properties['correlation_id']}"
                )
                continue

            if properties['content_type'] == 'application/json':
                # headers here is json bytes string
                header = json.loads(properties['headers'])
                message_key = header.get('message_key')
                if message_key in message_key_cache:
                    print(
                        f"[pulsar._partition_receive] message_key : {message_key} is duplicated"
                    )
                    channel_info.basic_ack(message)
                    continue

                message_key_cache.add(message_key)

                if header.get('partition_size') >= 0:
                    partition_size = header.get('partition_size')

                data = json.loads(body)
                data_iter = ((p_loads(bytes.fromhex(el['k'])),
                              p_loads(bytes.fromhex(el['v']))) for el in data)
                count += len(data)
                print(f"[pulsar._partition_receive] count: {count}")
                all_data.extend(data_iter)
                channel_info.basic_ack(message)

                if count == partition_size:
                    channel_info.cancel()
                    return all_data
            else:
                ValueError(
                    f"[pulsar._partition_receive]properties.content_type is {properties.content_type}, but must be application/json"
                )
예제 #4
0
    def _partition_receive(self, index, kvs, name, tag, party_id, role,
                           party_mq_names, mq, connection_conf: dict):
        queue_names = party_mq_names[index][1]
        channel_info = self._get_channel(mq, queue_names, party_id, role,
                                         connection_conf)

        message_key_cache = set()
        count = 0
        partition_size = -1
        all_data = []

        for method, properties, body in channel_info.consume():
            print(
                f"[rabbitmq._partition_receive] method: {method}, properties: {properties}."
            )
            if properties.message_id != name or properties.correlation_id != tag:
                # todo: fix this
                channel_info.basic_ack(delivery_tag=method.delivery_tag)
                print(
                    f"[rabbitmq._partition_receive]: require {name}.{tag}, got {properties.message_id}.{properties.correlation_id}"
                )
                continue

            if properties.content_type == 'application/json':
                message_key = properties.headers["message_key"]
                if message_key in message_key_cache:
                    print(
                        f"[rabbitmq._partition_receive] message_key : {message_key} is duplicated"
                    )
                    channel_info.basic_ack(delivery_tag=method.delivery_tag)
                    continue

                message_key_cache.add(message_key)

                if properties.headers["partition_size"] >= 0:
                    partition_size = properties.headers["partition_size"]

                data = json.loads(body)
                data_iter = ((p_loads(bytes.fromhex(el['k'])),
                              p_loads(bytes.fromhex(el['v']))) for el in data)
                count += len(data)
                print(f"[rabbitmq._partition_receive] count: {count}")
                all_data.extend(data_iter)
                channel_info.basic_ack(delivery_tag=method.delivery_tag)

                if count == partition_size:
                    channel_info.cancel()
                    return all_data
            else:
                ValueError(
                    f"[rabbitmq._partition_receive]properties.content_type is {properties.content_type}, but must be application/json"
                )
예제 #5
0
    def _receive_obj(self, channel_info, name, tag):
        party_id = channel_info._party_id
        role = channel_info._role
        wish_cache_key = self._get_message_cache_key(name, tag, party_id, role)

        if wish_cache_key in self._message_cache:
            return self._message_cache[wish_cache_key]

        for method, properties, body in channel_info.consume():
            LOGGER.debug(
                f"[rabbitmq._receive_obj] method: {method}, properties: {properties}."
            )
            if properties.message_id != name or properties.correlation_id != tag:
                # todo: fix this
                LOGGER.warning(
                    f"[rabbitmq._receive_obj] require {name}.{tag}, got {properties.message_id}.{properties.correlation_id}"
                )

            cache_key = self._get_message_cache_key(properties.message_id,
                                                    properties.correlation_id,
                                                    party_id, role)
            # object
            if properties.content_type == "text/plain":
                self._message_cache[cache_key] = p_loads(body)
                channel_info.basic_ack(delivery_tag=method.delivery_tag)
                if cache_key == wish_cache_key:
                    channel_info.cancel()
                    LOGGER.debug(
                        f"[rabbitmq._receive_obj] cache_key: {cache_key}, obj: {self._message_cache[cache_key]}"
                    )
                    return self._message_cache[cache_key]
            else:
                raise ValueError(
                    f"[rabbitmq._receive_obj] properties.content_type is {properties.content_type}, but must be text/plain"
                )
예제 #6
0
 def deserialize(_bytes):
     if _bytes:
         bytes_security_check(_bytes)
         try:
             return p_loads(_bytes)
         except:
             return eggroll_pickle_loads(_bytes)
예제 #7
0
    def _receive_obj(self, channel_info, name, tag):
        party_id = channel_info._party_id
        role = channel_info._role
        wish_cache_key = self._get_message_cache_key(name, tag, party_id, role)

        if wish_cache_key in self._message_cache:
            return self._message_cache[wish_cache_key]

        while True:
            message = channel_info.consume()
            # return None indicates the client is closed
            body = message.data()
            properties = message.properties()
            LOGGER.debug(f"[pulsar._receive_obj] properties: {properties}.")

            if properties["message_id"] != name or properties[
                    "correlation_id"] != tag:
                LOGGER.warning(
                    f"[pulsar._receive_obj] require {name}.{tag}, got {properties['message_id']}.{properties['correlation_id']}"
                )
                # just ack and continue
                channel_info.basic_ack(message.message_id())
                continue

            cache_key = self._get_message_cache_key(
                properties["message_id"], properties["correlation_id"],
                party_id, role)
            # object
            if properties["content_type"] == "text/plain":
                self._message_cache[cache_key] = p_loads(body)
                # TODO: handle ack failure
                channel_info.basic_ack(message.message_id())
                if cache_key == wish_cache_key:
                    # keep connection open for receiving object
                    # channel_info.cancel()
                    LOGGER.debug(
                        f"[pulsar._receive_obj] cache_key: {cache_key}, obj: {self._message_cache[cache_key]}"
                    )
                    return self._message_cache[cache_key]
            else:
                raise ValueError(
                    f"[pulsar._receive_obj] properties.content_type is {properties.content_type}, but must be text/plain"
                )
예제 #8
0
 def deserialize(_bytes):
     return p_loads(_bytes)
예제 #9
0
def get_opml(opml_path: str) -> List[str]:
    return p_loads(get_specific_obj(opml_path))
예제 #10
0
 def deserialize(_bytes):
     bytes_security_check(_bytes)
     return p_loads(_bytes)
예제 #11
0
    def _partition_receive(self, index, kvs, name, tag, party_id, role,
                           topic_infos, mq, conf: dict):
        topic_pair = topic_infos[index][1]
        channel_info = self._get_channel(mq, topic_pair, party_id, role, conf)

        message_key_cache = set()
        count = 0
        partition_size = -1
        all_data = []
        while True:
            try:
                message = channel_info.consume()
                properties = message.properties()
                # must get bytes
                body = message.data().decode()
                LOGGER.debug(
                    f"[pulsar._partition_receive] properties: {properties}.")
                if (properties["message_id"] != name
                        or properties["correlation_id"] != tag):
                    # leave this code to handle unexpected situation
                    channel_info.basic_ack(message.message_id())
                    LOGGER.debug(
                        f"[pulsar._partition_receive]: require {name}.{tag}, got {properties['message_id']}.{properties['correlation_id']}"
                    )
                    continue

                if properties["content_type"] == "application/json":
                    # headers here is json bytes string
                    header = json.loads(properties["headers"])
                    message_key = header.get("message_key")
                    if message_key in message_key_cache:
                        LOGGER.debug(
                            f"[pulsar._partition_receive] message_key : {message_key} is duplicated"
                        )
                        channel_info.basic_ack(message.message_id())
                        continue

                    message_key_cache.add(message_key)

                    if header.get("partition_size") >= 0:
                        partition_size = header.get("partition_size")

                    data = json.loads(body)
                    data_iter = ((
                        p_loads(bytes.fromhex(el["k"])),
                        p_loads(bytes.fromhex(el["v"])),
                    ) for el in data)
                    count += len(data)
                    LOGGER.debug(
                        f"[pulsar._partition_receive] count: {len(data)}")
                    LOGGER.debug(
                        f"[pulsar._partition_receive]total count: {count}")
                    all_data.extend(data_iter)
                    channel_info.basic_ack(message.message_id())
                    if partition_size != -1:
                        if count == partition_size:
                            channel_info.cancel()
                            return all_data
                        else:
                            raise Exception(
                                f"[pulsar._partition_receive] want {partition_size} data in {name}.{tag} but got {count}"
                            )
                else:
                    raise ValueError(
                        f"[pulsar._partition_receive]properties.content_type is {properties.content_type}, but must be application/json"
                    )
            except Exception as e:
                LOGGER.error(
                    f"[pulsar._partition_receive]catch exception {e}, while receiving {name}.{tag}"
                )
                # avoid hang on consume()
                if count == partition_size:
                    channel_info.cancel()
                    return all_data
                else:
                    raise e
예제 #12
0
    def pickle(self, line: str = ''):
        """
        Pickles a variable and copies it to the clipboard or un-pickles clipboard contents and prints or stores it.

        `%pickle` unpickle clipboard and print
        `%pickle v` pickle variable `v` and store in clipboard
        `%pickle _` pickle last line's output and store in clipboard
        `%pickle -o my_var` unpickle clipboard contents and store in `my_var`"""
        ip = self.shell
        args = magic_args.parse_argstring(self.pickle, line)
        if bool(args.output) and bool(args.var):
            msg = (
                'Incorrect usage, you can either pickle a variable, or unpickle, but not both at the same time.\n'
                f'\n`%pickle {args.var}` to pickle the contents of `{args.var}` and send them to your clipboard'
                f'\n`%pickle -o {args.output[0]}` to unpickle clipboard contents and send them to `{args.output[0]}`'
                f'\n`%pickle` to unpickle your clipboard contents and print')
            ip.write_err(msg)
            return None

        if not line or args.output:  # user wants to unpickle from clipboard
            content: str = pypaste()
            format_error = not content.startswith(
                'b') and content[1] != content[-1]  # b'...' or b"..."
            if format_error:  # clipboard doesn't have a valid pickle string
                sys.stderr.write(
                    r'''Your clipboard doesn't have a bytes-like string (ie. b'\x80\x03N.' or 
                b"\x80\x03N.")''')
                return None
            if not content:  # clipboard is empty
                sys.stderr.write(r'Your clipboard is empty.')
                return None

            try:
                unpickled = p_loads((literal_eval(content)))
            except (KeyError, UnpicklingError, PickleError):
                sys.stderr.write(
                    r'Your clipboard contents could not be unpickled because the data is not valid.'
                )
            else:
                if args.output:  # user wants to unpickle into a variable
                    ip.user_ns[args.output[0]] = unpickled

                else:  # user wants to unpickle and print
                    sys.stdout.write(str(unpickled))

        else:  # user wants to pickle a var
            try:
                pickled_data = str(p_dumps(ip.user_ns.get(args.var)))
            except RuntimeError:
                sys.stderr.write(
                    "Your data could not be pickled because it may be highly recursive.\n"
                    "For more information on what can be (un)pickled checkout "
                    "https://docs.python.org/3/library/pickle.html#what-can-be-pickled-and-unpickled"
                )
            except PicklingError:
                sys.stderr.write(
                    "The object you are trying to pickle is unpickable.\n"
                    "For more information on what can be (un)pickled checkout "
                    "https://docs.python.org/3/library/pickle.html#what-can-be-pickled-and-unpickled"
                )
            else:
                pycopy(pickled_data)