def _partition_snd(kvs, name, tag, total_size, partitions, mq_names, mq): LOGGER.debug( f"[rabbitmq._partition_send]total_size:{total_size}, partitions:{partitions}, mq_names:{mq_names}, mq:{mq}." ) channel_infos = _get_channels(mq_names=mq_names, mq=mq) data = [] lines = 0 for k, v in kvs: el = {'k': p_dumps(k).hex(), 'v': p_dumps(v).hex()} data.append(el) lines = lines + 1 if lines > MESSAGE_MAX_SIZE: _send_kv(name=name, tag=tag, data=data, channel_infos=channel_infos, total_size=total_size, partitions=partitions) lines = 0 data.clear() _send_kv(name=name, tag=tag, data=data, channel_infos=channel_infos, total_size=total_size, partitions=partitions) return [1]
def _partition_send( self, index, kvs, name, tag, partitions, mq_names, mq, maximun_message_size, connection_conf: dict, ): channel_infos = self._get_channels_index( index=index, mq_names=mq_names, mq=mq, connection_conf=connection_conf) datastream = Datastream() base_message_key = str(index) message_key_idx = 0 count = 0 for k, v in kvs: count += 1 el = {"k": p_dumps(k).hex(), "v": p_dumps(v).hex()} # roughly caculate the size of package to avoid serialization ;) if (datastream.get_size() + sys.getsizeof(el["k"]) + sys.getsizeof(el["v"]) >= maximun_message_size): print( f"[rabbitmq._partition_send]The size of message is: {datastream.get_size()}" ) message_key_idx += 1 message_key = base_message_key + "_" + str(message_key_idx) self._send_kv( name=name, tag=tag, data=datastream.get_data(), channel_infos=channel_infos, partition_size=-1, partitions=partitions, message_key=message_key, ) datastream.clear() datastream.append(el) message_key_idx += 1 message_key = _SPLIT_.join([base_message_key, str(message_key_idx)]) self._send_kv( name=name, tag=tag, data=datastream.get_data(), channel_infos=channel_infos, partition_size=count, partitions=partitions, message_key=message_key, ) return [1]
def moca_dumps(obj: Any) -> bytes: """serialize and compress.""" data = p_dumps(obj) if len(data) > 1024: return b'moca' + compress(data) else: return data
def remote(self, v, name: str, tag: str, parties: typing.List[Party], gc: GarbageCollectionABC) -> typing.NoReturn: log_str = f"rabbitmq.remote(name={name}, tag={tag}, parties={parties})" mq_names = self._get_mq_names(parties) LOGGER.debug(f"[rabbitmq.remote]mq_names: {mq_names}") if isinstance(v, Table): total_size = v.count() partitions = v.partitions LOGGER.debug( f"[{log_str}]start to remote RDD, total_size={total_size}, partitions={partitions}" ) send_func = _get_partition_send_func(name, tag, total_size, partitions, mq_names, mq=self._mq) # noinspection PyProtectedMember v._rdd.mapPartitions(send_func).count() else: LOGGER.debug(f"[{log_str}]start to remote obj") channel_infos = self._get_channels(mq_names=mq_names) LOGGER.debug( f"[rabbitmq.remote]got channel_infos: {channel_infos}") _send_obj(name=name, tag=tag, data=p_dumps(v), channel_infos=channel_infos) LOGGER.debug(f"[{log_str}]finish to remote")
def _partition_send(self, index, kvs, name, tag, partitions, party_topic_infos, mq, maximun_message_size, conf: dict): channel_infos = self._get_channels_index( index=index, party_topic_infos=party_topic_infos, mq=mq, conf=conf) # reuse datastream here incase message size has limitation in pulsar datastream = Datastream() base_message_key = str(index) message_key_idx = 0 count = 0 for k, v in kvs: count += 1 el = {'k': p_dumps(k).hex(), 'v': p_dumps(v).hex()} # roughly caculate the size of package to avoid serialization ;) if datastream.get_size() + sys.getsizeof(el['k']) + sys.getsizeof( el['v']) >= maximun_message_size: print( f'[pulsar._partition_send]The size of message is: {datastream.get_size()}' ) message_key_idx += 1 message_key = _SPLIT_.join( [base_message_key, str(message_key_idx)]) self._send_kv(name=name, tag=tag, data=datastream.get_data().encode(), channel_infos=channel_infos, partition_size=-1, partitions=partitions, message_key=message_key) datastream.clear() datastream.append(el) message_key_idx += 1 message_key = _SPLIT_.join([base_message_key, str(message_key_idx)]) self._send_kv(name=name, tag=tag, data=datastream.get_data().encode(), channel_infos=channel_infos, partition_size=count, partitions=partitions, message_key=message_key) return [1]
def remote( self, v, name: str, tag: str, parties: typing.List[Party], gc: GarbageCollectionABC, ) -> typing.NoReturn: log_str = f"[rabbitmq.remote](name={name}, tag={tag}, parties={parties})" # if not _remote_tag_not_duplicate(name, tag, parties): # raise ValueError(f"[{log_str}]remote to {parties} with duplicate tag") _name_dtype_keys = [ _SPLIT_.join([party.role, party.party_id, name, tag, "remote"]) for party in parties ] if _name_dtype_keys[0] not in self._name_dtype_map: mq_names = self._get_mq_names(parties, dtype=NAME_DTYPE_TAG) channel_infos = self._get_channels(mq_names=mq_names) if isinstance(v, Table): body = { "dtype": FederationDataType.TABLE, "partitions": v.partitions } else: body = {"dtype": FederationDataType.OBJECT} LOGGER.debug( f"[rabbitmq.remote] _name_dtype_keys: {_name_dtype_keys}, dtype: {body}" ) self._send_obj( name=name, tag=_SPLIT_.join([tag, NAME_DTYPE_TAG]), data=p_dumps(body), channel_infos=channel_infos, ) for k in _name_dtype_keys: if k not in self._name_dtype_map: self._name_dtype_map[k] = body if isinstance(v, Table): total_size = v.count() partitions = v.partitions LOGGER.debug( f"[{log_str}]start to remote RDD, total_size={total_size}, partitions={partitions}" ) mq_names = self._get_mq_names(parties, name, partitions=partitions) # add gc gc.add_gc_action(tag, v, "__del__", {}) send_func = self._get_partition_send_func( name, tag, partitions, mq_names, mq=self._mq, maximun_message_size=self._max_message_size, connection_conf=self._rabbit_manager.runtime_config.get( "connection", {}), ) # noinspection PyProtectedMember v._rdd.mapPartitionsWithIndex(send_func).count() else: LOGGER.debug(f"[{log_str}]start to remote obj") mq_names = self._get_mq_names(parties, name) channel_infos = self._get_channels(mq_names=mq_names) self._send_obj(name=name, tag=tag, data=p_dumps(v), channel_infos=channel_infos) LOGGER.debug(f"[{log_str}]finish to remote")
def serialize(_obj): return p_dumps(_obj)
def upload_opml(md5_key: str, show_list: list) -> str: presigned_url = upload_feed(md5_key , p_dumps(show_list), 'opml') return presigned_url
def remote(self, v, name: str, tag: str, parties: typing.List[Party], gc: GarbageCollectionABC) -> typing.NoReturn: log_str = f"[pulsar.remote](name={name}, tag={tag}, parties={parties})" _name_dtype_keys = [ _SPLIT_.join([party.role, party.party_id, name, tag, 'remote']) for party in parties ] # tell the receiver what sender is going to send. if _name_dtype_keys[0] not in self._name_dtype_map: party_topic_infos = self._get_party_topic_infos( parties, dtype=NAME_DTYPE_TAG) channel_infos = self._get_channels( party_topic_infos=party_topic_infos) if isinstance(v, Table): body = { "dtype": FederationDataType.TABLE, "partitions": v.partitions } else: body = {"dtype": FederationDataType.OBJECT} LOGGER.debug( f"[pulsar.remote] _name_dtype_keys: {_name_dtype_keys}, dtype: {body}" ) self._send_obj(name=name, tag=_SPLIT_.join([tag, NAME_DTYPE_TAG]), data=p_dumps(body), channel_infos=channel_infos) for k in _name_dtype_keys: if k not in self._name_dtype_map: self._name_dtype_map[k] = body if isinstance(v, Table): total_size = v.count() partitions = v.partitions LOGGER.debug( f"[{log_str}]start to remote RDD, total_size={total_size}, partitions={partitions}" ) party_topic_infos = self._get_party_topic_infos( parties, name, partitions=partitions) # add gc gc.add_gc_action(tag, v, '__del__', {}) send_func = self._get_partition_send_func( name, tag, partitions, party_topic_infos, mq=self._mq, maximun_message_size=self._max_message_size, conf=self._pulsar_manager.runtime_config) # noinspection PyProtectedMember v._rdd.mapPartitionsWithIndex(send_func).count() else: LOGGER.debug(f"[{log_str}]start to remote obj") party_topic_infos = self._get_party_topic_infos(parties, name) channel_infos = self._get_channels( party_topic_infos=party_topic_infos) self._send_obj(name=name, tag=tag, data=p_dumps(v), channel_infos=channel_infos) LOGGER.debug(f"[{log_str}]finish to remote")
def _partition_send( self, index, kvs, name, tag, partitions, party_topic_infos, mq, maximun_message_size, conf: dict, ): channel_infos = self._get_channels_index( index=index, party_topic_infos=party_topic_infos, mq=mq, conf=conf) # reuse datastream here incase message size has limitation in pulsar datastream = Datastream() base_message_key = str(index) message_key_idx = 0 count = 0 internal_count = 0 for k, v in kvs: count += 1 internal_count += 1 el = {"k": p_dumps(k).hex(), "v": p_dumps(v).hex()} # roughly caculate the size of package to avoid serialization ;) if (datastream.get_size() + sys.getsizeof(el["k"]) + sys.getsizeof(el["v"]) >= maximun_message_size): LOGGER.debug( f"[pulsar._partition_send]The count of message is: {internal_count}" ) LOGGER.debug( f"[pulsar._partition_send]The total count of message is: {count}" ) internal_count = 0 message_key_idx += 1 message_key = _SPLIT_.join( [base_message_key, str(message_key_idx)]) self._send_kv( name=name, tag=tag, data=datastream.get_data().encode(), channel_infos=channel_infos, partition_size=-1, partitions=partitions, message_key=message_key, ) datastream.clear() datastream.append(el) message_key_idx += 1 message_key = _SPLIT_.join([base_message_key, str(message_key_idx)]) self._send_kv( name=name, tag=tag, data=datastream.get_data().encode(), channel_infos=channel_infos, partition_size=count, partitions=partitions, message_key=message_key, ) return [1]
def pickle(self, line: str = ''): """ Pickles a variable and copies it to the clipboard or un-pickles clipboard contents and prints or stores it. `%pickle` unpickle clipboard and print `%pickle v` pickle variable `v` and store in clipboard `%pickle _` pickle last line's output and store in clipboard `%pickle -o my_var` unpickle clipboard contents and store in `my_var`""" ip = self.shell args = magic_args.parse_argstring(self.pickle, line) if bool(args.output) and bool(args.var): msg = ( 'Incorrect usage, you can either pickle a variable, or unpickle, but not both at the same time.\n' f'\n`%pickle {args.var}` to pickle the contents of `{args.var}` and send them to your clipboard' f'\n`%pickle -o {args.output[0]}` to unpickle clipboard contents and send them to `{args.output[0]}`' f'\n`%pickle` to unpickle your clipboard contents and print') ip.write_err(msg) return None if not line or args.output: # user wants to unpickle from clipboard content: str = pypaste() format_error = not content.startswith( 'b') and content[1] != content[-1] # b'...' or b"..." if format_error: # clipboard doesn't have a valid pickle string sys.stderr.write( r'''Your clipboard doesn't have a bytes-like string (ie. b'\x80\x03N.' or b"\x80\x03N.")''') return None if not content: # clipboard is empty sys.stderr.write(r'Your clipboard is empty.') return None try: unpickled = p_loads((literal_eval(content))) except (KeyError, UnpicklingError, PickleError): sys.stderr.write( r'Your clipboard contents could not be unpickled because the data is not valid.' ) else: if args.output: # user wants to unpickle into a variable ip.user_ns[args.output[0]] = unpickled else: # user wants to unpickle and print sys.stdout.write(str(unpickled)) else: # user wants to pickle a var try: pickled_data = str(p_dumps(ip.user_ns.get(args.var))) except RuntimeError: sys.stderr.write( "Your data could not be pickled because it may be highly recursive.\n" "For more information on what can be (un)pickled checkout " "https://docs.python.org/3/library/pickle.html#what-can-be-pickled-and-unpickled" ) except PicklingError: sys.stderr.write( "The object you are trying to pickle is unpickable.\n" "For more information on what can be (un)pickled checkout " "https://docs.python.org/3/library/pickle.html#what-can-be-pickled-and-unpickled" ) else: pycopy(pickled_data)
def save_messages_pickle(events, path): with open(path, 'wb') as _file: _file.write(p_dumps(events.to_dict()))
def save_channel_pickle(dict_channel, dirname): pickles_dir = dirname + "pickles/" full_path = pickles_dir + "channel_info.pickle" with open(full_path, 'wb') as _file: _file.write(p_dumps(dict_channel))