def start(self): if self._started: return self._started = True start_download_manager() self.guide_addr = env.get(GUIDE_ADDR) self.download_addr = env.get(DOWNLOAD_ADDR) self.cache = Cache() self.ctx = zmq.Context() self.shared_uuid_fn_dict = _download_manager.shared_uuid_fn_dict self.shared_uuid_map_dict = _download_manager.shared_uuid_map_dict self.download_cond = _download_manager.download_cond
def __init__(self): self.published = {} self.cache = Cache() self.host = socket.gethostname() self.server_thread = None random.seed(os.getpid() + int(time.time() * 1000) % 1000)
class P2PBroadcastManager(BroadcastManager): def __init__(self): self.published = {} self.cache = Cache() self.host = socket.gethostname() self.server_thread = None random.seed(os.getpid() + int(time.time() * 1000) % 1000) def start(self, is_master): if is_master: self.guides = {} self.guide_addr, self.guide_thread = self.start_guide() env.register('BroadcastGuideAddr', self.guide_addr) else: self.guide_addr = env.get('BroadcastGuideAddr') logger.debug("broadcast started: %s", self.guide_addr) def shutdown(self): sock = env.ctx.socket(zmq.REQ) sock.setsockopt(zmq.LINGER, 0) sock.connect(self.guide_addr) sock.send_pyobj((GUIDE_STOP, None)) sock.recv_pyobj() sock.close() def register(self, uuid, value): if uuid in self.published: raise RuntimeError('broadcast %s has already registered' % uuid) if not self.server_thread: self.server_addr, self.server_thread = self.start_server() blocks = self.to_blocks(uuid, value) self.published[uuid] = blocks self.guides[uuid] = {self.server_addr: [1] * len(blocks)} self.cache.put(uuid, value) return len(blocks) def fetch(self, uuid, block_num): if not self.server_thread: self.server_addr, self.server_thread = self.start_server() value = self.cache.get(uuid) if value is not None: return value blocks = self.fetch_blocks(uuid, block_num) value = self.from_blocks(uuid, blocks) return value def clear(self, uuid): self.cache.put(uuid, None) del self.published[uuid] def fetch_blocks(self, uuid, block_num): guide_sock = env.ctx.socket(zmq.REQ) guide_sock.connect(self.guide_addr) logger.debug("connect to guide %s", self.guide_addr) blocks = [None] * block_num bitmap = [0] * block_num self.published[uuid] = blocks def _report_bad(addr): guide_sock.send_pyobj((GUIDE_REPORT_BAD, (uuid, addr))) guide_sock.recv_pyobj() def _fetch(addr, indices): sock = env.ctx.socket(zmq.REQ) try: sock.setsockopt(zmq.LINGER, 0) sock.connect(addr) for i in indices: sock.send_pyobj((SERVER_FETCH, (uuid, i))) avail = sock.poll(5 * 1000, zmq.POLLIN) if not avail: logger.debug("%s recv broadcast %d from %s timeout", self.server_addr, i, addr) _report_bad(addr) return result, msg = sock.recv_pyobj() if result == SERVER_FETCH_FAIL: _report_bad(addr) return if result == SERVER_FETCH_OK: id, block = msg if i == id and block is not None: blocks[id] = block bitmap[id] = 1 else: raise RuntimeError('Unknown server response: %s %s' % (result, msg)) finally: sock.close() while not all(bitmap): guide_sock.send_pyobj( (GUIDE_SOURCES, (uuid, self.server_addr, bitmap))) sources = guide_sock.recv_pyobj() logger.debug("received SourceInfo from master: %s", list(sources.keys())) local = [] remote = [] for addr, _bitmap in six.iteritems(sources): if addr.startswith('tcp://%s:' % self.host): local.append((addr, _bitmap)) else: remote.append((addr, _bitmap)) for addr, _bitmap in local: indices = [ i for i in range(block_num) if not bitmap[i] and _bitmap[i] ] if indices: _fetch(addr, indices) random.shuffle(remote) for addr, _bitmap in remote: indices = [ i for i in range(block_num) if not bitmap[i] and _bitmap[i] ] if indices: _fetch(addr, [random.choice(indices)]) guide_sock.close() return blocks def start_guide(self): sock = env.ctx.socket(zmq.REP) port = sock.bind_to_random_port("tcp://0.0.0.0") guide_addr = "tcp://%s:%d" % (self.host, port) def run(): logger.debug("guide start at %s", guide_addr) while True: type, msg = sock.recv_pyobj() if type == GUIDE_STOP: sock.send_pyobj(0) break elif type == GUIDE_SOURCES: uuid, addr, bitmap = msg sources = self.guides[uuid] sock.send_pyobj(sources) if any(bitmap): sources[addr] = bitmap elif type == GUIDE_REPORT_BAD: uuid, addr = msg sock.send_pyobj(0) sources = self.guides[uuid] if addr in sources: del sources[addr] else: logger.error('Unknown guide message: %s %s', type, msg) sock.close() logger.debug("Sending stop notification to all servers ...") for uuid, sources in six.iteritems(self.guides): for addr in sources: self.stop_server(addr) return guide_addr, spawn(run) def start_server(self): sock = env.ctx.socket(zmq.REP) sock.setsockopt(zmq.LINGER, 0) port = sock.bind_to_random_port("tcp://0.0.0.0") server_addr = 'tcp://%s:%d' % (self.host, port) def run(): logger.debug("server started at %s", server_addr) while True: type, msg = sock.recv_pyobj() logger.debug('server recv: %s %s', type, msg) if type == SERVER_STOP: sock.send_pyobj(None) break elif type == SERVER_FETCH: uuid, id = msg if uuid not in self.published: sock.send_pyobj((SERVER_FETCH_FAIL, None)) else: blocks = self.published[uuid] if id >= len(blocks): sock.send_pyobj((SERVER_FETCH_FAIL, None)) else: sock.send_pyobj( (SERVER_FETCH_OK, (id, blocks[id]))) else: logger.error('Unknown server message: %s %s', type, msg) sock.close() logger.debug("stop Broadcast server %s", server_addr) for uuid in list(self.published.keys()): self.clear(uuid) return server_addr, spawn(run) def stop_server(self, addr): req = env.ctx.socket(zmq.REQ) req.setsockopt(zmq.LINGER, 0) req.connect(addr) req.send_pyobj((SERVER_STOP, None)) avail = req.poll(1 * 100, zmq.POLLIN) if avail: req.recv_pyobj() req.close() self.server_thread = None
class BroadcastManager(object): header_fmt = '>BI' header_len = struct.calcsize(header_fmt) def __init__(self): self._started = False self.guide_addr = None self.download_addr = None self.cache = None self.shared_uuid_fn_dict = None self.shared_uuid_map_dict = None self.download_cond = None self.ctx = None def start(self): if self._started: return self._started = True start_download_manager() self.guide_addr = env.get(GUIDE_ADDR) self.download_addr = env.get(DOWNLOAD_ADDR) self.cache = Cache() self.ctx = zmq.Context() self.shared_uuid_fn_dict = _download_manager.shared_uuid_fn_dict self.shared_uuid_map_dict = _download_manager.shared_uuid_map_dict self.download_cond = _download_manager.download_cond def register(self, uuid, value): self.start() if uuid in self.shared_uuid_fn_dict: raise RuntimeError('broadcast %s has already registered' % uuid) blocks, size, block_map = self.to_blocks(uuid, value) _download_manager.register_blocks(uuid, blocks) self._update_sources(uuid, block_map) self.cache.put(uuid, value) return size def _update_sources(self, uuid, bitmap): guide_sock = self.ctx.socket(zmq.REQ) try: guide_sock.setsockopt(zmq.LINGER, 0) guide_sock.connect(self.guide_addr) guide_sock.send_pyobj((GUIDE_SET_SOURCES, (uuid, self.download_addr, bitmap))) guide_sock.recv_pyobj() finally: guide_sock.close() def clear(self, uuid): assert self._started self.cache.put(uuid, None) sock = self.ctx.socket(zmq.REQ) sock.connect(self.download_addr) sock.send_pyobj((SERVER_CLEAR_ITEM, uuid)) sock.recv_pyobj() sock.close() def fetch(self, uuid, compressed_size): start_download_manager() self.start() value = self.cache.get(uuid) if value is not None: return value blocks = _download_manager.get_blocks(uuid) if blocks is None: blocks = self.fetch_blocks(uuid, compressed_size) value = self.from_blocks(uuid, blocks) return value @staticmethod def _get_blocks_by_filename(file_name, block_map): fp = open(file_name, 'rb') buf = fp.read() blocks = [buf[offset: offset + size] for offset, size in block_map] fp.close() return blocks def fetch_blocks(self, uuid, compressed_size): if uuid in self.shared_uuid_fn_dict: return self._get_blocks_by_filename(self.shared_uuid_fn_dict[uuid], self.shared_uuid_map_dict[uuid]) download_sock = self.ctx.socket(zmq.REQ) download_sock.connect(self.download_addr) download_sock.send_pyobj((DATA_GET, (uuid, compressed_size))) res = download_sock.recv_pyobj() if res == DATA_GET_OK: return self._get_blocks_by_filename(self.shared_uuid_fn_dict[uuid], self.shared_uuid_map_dict[uuid]) if res == DATA_GET_FAIL: raise RuntimeError('Data GET failed for uuid:%s' % uuid) while True: with self.download_cond: if uuid not in self.shared_uuid_fn_dict: self.download_cond.wait() else: break if uuid in self.shared_uuid_fn_dict: return self._get_blocks_by_filename(self.shared_uuid_fn_dict[uuid], self.shared_uuid_map_dict[uuid]) else: raise RuntimeError('get blocks failed') def to_blocks(self, uuid, obj): try: if marshalable(obj): buf = marshal.dumps((uuid, obj)) type_ = MARSHAL_TYPE else: buf = cPickle.dumps((uuid, obj), -1) type_ = PICKLE_TYPE except Exception: buf = cPickle.dumps((uuid, obj), -1) type_ = PICKLE_TYPE checksum = binascii.crc32(buf) & 0xFFFF stream = struct.pack(self.header_fmt, type_, checksum) + buf blockNum = (len(stream) + (BLOCK_SIZE - 1)) >> BLOCK_SHIFT blocks = [compress(stream[i * BLOCK_SIZE:(i + 1) * BLOCK_SIZE]) for i in range(blockNum)] sizes = [len(block) for block in blocks] size_l = accumulate_list(sizes) block_map = list(izip(size_l[:-1], sizes)) return blocks, size_l[-1], block_map def from_blocks(self, uuid, blocks): stream = b''.join(map(decompress, blocks)) type_, checksum = struct.unpack(self.header_fmt, stream[:self.header_len]) buf = stream[self.header_len:] _checksum = binascii.crc32(buf) & 0xFFFF if _checksum != checksum: raise RuntimeError('Wrong blocks: checksum: %s, expected: %s' % ( _checksum, checksum)) if type_ == MARSHAL_TYPE: _uuid, value = marshal.loads(buf) elif type_ == PICKLE_TYPE: _uuid, value = cPickle.loads(buf) else: raise RuntimeError('Unknown serialization type: %s' % type_) if uuid != _uuid: raise RuntimeError('Wrong blocks: uuid: %s, expected: %s' % (_uuid, uuid)) return value def shutdown(self): if not self._started: return self._started = False
class P2PBroadcastManager(BroadcastManager): def __init__(self): self.published = {} self.cache = Cache() self.host = socket.gethostname() self.server_thread = None random.seed(os.getpid() + int(time.time() * 1000) % 1000) def start(self, is_master): if is_master: self.guides = {} self.guide_addr, self.guide_thread = self.start_guide() env.register('BroadcastGuideAddr', self.guide_addr) else: self.guide_addr = env.get('BroadcastGuideAddr') logger.debug("broadcast started: %s", self.guide_addr) def shutdown(self): sock = env.ctx.socket(zmq.REQ) sock.setsockopt(zmq.LINGER, 0) sock.connect(self.guide_addr) sock.send_pyobj((GUIDE_STOP, None)) sock.recv_pyobj() sock.close() def register(self, uuid, value): if uuid in self.published: raise RuntimeError('broadcast %s has already registered' % uuid) if not self.server_thread: self.server_addr, self.server_thread = self.start_server() blocks = self.to_blocks(uuid, value) self.published[uuid] = blocks self.guides[uuid] = {self.server_addr: [1] * len(blocks)} self.cache.put(uuid, value) return len(blocks) def fetch(self, uuid, block_num): if not self.server_thread: self.server_addr, self.server_thread = self.start_server() value = self.cache.get(uuid) if value is not None: return value blocks = self.fetch_blocks(uuid, block_num) value = self.from_blocks(uuid, blocks) return value def clear(self, uuid): self.cache.put(uuid, None) del self.published[uuid] def fetch_blocks(self, uuid, block_num): guide_sock = env.ctx.socket(zmq.REQ) guide_sock.connect(self.guide_addr) logger.debug("connect to guide %s", self.guide_addr) blocks = [None] * block_num bitmap = [0] * block_num self.published[uuid] = blocks def _report_bad(addr): guide_sock.send_pyobj((GUIDE_REPORT_BAD, (uuid, addr))) guide_sock.recv_pyobj() def _fetch(addr, indices): sock = env.ctx.socket(zmq.REQ) try: sock.setsockopt(zmq.LINGER, 0) sock.connect(addr) for i in indices: sock.send_pyobj((SERVER_FETCH, (uuid, i))) avail = sock.poll(5 * 1000, zmq.POLLIN) if not avail: logger.debug("%s recv broadcast %d from %s timeout", self.server_addr, i, addr) _report_bad(addr) return result, msg = sock.recv_pyobj() if result == SERVER_FETCH_FAIL: _report_bad(addr) return if result == SERVER_FETCH_OK: id, block = msg if i == id and block is not None: blocks[id] = block bitmap[id] = 1 else: raise RuntimeError( 'Unknown server response: %s %s' % (result, msg)) finally: sock.close() while not all(bitmap): guide_sock.send_pyobj((GUIDE_SOURCES, (uuid, self.server_addr, bitmap))) sources = guide_sock.recv_pyobj() logger.debug("received SourceInfo from master: %s", sources.keys()) local = [] remote = [] for addr, _bitmap in sources.iteritems(): if addr.startswith('tcp://%s:' % self.host): local.append((addr, _bitmap)) else: remote.append((addr, _bitmap)) for addr, _bitmap in local: indices = [ i for i in xrange(block_num) if not bitmap[i] and _bitmap[i] ] if indices: _fetch(addr, indices) random.shuffle(remote) for addr, _bitmap in remote: indices = [ i for i in xrange(block_num) if not bitmap[i] and _bitmap[i] ] if indices: _fetch(addr, [random.choice(indices)]) guide_sock.close() return blocks def start_guide(self): sock = env.ctx.socket(zmq.REP) port = sock.bind_to_random_port("tcp://0.0.0.0") guide_addr = "tcp://%s:%d" % (self.host, port) def run(): logger.debug("guide start at %s", guide_addr) while True: type, msg = sock.recv_pyobj() if type == GUIDE_STOP: sock.send_pyobj(0) break elif type == GUIDE_SOURCES: uuid, addr, bitmap = msg sources = self.guides[uuid] sock.send_pyobj(sources) if any(bitmap): sources[addr] = bitmap elif type == GUIDE_REPORT_BAD: uuid, addr = msg sock.send_pyobj(0) sources = self.guides[uuid] if addr in sources: del sources[addr] else: logger.error('Unknown guide message: %s %s', type, msg) sock.close() logger.debug("Sending stop notification to all servers ...") for uuid, sources in self.guides.iteritems(): for addr in sources: self.stop_server(addr) return guide_addr, spawn(run) def start_server(self): sock = env.ctx.socket(zmq.REP) sock.setsockopt(zmq.LINGER, 0) port = sock.bind_to_random_port("tcp://0.0.0.0") server_addr = 'tcp://%s:%d' % (self.host, port) def run(): logger.debug("server started at %s", server_addr) while True: type, msg = sock.recv_pyobj() logger.debug('server recv: %s %s', type, msg) if type == SERVER_STOP: sock.send_pyobj(None) break elif type == SERVER_FETCH: uuid, id = msg if uuid not in self.published: sock.send_pyobj((SERVER_FETCH_FAIL, None)) else: blocks = self.published[uuid] if id >= len(blocks): sock.send_pyobj((SERVER_FETCH_FAIL, None)) else: sock.send_pyobj((SERVER_FETCH_OK, (id, blocks[id]))) else: logger.error('Unknown server message: %s %s', type, msg) sock.close() logger.debug("stop Broadcast server %s", server_addr) for uuid in self.published.keys(): self.clear(uuid) return server_addr, spawn(run) def stop_server(self, addr): req = env.ctx.socket(zmq.REQ) req.setsockopt(zmq.LINGER, 0) req.connect(addr) req.send_pyobj((SERVER_STOP, None)) avail = req.poll(1 * 100, zmq.POLLIN) if avail: req.recv_pyobj() req.close()
class Broadcast: initialized = False is_master = False cache = Cache() broadcastFactory = None BlockSize = 1024 * 1024 def __init__(self, value, is_local): assert value is not None, 'broadcast object should not been None' self.uuid = str(uuid.uuid4()) self.value = value self.is_local = is_local self.bytes = 0 self.stopped = False if is_local: if not self.cache.put(self.uuid, value): raise Exception('object %s is too big to cache', repr(value)) else: self.send() def clear(self): self.stopped = True self.cache.put(self.uuid, None) if hasattr(self, 'value'): delattr(self, 'value') def __getstate__(self): return (self.uuid, self.bytes, self.value if self.bytes < self.BlockSize / 20 else None) def __setstate__(self, v): self.stopped = False self.uuid, self.bytes, value = v if value is not None: self.value = value def __getattr__(self, name): if name != 'value': return getattr(self.value, name) if self.stopped: raise SystemExit("broadcast has been cleared") # in the executor process, Broadcast is not initialized if not self.initialized: raise AttributeError(name) uuid = self.uuid value = self.cache.get(uuid) if value is not None: self.value = value return value oldtitle = getproctitle() setproctitle('dpark worker: broadcasting ' + uuid) value = self.recv() if value is None: raise Exception("recv broadcast failed") self.value = value self.cache.put(uuid, value) setproctitle(oldtitle) return value def send(self): raise NotImplementedError def recv(self): raise NotImplementedError def blockifyObject(self, obj): try: buf = marshal.dumps(obj) except Exception: buf = cPickle.dumps(obj, -1) N = self.BlockSize blockNum = len(buf) / N + 1 val = [ Block(i, compress(buf[i * N:i * N + N])) for i in range(blockNum) ] return val, len(buf) def unBlockifyObject(self, blocks): s = ''.join(decompress(b.data) for b in blocks) try: return marshal.loads(s) except Exception: return cPickle.loads(s) @classmethod def initialize(cls, is_master): if cls.initialized: return cls.initialized = True cls.is_master = is_master cls.host = socket.gethostname() logger.debug("Broadcast initialized") @classmethod def shutdown(cls): pass