def read_work(reader_key): client_id = reader_key['client_id'] reduceId = rounds * taskId + reader_key['roundIdx'] key_per_client = reader_key['key-per-client'] key_per_client = int(key_per_client) client_id = int(client_id) objs = [] for mapId in range(key_per_client * client_id, min(key_per_client * (client_id + 1), numPartitions)): # for mapId in range(1): keyname = "shuffle-part-" + str(mapId) + "-" + str(reduceId) m = hashlib.md5() m.update(keyname.encode('utf-8')) randomized_keyname = "shuffle-" + m.hexdigest()[:8] + "-part-" + str(mapId) + "-" + str(reduceId) print("The name of the key to read is: " + randomized_keyname) try: datasize = 17000000 textback = " "*datasize pocket.get_buffer(pocket_namenode, randomized_keyname, textback, datasize, jobid) print("Successfully read") #pos = textback.find('.') #print("Padding position: " + str(pos)) original_text = b64decode(textback.encode('utf-8')) print("last ten bytes after padding: " + textback[-10:]) objs.append(original_text) except Exception: print("reading error key " + randomized_keyname) raise data = [np.fromstring(obj, dtype=recordType) for obj in objs] [d.sort(order='key') for d in data] inputs.extend(data)
def pocket_read_buffer(p, jobid, iter, text_back_tmp, size): text_back = " " * size for i in range(iter): dst_filename = 'tmp1' + '-' + str(i) r = pocket.get_buffer(p, dst_filename, text_back, size, jobid) if r != 0: raise Exception("get buffer failed: " + dst_filename)
def pocket_read_buffer(p, jobid, iter, text_back, size, id): for i in xrange(iter): dst_filename = '/tmp'+str(id)+'-'+str(i) r = pocket.get_buffer(p, dst_filename, text_back, size, jobid) if r != 0: raise Exception("get buffer failed: "+ dst_filename)