def ht_zipf(): address = "frog.zoo.cs.yale.edu" service_port = 9090 lease_port = 9091 num_blocks = 1 chain_length = 1 num_ops = 100000 data_size = 64 op_type_set = [] op_type_set.append("get") path = "/tmp" backing_path = "local://tmp" file_name = './ht_zipf_with_cache.txt' data = open(file_name, 'w+') # Output all the configuration parameters: print >> data, "host: ", address print >> data, "service-port: ", service_port print >> data, "lease-port: ", lease_port print >> data, "num-blocks: ", num_blocks print >> data, "chain-length: ", chain_length print >> data, "num-ops: ", num_ops print >> data, "data-size: ", data_size print >> data, "path: ", path print >> data, "backing-path: ", backing_path num_clients = 1 loading = 0 client = JiffyClient(address, service_port, lease_port) ht_clients = [None] * num_clients for cache_size in range(num_ops // 20 * 64, num_ops * 64 + 1, num_ops // 20 * 64): ht_clients[0] = client.open_or_create_hash_table( path, backing_path, num_blocks, chain_length, cache_size) benchmark = GetBenchmark(ht_clients, data_size, num_clients, num_ops) benchmark.run() result = benchmark.wait() client.remove(path) print >> data, "===== ", "Zipf_ht_Benchmark, ", "Cache_Size= ", cache_size, " ======" print >> data, "\t", num_ops, " requests completed in ", ( float(num_ops) / result[0]), " s" print >> data, "\t", num_clients, " parallel clients" print >> data, "\t", data_size, " payload" print >> data, "\tAverage put latency: ", result[1], "us" print >> data, "\tAverage get latency: ", result[2], "us" print >> data, "\tAverage total latency: ", result[1] + result[2], "us" print >> data, "\tThroughput: ", result[0], " requests per second" print >> data, "\tHit_rate: ", round(result[3], 4), "%" print >> data, "\n" loading += 1 print("Loading -- ", round(float(loading * 100 / 20), 1), "%") return 0
def run_scale_workload(d_host='127.0.0.1', d_port=9090, l_port=9091, data_path='/data/test', n_ops=100000, value_size=102400, skew=0.0): value = bytes(value_size) keys = zipf_keys(skew, 512, n_ops) client = JiffyClient(d_host, d_port, l_port) kv = client.open_or_create(data_path, '/tmp') logging.info("Generated {} keys".format(len(keys))) for key in keys: kv.put(key, value) for key in keys: kv.remove(key) client.remove(data_path, RemoveMode.delete)
def run_command(key): host = '172.31.12.102' logger.info("Connecting to the jiffy server") em = JiffyClient(host) logger.info("Jiffy connected") data_path = "/test" data_size = 30 * 1024 * 1024 sample_data = 'a' * data_size test_queue = em.open_or_create_queue(data_path,"local://tmp", 10,1) logger.info("queue created") test_queue.put(sample_data) logger.info("Data put") obj = test_queue.get() logger.info("Data read: " + str(len(obj))) em.close(data_path) return 0
def run_sync_kv_latency_benchmark(d_host, d_port, l_port, data_path, workload_path, workload_off=0, n_ops=100000): client = JiffyClient(d_host, d_port, l_port) kv = client.open(data_path) workload = make_workload(workload_path, workload_off, n_ops, kv) ops = 0 while ops < len(workload): begin = time.time() workload[ops][0](*workload[ops][1]) tot = time.time() - begin print("%f" % (tot * 1e6)) ops += 1
def load_and_run_workload(barrier, workload_path, workload_off, d_host, d_port, l_port, data_path, n_ops): client = JiffyClient(d_host, d_port, l_port) kv = client.open(data_path) workload = make_workload(workload_path, workload_off, n_ops, kv) logging.info("[Process] Loaded data for process.") barrier.wait() logging.info("[Process] Starting benchmark...") ops = 0 begin = time.time() while ops < len(workload): workload[ops][0](*workload[ops][1]) ops += 1 end = time.time() print(float(ops) / (end - begin))
def run_command(key): pywren.wrenlogging.default_config('INFO') logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) logger.info("before everything") partition_num = key['partition_num'] rounds = key['rounds'] em = JiffyClient(host=key['em']) reduceId = key['taskId'] appName = key['appName'] alg_type = key['type'] data_ques1 = open_or_create_jiffy_queues(em, appName, partition_num, 1, 'receiver') logger.info("queue opened") names = key['names'] dtypes = key['dtypes'] ############# left table left_table = 100000 indices = tm.makeStringIndex(left_table).values key = np.tile(indices[:left_table], 1) left = DataFrame({"key": key, "value": np.random.randn(left_table)}) t_start = time.time() ############### initialize join functions # print(left) lim = 0 ############## keeps fetching fin_num = 0 if alg_type == 'pipelined': leftsorter = None leftcount = None orizer = None intrizer = None count = 0 while fin_num < partition_num and lim < 15: #### read table lim += 1 time.sleep(0.01) logger.info("before get") obj = data_ques1[0].get() if sys.getsizeof(obj) > 1000: part_data = pd.read_table(BytesIO(obj), header=None, delimiter="|", names=['key', 'value2']) # ds, fin_num = read_jiffy_splits(names, dtypes, reduceId, data_ques1, fin_num, batch_size = 1, fin_size = partition_num) logger.info(ds) logger.info(fin_num) # print(fin_num) if len(ds) > 0: ### join # start = timeit.default_timer() result, orizer, intrizer, leftsorter, leftcount = pipeline_merge( left, ds, factorizer=orizer, intfactorizer=intrizer, leftsorter=leftsorter, leftcount=leftcount, slices=8, how="pipeline") time.sleep(0.8) logger.info("merged") # end = timeit.default_timer() # count += (end - start) # logger.info(str(i) + " chunks take time " + str(end - start) + " Accum time: " + str(count)) elif alg_type == 'origin': ds = pd.DataFrame() while fin_num < partition_num and lim < 1500: lim += 1 #### read table dd, fin_num = read_jiffy_splits(names, dtypes, reduceId, data_ques1, fin_num, batch_size=1, fin_size=partition_num) if len(dd) > 0: ds = ds.append(dd) print("this is ds:") print(ds) result = merge(left, ds, how="inner") print(fin_num) t_fin = time.time() # share.append([t_start,t_fin, fin_num]) return ([t_fin, t_start])
def run_command(key): pywren.wrenlogging.default_config('INFO') logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) logger.info("before everything") logger.info(key) t_s = time.time() partition_num = key['partition_num'] rounds = key['rounds'] #em = key['em'] taskId = key['taskId'] appName = key['appName'] # partition_num = 1 # rounds = 8 # #em = key['em'] # taskId = 1 # appName = 'test-1' em = JiffyClient(host=key['em']) logger.info("berfore queue") data_ques, msg_que = open_or_create_jiffy_queues( em, appName, partition_num, 1, 'sender') logger.info("queue opend") for i in range(rounds): ### dd = read_s3_table(key) msg = create_msg(rounds, taskId, i, partition_num) ######### create a table here to replace the input right_table = 100000 indices = tm.makeStringIndex(right_table).values key = np.tile(indices[:right_table], 1) right = DataFrame({ "key": key, "value": np.random.randn(right_table) }) logger.info("Finish generating data") x = 0 if i == rounds - 1: x = 1 encoded = right.to_csv(sep="|", header=False, index=False).encode('utf-8') a = np.random.randint(1, 10, 500000) encoded = np.asarray(a).astype('S100').tobytes() # print(sys.getsizeof(encoded)) data_path = "/" + appName + "/" + '01' test_que = em.open_or_create_queue(data_path, "local://tmp", 10, 1) logger.info("get encoded size" + str(sys.getsizeof(encoded))) ta = time.time() test_que.put(encoded) tb = time.time() logger.info("wirte takes" + str(tb - ta)) logger.info("before get") obj = test_que.get() logger.info("get obj of size" + str(sys.getsizeof(obj))) tc = time.time() logger.info("get takes " + str(tc - tb)) # data_ques[0].put(encoded) # logger.info("wirte finished") # logger.info("before get") # obj = data_ques[0].get() #res = write_jiffy_partitions(right, ['key'], 'uniform', partition_num, data_ques, msg_que = msg_que, msg = msg, fin = x) t_f = time.time() # share.append([t_s,t_f]) return ([t_s, t_f])
def file_cp(): address = "frog.zoo.cs.yale.edu" service_port = 9090 lease_port = 9091 num_blocks = 1 chain_length = 1 num_ops = 100000 data_size = 64 op_type_set = [] op_type_set.append("write") op_type_set.append("read") path = "/tmp" backing_path = "local://tmp" # Output all the configuration parameters: file_name = './file_cp.txt' data = open(file_name, 'w+') print >> data, "host: ", address print >> data, "service-port: ", service_port print >> data, "lease-port: ", lease_port print >> data, "num-blocks: ", num_blocks print >> data, "chain-length: ", chain_length print >> data, "num-ops: ", num_ops print >> data, "data-size: ", data_size print >> data, "path: ", path print >> data, "backing-path: ", backing_path for op_type in op_type_set: count = 1 while count <= 1: loading = 0 num_clients = count cache_block_size = 2000 client = JiffyClient(address, service_port, lease_port) ht_clients = [None] * num_clients for cache_size in range(100, 2101, 200): for prefetch_size in range(5, 51, 5): for i in range(num_clients): ht_clients[i] = client.open_or_create_file( path, backing_path, num_blocks, chain_length, cache_size, cache_block_size, prefetch_size) if (op_type == "write"): benchmark = WriteBenchmark(ht_clients, data_size, num_clients, num_ops) if (op_type == "read"): benchmark = ReadBenchmark(ht_clients, data_size, num_clients, num_ops) else: print >> data, "Incorrect operation type for file: ", op_type return 0 benchmark.run() result = benchmark.wait() client.remove(path) print >> data, "===== ", "Cache_Size= ", cache_size, "Prefetch Size= ", prefetch_size, " ======" print >> data, "\t", num_ops, " requests completed in ", ( float(num_ops) / result[0]), " s" print >> data, "\t", num_clients, " parallel clients" print >> data, "\t", data_size, " payload" print >> data, "\tAverage write latency: ", result[1], "us" print >> data, "\tAverage read latency: ", result[2], "us" print >> data, "\tAverage total latency: ", result[ 1] + result[2], "us" print >> data, "\tThroughput: ", result[ 0], " bytes per second" print >> data, "\tHit_rate: ", round(result[3], 4), "%" print >> data, "\n" loading += 1 print("Loading -- ", round(float(loading * 100 / 110), 1), "%") count *= 2 data.close() return 0
def run_command(key): """ keylist.append({'taskId': i, 'job_number': job_number, 'total_input': numTasks, 'write_element_size': write_element_size, 'process_time': process_time, 'total_time': total_time, 'em': em}) """ begin_of_function = time.time() logger = logging.getLogger(__name__) logger.info("taskId = " + str(key['taskId'])) taskId = key['taskId'] jobid_int = int(key['job_number']) write_element_size = int(key['write_element_size']) process_time = int(key['process_time']) total_time = int(key['total_time']) em = JiffyClient(host=key['em']) [read_time, work_time, write_time] = [0] * 3 start_time = time.time() # a total of 10 threads number_of_clients = 1 write_pool = ThreadPool(number_of_clients) time.sleep(process_time) logger.info("Process finish here: " + str(time.time())) def write_work_client(writer_key): start_time = time.time() client_id = int(writer_key['client_id']) taskID = writer_key['taskId'] jobID = writer_key['jobid'] datasize = writer_key['write_element_size'] #datasize = 1310720 total_time = writer_key['total_time'] body = b'a' * datasize client_id = int(client_id) count = 0 data_path = "/job" + str(jobID) table = em.open_or_create_hash_table(data_path,"local://tmp", 1,1) throughput_step = 1 throughput_count = 1 throughput_total = 0 throughput_nops = 0 ret = [] while time.time() < start_time + total_time: count = count + 1 keyname = str(jobID) + "-" + str(taskID) + "-" + str(count) m = hashlib.md5() m.update(keyname.encode('utf-8')) randomized_keyname = str(jobID) + "-" + str(taskID) + '-' + m.hexdigest()[:8] + '-' + str(count) #logger.info("(" + str(taskId) + ")" + "The name of the key to write is: " + randomized_keyname) start = time.time() logger.info("[HONEYCOMB] [" + str(jobID) + "] " + str(time.time()) + " " + str(taskID) + " " + str(len(body)) + " write " + "S") table.put(randomized_keyname, body) end = time.time() logger.info("[HONEYCOMB] [" + str(jobID) + "] " + str(time.time()) + " " + str(taskID) + " " + str(len(body)) + " write " + "E") throughput_total += end - start throughput_nops += 1 if end - start_time >= throughput_count: throughput = throughput_nops / throughput_total ret.append((end, throughput)) throughput_nops = 0 throughput_count += throughput_step throughput_total = 0 logger.info("Write finish here: " + str(time.time())) return ret writer_keylist = [] number_of_clients = int(number_of_clients) for i in range(number_of_clients): writer_keylist.append({'client_id': i, 'taskId': taskId, 'jobid': jobid_int, 'write_element_size': write_element_size, 'total_time': total_time}) start_time = time.time() write_pool_handler_container = [] write_pool_handler = write_pool.map_async(write_work_client, writer_keylist) write_pool_handler_container.append(write_pool_handler) logging.info("Write task launched") if len(write_pool_handler_container) > 0: write_pool_handler = write_pool_handler_container.pop() ret = write_pool_handler.get() print(ret) twait_end = time.time() write_time = twait_end - start_time write_pool.close() write_pool.join() end_of_function = time.time() return begin_of_function, end_of_function, write_time, ret
def connect(self): if self.handle is None: raise RuntimeError("Cannot connect: server not running") return JiffyClient(self.host, self.service_port, self.lease_port)