コード例 #1
0
def ht_zipf():
    address = "frog.zoo.cs.yale.edu"
    service_port = 9090
    lease_port = 9091
    num_blocks = 1
    chain_length = 1
    num_ops = 100000
    data_size = 64
    op_type_set = []

    op_type_set.append("get")
    path = "/tmp"
    backing_path = "local://tmp"
    file_name = './ht_zipf_with_cache.txt'
    data = open(file_name, 'w+')

    # Output all the configuration parameters:
    print >> data, "host: ", address
    print >> data, "service-port: ", service_port
    print >> data, "lease-port: ", lease_port
    print >> data, "num-blocks: ", num_blocks
    print >> data, "chain-length: ", chain_length
    print >> data, "num-ops: ", num_ops
    print >> data, "data-size: ", data_size
    print >> data, "path: ", path
    print >> data, "backing-path: ", backing_path

    num_clients = 1
    loading = 0
    client = JiffyClient(address, service_port, lease_port)
    ht_clients = [None] * num_clients
    for cache_size in range(num_ops // 20 * 64, num_ops * 64 + 1,
                            num_ops // 20 * 64):
        ht_clients[0] = client.open_or_create_hash_table(
            path, backing_path, num_blocks, chain_length, cache_size)
        benchmark = GetBenchmark(ht_clients, data_size, num_clients, num_ops)
        benchmark.run()
        result = benchmark.wait()
        client.remove(path)
        print >> data, "===== ", "Zipf_ht_Benchmark, ", "Cache_Size= ", cache_size, " ======"
        print >> data, "\t", num_ops, " requests completed in ", (
            float(num_ops) / result[0]), " s"
        print >> data, "\t", num_clients, " parallel clients"
        print >> data, "\t", data_size, " payload"
        print >> data, "\tAverage put latency: ", result[1], "us"
        print >> data, "\tAverage get latency: ", result[2], "us"
        print >> data, "\tAverage total latency: ", result[1] + result[2], "us"
        print >> data, "\tThroughput: ", result[0], " requests per second"
        print >> data, "\tHit_rate: ", round(result[3], 4), "%"
        print >> data, "\n"
        loading += 1
        print("Loading -- ", round(float(loading * 100 / 20), 1), "%")

    return 0
コード例 #2
0
def run_scale_workload(d_host='127.0.0.1', d_port=9090, l_port=9091, data_path='/data/test', n_ops=100000,
                       value_size=102400, skew=0.0):
    value = bytes(value_size)
    keys = zipf_keys(skew, 512, n_ops)
    client = JiffyClient(d_host, d_port, l_port)
    kv = client.open_or_create(data_path, '/tmp')
    logging.info("Generated {} keys".format(len(keys)))
    for key in keys:
        kv.put(key, value)
    for key in keys:
        kv.remove(key)
    client.remove(data_path, RemoveMode.delete)
コード例 #3
0
def run_command(key):
	host = '172.31.12.102'
	logger.info("Connecting to the jiffy server")
	em = JiffyClient(host)
	logger.info("Jiffy connected")
	data_path = "/test"
	data_size = 30 * 1024 * 1024
	sample_data = 'a' * data_size
	test_queue = em.open_or_create_queue(data_path,"local://tmp", 10,1)
	logger.info("queue created")
	test_queue.put(sample_data)
	logger.info("Data put")
	obj = test_queue.get()
	logger.info("Data read: " + str(len(obj)))
	em.close(data_path)
	return 0	
コード例 #4
0
ファイル: kv_sync_benchmark.py プロジェクト: ysarch-lab/jiffy
def run_sync_kv_latency_benchmark(d_host,
                                  d_port,
                                  l_port,
                                  data_path,
                                  workload_path,
                                  workload_off=0,
                                  n_ops=100000):
    client = JiffyClient(d_host, d_port, l_port)
    kv = client.open(data_path)
    workload = make_workload(workload_path, workload_off, n_ops, kv)

    ops = 0
    while ops < len(workload):
        begin = time.time()
        workload[ops][0](*workload[ops][1])
        tot = time.time() - begin
        print("%f" % (tot * 1e6))
        ops += 1
コード例 #5
0
ファイル: kv_sync_benchmark.py プロジェクト: ysarch-lab/jiffy
def load_and_run_workload(barrier, workload_path, workload_off, d_host, d_port,
                          l_port, data_path, n_ops):
    client = JiffyClient(d_host, d_port, l_port)
    kv = client.open(data_path)
    workload = make_workload(workload_path, workload_off, n_ops, kv)
    logging.info("[Process] Loaded data for process.")

    barrier.wait()
    logging.info("[Process] Starting benchmark...")

    ops = 0
    begin = time.time()
    while ops < len(workload):
        workload[ops][0](*workload[ops][1])
        ops += 1
    end = time.time()

    print(float(ops) / (end - begin))
コード例 #6
0
ファイル: map_join_error.py プロジェクト: charles-typ/pandas
    def run_command(key):
        pywren.wrenlogging.default_config('INFO')
        logging.basicConfig(level=logging.DEBUG)
        logger = logging.getLogger(__name__)
        logger.info("before everything")
        partition_num = key['partition_num']
        rounds = key['rounds']
        em = JiffyClient(host=key['em'])
        reduceId = key['taskId']
        appName = key['appName']
        alg_type = key['type']
        data_ques1 = open_or_create_jiffy_queues(em, appName, partition_num, 1,
                                                 'receiver')
        logger.info("queue opened")
        names = key['names']
        dtypes = key['dtypes']
        ############# left table
        left_table = 100000
        indices = tm.makeStringIndex(left_table).values
        key = np.tile(indices[:left_table], 1)
        left = DataFrame({"key": key, "value": np.random.randn(left_table)})
        t_start = time.time()
        ############### initialize join functions
        # print(left)
        lim = 0
        ############## keeps fetching
        fin_num = 0

        if alg_type == 'pipelined':
            leftsorter = None
            leftcount = None
            orizer = None
            intrizer = None
            count = 0

            while fin_num < partition_num and lim < 15:
                #### read table
                lim += 1
                time.sleep(0.01)
                logger.info("before get")
                obj = data_ques1[0].get()
                if sys.getsizeof(obj) > 1000:
                    part_data = pd.read_table(BytesIO(obj),
                                              header=None,
                                              delimiter="|",
                                              names=['key', 'value2'])

            #    ds, fin_num = read_jiffy_splits(names, dtypes, reduceId, data_ques1, fin_num, batch_size = 1, fin_size = partition_num)
                logger.info(ds)
                logger.info(fin_num)
                #                 print(fin_num)
                if len(ds) > 0:
                    ### join
                    #                 start = timeit.default_timer()
                    result, orizer, intrizer, leftsorter, leftcount = pipeline_merge(
                        left,
                        ds,
                        factorizer=orizer,
                        intfactorizer=intrizer,
                        leftsorter=leftsorter,
                        leftcount=leftcount,
                        slices=8,
                        how="pipeline")
                    time.sleep(0.8)
                    logger.info("merged")

    #                 end = timeit.default_timer()
    #                 count += (end - start)
    #                 logger.info(str(i) + " chunks take time " +  str(end - start) + " Accum time: " + str(count))

        elif alg_type == 'origin':
            ds = pd.DataFrame()
            while fin_num < partition_num and lim < 1500:
                lim += 1
                #### read table
                dd, fin_num = read_jiffy_splits(names,
                                                dtypes,
                                                reduceId,
                                                data_ques1,
                                                fin_num,
                                                batch_size=1,
                                                fin_size=partition_num)
                if len(dd) > 0:

                    ds = ds.append(dd)
                print("this is ds:")
                print(ds)
                result = merge(left, ds, how="inner")
                print(fin_num)
        t_fin = time.time()
        #         share.append([t_start,t_fin, fin_num])
        return ([t_fin, t_start])
コード例 #7
0
ファイル: map_join_error.py プロジェクト: charles-typ/pandas
    def run_command(key):
        pywren.wrenlogging.default_config('INFO')
        logging.basicConfig(level=logging.DEBUG)
        logger = logging.getLogger(__name__)
        logger.info("before everything")
        logger.info(key)
        t_s = time.time()
        partition_num = key['partition_num']
        rounds = key['rounds']
        #em = key['em']
        taskId = key['taskId']
        appName = key['appName']
        #         partition_num = 1
        #         rounds = 8
        #         #em = key['em']
        #         taskId = 1
        #         appName = 'test-1'
        em = JiffyClient(host=key['em'])
        logger.info("berfore queue")
        data_ques, msg_que = open_or_create_jiffy_queues(
            em, appName, partition_num, 1, 'sender')
        logger.info("queue opend")

        for i in range(rounds):
            ###        dd = read_s3_table(key)

            msg = create_msg(rounds, taskId, i, partition_num)

            #########  create a table here to replace the input
            right_table = 100000
            indices = tm.makeStringIndex(right_table).values
            key = np.tile(indices[:right_table], 1)
            right = DataFrame({
                "key": key,
                "value": np.random.randn(right_table)
            })
            logger.info("Finish generating data")
            x = 0
            if i == rounds - 1:
                x = 1
            encoded = right.to_csv(sep="|", header=False,
                                   index=False).encode('utf-8')
            a = np.random.randint(1, 10, 500000)
            encoded = np.asarray(a).astype('S100').tobytes()
            # print(sys.getsizeof(encoded))

            data_path = "/" + appName + "/" + '01'
            test_que = em.open_or_create_queue(data_path, "local://tmp", 10, 1)
            logger.info("get encoded size" + str(sys.getsizeof(encoded)))
            ta = time.time()
            test_que.put(encoded)
            tb = time.time()
            logger.info("wirte takes" + str(tb - ta))
            logger.info("before get")
            obj = test_que.get()
            logger.info("get obj of size" + str(sys.getsizeof(obj)))
            tc = time.time()
            logger.info("get takes " + str(tc - tb))
#             data_ques[0].put(encoded)
#             logger.info("wirte finished")
#             logger.info("before get")
#             obj = data_ques[0].get()

#res = write_jiffy_partitions(right, ['key'], 'uniform', partition_num, data_ques, msg_que = msg_que, msg = msg, fin = x)

        t_f = time.time()
        #        share.append([t_s,t_f])

        return ([t_s, t_f])
コード例 #8
0
def file_cp():
    address = "frog.zoo.cs.yale.edu"
    service_port = 9090
    lease_port = 9091
    num_blocks = 1
    chain_length = 1
    num_ops = 100000
    data_size = 64
    op_type_set = []
    op_type_set.append("write")
    op_type_set.append("read")
    path = "/tmp"
    backing_path = "local://tmp"

    # Output all the configuration parameters:
    file_name = './file_cp.txt'
    data = open(file_name, 'w+')
    print >> data, "host: ", address
    print >> data, "service-port: ", service_port
    print >> data, "lease-port: ", lease_port
    print >> data, "num-blocks: ", num_blocks
    print >> data, "chain-length: ", chain_length
    print >> data, "num-ops: ", num_ops
    print >> data, "data-size: ", data_size
    print >> data, "path: ", path
    print >> data, "backing-path: ", backing_path
    for op_type in op_type_set:
        count = 1
        while count <= 1:
            loading = 0
            num_clients = count
            cache_block_size = 2000
            client = JiffyClient(address, service_port, lease_port)
            ht_clients = [None] * num_clients
            for cache_size in range(100, 2101, 200):
                for prefetch_size in range(5, 51, 5):
                    for i in range(num_clients):
                        ht_clients[i] = client.open_or_create_file(
                            path, backing_path, num_blocks, chain_length,
                            cache_size, cache_block_size, prefetch_size)

                    if (op_type == "write"):
                        benchmark = WriteBenchmark(ht_clients, data_size,
                                                   num_clients, num_ops)
                    if (op_type == "read"):
                        benchmark = ReadBenchmark(ht_clients, data_size,
                                                  num_clients, num_ops)
                    else:
                        print >> data, "Incorrect operation type for file: ", op_type
                        return 0

                    benchmark.run()
                    result = benchmark.wait()
                    client.remove(path)

                    print >> data, "===== ", "Cache_Size= ", cache_size, "Prefetch Size= ", prefetch_size, " ======"
                    print >> data, "\t", num_ops, " requests completed in ", (
                        float(num_ops) / result[0]), " s"
                    print >> data, "\t", num_clients, " parallel clients"
                    print >> data, "\t", data_size, " payload"
                    print >> data, "\tAverage write latency: ", result[1], "us"
                    print >> data, "\tAverage read latency: ", result[2], "us"
                    print >> data, "\tAverage total latency: ", result[
                        1] + result[2], "us"
                    print >> data, "\tThroughput: ", result[
                        0], " bytes per second"
                    print >> data, "\tHit_rate: ", round(result[3], 4), "%"
                    print >> data, "\n"
                    loading += 1
                    print("Loading -- ", round(float(loading * 100 / 110), 1),
                          "%")
            count *= 2

    data.close()
    return 0
コード例 #9
0
    def run_command(key):
        """
        keylist.append({'taskId': i,
                        'job_number': job_number,
                        'total_input': numTasks,
                        'write_element_size': write_element_size,
                        'process_time': process_time,
                        'total_time': total_time,
                        'em': em})
        """
        begin_of_function = time.time()
        logger = logging.getLogger(__name__)
        logger.info("taskId = " + str(key['taskId']))
        taskId = key['taskId']
        jobid_int = int(key['job_number'])
        write_element_size = int(key['write_element_size'])
        process_time = int(key['process_time'])
        total_time = int(key['total_time'])
        em = JiffyClient(host=key['em'])

        [read_time, work_time, write_time] = [0] * 3
        start_time = time.time()

        # a total of 10 threads
        number_of_clients = 1
        write_pool = ThreadPool(number_of_clients)

        time.sleep(process_time)


        logger.info("Process finish here: " + str(time.time()))

        def write_work_client(writer_key):
            start_time = time.time()
            client_id = int(writer_key['client_id'])
            taskID = writer_key['taskId']
            jobID = writer_key['jobid']
            datasize = writer_key['write_element_size']
                #datasize = 1310720
            total_time = writer_key['total_time']
            body = b'a' * datasize
            client_id = int(client_id)
            count = 0
            data_path = "/job" + str(jobID)
            table = em.open_or_create_hash_table(data_path,"local://tmp", 1,1)
            throughput_step = 1
            throughput_count = 1
            throughput_total = 0
            throughput_nops = 0
            ret = []
            while time.time() < start_time + total_time:
                count = count + 1
                keyname = str(jobID) + "-" + str(taskID) + "-" + str(count)
                m = hashlib.md5()
                m.update(keyname.encode('utf-8'))
                randomized_keyname = str(jobID) + "-" + str(taskID) + '-' + m.hexdigest()[:8] + '-' + str(count)
                #logger.info("(" + str(taskId) + ")" + "The name of the key to write is: " + randomized_keyname)
                start = time.time()
                logger.info("[HONEYCOMB] [" + str(jobID) + "] " + str(time.time()) + " " + str(taskID) + " " + str(len(body)) + " write " + "S")
                table.put(randomized_keyname, body)
                end = time.time()
                logger.info("[HONEYCOMB] [" + str(jobID) + "] " + str(time.time()) + " " + str(taskID) + " " + str(len(body)) + " write " + "E")
                throughput_total += end - start
                throughput_nops += 1
                if end - start_time >= throughput_count:
                    throughput = throughput_nops / throughput_total
                    ret.append((end, throughput))
                    throughput_nops = 0
                    throughput_count += throughput_step
                    throughput_total = 0

            logger.info("Write finish here: " + str(time.time()))
            return ret

        writer_keylist = []
        number_of_clients = int(number_of_clients)
        for i in range(number_of_clients):
            writer_keylist.append({'client_id': i,
                                   'taskId': taskId,
                                   'jobid': jobid_int,
                                   'write_element_size': write_element_size,
                                   'total_time': total_time})

        start_time = time.time()
        write_pool_handler_container = []
        write_pool_handler = write_pool.map_async(write_work_client, writer_keylist)
        write_pool_handler_container.append(write_pool_handler)
        logging.info("Write task launched")

        if len(write_pool_handler_container) > 0:
            write_pool_handler = write_pool_handler_container.pop()
            ret = write_pool_handler.get()
            print(ret)
            twait_end = time.time()
            write_time = twait_end - start_time
        write_pool.close()
        write_pool.join()
        end_of_function = time.time()
        return begin_of_function, end_of_function, write_time, ret
コード例 #10
0
ファイル: test_client.py プロジェクト: ysarch-lab/jiffy
    def connect(self):
        if self.handle is None:
            raise RuntimeError("Cannot connect: server not running")

        return JiffyClient(self.host, self.service_port, self.lease_port)