def run_write(args, data, process_id, timer):
    client = alluxio.Client(args.host, args.port)
    for iteration in range(args.iteration):
        print('process {}, iteration {} ... '.format(process_id, iteration), end='')
        dst = alluxio_path(args.dst, iteration, args.node, process_id)
        write_type = alluxio.wire.WriteType(args.write_type)
        t = write(client, data, dst, write_type, timer)
        print('{} seconds'.format(t))
        sys.stdout.flush() # https://stackoverflow.com/questions/2774585/child-processes-created-with-python-multiprocessing-module-wont-print
def setup_client(handler):
    host = 'localhost'
    port = get_free_port()
    print port
    server = HTTPServer((host, port), handler)
    server_thread = Thread(target=server.serve_forever)
    server_thread.setDaemon(True)
    server_thread.start()
    client = alluxio.Client(host, port, timeout=60)
    return client, lambda: server.shutdown
Exemple #3
0
def run_read(args, expected, process_id, timer):
    client = alluxio.Client(args.host, args.port)
    for iteration in range(args.iteration):
        print('process {}, iteration {} ... '.format(process_id, iteration),
              end='')
        src = alluxio_path(args.src, iteration, args.node,
                           process_id) if args.node else args.src
        t = read(client, src, expected, timer)
        print('{} seconds'.format(t))
        sys.stdout.flush(
        )  # https://stackoverflow.com/questions/2774585/child-processes-created-with-python-multiprocessing-module-wont-print
Exemple #4
0
def main(args):
    with open(args.expected, 'r') as f:
        expected = f.read()
    total_time = 0
    c = alluxio.Client(args.host, args.port)
    for iteration in range(args.iteration):
        src = alluxio_path(args.src, iteration, 0, 0)
        print('Iteration %d ... ' % iteration, end='')
        start_time = time.time()
        with c.open(src, 'r') as f:
            data = f.read()
        elapsed_time = time.time() - start_time
        assert expected == data
        print('{} seconds'.format(elapsed_time))
        total_time += elapsed_time
    print_stats(args, total_time)
Exemple #5
0
def main(args):
    with open(args.src, 'r') as f:
        data = f.read()
    total_time = 0
    c = alluxio.Client(args.host, args.port)
    for iteration in range(args.iteration):
        dst = alluxio_path(args.dst, iteration, 0, 0)
        write_type = alluxio.wire.WriteType(args.write_type)
        print('Iteration %d ... ' % iteration, end='')
        start_time = time.time()
        with c.open(dst, 'w', recursive=True, write_type=write_type) as f:
            f.write(data)
        elapsed_time = time.time() - start_time
        print('{} seconds'.format(elapsed_time))
        total_time += elapsed_time
    print_stats(args, total_time)
Exemple #6
0
def format_data_for_hive(filename, out_file):
  client = alluxio.Client('localhost', 39999)
  first = True
  num = 0
  data = ""
  with client.open(filename, 'r') as f:
    with client.open(out_file, 'w') as fout:
      for line in f:
        parts = line.split(', ')
	if (num > 20000):
	   fout.write(data)
	   data = ""
	   num = 1
	   data = data + ','.join(parts)
	else:
	   data = data + ','.join(parts)
	   num = num + 1
      
      fout.write(data)
      print 'Done'
Exemple #7
0
def format_data_for_mahout(filename, out_file):
    num = 0
    data = ""
    client = alluxio.Client('localhost', 39999)
    with client.open(filename, 'r') as f:
        with client.open(out_file, 'w') as fout:
            for line in f:
                parts = line.split()
                parts[2] = "{0:.2f}".format(
                    float(parts[2])
                )  # special because original #s have precision of only 2 places after decimal
                if (num > 20000):
                    fout.write(data)
                    data = ""
                    num = 1
                    data = data + ','.join(parts) + '\n'
                else:
                    data = data + ','.join(parts) + '\n'
                    num = num + 1
            fout.write(data)
            print 'Done'
Exemple #8
0

def info(s):
    print(green(s))


def pretty_json(obj):
    return json.dumps(obj, indent=2)


py_test_root_dir = '/py-test-dir'
py_test_nested_dir = '/py-test-dir/nested'
py_test = py_test_nested_dir + '/py-test'
py_test_renamed = py_test_root_dir + '/py-test-renamed'

client = alluxio.Client('localhost', 39999)

info("creating directory %s" % py_test_nested_dir)
opt = option.CreateDirectory(recursive=True)
client.create_directory(py_test_nested_dir, opt)
info("done")

info("writing to %s" % py_test)
with client.open(py_test, 'w') as f:
    f.write('Alluxio works with Python!\n')
    with open(sys.argv[0]) as this_file:
        f.write(this_file)
info("done")

info("getting status of %s" % py_test)
stat = client.get_status(py_test)