Beispiel #1
0
def test_shape_of_variables():
    x = theano.tensor.matrix('x')
    y = x[1:, 2:]
    known_shapes = shape_of_variables((x,), (y,), {x: (10, 10)})
    assert known_shapes[y] == (9, 8)
    assert not isinstance(known_shapes[y][0], np.ndarray)

    # just make sure that we can do this afterwards
    fgraph = theano.FunctionGraph((x,), (y,))
Beispiel #2
0
def test_shape_of_variables():
    x = theano.tensor.matrix('x')
    y = x[1:, 2:]
    known_shapes = shape_of_variables((x, ), (y, ), {x: (10, 10)})
    assert known_shapes[y] == (9, 8)
    assert not isinstance(known_shapes[y][0], np.ndarray)

    # just make sure that we can do this afterwards
    fgraph = theano.FunctionGraph((x, ), (y, ))
Beispiel #3
0
Datei: mpi.py Projekt: cc13ny/ape
def _compute_time_on_machine(runfile, i, o, input_shapes, machine, niter):
    """ Computes computation time of funciton graph on a remote machine

    Returns average duration of the computation (time)

    inputs:
        runfile - The program to run on the remote machine
        i       - A Theano graph
        o       - A Theano graph
        input_shapes - A dict mapping input variable to array shape
        machine - A machine on which to run the graph
        niter - The number of times to run the computation in sampling

    outputs:
        A dict mapping apply node to average runtime

    >>> _compute_time_on_machine(runfile, i, o, {x: (10, 10)}, 'receiver.univ.edu', 10)
    {dot(x, Add(x, y)): 0.133, Add(x, y): .0012}

    See Also
    --------
        comptime_dict_cpu
        comptime_dict_gpu
    """

    file = open('_machinefile.txt', 'w')
    file.write(machine)
    file.close()

    # stringify the keys

    variables = theano.gof.graph.variables(i, o)
    if len(set(map(str, variables))) != len(variables):
        raise ValueError("Not all variables have unique names"
                         "Look into theano.gof.utils.give_variables_names")

    known_shapes = shape_of_variables(i, o, input_shapes)

    known_shapes_str = str({str(k): v for k, v in known_shapes.items()})

    stdin, stdout, stderr = os.popen3(
        '''mpiexec -np 1 -machinefile _machinefile.txt python %s%s "%s" %d''' %
        (ape_dir, runfile, known_shapes_str, niter))

    # Send the fgraphs as strings (they will be unpacked on the other end)

    nodes = theano.gof.graph.list_of_nodes(i, o)
    fgraphs = graph_iter(nodes)
    pack_many(fgraphs, stdin)  # This writes to stdin
    stdin.close()  # send termination signal

    # Receive the output from the compute node
    # return stdout.read() + stderr.read()
    message = stdout.read()
    times = ast.literal_eval(message)
    return dict(zip(map(str, nodes), times))
Beispiel #4
0
def _compute_time_on_machine(runfile, i, o, input_shapes, machine, niter):
    """ Computes computation time of funciton graph on a remote machine

    Returns average duration of the computation (time)

    inputs:
        runfile - The program to run on the remote machine
        i       - A Theano graph
        o       - A Theano graph
        input_shapes - A dict mapping input variable to array shape
        machine - A machine on which to run the graph
        niter - The number of times to run the computation in sampling

    outputs:
        A dict mapping apply node to average runtime

    >>> _compute_time_on_machine(runfile, i, o, {x: (10, 10)}, 'receiver.univ.edu', 10)
    {dot(x, Add(x, y)): 0.133, Add(x, y): .0012}

    See Also
    --------
        comptime_dict_cpu
        comptime_dict_gpu
    """

    file = open('_machinefile.txt', 'w')
    file.write(machine)
    file.close()

    # stringify the keys

    variables = theano.gof.graph.variables(i, o)
    if len(set(map(str, variables))) != len(variables):
        raise ValueError("Not all variables have unique names"
                         "Look into theano.gof.utils.give_variables_names")


    known_shapes = shape_of_variables(i, o, input_shapes)

    known_shapes_str = str({str(k):v for k,v in known_shapes.items()})

    stdin, stdout, stderr = os.popen3('''mpiexec -np 1 -machinefile _machinefile.txt python %s%s "%s" %d'''%(ape_dir, runfile, known_shapes_str, niter))

    # Send the fgraphs as strings (they will be unpacked on the other end)

    nodes = theano.gof.graph.list_of_nodes(i, o)
    fgraphs = graph_iter(nodes)
    pack_many(fgraphs, stdin) # This writes to stdin
    stdin.close() # send termination signal

    # Receive the output from the compute node
    # return stdout.read() + stderr.read()
    message = stdout.read()
    times = ast.literal_eval(message)
    return  dict(zip(map(str, nodes), times))
Beispiel #5
0
def test_integration():
    from ape.examples.basic_computation import inputs, outputs, input_shapes
    from ape.examples.basic_computation import a, b, c, d, e
    from ape.examples.basic_network import machines, A, B
    from ape import timings
    comm_dict = load_dict("ape/test/integration_test_comm_dict.dat")
    comp_dict = load_dict("ape/test/integration_test_comp_dict.dat")

    rootdir = '_test/'
    os.system('mkdir -p %s' % rootdir)
    sanitize(inputs, outputs)

    known_shapes = shape_of_variables(inputs, outputs, input_shapes)
    comptime = timings.make_runtime_function(comp_dict)
    commtime = timings.make_commtime_function(comm_dict, known_shapes)

    assert isinstance(commtime(a, A, B), (int, float))
    assert commtime(a, A, B) == commtime(a, B, A)

    elemwise = e.owner
    dot = d.owner
    assert comptime(elemwise, A) == 1
    assert comptime(elemwise, B) == 100
    assert comptime(dot, A) == 100
    assert comptime(dot, B) == 1

    graphs, scheds, rankfile, make = distribute(inputs, outputs, input_shapes,
                                                machines, commtime, comptime,
                                                50)

    assert make == 18  # B-dot:1 + B-d-A:16 + A-+:1

    # graphs == "{'A': ([b, a], [e]), 'B': ([a], [d])}"
    ais, [ao] = graphs[A]
    [bi], [bo] = graphs[B]
    assert set(map(str, ais)) == set("ab")
    assert ao.name == e.name
    assert bi.name == a.name
    assert bo.name == d.name

    assert rankfile[A] != rankfile[B]
    assert str(scheds['B'][0]) == str(dot)
    assert map(str, scheds['A']) == map(str, (c.owner, e.owner))
Beispiel #6
0
def test_integration():
    from ape.examples.basic_computation import inputs, outputs, input_shapes
    from ape.examples.basic_computation import a,b,c,d,e
    from ape.examples.basic_network import machines, A, B
    from ape import timings
    comm_dict = load_dict("ape/test/integration_test_comm_dict.dat")
    comp_dict = load_dict("ape/test/integration_test_comp_dict.dat")

    rootdir = '_test/'
    os.system('mkdir -p %s'%rootdir)
    sanitize(inputs, outputs)

    known_shapes = shape_of_variables(inputs, outputs, input_shapes)
    comptime = timings.make_runtime_function(comp_dict)
    commtime = timings.make_commtime_function(comm_dict, known_shapes)

    assert isinstance(commtime(a, A, B), (int, float))
    assert commtime(a, A, B) == commtime(a, B, A)

    elemwise = e.owner
    dot = d.owner
    assert comptime(elemwise, A) == 1
    assert comptime(elemwise, B) == 100
    assert comptime(dot, A) == 100
    assert comptime(dot, B) == 1

    graphs, scheds, rankfile, make = distribute(inputs, outputs, input_shapes,
                                               machines, commtime, comptime, 50)

    assert make == 18 # B-dot:1 + B-d-A:16 + A-+:1

    # graphs == "{'A': ([b, a], [e]), 'B': ([a], [d])}"
    ais, [ao]  = graphs[A]
    [bi], [bo] = graphs[B]
    assert set(map(str, ais)) == set("ab")
    assert ao.name == e.name
    assert bi.name == a.name
    assert bo.name == d.name

    assert rankfile[A] != rankfile[B]
    assert str(scheds['B'][0]) == str(dot)
    assert map(str, scheds['A']) == map(str, (c.owner, e.owner))
Beispiel #7
0
def test_comptime_run():
    x = theano.tensor.matrix('x')
    y = theano.tensor.matrix('y')
    z = theano.tensor.dot(x, y)
    inputs, outputs = (x, y), (z,)
    variables = theano.gof.graph.variables(inputs, outputs)

    nodes = theano.gof.graph.list_of_nodes(inputs, outputs)

    theano.gof.utils.give_variables_names(variables)
    map(clean_variable, variables)

    input_shapes = {x: (10, 10), y: (10, 10)}
    known_shapes = shape_of_variables(inputs, outputs, input_shapes)
    known_shapes = {str(k): v for k,v in known_shapes.items()}

    time_comp_fn = lambda ins, outs, num_ins, niter: 1

    fgraphs = list(graph_iter(nodes))
    niter = 3

    results = comptime_run(known_shapes, niter, fgraphs, time_comp_fn)
    assert results == [1]*len(fgraphs)
Beispiel #8
0
def distribute(inputs, outputs, input_shapes, machines, commtime, comptime, makespan=100):
    known_shapes = shape_of_variables(inputs, outputs, input_shapes)
    variables = theano.gof.graph.variables(inputs, outputs)

    dag, dinputs, doutputs = dicdag.theano.theano_graph_to_dag(inputs, outputs)
    vars = set.union(set(dinputs), set(doutputs), set(dag.keys()),
            {v for value in dag.values() for v in value['args']})
    assert len(vars) == len(map(str, vars))

    unidag = dicdag.unidag.dag_to_unidag(dag)

    # TODO: This should be an input
    is_gpu       = lambda    m: machines[m]['type'] == 'gpu'
    can_start_on = lambda v, m: not is_gpu(m)
    can_end_on   = lambda v, m: not is_gpu(m)

    def dag_commtime(job, a, b):
        inputs, op, output = job
        return commtime(output, a, b)
    def dag_comptime(job, a):
        if job[1]==dicdag.index:
            return 0
        return comptime(make_apply(*job), a)

    # Compute Schedule
    dags, sched, makespan = tompkins.schedule(
            unidag, machines, dag_comptime, dag_commtime,
            lambda j:0, lambda j,a:1, makespan)

    cleaner_dags = fmap(replace_send_recvs, dags)

    full_dags  = fmap(dicdag.unidag.unidag_to_dag, cleaner_dags)
    check_send_recv(full_dags)

    merge_dags = merge_gpu_dags(full_dags, machines)
    check_send_recv(merge_dags)

    rankfile = {machine: i for i, machine in enumerate(sorted(merge_dags))}

    theano_graphs = {machine: dag_to_theano_graph(dag,
                     make_ith_output(rankfile, tagof, known_shapes, machine))
                            for machine, dag in merge_dags.items()}

    # Check that all inputs and outputs are inputs/outputs to the computation
    # or mpi
    def valid_inp(x):
        return x.name in map(str, inputs)
    def valid_out(x):
        return x.name in map(str, outputs) or 'mpi_token' in str(x)
    assert all(valid_inp(x) for g in theano_graphs.values()
                            for x in g[0])
    assert all(valid_out(x) for g in theano_graphs.values()
                            for x in g[1])

    if not all(count == 2 for count in tagof.counts.values()):
        print "issue with tag counts"
        for x in tagof.counts:          print x
        for x in tagof.cache:           print x

    scheds = tompkins_to_theano_scheds(sched, machines)

    return theano_graphs, scheds, rankfile, makespan
Beispiel #9
0
    # sanitize
    sanitize(inputs, outputs)

    # do timings if necessary
    recompute = False
    if recompute:
        comps = timings.comptime_dict(inputs, outputs, input_shapes, 5,
                                      machines, machine_groups)
        comms = timings.commtime_dict(network)
        save_dict(rootdir+'comps.dat', comps)
        save_dict(rootdir+'comms.dat', comms)
    else:
        comps = load_dict(rootdir+'comps.dat')
        comms = load_dict(rootdir+'comms.dat')

    known_shapes = shape_of_variables(inputs, outputs, input_shapes)
    comptime = timings.make_runtime_function(comps)
    commtime = timings.make_commtime_function(comms, known_shapes)

    # Break up graph
    graphs, scheds, rankfile, make = distribute(inputs, outputs, input_shapes,
                                                machines, commtime, comptime)

    # Write to disk
    write(graphs, scheds, rankfile, rootdir, known_shapes)

    # Print out fgraphs as pdfs
    fgraphs = {m: theano.FunctionGraph(*theano.gof.graph.clone(i, o))
                            for m, (i, o) in graphs.items()}
    for m, g in fgraphs.items():
        theano.printing.pydotprint(g, outfile="%s%s.pdf"%(rootdir,m),
Beispiel #10
0
def distribute(inputs,
               outputs,
               input_shapes,
               machines,
               commtime,
               comptime,
               makespan=100):
    known_shapes = shape_of_variables(inputs, outputs, input_shapes)
    variables = theano.gof.graph.variables(inputs, outputs)

    dag, dinputs, doutputs = dicdag.theano.theano_graph_to_dag(inputs, outputs)
    vars = set.union(set(dinputs), set(doutputs), set(dag.keys()),
                     {v
                      for value in dag.values() for v in value['args']})
    assert len(vars) == len(map(str, vars))

    unidag = dicdag.unidag.dag_to_unidag(dag)

    # TODO: This should be an input
    is_gpu = lambda m: machines[m]['type'] == 'gpu'
    can_start_on = lambda v, m: not is_gpu(m)
    can_end_on = lambda v, m: not is_gpu(m)

    def dag_commtime(job, a, b):
        inputs, op, output = job
        return commtime(output, a, b)

    def dag_comptime(job, a):
        if job[1] == dicdag.index:
            return 0
        return comptime(make_apply(*job), a)

    # Compute Schedule
    dags, sched, makespan = tompkins.schedule(unidag, machines, dag_comptime,
                                              dag_commtime, lambda j: 0,
                                              lambda j, a: 1, makespan)

    cleaner_dags = fmap(replace_send_recvs, dags)

    full_dags = fmap(dicdag.unidag.unidag_to_dag, cleaner_dags)
    check_send_recv(full_dags)

    merge_dags = merge_gpu_dags(full_dags, machines)
    check_send_recv(merge_dags)

    rankfile = {machine: i for i, machine in enumerate(sorted(merge_dags))}

    theano_graphs = {
        machine: dag_to_theano_graph(
            dag, make_ith_output(rankfile, tagof, known_shapes, machine))
        for machine, dag in merge_dags.items()
    }

    # Check that all inputs and outputs are inputs/outputs to the computation
    # or mpi
    def valid_inp(x):
        return x.name in map(str, inputs)

    def valid_out(x):
        return x.name in map(str, outputs) or 'mpi_token' in str(x)

    assert all(valid_inp(x) for g in theano_graphs.values() for x in g[0])
    assert all(valid_out(x) for g in theano_graphs.values() for x in g[1])

    if not all(count == 2 for count in tagof.counts.values()):
        print "issue with tag counts"
        for x in tagof.counts:
            print x
        for x in tagof.cache:
            print x

    scheds = tompkins_to_theano_scheds(sched, machines)

    return theano_graphs, scheds, rankfile, makespan
Beispiel #11
0
    # sanitize
    sanitize(inputs, outputs)

    # do timings if necessary
    recompute = False
    if recompute:
        comps = timings.comptime_dict(inputs, outputs, input_shapes, 5,
                                      machines, machine_groups)
        comms = timings.commtime_dict(network)
        save_dict(rootdir + 'comps.dat', comps)
        save_dict(rootdir + 'comms.dat', comms)
    else:
        comps = load_dict(rootdir + 'comps.dat')
        comms = load_dict(rootdir + 'comms.dat')

    known_shapes = shape_of_variables(inputs, outputs, input_shapes)
    comptime = timings.make_runtime_function(comps)
    commtime = timings.make_commtime_function(comms, known_shapes)

    # Break up graph
    graphs, scheds, rankfile, make = distribute(inputs, outputs, input_shapes,
                                                machines, commtime, comptime)

    # Write to disk
    write(graphs, scheds, rankfile, rootdir, known_shapes)

    # Print out fgraphs as pdfs
    fgraphs = {
        m: theano.FunctionGraph(*theano.gof.graph.clone(i, o))
        for m, (i, o) in graphs.items()
    }