Beispiel #1
0
def test_plant():
    """Example of a network using a dynamic plant as the output layer."""

    eps = 1e-6  # value to use for finite differences computations
    dt = 1e-2  # size of time step
    sig_len = 40  # how many time steps to train over
    batch_size = 32  # how many updates to perform with static input
    num_batches = 20000  # how many batches to run total

    import sys
    # NOTE: Change to wherever you keep your arm models
    sys.path.append("../../../studywolf_control/studywolf_control/")
    from arms.two_link.arm_python import Arm as Arm
    print 'Plant is: ', Arm
    arm = Arm(dt=dt, init_q=[0.736134824578, 1.85227640003])

    num_states = arm.DOF * 2  # are states are [positions, velocities]
    targets = gen_targets(arm=arm, sig_len=sig_len)  # target joint angles
    init_state = np.zeros((len(targets), num_states))  # initial velocity = 0
    init_state[:, :arm.DOF] = arm.init_q  # set up the initial joint angles
    plant = PlantArm(arm=arm, targets=targets, init_state=init_state, eps=eps)

    # open up weights folder and checked for saved weights
    import glob
    files = sorted(glob.glob('weights/rnn*'))
    if len(files) > 0:
        # if weights found, load them up and keep going from last trial
        W = np.load(files[-1])['arr_0']
        print 'loading from ', files[-1]
        last_trial = int(
            files[-1].split('weights/rnn_weights-trial')[1].split('-err')[0])
        print 'last_trial: ', last_trial
    else:
        # if no weights found, start fresh with new random seed
        W = None
        last_trial = -1
        seed = np.random.randint(100000000)
        print 'seed : ', seed
        np.random.seed(seed)

    # specify the network structure and loss functions
    from hessianfree.loss_funcs import SquaredError, SparseL2
    rnn = RNNet(
        # specify the number of nodes in each layer
        shape=[num_states * 2, 32, 32, num_states, num_states],
        # specify the function of the nodes in each layer
        layers=[Linear(), Tanh(), Tanh(),
                Linear(), plant],
        # specify the layers that have recurrent connections
        rec_layers=[1, 2],
        # specify the connections between layers
        conns={
            0: [1, 2],
            1: [2],
            2: [3],
            3: [4]
        },
        # specify the loss function
        loss_type=[
            # squared error between plant output and targets
            SquaredError()
        ],
        load_weights=W,
        use_GPU=False)

    # set up masking so that weights between network output
    # and the plant aren't modified in learning, always = 1
    offset, W_end, b_end = rnn.offsets[(3, 4)]
    rnn.mask = np.zeros(rnn.W.shape, dtype=bool)
    rnn.mask[offset:b_end] = True
    rnn.W[offset:W_end] = np.eye(4).flatten()

    for ii in range(last_trial + 1, num_batches):
        print '============================================='
        print 'training batch ', ii
        err = rnn.run_epochs(plant,
                             None,
                             max_epochs=batch_size,
                             optimizer=HessianFree(CG_iter=96,
                                                   init_damping=100))
        # save the weights to file, track trial and error
        err = rnn.best_error
        name = 'weights/rnn_weights-trial%04i-err%.5f' % (ii, err)
        np.savez_compressed(name, rnn.W)
        print '============================================='
        print 'network: ', name
        print 'final error: ', err
        print '============================================='

    return rnn.best_error
Beispiel #2
0
def gen_data_plot(folder="weights",
                  index=-1,
                  show_plot=True,
                  save_plot=None,
                  save_paths=False,
                  verbose=True):

    files = sorted(glob.glob('%s/rnn*' % folder))

    # plot the values over time
    vals = []

    for ii, name in enumerate(files[:index]):
        if verbose: print name
        name = name.split('err')[1]
        name = name.split('.npz')[0]
        vals.append(float(name))

    vals = np.array(vals)

    plt.figure(figsize=(10, 3))
    ax = plt.subplot2grid((1, 3), (0, 0), colspan=2)
    ax.loglog(vals)
    ax.loglog(range(len(vals)), np.ones(len(vals)) * min(vals), 'r--')
    ax.loglog(range(len(vals)), np.ones(len(vals)) * min(vals), 'r--')
    plt.xlim([0, len(files)])
    plt.ylim([10**-5, 10])
    plt.title('AHF training error')
    plt.xlabel('Training iterations')
    plt.ylabel('Error')
    plt.yscale('log')

    # load in the weights and see how well they control the arm
    dt = 1e-2
    sig_len = 40

    # HACK: append system path to have access to the arm code
    # NOTE: Change this path to wherever your plant model is kept!
    sys.path.append("../../../studywolf_control/studywolf_control/")
    from arms.two_link.arm_python import Arm as Arm
    if verbose: print 'Plant is: ', Arm
    arm = Arm(dt=dt, init_q=[0.736134824578, 1.85227640003])

    from hessianfree import RNNet
    from hessianfree.nonlinearities import (Tanh, Linear)
    from train_hf import PlantArm, gen_targets

    init_type = "sparse"
    rec_coeff = [1, 1]
    rec_type = "sparse"
    eps = 1e-6

    num_states = 4
    targets = gen_targets(arm, sig_len=sig_len)
    init_state = np.zeros((len(targets), num_states))
    init_state[:, 0] = 0.736134824578
    init_state[:, 1] = 1.85227640003
    plant = PlantArm(arm, targets=targets, init_state=init_state, eps=eps)

    W = np.load(files[index])['arr_0']
    last_trial = -1

    # make sure this network is the same as the one you trained!
    from hessianfree.loss_funcs import SquaredError, SparseL2
    suts_gain = 10e-2 * 1e-2  # for centre-out reaching task
    net_size = 96
    if '32' in folder:
        net_size = 32
    rnn = RNNet(
        shape=[num_states * 2, net_size, net_size, num_states, num_states],
        layers=[Linear(), Tanh(), Tanh(),
                Linear(), plant],
        debug=False,
        rec_layers=[1, 2],
        conns={
            0: [1, 2],
            1: [2],
            2: [3],
            3: [4]
        },
        W_rec_params={
            "coeff": rec_coeff,
            "init_type": rec_type
        },
        load_weights=W,
        use_GPU=False)

    full_output = rnn.forward(plant, rnn.W)
    outputs = full_output[-1]
    states = np.asarray(plant.get_vecs()[0][:, :, num_states:])
    targets = np.asarray(plant.get_vecs()[1])

    def kin(q):
        L = [.31, .27]
        q0 = q[:, 0]
        q1 = q[:, 1]
        return [
            L[0] * np.cos(q0) + L[1] * np.cos(q0 + q1),
            L[0] * np.sin(q0) + L[1] * np.sin(q0 + q1)
        ]

    ax = plt.subplot2grid((1, 3), (0, 2))
    # plot start point
    initx, inity = kin(init_state)
    ax.plot(initx, inity, 'x', mew=10)
    for jj in range(0, len(targets)):
        # plot target
        targetx, targety = kin(targets[jj])
        ax.plot(targetx, targety, 'rx', mew=1)
        # plat path
        pathx, pathy = kin(states[jj, :, :])
        path = np.hstack([pathx[:, None], pathy[:, None]])
        if save_paths is True:
            np.savez_compressed('end-effector position%.3i.npz' % int(jj / 8),
                                array1=path)
        ax.plot(path[:, 0], path[:, 1])

    plt.tight_layout()
    plt.xlim([-.1, .1])
    plt.ylim([.25, .45])
    plt.title('Hand trajectory')
    plt.xlabel('x')
    plt.ylabel('y')

    if save_plot is not None:
        plt.savefig(save_plot)
    if show_plot is True:
        plt.show()
    plt.close()
Beispiel #3
0
def gen_data_plot(folder="weights", index=-1, show_plot=True, 
        save_plot=None, save_paths=False, verbose=True):

    files = sorted(glob.glob('%s/rnn*'%folder))

    # plot the values over time
    vals = []

    for ii,name in enumerate(files[:index]):
        if verbose: print name
        name = name.split('err')[1]
        name = name.split('.npz')[0]
        vals.append(float(name))

    vals = np.array(vals)

    plt.figure(figsize=(10,3))
    ax = plt.subplot2grid((1,3), (0,0), colspan=2)
    ax.loglog(vals)
    ax.loglog(range(len(vals)), np.ones(len(vals)) * min(vals), 'r--')
    ax.loglog(range(len(vals)), np.ones(len(vals)) * min(vals), 'r--')
    plt.xlim([0, len(files)])
    plt.ylim([10**-5, 10])
    plt.title('AHF training error')
    plt.xlabel('Training iterations')
    plt.ylabel('Error')
    plt.yscale('log')

    # load in the weights and see how well they control the arm
    dt = 1e-2
    sig_len = 40

    # HACK: append system path to have access to the arm code
    # NOTE: Change this path to wherever your plant model is kept! 
    sys.path.append("../../../studywolf_control/studywolf_control/")
    from arms.two_link.arm_python import Arm as Arm
    if verbose: print 'Plant is: ', Arm
    arm = Arm(dt=dt, init_q=[0.736134824578, 1.85227640003])

    from hessianfree import RNNet
    from hessianfree.nonlinearities import (Tanh, Linear)
    from train_hf import PlantArm, gen_targets

    init_type = "sparse" 
    rec_coeff = [1, 1] 
    rec_type = "sparse" 
    eps = 1e-6 

    num_states = 4
    targets = gen_targets(arm, sig_len=sig_len)
    init_state = np.zeros((len(targets), num_states))
    init_state[:, 0] = 0.736134824578
    init_state[:, 1] = 1.85227640003
    plant = PlantArm(arm, targets=targets, 
                    init_state=init_state, eps=eps)

    W = np.load(files[index])['arr_0'] 
    last_trial = -1

    # make sure this network is the same as the one you trained! 
    from hessianfree.loss_funcs import SquaredError, SparseL2
    suts_gain = 10e-2 * 1e-2 # for centre-out reaching task
    net_size = 96
    if '32' in folder: 
        net_size = 32
    rnn = RNNet(shape=[num_states * 2, net_size, net_size, num_states, num_states], 
                    layers=[Linear(), Tanh(), Tanh(), Linear(), plant],
                    debug=False,
                    rec_layers=[1,2],
                    conns={0:[1, 2], 1:[2], 2:[3], 3:[4]},
                    W_rec_params={"coeff": rec_coeff, "init_type": rec_type},
                    load_weights=W,
                    use_GPU=False)

    full_output = rnn.forward(plant, rnn.W)
    outputs = full_output[-1]
    states = np.asarray(plant.get_vecs()[0][:, :, num_states:])
    targets = np.asarray(plant.get_vecs()[1])

    def kin(q):
        L = [.31, .27]
        q0 = q[:, 0]
        q1 = q[:, 1]
        return [L[0]*np.cos(q0) + L[1]*np.cos(q0+q1), 
                L[0]*np.sin(q0) + L[1]*np.sin(q0+q1)]

    ax = plt.subplot2grid((1,3),(0,2))
    # plot start point
    initx, inity = kin(init_state)
    ax.plot(initx, inity, 'x', mew=10)
    for jj in range(0,len(targets)):
        # plot target
        targetx, targety = kin(targets[jj])
        ax.plot(targetx, targety, 'rx', mew=1)
        # plat path
        pathx, pathy = kin(states[jj,:,:])
        path = np.hstack([pathx[:,None], pathy[:,None]])
        if save_paths is True:
            np.savez_compressed('end-effector position%.3i.npz'%int(jj/8), 
                    array1=path)
        ax.plot(path[:,0], path[:,1])

    plt.tight_layout()
    plt.xlim([-.1, .1])
    plt.ylim([.25, .45])
    plt.title('Hand trajectory')
    plt.xlabel('x')
    plt.ylabel('y')

    if save_plot is not None:
        plt.savefig(save_plot)
    if show_plot is True:
        plt.show()
    plt.close()
Beispiel #4
0
def gen_data_plot(folder="weights", index=None, show_plot=True,
                  save_plot=None, save_paths=False, verbose=True):

    files = sorted(glob.glob('%s/rnn*' % folder))
    files = files[:index] if index is not None else files

    # plot the values over time
    vals = []

    for ii, name in enumerate(files):
        if verbose:
            print(name)
        name = name.split('err')[1]
        name = name.split('.npz')[0]
        vals.append(float(name))

    vals = np.array(vals)

    plt.figure(figsize=(10, 3))
    ax = plt.subplot2grid((1, 3), (0, 0), colspan=2)
    ax.loglog(vals)
    ax.loglog(range(len(vals)), np.ones(len(vals)) * min(vals), 'r--')
    ax.loglog(range(len(vals)), np.ones(len(vals)) * min(vals), 'r--')
    plt.xlim([0, len(files)])
    plt.ylim([10**-5, 10])
    plt.title('AHF training error')
    plt.xlabel('Training iterations')
    plt.ylabel('Error')
    plt.yscale('log')

    # load in the weights and see how well they control the arm
    dt = 1e-2
    sig_len = 40

    # HACK: append system path to have access to the arm code
    # NOTE: Change this path to wherever your plant model is kept!
    sys.path.append("../../../studywolf_control/studywolf_control/")
    # from arms.two_link.arm_python import Arm as Arm
    from arms.three_link.arm import Arm as Arm
    if verbose:
        print('Plant is: %s' % str(Arm))
    arm = Arm(dt=dt)

    from hessianfree import RNNet
    from hessianfree.nonlinearities import (Tanh, Linear)
    from train_hf_3link import PlantArm, gen_targets

    rec_coeff = [1, 1]
    rec_type = "sparse"
    eps = 1e-6

    num_states = arm.DOF * 2
    targets = gen_targets(arm, sig_len=sig_len)
    init_state = np.zeros((len(targets), num_states), dtype=np.float32)
    init_state[:, :arm.DOF] = arm.init_q # set up the initial joint angles
    plant = PlantArm(arm, targets=targets,
                     init_state=init_state, eps=eps)

    index = -1 if index is None else index
    W = np.load(files[index])['arr_0']

    # make sure this network is the same as the one you trained!
    net_size = 96
    if '32' in folder:
        net_size = 32
    rnn = RNNet(shape=[num_states * 2,
                       net_size,
                       net_size,
                       num_states,
                       num_states],
                layers=[Linear(), Tanh(), Tanh(), Linear(), plant],
                debug=False,
                rec_layers=[1, 2],
                conns={0: [1, 2], 1: [2], 2: [3], 3: [4]},
                W_rec_params={"coeff": rec_coeff, "init_type": rec_type},
                load_weights=W,
                use_GPU=False)

    rnn.forward(plant, rnn.W)
    states = np.asarray(plant.get_vecs()[0][:, :, num_states:])
    targets = np.asarray(plant.get_vecs()[1])

    def kin(q):
        x = np.sum([arm.L[ii] * np.cos(np.sum(q[:, :ii+1], axis=1))
                    for ii in range(arm.DOF)], axis=0)
        y = np.sum([arm.L[ii] * np.sin(np.sum(q[:, :ii+1], axis=1))
                    for ii in range(arm.DOF)], axis=0)
        return x,y

    ax = plt.subplot2grid((1, 3), (0, 2))
    # plot start point
    initx, inity = kin(init_state)
    ax.plot(initx, inity, 'x', mew=10)
    for jj in range(0, len(targets)):
        # plot target
        targetx, targety = kin(targets[jj])
        ax.plot(targetx, targety, 'rx', mew=1)
        # plat path
        pathx, pathy = kin(states[jj, :, :])
        path = np.hstack([pathx[:, None], pathy[:, None]])
        if save_paths is True:
            np.savez_compressed('end-effector position%.3i.npz' % int(jj/8),
                                array1=path)
        ax.plot(path[:, 0], path[:, 1])

    plt.tight_layout()
    # plt.xlim([-.1, .1])
    # plt.ylim([.25, .45])
    plt.title('Hand trajectory')
    plt.xlabel('x')
    plt.ylabel('y')

    if save_plot is not None:
        plt.savefig(save_plot)
    if show_plot is True:
        plt.show()
    plt.close()
Beispiel #5
0
def test_plant():
    """Example of a network using a dynamic plant as the output layer."""

    eps = 1e-6 # value to use for finite differences computations
    dt = 1e-2 # size of time step 
    sig_len = 40 # how many time steps to train over
    batch_size = 32 # how many updates to perform with static input
    num_batches = 20000 # how many batches to run total

    import sys
    # NOTE: Change to wherever you keep your arm models
    sys.path.append("../../../studywolf_control/studywolf_control/")
    from arms.two_link.arm_python import Arm as Arm
    print 'Plant is: ', Arm
    arm = Arm(dt=dt, init_q=[0.736134824578, 1.85227640003])

    num_states = arm.DOF * 2 # are states are [positions, velocities]
    targets = gen_targets(arm=arm, sig_len=sig_len) # target joint angles
    init_state = np.zeros((len(targets), num_states)) # initial velocity = 0
    init_state[:, :arm.DOF] = arm.init_q # set up the initial joint angles 
    plant = PlantArm(arm=arm, targets=targets, 
                        init_state=init_state, eps=eps)

    # open up weights folder and checked for saved weights
    import glob
    files = sorted(glob.glob('weights/rnn*'))
    if len(files) > 0:
        # if weights found, load them up and keep going from last trial
        W = np.load(files[-1])['arr_0'] 
        print 'loading from ', files[-1]
        last_trial = int(files[-1].split('weights/rnn_weights-trial')[1].split('-err')[0])
        print 'last_trial: ', last_trial
    else:
        # if no weights found, start fresh with new random seed
        W = None
        last_trial = -1
        seed = np.random.randint(100000000)
        print 'seed : ', seed
        np.random.seed(seed) 

    # specify the network structure and loss functions
    from hessianfree.loss_funcs import SquaredError, SparseL2
    rnn = RNNet(
        # specify the number of nodes in each layer
        shape=[num_states * 2, 32, 32, num_states, num_states], 
        # specify the function of the nodes in each layer
        layers=[Linear(), Tanh(), Tanh(), Linear(), plant],
        # specify the layers that have recurrent connections
        rec_layers=[1,2],
        # specify the connections between layers
        conns={0:[1, 2], 1:[2], 2:[3], 3:[4]},
        # specify the loss function
        loss_type=[
            # squared error between plant output and targets
            SquaredError()],
        load_weights=W,
        use_GPU=False)

    # set up masking so that weights between network output
    # and the plant aren't modified in learning, always = 1
    offset, W_end, b_end = rnn.offsets[(3,4)]
    rnn.mask = np.zeros(rnn.W.shape, dtype=bool)
    rnn.mask[offset:b_end] = True
    rnn.W[offset:W_end] = np.eye(4).flatten()

    for ii in range(last_trial+1, num_batches):
        print '============================================='
        print 'training batch ', ii
        err = rnn.run_epochs(plant, None, max_epochs=batch_size,
                        optimizer=HessianFree(CG_iter=96, init_damping=100))
        # save the weights to file, track trial and error 
        err = rnn.best_error
        name = 'weights/rnn_weights-trial%04i-err%.5f'%(ii, err) 
        np.savez_compressed(name, rnn.W)
        print '============================================='
        print 'network: ', name
        print 'final error: ', err 
        print '============================================='

    return rnn.best_error