Example #1
0
    def __init__(self,
                 topology,
                 activations,
                 session,
                 checkpoint_base_path,
                 dtype=tf.float32):
        n_input = topology[0]
        # Layers in network.
        L = len(topology) - 1
        self.session = session
        self.L = L
        self.topology = topology
        self.checkpoint_base_path = checkpoint_base_path
        self.o_n = tf.placeholder(dtype,
                                  shape=[None, n_input],
                                  name='prog_nn_input_placeholder')

        self.W = []
        self.b = []
        self.h = [self.o_n]
        params = []
        for k in range(L):
            shape = topology[k:k + 2]
            self.W.append(
                weight_variable(shape, name="weight_var_layer_" + str(k)))
            self.b.append(
                bias_variable([shape[1]], name="bias_var_layer_" + str(k)))
            self.h.append(activations[k](tf.matmul(self.h[-1], self.W[k]) +
                                         self.b[k]))
            params.append(self.W[-1])
            params.append(self.b[-1])
        self.pc = ParamCollection(self.session, params)
Example #2
0
class InitialColumnProgNN(object):
    """
    Descr: Initial network to train for later use transfer learning with a
        Progressive Neural Network.
    Args:
        topology - A list of number of units in each hidden dimension.
                   First entry is input dimension.
        activations - A list of activation functions to use on the transforms.
        session - A TensorFlow session.
    Returns:
        None - attaches objects to class for InitialColumnProgNN.session.run()
    """
    def __init__(self,
                 topology,
                 activations,
                 session,
                 checkpoint_base_path,
                 dtype=tf.float32):
        n_input = topology[0]
        # Layers in network.
        L = len(topology) - 1
        self.session = session
        self.L = L
        self.topology = topology
        self.checkpoint_base_path = checkpoint_base_path
        self.o_n = tf.placeholder(dtype,
                                  shape=[None, n_input],
                                  name='prog_nn_input_placeholder')

        self.W = []
        self.b = []
        self.h = [self.o_n]
        params = []
        for k in range(L):
            shape = topology[k:k + 2]
            self.W.append(
                weight_variable(shape, name="weight_var_layer_" + str(k)))
            self.b.append(
                bias_variable([shape[1]], name="bias_var_layer_" + str(k)))
            self.h.append(activations[k](tf.matmul(self.h[-1], self.W[k]) +
                                         self.b[k]))
            params.append(self.W[-1])
            params.append(self.b[-1])
        self.pc = ParamCollection(self.session, params)

    def save(self, checkpoint_i):
        save_path = get_checkpoint_path(self.checkpoint_base_path, 0,
                                        checkpoint_i)
        current_params = self.pc.get_values_flat()
        np.save(save_path, current_params)

    def restore_weights(self, checkpoint_i):
        save_path = get_checkpoint_path(self.checkpoint_base_path, 0,
                                        checkpoint_i)
        saved_theta = np.load(save_path)
        self.pc.set_values_flat(saved_theta)
    def __init__(self, n_actions):
        Serializable.__init__(self, n_actions)
        cgt.set_precision('double')
        n_in = 128
        o_no = cgt.matrix("o_no",fixed_shape=(None,n_in))
        a_n = cgt.vector("a_n",dtype='i8')
        q_n = cgt.vector("q_n")
        oldpdist_np = cgt.matrix("oldpdists")

        h0 = (o_no - 128.0)/128.0 
        nhid = 64
        h1 = cgt.tanh(nn.Affine(128,nhid,weight_init=nn.IIDGaussian(std=.1))(h0))
        probs_na = nn.softmax(nn.Affine(nhid,n_actions,weight_init=nn.IIDGaussian(std=0.01))(h1))
        logprobs_na = cgt.log(probs_na)
        b = cgt.size(o_no, 0)
        logps_n = logprobs_na[cgt.arange(b), a_n]
        surr = (logps_n*q_n).mean()
        kl = (oldpdist_np * cgt.log(oldpdist_np/probs_na)).sum(axis=1).mean()

        params = nn.get_parameters(surr)
        gradsurr = cgt.grad(surr, params)
        flatgrad = cgt.concatenate([p.flatten() for p in gradsurr])

        lam = cgt.scalar()
        penobj = surr - lam * kl
        self._f_grad_lagrangian = cgt.function([lam, oldpdist_np, o_no, a_n, q_n], 
            cgt.concatenate([p.flatten() for p in cgt.grad(penobj,params)]))
        self.f_pdist = cgt.function([o_no], probs_na)

        self.f_probs = cgt.function([o_no], probs_na)
        self.f_surr_kl = cgt.function([oldpdist_np, o_no, a_n, q_n], [surr, kl])
        self.f_gradlogp = cgt.function([oldpdist_np, o_no, a_n, q_n], flatgrad)

        self.pc = ParamCollection(params)
Example #4
0
    def __init__(self, obs_dim, ctrl_dim):

        cgt.set_precision('double')
        Serializable.__init__(self, obs_dim, ctrl_dim)

        self.obs_dim = obs_dim
        self.ctrl_dim = ctrl_dim

        o_no = cgt.matrix("o_no",fixed_shape=(None,obs_dim))
        a_na = cgt.matrix("a_na",fixed_shape = (None, ctrl_dim))
        adv_n = cgt.vector("adv_n")
        oldpdist_np = cgt.matrix("oldpdist", fixed_shape=(None, 2*ctrl_dim))
        self.logstd = logstd_1a = nn.parameter(np.zeros((1, self.ctrl_dim)), name="std_1a")
        std_1a = cgt.exp(logstd_1a)

        # Here's where we apply the network
        h0 = o_no
        nhid = 32
        h1 = cgt.tanh(nn.Affine(obs_dim,nhid,weight_init=nn.IIDGaussian(std=0.1))(h0))
        h2 = cgt.tanh(nn.Affine(nhid,nhid,weight_init=nn.IIDGaussian(std=0.1))(h1))
        mean_na = nn.Affine(nhid,ctrl_dim,weight_init=nn.IIDGaussian(std=0.01))(h2)

        b = cgt.size(o_no, 0)
        std_na = cgt.repeat(std_1a, b, axis=0)

        oldmean_na = oldpdist_np[:, 0:self.ctrl_dim]
        oldstd_na = oldpdist_np[:, self.ctrl_dim:2*self.ctrl_dim]

        logp_n = ((-.5) * cgt.square( (a_na - mean_na) / std_na ).sum(axis=1)) - logstd_1a.sum()
        oldlogp_n = ((-.5) * cgt.square( (a_na - oldmean_na) / oldstd_na ).sum(axis=1)) - cgt.log(oldstd_na).sum(axis=1)

        ratio_n = cgt.exp(logp_n - oldlogp_n)

        surr = (ratio_n*adv_n).mean()

        pdists_np = cgt.concatenate([mean_na, std_na], axis=1)
        # kl = cgt.log(sigafter/)

        params = nn.get_parameters(surr)

        oldvar_na = cgt.square(oldstd_na)
        var_na = cgt.square(std_na)
        kl = (cgt.log(std_na / oldstd_na) + (oldvar_na + cgt.square(oldmean_na - mean_na)) / (2 * var_na) - .5).sum(axis=1).mean()


        lam = cgt.scalar()
        penobj = surr - lam * kl
        self._compute_surr_kl = cgt.function([oldpdist_np, o_no, a_na, adv_n], [surr, kl])
        self._compute_grad_lagrangian = cgt.function([lam, oldpdist_np, o_no, a_na, adv_n], 
            cgt.concatenate([p.flatten() for p in cgt.grad(penobj,params)]))
        self.f_pdist = cgt.function([o_no], pdists_np)

        self.f_objs = cgt.function([oldpdist_np, o_no, a_na, adv_n], [surr, kl])

        self.pc = ParamCollection(params)
Example #5
0
    def __init__(self,
                 topology,
                 activations,
                 session,
                 prev_columns,
                 dtype=tf.float64):
        n_input = topology[0]
        self.topology = topology
        self.session = session
        width = len(prev_columns)
        # Layers in network. First value is n_input, so it doesn't count.
        L = len(topology) - 1
        self.L = L
        self.prev_columns = prev_columns

        # Doesn't work if the columns aren't the same height.
        assert all([self.L == x.L for x in prev_columns])

        self.o_n = tf.placeholder(dtype, shape=[None, n_input])

        self.W = [[]] * L
        self.b = [[]] * L
        self.U = []
        for k in range(L - 1):
            self.U.append([[]] * width)
        self.h = [self.o_n]
        # Collect parameters to hand off to ParamCollection.
        params = []
        for k in range(L):
            W_shape = topology[k:k + 2]
            self.W[k] = weight_variable(W_shape)
            self.b[k] = bias_variable([W_shape[1]])
            if k == 0:
                self.h.append(activations[k](tf.matmul(self.h[-1], self.W[k]) +
                                             self.b[k]))
                params.append(self.W[k])
                params.append(self.b[k])
                continue
            preactivation = tf.matmul(self.h[-1], self.W[k]) + self.b[k]
            for kk in range(width):
                U_shape = [prev_columns[kk].topology[k], topology[k + 1]]
                # Remember len(self.U) == L - 1!
                self.U[k - 1][kk] = weight_variable(U_shape)
                # pprint(prev_columns[kk].h[k].get_shape().as_list())
                # pprint(self.U[k-1][kk].get_shape().as_list())
                preactivation += tf.matmul(prev_columns[kk].h[k],
                                           self.U[k - 1][kk])
            self.h.append(activations[k](preactivation))
            params.append(self.W[k])
            params.append(self.b[k])
            for kk in range(width):
                params.append(self.U[k - 1][kk])

        self.pc = ParamCollection(self.session, params)
Example #6
0
    def __init__(self, n_actions):
        Serializable.__init__(self, n_actions)
        cgt.set_precision('double')
        n_in = 128
        o_no = cgt.matrix("o_no", fixed_shape=(None, n_in))
        a_n = cgt.vector("a_n", dtype='i8')
        q_n = cgt.vector("q_n")
        oldpdist_np = cgt.matrix("oldpdists")

        h0 = (o_no - 128.0) / 128.0
        nhid = 64
        h1 = cgt.tanh(
            nn.Affine(128, nhid, weight_init=nn.IIDGaussian(std=.1))(h0))
        probs_na = nn.softmax(
            nn.Affine(nhid, n_actions,
                      weight_init=nn.IIDGaussian(std=0.01))(h1))
        logprobs_na = cgt.log(probs_na)
        b = cgt.size(o_no, 0)
        logps_n = logprobs_na[cgt.arange(b), a_n]
        surr = (logps_n * q_n).mean()
        kl = (oldpdist_np * cgt.log(oldpdist_np / probs_na)).sum(axis=1).mean()

        params = nn.get_parameters(surr)
        gradsurr = cgt.grad(surr, params)
        flatgrad = cgt.concatenate([p.flatten() for p in gradsurr])

        lam = cgt.scalar()
        penobj = surr - lam * kl
        self._f_grad_lagrangian = cgt.function(
            [lam, oldpdist_np, o_no, a_n, q_n],
            cgt.concatenate([p.flatten() for p in cgt.grad(penobj, params)]))
        self.f_pdist = cgt.function([o_no], probs_na)

        self.f_probs = cgt.function([o_no], probs_na)
        self.f_surr_kl = cgt.function([oldpdist_np, o_no, a_n, q_n],
                                      [surr, kl])
        self.f_gradlogp = cgt.function([oldpdist_np, o_no, a_n, q_n], flatgrad)

        self.pc = ParamCollection(params)
Example #7
0
    def __init__(self, topology, activations, session, dtype=tf.float64):
        n_input = topology[0]
        # Layers in network.
        L = len(topology) - 1 # n_layers except input layer
        self.session = session
        self.L = L
        self.topology = topology # list of layer output dims
        self.o_n = tf.placeholder(dtype,shape=[None, n_input]) # output of input layer

        self.W = [] # weights in each layer
        self.b =[] # biases in each layer
        self.h = [self.o_n] # activation output in each layer
        params = [] # store all Ws and bs, will be modified when W and b is trained i think
        for k in range(L):
            shape = topology[k:k+2]
            self.W.append(weight_variable(shape))
            self.b.append(bias_variable([shape[1]]))
            self.h.append(activations[k](tf.matmul(self.h[-1], self.W[k]) + self.b[k]))
            params.append(self.W[-1])
            params.append(self.b[-1])
        self.pc = ParamCollection(self.session, params)
Example #8
0
    def __init__(self, topology, activations, session, dtype=tf.float64):
        n_input = topology[0]
        # Layers in network.
        L = len(topology) - 1
        self.session = session
        self.L = L
        self.topology = topology
        self.o_n = tf.placeholder(dtype, shape=[None, n_input])

        self.W = []
        self.b = []
        self.h = [self.o_n]
        params = []
        for k in range(L):
            shape = topology[k:k + 2]
            self.W.append(weight_variable(shape))
            self.b.append(bias_variable([shape[1]]))
            self.h.append(activations[k](tf.matmul(self.h[-1], self.W[k]) +
                                         self.b[k]))
            params.append(self.W[-1])
            params.append(self.b[-1])
        self.pc = ParamCollection(self.session, params)
Example #9
0
    def __init__(self,
                 n_input,
                 kernel,
                 stride,
                 activations,
                 session,
                 checkpoint_base_path,
                 prev_columns,
                 dtype=tf.float32):
        self.session = session
        self.width = len(prev_columns)
        # Layers in network. First value is n_input, so it doesn't count.
        L = 5
        self.prev_columns = prev_columns
        self.checkpoint_base_path = checkpoint_base_path
        # Doesn't work if the columns aren't the same height
        #assert all([L == x.L for x in prev_columns])

        self.o_n = tf.placeholder(dtype=tf.float32, shape=[None, n_input])
        self.imageIn = tf.reshape(self.o_n, shape=[-1, 84, 84, 1])

        self.W = [[]] * L
        self.b = [[]] * L
        self.U = []
        self.V = []
        self.a = []
        for k in range(L - 1):
            self.U.append([[]] * self.width)
            self.V.append([[]] * self.width)
            self.a.append([[]] * self.width)
        self.h = [self.imageIn]  #h[0]
        # Collect parameters to hand off to ParamCollection.
        params = []
        padding = 'SAME'
        #first layer, not connected with previous layers
        self.W[0] = (weight_variable(kernel[0]))
        self.b[0] = (bias_variable([kernel[0][-1]]))
        conv = tf.nn.conv2d(self.h[-1], self.W[0], stride[0],
                            padding) + self.b[0]
        self.h.append(activations(conv))  #h[1]
        params.append(self.W[0])
        params.append(self.b[0])

        #second layer
        self.W[1] = (weight_variable(kernel[1]))
        self.b[1] = (bias_variable([kernel[1][-1]]))
        preactivation = tf.nn.conv2d(self.h[-1], self.W[1], stride[1],
                                     padding) + self.b[1]
        for kk in range(self.width):
            self.a[0][kk] = adapters()
            ah = tf.multiply(self.a[0][kk], prev_columns[kk].h[1])
            maps_in = ah.get_shape().as_list()[3]
            maps_out = int(maps_in / (2.0 * self.width))
            self.V[0][kk] = weight_variable([1, 1, maps_in, maps_out])
            lateral = tf.nn.conv2d(ah, self.V[0][kk], stride[2], padding)
            lateral = activations(lateral)

            self.U[0][kk] = weight_variable(
                [kernel[1][0], kernel[1][1], maps_out, kernel[1][3]])
            preactivation1 = tf.nn.conv2d(lateral, self.U[0][kk], stride[1],
                                          padding)
            preactivation = preactivation + preactivation1
        self.h.append(activations(preactivation))
        params.append(self.W[1])
        params.append(self.b[1])
        for kk in range(self.width):
            params.append(self.U[0][kk])
            params.append(self.V[0][kk])
            params.append(self.a[0][kk])

        self.h.append(tf.layers.flatten(self.h[-1]))  #h[3]

        #fully connected layer
        self.W[2] = (weight_variable(kernel[-1]))
        self.b[2] = (bias_variable([kernel[-1][-1]]))
        fc = tf.matmul(self.h[-1], self.W[2]) + self.b[2]
        for kk in range(self.width):
            self.a[1][kk] = adapters()
            ah = tf.multiply(self.a[1][kk], prev_columns[kk].h[2])
            maps_in = ah.get_shape().as_list()[3]
            maps_out = int(maps_in / (2.0 * self.width))
            self.V[1][kk] = weight_variable([1, 1, maps_in, maps_out])
            lateral = tf.nn.conv2d(ah, self.V[1][kk], stride[2], padding)
            lateral = activations(lateral)
            #lateral = tf.reshape(lateral,[-1,kernel[-1][-1]])
            lateral = tf.layers.flatten(lateral)
            self.U[1][kk] = weight_variable(
                [lateral.get_shape().as_list()[-1], kernel[-1][-1]])
            fc += tf.matmul(lateral, self.U[1][kk])
        self.h.append(activations(fc))  #h[4]
        params.append(self.W[2])
        params.append(self.b[2])
        for kk in range(self.width):
            params.append(self.U[1][kk])
            params.append(self.V[1][kk])
            params.append(self.a[1][kk])

        #calculate value
        self.W[3] = (weight_variable([256, 1]))
        self.b[3] = (bias_variable([1]))
        self.value = tf.matmul(self.h[-1], self.W[3]) + self.b[3]
        for kk in range(self.width):
            self.a[2][kk] = adapters()
            ah = tf.multiply(self.a[2][kk], prev_columns[kk].h[4])
            maps_in = ah.get_shape().as_list()[1]
            maps_out = int(maps_in / (2.0 * self.width))
            self.V[2][kk] = weight_variable([maps_in, maps_out])
            lateral = tf.matmul(ah, self.V[2][kk])
            lateral = activations(lateral)

            self.U[2][kk] = weight_variable([maps_out, 1])
            self.value += tf.matmul(lateral, self.U[2][kk])
        params.append(self.W[3])
        params.append(self.b[3])
        for kk in range(self.width):
            params.append(self.U[2][kk])
            params.append(self.V[2][kk])
            params.append(self.a[2][kk])

        #calculate policy
        self.W[4] = (weight_variable([256, 6]))
        self.b[4] = (bias_variable([6]))
        fc = tf.matmul(self.h[-1], self.W[4]) + self.b[4]
        for kk in range(self.width):
            self.a[3][kk] = adapters()
            ah = tf.multiply(self.a[3][kk], prev_columns[kk].h[4])
            maps_in = ah.get_shape().as_list()[1]
            maps_out = int(maps_in / (2.0 * self.width))
            self.V[3][kk] = weight_variable([maps_in, maps_out])
            lateral = tf.matmul(ah, self.V[3][kk])
            lateral = activations(lateral)

            self.U[3][kk] = weight_variable([maps_out, 6])
            fc += tf.matmul(lateral, self.U[3][kk])
        self.policy = tf.nn.softmax(fc)
        params.append(self.W[4])
        params.append(self.b[4])
        for kk in range(self.width):
            params.append(self.U[3][kk])
            params.append(self.V[3][kk])
            params.append(self.a[3][kk])

        self.pc = ParamCollection(self.session, params)
Example #10
0
class ExtensibleColumnProgNN(object):
    """
    Descr: An extensible network column for use in transfer learning with a
        Progressive Neural Network.
    Args:
        n_input - The array length which the input image is flattened to
        kernel - A list of kernel size for each layer
        activations - A list of activation functions to use on the transforms.
        session - A TensorFlow session.
        checkpoing_base_path - Save path.
        prev_columns - Previously trained columns, either Initial or Extensible,
            we are going to create lateral connections to for the current column.
    Returns:
        None - attaches objects to class for ExtensibleColumnProgNN.session.run()
    """
    def __init__(self,
                 n_input,
                 kernel,
                 stride,
                 activations,
                 session,
                 checkpoint_base_path,
                 prev_columns,
                 dtype=tf.float32):
        self.session = session
        self.width = len(prev_columns)
        # Layers in network. First value is n_input, so it doesn't count.
        L = 5
        self.prev_columns = prev_columns
        self.checkpoint_base_path = checkpoint_base_path
        # Doesn't work if the columns aren't the same height
        #assert all([L == x.L for x in prev_columns])

        self.o_n = tf.placeholder(dtype=tf.float32, shape=[None, n_input])
        self.imageIn = tf.reshape(self.o_n, shape=[-1, 84, 84, 1])

        self.W = [[]] * L
        self.b = [[]] * L
        self.U = []
        self.V = []
        self.a = []
        for k in range(L - 1):
            self.U.append([[]] * self.width)
            self.V.append([[]] * self.width)
            self.a.append([[]] * self.width)
        self.h = [self.imageIn]  #h[0]
        # Collect parameters to hand off to ParamCollection.
        params = []
        padding = 'SAME'
        #first layer, not connected with previous layers
        self.W[0] = (weight_variable(kernel[0]))
        self.b[0] = (bias_variable([kernel[0][-1]]))
        conv = tf.nn.conv2d(self.h[-1], self.W[0], stride[0],
                            padding) + self.b[0]
        self.h.append(activations(conv))  #h[1]
        params.append(self.W[0])
        params.append(self.b[0])

        #second layer
        self.W[1] = (weight_variable(kernel[1]))
        self.b[1] = (bias_variable([kernel[1][-1]]))
        preactivation = tf.nn.conv2d(self.h[-1], self.W[1], stride[1],
                                     padding) + self.b[1]
        for kk in range(self.width):
            self.a[0][kk] = adapters()
            ah = tf.multiply(self.a[0][kk], prev_columns[kk].h[1])
            maps_in = ah.get_shape().as_list()[3]
            maps_out = int(maps_in / (2.0 * self.width))
            self.V[0][kk] = weight_variable([1, 1, maps_in, maps_out])
            lateral = tf.nn.conv2d(ah, self.V[0][kk], stride[2], padding)
            lateral = activations(lateral)

            self.U[0][kk] = weight_variable(
                [kernel[1][0], kernel[1][1], maps_out, kernel[1][3]])
            preactivation1 = tf.nn.conv2d(lateral, self.U[0][kk], stride[1],
                                          padding)
            preactivation = preactivation + preactivation1
        self.h.append(activations(preactivation))
        params.append(self.W[1])
        params.append(self.b[1])
        for kk in range(self.width):
            params.append(self.U[0][kk])
            params.append(self.V[0][kk])
            params.append(self.a[0][kk])

        self.h.append(tf.layers.flatten(self.h[-1]))  #h[3]

        #fully connected layer
        self.W[2] = (weight_variable(kernel[-1]))
        self.b[2] = (bias_variable([kernel[-1][-1]]))
        fc = tf.matmul(self.h[-1], self.W[2]) + self.b[2]
        for kk in range(self.width):
            self.a[1][kk] = adapters()
            ah = tf.multiply(self.a[1][kk], prev_columns[kk].h[2])
            maps_in = ah.get_shape().as_list()[3]
            maps_out = int(maps_in / (2.0 * self.width))
            self.V[1][kk] = weight_variable([1, 1, maps_in, maps_out])
            lateral = tf.nn.conv2d(ah, self.V[1][kk], stride[2], padding)
            lateral = activations(lateral)
            #lateral = tf.reshape(lateral,[-1,kernel[-1][-1]])
            lateral = tf.layers.flatten(lateral)
            self.U[1][kk] = weight_variable(
                [lateral.get_shape().as_list()[-1], kernel[-1][-1]])
            fc += tf.matmul(lateral, self.U[1][kk])
        self.h.append(activations(fc))  #h[4]
        params.append(self.W[2])
        params.append(self.b[2])
        for kk in range(self.width):
            params.append(self.U[1][kk])
            params.append(self.V[1][kk])
            params.append(self.a[1][kk])

        #calculate value
        self.W[3] = (weight_variable([256, 1]))
        self.b[3] = (bias_variable([1]))
        self.value = tf.matmul(self.h[-1], self.W[3]) + self.b[3]
        for kk in range(self.width):
            self.a[2][kk] = adapters()
            ah = tf.multiply(self.a[2][kk], prev_columns[kk].h[4])
            maps_in = ah.get_shape().as_list()[1]
            maps_out = int(maps_in / (2.0 * self.width))
            self.V[2][kk] = weight_variable([maps_in, maps_out])
            lateral = tf.matmul(ah, self.V[2][kk])
            lateral = activations(lateral)

            self.U[2][kk] = weight_variable([maps_out, 1])
            self.value += tf.matmul(lateral, self.U[2][kk])
        params.append(self.W[3])
        params.append(self.b[3])
        for kk in range(self.width):
            params.append(self.U[2][kk])
            params.append(self.V[2][kk])
            params.append(self.a[2][kk])

        #calculate policy
        self.W[4] = (weight_variable([256, 6]))
        self.b[4] = (bias_variable([6]))
        fc = tf.matmul(self.h[-1], self.W[4]) + self.b[4]
        for kk in range(self.width):
            self.a[3][kk] = adapters()
            ah = tf.multiply(self.a[3][kk], prev_columns[kk].h[4])
            maps_in = ah.get_shape().as_list()[1]
            maps_out = int(maps_in / (2.0 * self.width))
            self.V[3][kk] = weight_variable([maps_in, maps_out])
            lateral = tf.matmul(ah, self.V[3][kk])
            lateral = activations(lateral)

            self.U[3][kk] = weight_variable([maps_out, 6])
            fc += tf.matmul(lateral, self.U[3][kk])
        self.policy = tf.nn.softmax(fc)
        params.append(self.W[4])
        params.append(self.b[4])
        for kk in range(self.width):
            params.append(self.U[3][kk])
            params.append(self.V[3][kk])
            params.append(self.a[3][kk])

        self.pc = ParamCollection(self.session, params)

    def add_input_to_feed_dict(self, feed_dict, input_batch):
        for col in self.prev_columns:
            feed_dict[col.o_n] = input_batch
        feed_dict[self.o_n] = input_batch
        return feed_dict

    def save(self, checkpoint_i):
        self.save_path, file_name = get_checkpoint_path(
            self.checkpoint_base_path, self.width, checkpoint_i)
        current_params = self.pc.get_values_flat()
        np.save(file_name, current_params)

    def restore_weights(self, checkpoint_i):
        self.save_path, file_name = get_checkpoint_path(
            self.checkpoint_base_path, self.width, checkpoint_i)
        saved_theta = np.load(file_name)
        self.pc.set_values_flat(saved_theta)
Example #11
0
File: rrnn.py Project: zobot/rrnn
def main():

    nr.seed(0)

    parser = argparse.ArgumentParser()
    parser.add_argument("--data_dir", type=str, default="alice")
    parser.add_argument("--size_mem", type=int,default=64)
    parser.add_argument("--size_batch", type=int,default=64)
    parser.add_argument("--n_layers",type=int,default=2)
    parser.add_argument("--n_unroll",type=int,default=16)
    parser.add_argument("--k_in",type=int,default=3)
    parser.add_argument("--k_h",type=int,default=5)
    parser.add_argument("--step_size",type=float,default=.01)
    parser.add_argument("--decay_rate",type=float,default=0.95)
    parser.add_argument("--n_epochs",type=int,default=20)
    parser.add_argument("--arch",choices=["lstm","gru"],default="gru")
    parser.add_argument("--grad_check",action="store_true")
    parser.add_argument("--profile",action="store_true")
    parser.add_argument("--unittest",action="store_true")

    args = parser.parse_args()

    cgt.set_precision("quad" if args.grad_check else "single")

    assert args.n_unroll > 1

    loader = Loader(args.data_dir,args.size_batch, args.n_unroll, (.8,.1,.1))

    network, f_loss, f_loss_and_grad, f_step = make_loss_and_grad_and_step(args.arch, loader.size_vocab, 
        loader.size_vocab, args.size_mem, args.size_batch, args.n_layers, args.n_unroll, args.k_in, args.k_h)

    if args.profile: profiler.start()

    params = network.get_parameters()
    pc = ParamCollection(params)
    pc.set_value_flat(nr.uniform(-0.01, 0.01, size=(pc.get_total_size(),)))

    for i, param in enumerate(pc.params):
        if "is_rotation" in param.props:
            shape = pc.get_shapes()[i]
            num_vec = int(shape[0] / 2)
            size_vec = int(shape[1])
            gauss = nr.normal(size=(num_vec * size_vec))
            gauss = np.reshape(gauss, (num_vec, size_vec))
            gauss_mag = norm(gauss, axis=1, keepdims=True)
            gauss_normed = gauss / gauss_mag
            gauss_perturb = nr.normal(scale=0.01, size=(num_vec * size_vec))
            gauss_perturb = np.reshape(gauss_perturb, (num_vec, size_vec))
            second_vec = gauss_normed + gauss_perturb
            second_vec_mag = norm(second_vec, axis=1, keepdims=True)
            second_vec_normed = second_vec / second_vec_mag
            new_param_value = np.zeros(shape)
            for j in xrange(num_vec):
                new_param_value[2 * j, :] = gauss_normed[j, :]
                new_param_value[2 * j + 1, :] = second_vec_normed[j, :]
            param.op.set_value(new_param_value)
            #print new_param_value



    def initialize_hiddens(n):
        return [np.ones((n, args.size_mem), cgt.floatX) / float(args.size_mem) for _ in xrange(get_num_hiddens(args.arch, args.n_layers))]

    if args.grad_check:
    #if True:
        x,y = loader.train_batches_iter().next()
        prev_hiddens = initialize_hiddens(args.size_batch)
        def f(thnew):
            thold = pc.get_value_flat()
            pc.set_value_flat(thnew)
            loss = f_loss(x,y, *prev_hiddens)
            pc.set_value_flat(thold)
            return loss
        from cgt.numeric_diff import numeric_grad
        print "Beginning grad check"
        g_num = numeric_grad(f, pc.get_value_flat(),eps=1e-10)
        print "Ending grad check"
        result = f_loss_and_grad(x,y,*prev_hiddens)
        g_anal = result[1]
        diff = g_num - g_anal
        abs_diff = np.abs(diff)
        print np.where(abs_diff > 1e-4)
        print diff[np.where(abs_diff > 1e-4)]
        embed()
        assert np.allclose(g_num, g_anal, atol=1e-4)
        print "Gradient check succeeded!"
        return

    optim_state = make_rmsprop_state(theta=pc.get_value_flat(), step_size = args.step_size, 
        decay_rate = args.decay_rate)

    for iepoch in xrange(args.n_epochs):
        losses = []
        tstart = time()
        print "starting epoch",iepoch
        cur_hiddens = initialize_hiddens(args.size_batch)
        for (x,y) in loader.train_batches_iter():
            out = f_loss_and_grad(x,y, *cur_hiddens)
            loss = out[0]
            grad = out[1]
            cur_hiddens = out[2:]
            rmsprop_update(grad, optim_state)
            pc.set_value_flat(optim_state.theta)
            losses.append(loss)
            if args.unittest: return
        print "%.3f s/batch. avg loss = %.3f"%((time()-tstart)/len(losses), np.mean(losses))
        optim_state.step_size *= .98 #pylint: disable=E1101

        sample(f_step, initialize_hiddens(1), char2ind=loader.char2ind, n_steps=300, temp=1.0, seed_text = "")

    if args.profile: profiler.print_stats()
Example #12
0
class AtariRAMPolicy(PPOPolicy, Serializable):
    def __init__(self, n_actions):
        Serializable.__init__(self, n_actions)
        cgt.set_precision('double')
        n_in = 128
        o_no = cgt.matrix("o_no", fixed_shape=(None, n_in))
        a_n = cgt.vector("a_n", dtype='i8')
        q_n = cgt.vector("q_n")
        oldpdist_np = cgt.matrix("oldpdists")

        h0 = (o_no - 128.0) / 128.0
        nhid = 64
        h1 = cgt.tanh(
            nn.Affine(128, nhid, weight_init=nn.IIDGaussian(std=.1))(h0))
        probs_na = nn.softmax(
            nn.Affine(nhid, n_actions,
                      weight_init=nn.IIDGaussian(std=0.01))(h1))
        logprobs_na = cgt.log(probs_na)
        b = cgt.size(o_no, 0)
        logps_n = logprobs_na[cgt.arange(b), a_n]
        surr = (logps_n * q_n).mean()
        kl = (oldpdist_np * cgt.log(oldpdist_np / probs_na)).sum(axis=1).mean()

        params = nn.get_parameters(surr)
        gradsurr = cgt.grad(surr, params)
        flatgrad = cgt.concatenate([p.flatten() for p in gradsurr])

        lam = cgt.scalar()
        penobj = surr - lam * kl
        self._f_grad_lagrangian = cgt.function(
            [lam, oldpdist_np, o_no, a_n, q_n],
            cgt.concatenate([p.flatten() for p in cgt.grad(penobj, params)]))
        self.f_pdist = cgt.function([o_no], probs_na)

        self.f_probs = cgt.function([o_no], probs_na)
        self.f_surr_kl = cgt.function([oldpdist_np, o_no, a_n, q_n],
                                      [surr, kl])
        self.f_gradlogp = cgt.function([oldpdist_np, o_no, a_n, q_n], flatgrad)

        self.pc = ParamCollection(params)

    def step(self, X):
        pdist_na = self.f_probs(X)
        acts_n = cat_sample(pdist_na)
        return {"action": acts_n, "pdist": pdist_na}

    def compute_gradient(self, pdist_np, o_no, a_n, q_n):
        return self.f_gradlogp(pdist_np, o_no, a_n, q_n)

    def compute_surr_kl(self, pdist_np, o_no, a_n, q_n):
        return self.f_surr_kl(pdist_np, o_no, a_n, q_n)

    def compute_grad_lagrangian(self, lam, pdist_np, o_no, a_n, q_n):
        return self._f_grad_lagrangian(lam, pdist_np, o_no, a_n, q_n)

    def compute_entropy(self, pdist_np):
        return cat_entropy(pdist_np)

    def get_parameters_flat(self):
        return self.pc.get_value_flat()

    def set_parameters_flat(self, th):
        return self.pc.set_value_flat(th)
Example #13
0
File: rrnn.py Project: zoemcc/rrnn
def main():

    nr.seed(0)

    parser = argparse.ArgumentParser()
    parser.add_argument("--data_dir", type=str, default="alice")
    parser.add_argument("--size_mem", type=int, default=64)
    parser.add_argument("--size_batch", type=int, default=64)
    parser.add_argument("--n_layers", type=int, default=2)
    parser.add_argument("--n_unroll", type=int, default=16)
    parser.add_argument("--k_in", type=int, default=3)
    parser.add_argument("--k_h", type=int, default=5)
    parser.add_argument("--step_size", type=float, default=.01)
    parser.add_argument("--decay_rate", type=float, default=0.95)
    parser.add_argument("--n_epochs", type=int, default=20)
    parser.add_argument("--arch", choices=["lstm", "gru"], default="gru")
    parser.add_argument("--grad_check", action="store_true")
    parser.add_argument("--profile", action="store_true")
    parser.add_argument("--unittest", action="store_true")

    args = parser.parse_args()

    cgt.set_precision("quad" if args.grad_check else "single")

    assert args.n_unroll > 1

    loader = Loader(args.data_dir, args.size_batch, args.n_unroll,
                    (.8, .1, .1))

    network, f_loss, f_loss_and_grad, f_step = make_loss_and_grad_and_step(
        args.arch, loader.size_vocab, loader.size_vocab, args.size_mem,
        args.size_batch, args.n_layers, args.n_unroll, args.k_in, args.k_h)

    if args.profile: profiler.start()

    params = network.get_parameters()
    pc = ParamCollection(params)
    pc.set_value_flat(nr.uniform(-0.01, 0.01, size=(pc.get_total_size(), )))

    for i, param in enumerate(pc.params):
        if "is_rotation" in param.props:
            shape = pc.get_shapes()[i]
            num_vec = int(shape[0] / 2)
            size_vec = int(shape[1])
            gauss = nr.normal(size=(num_vec * size_vec))
            gauss = np.reshape(gauss, (num_vec, size_vec))
            gauss_mag = norm(gauss, axis=1, keepdims=True)
            gauss_normed = gauss / gauss_mag
            gauss_perturb = nr.normal(scale=0.01, size=(num_vec * size_vec))
            gauss_perturb = np.reshape(gauss_perturb, (num_vec, size_vec))
            second_vec = gauss_normed + gauss_perturb
            second_vec_mag = norm(second_vec, axis=1, keepdims=True)
            second_vec_normed = second_vec / second_vec_mag
            new_param_value = np.zeros(shape)
            for j in xrange(num_vec):
                new_param_value[2 * j, :] = gauss_normed[j, :]
                new_param_value[2 * j + 1, :] = second_vec_normed[j, :]
            param.op.set_value(new_param_value)
            #print new_param_value

    def initialize_hiddens(n):
        return [
            np.ones((n, args.size_mem), cgt.floatX) / float(args.size_mem)
            for _ in xrange(get_num_hiddens(args.arch, args.n_layers))
        ]

    if args.grad_check:
        #if True:
        x, y = loader.train_batches_iter().next()
        prev_hiddens = initialize_hiddens(args.size_batch)

        def f(thnew):
            thold = pc.get_value_flat()
            pc.set_value_flat(thnew)
            loss = f_loss(x, y, *prev_hiddens)
            pc.set_value_flat(thold)
            return loss

        from cgt.numeric_diff import numeric_grad
        print "Beginning grad check"
        g_num = numeric_grad(f, pc.get_value_flat(), eps=1e-10)
        print "Ending grad check"
        result = f_loss_and_grad(x, y, *prev_hiddens)
        g_anal = result[1]
        diff = g_num - g_anal
        abs_diff = np.abs(diff)
        print np.where(abs_diff > 1e-4)
        print diff[np.where(abs_diff > 1e-4)]
        embed()
        assert np.allclose(g_num, g_anal, atol=1e-4)
        print "Gradient check succeeded!"
        return

    optim_state = make_rmsprop_state(theta=pc.get_value_flat(),
                                     step_size=args.step_size,
                                     decay_rate=args.decay_rate)

    for iepoch in xrange(args.n_epochs):
        losses = []
        tstart = time()
        print "starting epoch", iepoch
        cur_hiddens = initialize_hiddens(args.size_batch)
        for (x, y) in loader.train_batches_iter():
            out = f_loss_and_grad(x, y, *cur_hiddens)
            loss = out[0]
            grad = out[1]
            cur_hiddens = out[2:]
            rmsprop_update(grad, optim_state)
            pc.set_value_flat(optim_state.theta)
            losses.append(loss)
            if args.unittest: return
        print "%.3f s/batch. avg loss = %.3f" % (
            (time() - tstart) / len(losses), np.mean(losses))
        optim_state.step_size *= .98  #pylint: disable=E1101

        sample(f_step,
               initialize_hiddens(1),
               char2ind=loader.char2ind,
               n_steps=300,
               temp=1.0,
               seed_text="")

    if args.profile: profiler.print_stats()
Example #14
0
    def __init__(self, session, n_actions, ih, iw, nin):
        """
        Method:
            __init__(self, session, n_actions, ih, iw, nin)
        Args:
            self -- standard method
            session -- a TensorFlow session.
            n_actions -- the dimension of the action space, assumed to be
                discrete because we're playing Atari.
            ih -- image height
            iw -- image width
            nin -- input channels in image, typically 3 for rgb_array.
        Returns:
            None -- defines model from images to actions for class Policy.
        """
        self.session = session
        self.n_actions = n_actions
        self.img_no = tf.placeholder(tf.float32, shape=[None, ih, iw, nin])
        self.a_n = tf.placeholder(tf.int32, shape=[None])
        self.q_n = tf.placeholder(tf.float32, shape=[None])
        self.oldpdist_np = tf.placeholder(tf.float32, shape=[None, n_actions])
        self.keep_prob = tf.placeholder(tf.float32)
        self.lam = tf.placeholder(tf.float32)
        self.n_batch = tf.shape(self.img_no)[0]

        mu, var = tf.nn.moments(self.img_no,axes=[0,1,2,3])
        normed_img = tf.nn.batch_normalization(
            self.img_no, mu, var, None, None,1e-6)

        with tf.variable_scope("conv1"):
            relu1 = conv_relu(normed_img, [5,5,nin,24],[24], stride=2)

        with tf.variable_scope("conv2"):
            relu2 = conv_relu(relu1, [5,5,24,36], [36], stride=2)

        with tf.variable_scope("conv3"):
            relu3 = conv_relu(relu2, [3,3,36,64], [64], stride=2)

        with tf.variable_scope("conv4"):
            relu4 = conv_relu(relu3, [5,5,64,64], [64], stride=2)

        with tf.variable_scope("avgpool1"):
            avgpool1 = tf.nn.avg_pool(relu4, [1, 5, 5, 1], strides=[1,1,1,1],
                padding='VALID')

        avgpool1_shape = avgpool1.get_shape().as_list()
        avgpool1_flat_n = np.prod(avgpool1_shape[1:])
        avgpool1_flat = tf.reshape(avgpool1, [self.n_batch, avgpool1_flat_n])

        with tf.variable_scope("fc1"):
            fc1 = fc_relu(avgpool1_flat, avgpool1_flat_n, 1164)
            fc1_dropout = tf.nn.dropout(fc1, self.keep_prob)
        with tf.variable_scope("fc2"):
            fc2 = fc_relu(fc1_dropout, 1164, 512)
            fc2_dropout = tf.nn.dropout(fc2, self.keep_prob)
        with tf.variable_scope("fc3"):
            fc3 = fc_relu(fc2_dropout, 512, 128)
            fc3_dropout = tf.nn.dropout(fc3, self.keep_prob)
        with tf.variable_scope("fc4"):
            fc4 = fc_relu(fc3_dropout, 128, 64)
            fc4_dropout = tf.nn.dropout(fc4, self.keep_prob)
        with tf.variable_scope("probs_na"):
            weights = tf.get_variable("weights", [64, n_actions],
                initializer=tf.random_normal_initializer())
            biases = tf.get_variable("biases", [n_actions],
                initializer=tf.constant_initializer(0.0))
            self.probs_na = tf.nn.softmax(tf.matmul(fc4_dropout, weights) \
                + biases)

        self.pred_action = tf.argmax(self.probs_na, 1)
        logprobs_na = tf.log(self.probs_na)
        idx_flattened = tf.range(0,self.n_batch) * n_actions + self.a_n

        logps_n = tf.gather(tf.reshape(logprobs_na, [-1]), idx_flattened)

        self.surr = tf.reduce_mean(tf.mul(logps_n, self.q_n))

        params = tf.trainable_variables()

        self.surr_grads = tf.gradients(self.surr, params)

        self.kl = tf.reduce_mean(
            tf.reduce_sum(
                tf.mul(self.oldpdist_np, tf.log(tf.div(self.oldpdist_np,
                    self.probs_na))), 1
            )
        )
        penobj = tf.sub(self.surr, tf.mul(self.lam, self.kl))

        self.pc = ParamCollection(self.session, params)
Example #15
0
class AtariRAMPolicy(PPOPolicy, Serializable):
    def __init__(self, n_actions):
        Serializable.__init__(self, n_actions)
        cgt.set_precision('double')
        n_in = 128
        o_no = cgt.matrix("o_no",fixed_shape=(None,n_in))
        a_n = cgt.vector("a_n",dtype='i8')
        q_n = cgt.vector("q_n")
        oldpdist_np = cgt.matrix("oldpdists")

        nhid, nhid2 = 64, 64
        h0 = (o_no - 128.0)/128.0
        d0 = nn.dropout(h1, .2)

        h1 = nn.rectify(nn.Affine(128,nhid,weight_init=nn.IIDGaussian(std=.1))(d0))
        d1 = nn.dropout(h1, .2)
        h2 = nn.rectify(nn.Affine(nhid,nhid2,weight_init=nn.IIDGaussian(std=.1))(d1))
        # d2 = nn.dropout(h2, .2)
        probs_na = nn.softmax(nn.Affine(nhid2,n_actions,weight_init=nn.IIDGaussian(std=0.01))(d2))
        logprobs_na = cgt.log(probs_na)
        b = cgt.size(o_no, 0)
        logps_n = logprobs_na[cgt.arange(b), a_n]
        surr = (logps_n*q_n).mean()
        kl = (oldpdist_np * cgt.log(oldpdist_np/probs_na)).sum(axis=1).mean()

        params = nn.get_parameters(surr)
        gradsurr = cgt.grad(surr, params)
        flatgrad = cgt.concatenate([p.flatten() for p in gradsurr])

        lam = cgt.scalar()
        penobj = surr - lam * kl
        self._f_grad_lagrangian = cgt.function([lam, oldpdist_np, o_no, a_n, q_n], 
            cgt.concatenate([p.flatten() for p in cgt.grad(penobj,params)]))
        self.f_pdist = cgt.function([o_no], probs_na)

        self.f_probs = cgt.function([o_no], probs_na)
        self.f_surr_kl = cgt.function([oldpdist_np, o_no, a_n, q_n], [surr, kl])
        self.f_gradlogp = cgt.function([oldpdist_np, o_no, a_n, q_n], flatgrad)

        self.pc = ParamCollection(params)

    def step(self, X):
        pdist_na = self.f_probs(X)
        acts_n = cat_sample(pdist_na)
        return {
            "action" : acts_n,
            "pdist" : pdist_na
        }

    def compute_gradient(self, pdist_np, o_no, a_n, q_n):
        return self.f_gradlogp(pdist_np, o_no, a_n, q_n)

    def compute_surr_kl(self, pdist_np, o_no, a_n, q_n):
        return self.f_surr_kl(pdist_np, o_no, a_n, q_n)

    def compute_grad_lagrangian(self, lam, pdist_np, o_no, a_n, q_n):
        return self._f_grad_lagrangian(lam, pdist_np, o_no, a_n, q_n)

    def compute_entropy(self, pdist_np):
        return cat_entropy(pdist_np)

    def get_parameters_flat(self):
        return self.pc.get_value_flat()

    def set_parameters_flat(self,th):
        return self.pc.set_value_flat(th)
Example #16
0
    def __init__(self,
                 topology,
                 activations,
                 session,
                 checkpoint_base_path,
                 prev_columns,
                 dtype=tf.float32):
        n_input = topology[0]
        self.topology = topology
        self.session = session
        width = len(prev_columns)
        # Layers in network. First value is n_input, so it doesn't count.
        L = len(topology) - 1
        self.L = L
        self.prev_columns = prev_columns
        self.checkpoint_base_path = checkpoint_base_path
        self.column_number = width

        # Doesn't work if the columns aren't the same height.
        assert all([self.L == x.L for x in prev_columns])

        self.o_n = tf.placeholder(dtype,
                                  shape=[None, n_input],
                                  name='prog_nn_input_placeholder')

        self.W = [[]] * L
        self.b = [[]] * L
        self.U = []
        for k in range(L - 1):
            self.U.append([[]] * width)
        self.h = [self.o_n]
        # Collect parameters to hand off to ParamCollection.
        params = []
        for k in range(L):
            W_shape = topology[k:k + 2]
            self.W[k] = weight_variable(W_shape,
                                        name="weight_var_layer_" + str(k))
            self.b[k] = bias_variable([W_shape[1]],
                                      name="bias_var_layer_" + str(k))
            if k == 0:
                if activations[k] is None:
                    self.h.append(tf.matmul(self.h[-1], self.W[k]) + self.b[k])
                else:
                    self.h.append(
                        activations[k](tf.matmul(self.h[-1], self.W[k]) +
                                       self.b[k]))
                params.append(self.W[k])
                params.append(self.b[k])
                continue
            preactivation = tf.matmul(self.h[-1], self.W[k]) + self.b[k]
            for kk in range(width):
                U_shape = [prev_columns[kk].topology[k], topology[k + 1]]
                # Remember len(self.U) == L - 1!
                self.U[k - 1][kk] = weight_variable(
                    U_shape,
                    name="lateral_weight_var_layer_" + str(k) + "_to_column_" +
                    str(kk))
                # pprint(prev_columns[kk].h[k].get_shape().as_list())
                # pprint(self.U[k-1][kk].get_shape().as_list())
                ##### -------- += adding preactivations with a U tranform from previous
                ##### ------------------- layer
                preactivation += tf.matmul(prev_columns[kk].h[k],
                                           self.U[k - 1][kk])
            if activations[k] is None:
                self.h.append(preactivation)
            else:
                self.h.append(activations[k](preactivation))
            params.append(self.W[k])
            params.append(self.b[k])
            for kk in range(width):
                params.append(self.U[k - 1][kk])
        return h[-1]

        self.pc = ParamCollection(self.session, params)
Example #17
0
def train(args, X, Y, dbg_iter=None, dbg_epoch=None, dbg_done=None):
    dbg_out = []
    net_in, net_out = hybrid_network(args.num_inputs,
                                     args.num_outputs,
                                     args.num_units,
                                     args.num_sto,
                                     dbg_out=dbg_out)
    params, f_step, f_loss, f_grad, f_surr = \
        make_funcs(net_in, net_out, args, dbg_out=dbg_out)
    param_col = ParamCollection(params)
    init_params = nn.init_array(args.init_conf,
                                (param_col.get_total_size(), 1))
    param_col.set_value_flat(init_params.flatten())
    init_params = [
        np.array([[0., 1.]]),  # W_1
        np.array([[0., 0.]]),  # b_1
        np.array([[1.], [1.]]),  # W_3
        np.array([[0.]]),  # b_3
    ]
    param_col.set_values(init_params)
    if 'snapshot' in args:
        print "Loading params from previous snapshot"
        snapshot = pickle.load(open(args['snapshot'], 'r'))
        param_col.set_values(snapshot)
    # param_col.set_value_flat(
    #     np.random.normal(0., 1.,size=param_col.get_total_size())
    # )
    # optim_state = Table(theta=param_col.get_value_flat(),
    #                     scratch=param_col.get_value_flat(),
    #                     step_size=args.step_size
    #                     )

    optim_state = make_rmsprop_state(theta=param_col.get_value_flat(),
                                     step_size=args.step_size,
                                     decay_rate=args.decay_rate)
    for i_epoch in range(args.n_epochs):
        for i_iter in range(X.shape[0]):
            ind = np.random.choice(X.shape[0], args['size_batch'])
            x, y = X[ind], Y[ind]  # not sure this works for multi-dim
            info = f_surr(x, y, num_samples=args['size_sample'])
            loss, loss_surr, grad = info['loss'], info['surr_loss'], info[
                'surr_grad']
            # loss, loss_surr, grad = f_grad(x, y)
            # update
            rmsprop_update(param_col.flatten_values(grad), optim_state)
            # optim_state.scratch = param_col.flatten_values(grad)
            # optim_state.theta -= optim_state.step_size * optim_state.scratch
            param_col.set_value_flat(optim_state.theta)
            print param_col.get_value_flat()
            if dbg_iter:
                dbg_iter(i_epoch, i_iter, param_col, optim_state, info)
        if dbg_epoch: dbg_epoch(i_epoch, param_col, f_surr)
    if dbg_done: dbg_done(param_col, optim_state, f_surr)
    return optim_state
Example #18
0
class InitialColumnProgNN(object):
    """
    Descr: Initial network to train for later use transfer learning with a
        Progressive Neural Network.
    Args:
        n_input - The array length which the input image is flattened to
        kernel - A list of kernel size for each layer
        activations - A list of activation functions to use on the transforms.
        session - A TensorFlow session.
        checkpoing_base_path - Save path.
    Returns:
        None - attaches objects to class for InitialColumnProgNN.session.run()
    """

    #todo:add name to enery tensor
    def __init__(self,
                 n_input,
                 kernel,
                 stride,
                 activations,
                 session,
                 checkpoint_base_path,
                 dtype=tf.float32):
        # Layers in network.
        self.session = session
        #self.L = len(topology)
        #self.topology = topology
        self.o_n = tf.placeholder(dtype=tf.float32, shape=[None, n_input])
        self.imageIn = tf.reshape(self.o_n, shape=[-1, 84, 84, 1])
        self.checkpoint_base_path = checkpoint_base_path

        self.W = []
        self.b = []
        self.h = [self.imageIn]
        params = []

        padding = 'SAME'

        #The first two layers
        for k in range(2):
            #When training on second column, if the previous weights need to be frozen, set initial=True,
            # the variables are set as not trainable.
            self.W.append(weight_variable(kernel[k], initial=None))
            self.b.append(bias_variable([kernel[k][-1]], initial=None))
            conv = tf.nn.conv2d(self.h[-1], self.W[k], stride[k],
                                padding) + self.b[k]
            self.h.append(activations(conv))
            params.append(self.W[k])
            params.append(self.b[k])

        self.h.append(tf.layers.flatten(self.h[-1]))

        #fully connected layer
        self.W.append(weight_variable(kernel[-1], initial=None))
        self.b.append(bias_variable([kernel[-1][-1]], initial=None))
        fc = tf.matmul(self.h[-1], self.W[-1]) + self.b[-1]
        self.h.append(activations(fc))
        params.append(self.W[-1])
        params.append(self.b[-1])

        #Calculate value
        self.W.append(weight_variable([256, 1], initial=None))
        self.b.append(bias_variable([1], initial=None))
        self.value = tf.matmul(self.h[-1], self.W[-1]) + self.b[-1]
        params.append(self.W[-1])
        params.append(self.b[-1])

        #Calculate policy
        self.W.append(weight_variable([256, 6], initial=None))
        self.b.append(bias_variable([6], initial=None))
        fc = tf.matmul(self.h[-1], self.W[-1]) + self.b[-1]
        self.policy = tf.nn.softmax(fc)
        params.append(self.W[-1])
        params.append(self.b[-1])

        self.pc = ParamCollection(self.session, params)

    def add_input_to_feed_dict(self, feed_dict, input_batch):
        feed_dict[self.o_n] = input_batch
        return feed_dict

    def save(self, checkpoint_i):
        self.save_path, file_name = get_checkpoint_path(
            self.checkpoint_base_path, 0, checkpoint_i)
        current_params = self.pc.get_values_flat()
        np.save(file_name, current_params)

    def restore_weights(self, checkpoint_i):
        self.save_path, file_name = get_checkpoint_path(
            self.checkpoint_base_path, 0, checkpoint_i)
        saved_theta = np.load(file_name)
        self.pc.set_values_flat(saved_theta)
Example #19
0
    def __init__(self,
                 n_input,
                 kernel,
                 stride,
                 activations,
                 session,
                 checkpoint_base_path,
                 dtype=tf.float32):
        # Layers in network.
        self.session = session
        #self.L = len(topology)
        #self.topology = topology
        self.o_n = tf.placeholder(dtype=tf.float32, shape=[None, n_input])
        self.imageIn = tf.reshape(self.o_n, shape=[-1, 84, 84, 1])
        self.checkpoint_base_path = checkpoint_base_path

        self.W = []
        self.b = []
        self.h = [self.imageIn]
        params = []

        padding = 'SAME'

        #The first two layers
        for k in range(2):
            #When training on second column, if the previous weights need to be frozen, set initial=True,
            # the variables are set as not trainable.
            self.W.append(weight_variable(kernel[k], initial=None))
            self.b.append(bias_variable([kernel[k][-1]], initial=None))
            conv = tf.nn.conv2d(self.h[-1], self.W[k], stride[k],
                                padding) + self.b[k]
            self.h.append(activations(conv))
            params.append(self.W[k])
            params.append(self.b[k])

        self.h.append(tf.layers.flatten(self.h[-1]))

        #fully connected layer
        self.W.append(weight_variable(kernel[-1], initial=None))
        self.b.append(bias_variable([kernel[-1][-1]], initial=None))
        fc = tf.matmul(self.h[-1], self.W[-1]) + self.b[-1]
        self.h.append(activations(fc))
        params.append(self.W[-1])
        params.append(self.b[-1])

        #Calculate value
        self.W.append(weight_variable([256, 1], initial=None))
        self.b.append(bias_variable([1], initial=None))
        self.value = tf.matmul(self.h[-1], self.W[-1]) + self.b[-1]
        params.append(self.W[-1])
        params.append(self.b[-1])

        #Calculate policy
        self.W.append(weight_variable([256, 6], initial=None))
        self.b.append(bias_variable([6], initial=None))
        fc = tf.matmul(self.h[-1], self.W[-1]) + self.b[-1]
        self.policy = tf.nn.softmax(fc)
        params.append(self.W[-1])
        params.append(self.b[-1])

        self.pc = ParamCollection(self.session, params)
Example #20
0
def train(args, X, Y, dbg_iter=None, dbg_epoch=None, dbg_done=None):
    dbg_out = []
    net_in, net_out = hybrid_network(args.num_inputs, args.num_outputs,
                                     args.num_units, args.num_sto,
                                     dbg_out=dbg_out)
    params, f_step, f_loss, f_grad, f_surr = \
        make_funcs(net_in, net_out, args, dbg_out=dbg_out)
    param_col = ParamCollection(params)
    init_params = nn.init_array(args.init_conf, (param_col.get_total_size(), 1))
    param_col.set_value_flat(init_params.flatten())
    init_params = [
        np.array([[0., 1.]]),  # W_1
        np.array([[0., 0.]]),  # b_1
        np.array([[1.], [1.]]),  # W_3
        np.array([[0.]]),  # b_3
    ]
    param_col.set_values(init_params)
    if 'snapshot' in args:
        print "Loading params from previous snapshot"
        snapshot = pickle.load(open(args['snapshot'], 'r'))
        param_col.set_values(snapshot)
    # param_col.set_value_flat(
    #     np.random.normal(0., 1.,size=param_col.get_total_size())
    # )
    # optim_state = Table(theta=param_col.get_value_flat(),
    #                     scratch=param_col.get_value_flat(),
    #                     step_size=args.step_size
    #                     )

    optim_state = make_rmsprop_state(theta=param_col.get_value_flat(),
                                     step_size=args.step_size,
                                     decay_rate=args.decay_rate)
    for i_epoch in range(args.n_epochs):
        for i_iter in range(X.shape[0]):
            ind = np.random.choice(X.shape[0], args['size_batch'])
            x, y = X[ind], Y[ind]  # not sure this works for multi-dim
            info = f_surr(x, y, num_samples=args['size_sample'])
            loss, loss_surr, grad = info['loss'], info['surr_loss'], info['surr_grad']
            # loss, loss_surr, grad = f_grad(x, y)
            # update
            rmsprop_update(param_col.flatten_values(grad), optim_state)
            # optim_state.scratch = param_col.flatten_values(grad)
            # optim_state.theta -= optim_state.step_size * optim_state.scratch
            param_col.set_value_flat(optim_state.theta)
            print param_col.get_value_flat()
            if dbg_iter: dbg_iter(i_epoch, i_iter, param_col, optim_state, info)
        if dbg_epoch: dbg_epoch(i_epoch, param_col, f_surr)
    if dbg_done: dbg_done(param_col, optim_state, f_surr)
    return optim_state
def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("--grad_check", action="store_true")
    parser.add_argument("--n_batches", type=int, default=1000000)
    parser.add_argument("--profile", action="store_true")
    parser.add_argument("--unittest", action="store_true")
    args = parser.parse_args()
    np.seterr("raise")

    cgt.set_precision("quad" if args.grad_check else "double")
    np.random.seed(0)

    # model parameters
    if args.grad_check:
        opt = NTMOpts(
            b=1,  # batch size
            h=1,  # number of heads
            n=2,  # number of memory sites
            m=3,  # dimension at each memory site
            k=4,  # dimension of input
            p=2,  # dimension of output
            ff_hid_sizes=[])
        seq_length = 2

    else:
        opt = NTMOpts(
            b=64,  # batch size
            h=3,  # number of heads
            n=128,  # number of memory sites
            m=20,  # dimension at each memory site
            k=3,  # dimension of input
            p=1,  # dimension of output
            ff_hid_sizes=[128, 128])

        seq_length = 10

    if args.unittest:
        seq_length = 3
        args.n_batches = 3

    tstart = time.time()
    ntm = make_ntm(opt)
    task = CopyTask(opt.b, seq_length, opt.p)
    f_loss, f_loss_and_grad, params = make_funcs(opt, ntm, task.total_time(),
                                                 task.loss_timesteps())
    print "graph construction and compilation took %g seconds" % (time.time() -
                                                                  tstart)

    pc = ParamCollection(params)
    pc.set_value_flat(nr.uniform(-.1, .1, size=(pc.get_total_size(), )))

    if args.grad_check:
        x, y = task.gen_batch()

        def f(thnew):
            thold = th.copy()
            pc.set_value_flat(thnew)
            loss = f_loss(x, y)
            pc.set_value_flat(thold)
            return loss

        from cgt.numeric_diff import numeric_grad
        g_num = numeric_grad(f, th, eps=1e-8)
        _, _, g_anal = f_loss_and_grad(x, y)
        assert np.allclose(g_num, g_anal, atol=1e-8)
        print "Gradient check succeeded!"
        print "%i/%i elts of grad are nonzero" % (
            (g_anal != 0).sum(), g_anal.size)
        return

    seq_num = 0
    state = make_rmsprop_state(pc.get_value_flat(), .01, .95)
    print fmt_row(13, ["seq num", "CE (bits)", "accuracy", "|g|_inf"],
                  header=True)

    if args.profile: cgt.profiler.start()

    for i in xrange(args.n_batches):
        x, y = task.gen_batch()
        seq_num += x.shape[1]
        l, l01, g = f_loss_and_grad(x, y)
        print fmt_row(13, [seq_num, l, l01, np.abs(g).max()])
        rmsprop_update(g, state)
        pc.set_value_flat(state.theta)
        if not np.isfinite(l): break

    if args.profile: cgt.profiler.print_stats()
Example #22
0
def main():

    nr.seed(0)

    parser = argparse.ArgumentParser()
    parser.add_argument("--data_dir", type=str, default="alice")
    parser.add_argument("--size_mem", type=int, default=64)
    parser.add_argument("--size_batch", type=int, default=64)
    parser.add_argument("--n_layers", type=int, default=2)
    parser.add_argument("--n_unroll", type=int, default=16)
    parser.add_argument("--step_size", type=float, default=.01)
    parser.add_argument("--decay_rate", type=float, default=0.95)
    parser.add_argument("--n_epochs", type=int, default=20)
    parser.add_argument("--arch", choices=["lstm", "gru"], default="lstm")
    parser.add_argument("--grad_check", action="store_true")
    parser.add_argument("--profile", action="store_true")
    parser.add_argument("--unittest", action="store_true")

    args = parser.parse_args()

    cgt.set_precision("quad" if args.grad_check else "single")

    assert args.n_unroll > 1

    loader = Loader(args.data_dir, args.size_batch, args.n_unroll,
                    (.8, .1, .1))

    network, f_loss, f_loss_and_grad, f_step = make_loss_and_grad_and_step(
        args.arch, loader.size_vocab, loader.size_vocab, args.size_mem,
        args.size_batch, args.n_layers, args.n_unroll)

    if args.profile: profiler.start()

    params = network.get_parameters()
    pc = ParamCollection(params)
    pc.set_value_flat(nr.uniform(-.1, .1, size=(pc.get_total_size(), )))

    def initialize_hiddens(n):
        return [
            np.zeros((n, args.size_mem), cgt.floatX)
            for _ in xrange(get_num_hiddens(args.arch, args.n_layers))
        ]

    if args.grad_check:
        x, y = loader.train_batches_iter().next()
        prev_hiddens = initialize_hiddens(args.size_batch)

        def f(thnew):
            thold = pc.get_value_flat()
            pc.set_value_flat(thnew)
            loss = f_loss(x, y, *prev_hiddens)
            pc.set_value_flat(thold)
            return loss

        from cgt.numeric_diff import numeric_grad
        g_num = numeric_grad(f, pc.get_value_flat(), eps=1e-10)
        result = f_loss_and_grad(x, y, *prev_hiddens)
        g_anal = result[1]
        assert np.allclose(g_num, g_anal, atol=1e-4)
        print "Gradient check succeeded!"
        return

    optim_state = make_rmsprop_state(theta=pc.get_value_flat(),
                                     step_size=args.step_size,
                                     decay_rate=args.decay_rate)

    for iepoch in xrange(args.n_epochs):
        losses = []
        tstart = time()
        print "starting epoch", iepoch
        cur_hiddens = initialize_hiddens(args.size_batch)
        for (x, y) in loader.train_batches_iter():
            out = f_loss_and_grad(x, y, *cur_hiddens)
            loss = out[0]
            grad = out[1]
            cur_hiddens = out[2:]
            rmsprop_update(grad, optim_state)
            pc.set_value_flat(optim_state.theta)
            losses.append(loss)
            if args.unittest: return
        print "%.3f s/batch. avg loss = %.3f" % (
            (time() - tstart) / len(losses), np.mean(losses))
        optim_state.step_size *= .98  #pylint: disable=E1101

        sample(f_step,
               initialize_hiddens(1),
               char2ind=loader.char2ind,
               n_steps=300,
               temp=1.0,
               seed_text="")

    if args.profile: profiler.print_stats()
Example #23
0
r_vec = nn.Affine(size_x, 2 * k_in * size_mem)(x)
r_non = cgt.reshape(r_vec, (size_batch, 2 * k_in, size_mem))
r_norm = cgt.norm(r_non, axis=2, keepdims=True)
r = cgt.broadcast('/', r_non, r_norm, "xxx,xx1")
prev_h_3 = cgt.reshape(prev_h, (size_batch, size_mem, 1))
inters = [prev_h_3]

for i in xrange(k_in * 2):
    inter_in = inters[-1]
    r_cur = r[:, i, :]
    r_cur_3_transpose = cgt.reshape(r_cur, (size_batch, 1, size_mem))
    r_cur_3 = cgt.reshape(r_cur, (size_batch, size_mem, 1))
    ref_cur = cgt.batched_matmul(
        r_cur_3, cgt.batched_matmul(r_cur_3_transpose, inter_in))
    inter_out = inter_in - ref_cur
    inters.append(inter_out)
h = inters[-1]

r_nn = nn.Module([x], [h])

params = r_nn.get_parameters()
pc = ParamCollection(params)
pc.set_value_flat(nr.uniform(-.1, .1, size=(pc.get_total_size(), )))
func = cgt.function([x, prev_h], h)

x_in = nr.uniform(-.1, .1,
                  size=(size_batch * size_x)).reshape(size_batch, size_x)
h_in = np.zeros((size_batch, size_mem))
h_in[:, 0] = np.ones(size_batch)
h = func(x_in, h_in)
Example #24
0
class ExtensibleColumnProgNN(object):
    """
    Descr: An extensible network column for use in transfer learning with a
        Progressive Neural Network.
    Args:
        topology - A list of number of units in each hidden dimension.
            First entry is input dimension.
        activations - A list of activation functions to use on the transforms.
        session - A TensorFlow session.
        prev_columns - Previously trained columns, either Initial or Extensible,
            we are going to create lateral connections to for the current column.
    Returns:
        None - attaches objects to class for ExtensibleColumnProgNN.session.run()
    """
    def __init__(self,
                 topology,
                 activations,
                 session,
                 checkpoint_base_path,
                 prev_columns,
                 dtype=tf.float32):
        n_input = topology[0]
        self.topology = topology
        self.session = session
        width = len(prev_columns)
        # Layers in network. First value is n_input, so it doesn't count.
        L = len(topology) - 1
        self.L = L
        self.prev_columns = prev_columns
        self.checkpoint_base_path = checkpoint_base_path
        self.column_number = width

        # Doesn't work if the columns aren't the same height.
        assert all([self.L == x.L for x in prev_columns])

        self.o_n = tf.placeholder(dtype,
                                  shape=[None, n_input],
                                  name='prog_nn_input_placeholder')

        self.W = [[]] * L
        self.b = [[]] * L
        self.U = []
        for k in range(L - 1):
            self.U.append([[]] * width)
        self.h = [self.o_n]
        # Collect parameters to hand off to ParamCollection.
        params = []
        for k in range(L):
            W_shape = topology[k:k + 2]
            self.W[k] = weight_variable(W_shape,
                                        name="weight_var_layer_" + str(k))
            self.b[k] = bias_variable([W_shape[1]],
                                      name="bias_var_layer_" + str(k))
            if k == 0:
                self.h.append(activations[k](tf.matmul(self.h[-1], self.W[k]) +
                                             self.b[k]))
                params.append(self.W[k])
                params.append(self.b[k])
                continue
            preactivation = tf.matmul(self.h[-1], self.W[k]) + self.b[k]
            for kk in range(width):
                U_shape = [prev_columns[kk].topology[k], topology[k + 1]]
                # Remember len(self.U) == L - 1!
                self.U[k - 1][kk] = weight_variable(
                    U_shape,
                    name="lateral_weight_var_layer_" + str(k) + "_to_column_" +
                    str(kk))
                # pprint(prev_columns[kk].h[k].get_shape().as_list())
                # pprint(self.U[k-1][kk].get_shape().as_list())
                preactivation += tf.matmul(prev_columns[kk].h[k],
                                           self.U[k - 1][kk])
            self.h.append(activations[k](preactivation))
            params.append(self.W[k])
            params.append(self.b[k])
            for kk in range(width):
                params.append(self.U[k - 1][kk])

        self.pc = ParamCollection(self.session, params)

    def save(self, checkpoint_i):
        save_path = get_checkpoint_path(self.checkpoint_base_path,
                                        self.column_number, checkpoint_i)
        current_params = self.pc.get_values_flat()
        np.save(save_path, current_params)

    def restore_weights(self, checkpoint_i):
        save_path = get_checkpoint_path(self.checkpoint_base_path,
                                        self.column_number, checkpoint_i)
        saved_theta = np.load(save_path)
        self.pc.set_values_flat(saved_theta)
Example #25
0
    def __init__(self, obs_dim, ctrl_dim):

        cgt.set_precision('double')
        Serializable.__init__(self, obs_dim, ctrl_dim)

        self.obs_dim = obs_dim
        self.ctrl_dim = ctrl_dim

        o_no = cgt.matrix("o_no", fixed_shape=(None, obs_dim))
        a_na = cgt.matrix("a_na", fixed_shape=(None, ctrl_dim))
        adv_n = cgt.vector("adv_n")
        oldpdist_np = cgt.matrix("oldpdist", fixed_shape=(None, 2 * ctrl_dim))
        self.logstd = logstd_1a = nn.parameter(np.zeros((1, self.ctrl_dim)),
                                               name="std_1a")
        std_1a = cgt.exp(logstd_1a)

        # Here's where we apply the network
        h0 = o_no
        nhid = 32
        h1 = cgt.tanh(
            nn.Affine(obs_dim, nhid, weight_init=nn.IIDGaussian(std=0.1))(h0))
        h2 = cgt.tanh(
            nn.Affine(nhid, nhid, weight_init=nn.IIDGaussian(std=0.1))(h1))
        mean_na = nn.Affine(nhid,
                            ctrl_dim,
                            weight_init=nn.IIDGaussian(std=0.01))(h2)

        b = cgt.size(o_no, 0)
        std_na = cgt.repeat(std_1a, b, axis=0)

        oldmean_na = oldpdist_np[:, 0:self.ctrl_dim]
        oldstd_na = oldpdist_np[:, self.ctrl_dim:2 * self.ctrl_dim]

        logp_n = ((-.5) * cgt.square(
            (a_na - mean_na) / std_na).sum(axis=1)) - logstd_1a.sum()
        oldlogp_n = ((-.5) * cgt.square(
            (a_na - oldmean_na) / oldstd_na).sum(axis=1)
                     ) - cgt.log(oldstd_na).sum(axis=1)

        ratio_n = cgt.exp(logp_n - oldlogp_n)

        surr = (ratio_n * adv_n).mean()

        pdists_np = cgt.concatenate([mean_na, std_na], axis=1)
        # kl = cgt.log(sigafter/)

        params = nn.get_parameters(surr)

        oldvar_na = cgt.square(oldstd_na)
        var_na = cgt.square(std_na)
        kl = (cgt.log(std_na / oldstd_na) +
              (oldvar_na + cgt.square(oldmean_na - mean_na)) / (2 * var_na) -
              .5).sum(axis=1).mean()

        lam = cgt.scalar()
        penobj = surr - lam * kl
        self._compute_surr_kl = cgt.function([oldpdist_np, o_no, a_na, adv_n],
                                             [surr, kl])
        self._compute_grad_lagrangian = cgt.function(
            [lam, oldpdist_np, o_no, a_na, adv_n],
            cgt.concatenate([p.flatten() for p in cgt.grad(penobj, params)]))
        self.f_pdist = cgt.function([o_no], pdists_np)

        self.f_objs = cgt.function([oldpdist_np, o_no, a_na, adv_n],
                                   [surr, kl])

        self.pc = ParamCollection(params)
Example #26
0
r_non = cgt.reshape(r_vec, (size_batch, 2 * k_in, size_mem))
r_norm = cgt.norm(r_non, axis=2, keepdims=True)
r = cgt.broadcast('/', r_non, r_norm, "xxx,xx1")
prev_h_3 = cgt.reshape(prev_h, (size_batch, size_mem, 1))
inters = [prev_h_3]

for i in xrange(k_in * 2):
    inter_in = inters[-1]
    r_cur = r[:, i, :]
    r_cur_3_transpose = cgt.reshape(r_cur, (size_batch, 1, size_mem))
    r_cur_3 = cgt.reshape(r_cur, (size_batch, size_mem, 1))
    ref_cur = cgt.batched_matmul(r_cur_3, cgt.batched_matmul(r_cur_3_transpose, inter_in))
    inter_out = inter_in - ref_cur
    inters.append(inter_out)
h = inters[-1]
    
r_nn = nn.Module([x], [h])


params = r_nn.get_parameters()
pc = ParamCollection(params)
pc.set_value_flat(nr.uniform(-.1, .1, size=(pc.get_total_size(),)))
func = cgt.function([x, prev_h], h)


x_in = nr.uniform(-.1, .1, size=(size_batch * size_x)).reshape(size_batch, size_x)
h_in = np.zeros((size_batch, size_mem))
h_in[:, 0] = np.ones(size_batch)
h = func(x_in, h_in)

Example #27
0
def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("--grad_check",action="store_true")
    parser.add_argument("--n_batches",type=int,default=1000000)
    parser.add_argument("--profile",action="store_true")
    parser.add_argument("--unittest", action="store_true")
    parser.add_argument("--task",choices=["copy","reverse_copy","repeat_copy"],default="copy")
    args = parser.parse_args()
    np.seterr("raise")

    cgt.set_precision("quad" if args.grad_check else "double")
    np.random.seed(0)

    # model parameters
    if args.grad_check:
        opt = NTMOpts(
            b = 1, # batch size
            h = 1, # number of heads
            n = 2, # number of memory sites
            m = 3, # dimension at each memory site
            k = 4, # dimension of input
            p = 2, # dimension of output
            ff_hid_sizes = []
        )
        seq_length = 2

    else:
        opt = NTMOpts(
            b = 64, # batch size
            h = 3, # number of heads
            n = 128, # number of memory sites
            m = 20, # dimension at each memory site
            k = 3, # dimension of input
            p = 1, # dimension of output
            ff_hid_sizes = [128,128]
        )

        seq_length = 10


    if args.unittest:
        seq_length=3
        args.n_batches=3
        


    tstart = time.time()
    ntm = make_ntm(opt)
    if args.task == "copy":
        task = CopyTask(opt.b, seq_length, opt.p)
    elif args.task == "reverse_copy":
        task = ReverseCopyTask(opt.b, seq_length, opt.p)
    elif args.task == "repeat_copy":
        n_copies = 4
        task = RepeatCopyTask(opt.b, seq_length, opt.p, n_copies)


    f_loss, f_loss_and_grad, params = make_funcs(opt, ntm, task.total_time(), task.loss_timesteps())
    print "graph construction and compilation took %g seconds"%(time.time()-tstart)

    pc = ParamCollection(params)
    pc.set_value_flat(nr.uniform(-.1, .1, size=(pc.get_total_size(),)))

    if args.grad_check:
        x,y = task.gen_batch()
        def f(thnew):
            thold = th.copy()
            pc.set_value_flat(thnew)
            loss = f_loss(x,y)
            pc.set_value_flat(thold)
            return loss
        from cgt.numeric_diff import numeric_grad
        g_num = numeric_grad(f, th,eps=1e-8)
        _, _, g_anal = f_loss_and_grad(x,y)
        assert np.allclose(g_num, g_anal, atol=1e-8)
        print "Gradient check succeeded!"
        print "%i/%i elts of grad are nonzero"%( (g_anal != 0).sum(), g_anal.size )
        return


    seq_num = 0
    state = make_rmsprop_state(pc.get_value_flat(), .01, .95)
    print fmt_row(13, ["seq num", "CE (bits)", "accuracy", "|g|_inf"], header=True)
    
    if args.profile: cgt.profiler.start()
    
    for i in xrange(args.n_batches):
        x,y = task.gen_batch()
        seq_num += x.shape[1]
        l,l01,g = f_loss_and_grad(x,y)
        print fmt_row(13, [seq_num, l,l01,np.abs(g).max()])
        rmsprop_update(g, state)        
        pc.set_value_flat(state.theta)
        if not np.isfinite(l): break

    
    if args.profile: cgt.profiler.print_stats()
Example #28
0
def main():

    nr.seed(0)

    parser = argparse.ArgumentParser()
    parser.add_argument("--data_dir", type=str, default="alice")
    parser.add_argument("--size_mem", type=int,default=64)
    parser.add_argument("--size_batch", type=int,default=64)
    parser.add_argument("--n_layers",type=int,default=2)
    parser.add_argument("--n_unroll",type=int,default=16)
    parser.add_argument("--step_size",type=float,default=.01)
    parser.add_argument("--decay_rate",type=float,default=0.95)
    parser.add_argument("--n_epochs",type=int,default=20)
    parser.add_argument("--arch",choices=["lstm","gru"],default="lstm")
    parser.add_argument("--grad_check",action="store_true")
    parser.add_argument("--profile",action="store_true")
    parser.add_argument("--unittest",action="store_true")
    parser.add_argument("--temperature",type=float,default=1)

    args = parser.parse_args()

    cgt.set_precision("quad" if args.grad_check else "single")

    assert args.n_unroll > 1

    loader = Loader(args.data_dir,args.size_batch, args.n_unroll, (1.0,0,0))

    network, f_loss, f_loss_and_grad, f_step = make_loss_and_grad_and_step(args.arch, loader.size_vocab, 
        loader.size_vocab, args.size_mem, args.size_batch, args.n_layers, args.n_unroll)

    if args.profile: profiler.start()

    params = network.get_parameters()
    pc = ParamCollection(params)
    pc.set_value_flat(nr.uniform(-.1, .1, size=(pc.get_total_size(),)))

    def initialize_hiddens(n):
        return [np.zeros((n, args.size_mem), cgt.floatX) for _ in xrange(get_num_hiddens(args.arch, args.n_layers))]

    if args.grad_check:
        x,y = loader.train_batches_iter().next()
        prev_hiddens = initialize_hiddens(args.size_batch)
        def f(thnew):
            thold = pc.get_value_flat()
            pc.set_value_flat(thnew)
            loss = f_loss(x,y, *prev_hiddens)
            pc.set_value_flat(thold)
            return loss
        from cgt.numeric_diff import numeric_grad
        g_num = numeric_grad(f, pc.get_value_flat(),eps=1e-10)
        result = f_loss_and_grad(x,y,*prev_hiddens)
        g_anal = result[1]
        assert np.allclose(g_num, g_anal, atol=1e-4)
        print "Gradient check succeeded!"
        return

    optim_state = make_rmsprop_state(theta=pc.get_value_flat(), step_size = args.step_size, 
        decay_rate = args.decay_rate)

    for iepoch in xrange(args.n_epochs):
        losses = []
        tstart = time()
        print "starting epoch",iepoch
        cur_hiddens = initialize_hiddens(args.size_batch)
        for (x,y) in loader.train_batches_iter():
            out = f_loss_and_grad(x,y, *cur_hiddens)
            loss = out[0]
            grad = out[1]
            cur_hiddens = out[2:]
            rmsprop_update(grad, optim_state)
            pc.set_value_flat(optim_state.theta)
            losses.append(loss)
            if args.unittest: return
        print "%.3f s/batch. avg loss = %.3f"%((time()-tstart)/len(losses), np.mean(losses))
        optim_state.step_size *= .98 #pylint: disable=E1101

        sample(f_step, initialize_hiddens(1), char2ind=loader.char2ind, n_steps=1000, temperature=args.temperature, seed_text = "")

    if args.profile: profiler.print_stats()