def fit(self, X, learning_rate=0.5, mu=0.99, epochs=1, batch_sz=100, show_fig=False):
        N, D = X.shape
        n_batches = int(N / batch_sz)

        W0 = init_weights((D, self.M))
        self.W = theano.shared(W0, 'W_%s' % self.id)
        self.bh = theano.shared(np.zeros(self.M), 'bh_%s' % self.id)
        self.bo = theano.shared(np.zeros(D), 'bo_%s' % self.id)
        self.params = [self.W, self.bh, self.bo]
        self.forward_params = [self.W, self.bh]

        # TODO: technically these should be reset before doing backprop
        self.dW = theano.shared(np.zeros(W0.shape), 'dW_%s' % self.id)
        self.dbh = theano.shared(np.zeros(self.M), 'dbh_%s' % self.id)
        self.dbo = theano.shared(np.zeros(D), 'dbo_%s' % self.id)
        self.dparams = [self.dW, self.dbh, self.dbo]
        self.forward_dparams = [self.dW, self.dbh]

        X_in = T.matrix('X_%s' % self.id)
        X_hat = self.forward_output(X_in)

        # attach it to the object so it can be used later
        # must be sigmoidal because the output is also a sigmoid
        H = T.nnet.sigmoid(X_in.dot(self.W) + self.bh)
        self.hidden_op = theano.function(
            inputs=[X_in],
            outputs=H,
        )

        # cost = ((X_in - X_hat) * (X_in - X_hat)).sum() / N
        cost = -(X_in * T.log(X_hat) + (1 - X_in) * T.log(1 - X_hat)).sum() / (batch_sz * D)
        cost_op = theano.function(
            inputs=[X_in],
            outputs=cost,
        )

        updates = [
            (p, p + mu*dp - learning_rate*T.grad(cost, p)) for p, dp in zip(self.params, self.dparams)
        ] + [
            (dp, mu*dp - learning_rate*T.grad(cost, p)) for p, dp in zip(self.params, self.dparams)
        ]
        train_op = theano.function(
            inputs=[X_in],
            updates=updates,
        )

        costs = []
        print("training autoencoder: %s" % self.id)
        for i in range(epochs):
            print("epoch:", i)
            X = shuffle(X)
            for j in range(n_batches):
                batch = X[j*batch_sz:(j*batch_sz + batch_sz)]
                train_op(batch)
                the_cost = cost_op(X) # technically we could also get the cost for Xtest here
                print("j / n_batches:", j, "/", n_batches, "cost:", the_cost)
                costs.append(the_cost)
        if show_fig:
            plt.plot(costs)
            plt.savefig('AE_costs.jpg')
    def fit(self, X, learning_rate=0.5, mu=0.99, epochs=1, batch_sz=100, show_fig=False):
        N, D = X.shape
        n_batches = N / batch_sz

        W0 = init_weights((D, self.M))
        self.W = theano.shared(W0, 'W_%s' % self.id)
        self.bh = theano.shared(np.zeros(self.M), 'bh_%s' % self.id)
        self.bo = theano.shared(np.zeros(D), 'bo_%s' % self.id)
        self.params = [self.W, self.bh, self.bo]
        self.forward_params = [self.W, self.bh]

        # TODO: technically these should be reset before doing backprop
        self.dW = theano.shared(np.zeros(W0.shape), 'dW_%s' % self.id)
        self.dbh = theano.shared(np.zeros(self.M), 'dbh_%s' % self.id)
        self.dbo = theano.shared(np.zeros(D), 'dbo_%s' % self.id)
        self.dparams = [self.dW, self.dbh, self.dbo]
        self.forward_dparams = [self.dW, self.dbh]

        X_in = T.matrix('X_%s' % self.id)
        X_hat = self.forward_output(X_in)

        # attach it to the object so it can be used later
        # must be sigmoidal because the output is also a sigmoid
        H = T.nnet.sigmoid(X_in.dot(self.W) + self.bh)
        self.hidden_op = theano.function(
            inputs=[X_in],
            outputs=H,
        )

        # cost = ((X_in - X_hat) * (X_in - X_hat)).sum() / N
        cost = -(X_in * T.log(X_hat) + (1 - X_in) * T.log(1 - X_hat)).sum() / (batch_sz * D)
        cost_op = theano.function(
            inputs=[X_in],
            outputs=cost,
        )

        updates = [
            (p, p + mu*dp - learning_rate*T.grad(cost, p)) for p, dp in zip(self.params, self.dparams)
        ] + [
            (dp, mu*dp - learning_rate*T.grad(cost, p)) for p, dp in zip(self.params, self.dparams)
        ]
        train_op = theano.function(
            inputs=[X_in],
            updates=updates,
        )

        costs = []
        print "training autoencoder: %s" % self.id
        for i in xrange(epochs):
            print "epoch:", i
            X = shuffle(X)
            for j in xrange(n_batches):
                batch = X[j*batch_sz:(j*batch_sz + batch_sz)]
                train_op(batch)
                the_cost = cost_op(X) # technically we could also get the cost for Xtest here
                print "j / n_batches:", j, "/", n_batches, "cost:", the_cost
                costs.append(the_cost)
        if show_fig:
            plt.plot(costs)
            plt.show()
    def __init__(self, hidden_layer_sizes, keep_probs):
        self.hidden_layer_sizes = hidden_layer_sizes
        self.keep_probs = keep_probs
        #list of all parameters except first and final layer
        self.all_params = []
        m1 = self.hidden_layer_sizes[0]
        self.count = 1
        for m2 in self.hidden_layer_sizes[1:]:
            #dont add bias due to batch_normalization
            w_init, _ = init_weights(m1, m2)
            W = tf.Variable(w_init, name='W' + str(self.count))
            #batch normalization parameters
            gamma = tf.Variable(np.ones(m2, dtype=np.float32),
                                name='Gamma' + str(self.count))
            beta = tf.Variable(np.zeros(m2, dtype=np.float32),
                               name='Beta' + str(self.count))
            running_mean = tf.Variable(np.zeros(m2, dtype=np.float32),
                                       trainable=False,
                                       name='Rn_mean' + str(self.count))
            running_var = tf.Variable(np.zeros(m2, dtype=np.float32),
                                      trainable=False,
                                      name='Rn_var' + str(self.count))

            self.all_params += [{
                'W': W,
                'gamma': gamma,
                'beta': beta,
                'rn_mean': running_mean,
                'rn_var': running_var
            }]
            self.count += 1
            m1 = m2
    def fit_to_input(self, k, learning_rate=1.0, mu=0.99, epochs=100000):
        # This is not very flexible, as you would ideally
        # like to be able to activate any node in any hidden
        # layer, not just the last layer.
        # Exercise for students: modify this function to be able
        # to activate neurons in the middle layers.
        X0 = init_weights((1, self.D))
        X = theano.shared(X0, 'X_shared')
        dX = theano.shared(np.zeros(X0.shape), 'dX_shared')
        Y = self.forward(X)
        t = np.zeros(self.hidden_layers[-1].M)
        t[k] = 1

        cost = -(t*T.log(Y[0]) + (1 - t)*(T.log(1 - Y[0]))).sum()
        updates = [
            (X, X + mu*dX - learning_rate*T.grad(cost, X)),
            (dX, mu*dX - learning_rate*T.grad(cost, X)),
        ]
        train = theano.function(
            inputs=[],
            outputs=cost,
            updates=updates,
        )

        costs = []
        for i in xrange(epochs):
            if i % 1000 == 0:
                print "epoch:", i
            the_cost = train()
            costs.append(the_cost)
        plt.plot(costs)
        plt.show()

        return X.get_value()
 def __init__(self, M1, M2):
   self.M1 = M1
   self.M2 = M2
   W = init_weights(M1, M2)
   b = np.zeros(M2).astype(np.float32)
   self.W = theano.shared(W, 'W')
   self.b = theano.shared(b, 'b')
   self.params = [self.W, self.b]
Ejemplo n.º 6
0
 def __init__(self, m1, m2): # m1: input size & m2: output size
     W = init_weights((m1, m2))
     bi = np.zeros(m2)
     bo = np.zeros(m1)
     self.W = theano.shared(W)
     self.bi = theano.shared(bi) # input bias
     self.bo = theano.shared(bo) # output bias
     self.params = [self.W, self.bi, self.bo]
 def __init__(self, m1, m2):
     W = init_weights((m1, m2))
     bi = np.zeros(m2, dtype=np.float32)
     bo = np.zeros(m1, dtype=np.float32)
     self.W = theano.shared(W)
     self.bi = theano.shared(bi)
     self.bo = theano.shared(bo)
     self.params = [self.W, self.bi, self.bo]
Ejemplo n.º 8
0
 def __init__(self, m1, m2):
     W = init_weights((m1, m2))
     bi = np.zeros(m2, dtype=np.float32)
     bo = np.zeros(m1, dtype=np.float32)
     self.W = theano.shared(W)
     self.bi = theano.shared(bi)
     self.bo = theano.shared(bo)
     self.params = [self.W, self.bi, self.bo]
Ejemplo n.º 9
0
    def fit_to_input(self,
                     k,
                     learning_rate=0.00001,
                     mu=0.99,
                     reg=10e-10,
                     epochs=20000):
        # This is not very flexible, as you would ideally
        # like to be able to activate any node in any hidden
        # layer, not just the last layer.
        # Exercise for students: modify this function to be able
        # to activate neurons in the middle layers.
        X0 = init_weights((1, self.D))
        X = theano.shared(X0, 'X_shared')
        dX = theano.shared(np.zeros(X0.shape), 'dX_shared')
        Y = self.forward(X)
        # t = np.zeros(self.hidden_layers[-1].M)
        # t[k] = 1

        # # choose Y[0] b/c it's shape 1xD, we want just a D-size vector, not 1xD matrix
        # cost = -(t*T.log(Y[0]) + (1 - t)*(T.log(1 - Y[0]))).sum() + reg*(X * X).sum()

        cost = -T.log(Y[0, k]) + reg * (X * X).sum()

        updates = [
            (X, X + mu * dX - learning_rate * T.grad(cost, X)),
            (dX, mu * dX - learning_rate * T.grad(cost, X)),
        ]
        train = theano.function(
            inputs=[],
            outputs=[cost, Y],
            updates=updates,
        )

        costs = []
        bestX = None
        for i in xrange(epochs):
            if i % 1000 == 0:
                print "epoch:", i
            the_cost, out = train()
            if i == 0:
                print "out.shape:", out.shape
            costs.append(the_cost)
            # if the_cost < 10:
            #     break
            if the_cost > costs[-1] or np.isnan(the_cost):
                break

            bestX = X.get_value()
        print "len(costs):", len(costs), "max:", np.max(costs), "min:", np.min(
            costs)
        plt.plot(costs)
        plt.show()

        return bestX
Ejemplo n.º 10
0
    def __init__(self,
            expr,
            steps,
            batchsize=32,
            constsize=0,
            rand=None,
            update_fn=lasagne.updates.adam,
            lamb=10.0,
            binary_ops=DEFAULT_BINARY_OPS,
            unary_ops=DEFAULT_UNARY_OPS
    ):
        super(ProcessorNetwork, self).__init__(expr,
                batchsize=batchsize,
                rand=rand,
                update_fn=update_fn,
                lamb=lamb
        )

        # self._one = T.constant(1.0)
        self._steps = steps
        self._constsize = constsize
        self._binary_ops = binary_ops
        self._unary_ops = unary_ops

        if constsize > 0:
            self._constants = init_weights([constsize])

        self._W_read = [init_weights([3, self.total_readables(t)]) for t in range(steps)]
        # self._W_scale = theano.shared(np.ones([steps, 3]))

        self._W_select = init_weights([steps, len(binary_ops) + len(unary_ops)])
        # self._params = self._W_read + [self._W_scale, self._W_select]
        self._params = self._maybe_constants(self._W_read + [self._W_select])

        # Regularize
        self._regularization = T.sum(
                [penalize_hedging(self._W_read[t][i]) for t in range(steps) for i in range(3)] +
                [penalize_hedging(self._W_select[t]) for t in range(steps)]
        )

        self._build()
Ejemplo n.º 11
0
    def fit(self, X, lr=10e-4, mu=0.99):
        N = len(X)
        M = self.M
        D = self.D
        V = self.V

        #Initialize weights
        We = init_weights(V, D)
        Wx = init_weights(D, M)
        Wh = init_weights(M, M)
        bh = np.zeros(M).astype(np.float32)
        h0 = np.zeros(M).astype(np.float32)
        Wo = init_weights(M, V)
        bo = np.zeros(V).astype(np.float32)

        #Create all the theano variables and equations for training and prediction
        self.set(We, Wx, Wh, bh, h0, Wo, bo, np.float32(lr), np.float32(mu))

        #Stochastic Gradient Descent
        for n in range(2000):
            n_total = 0
            n_correct = 0
            tot_cost = 0
            if n % 10 == 0:
                lr *= 0.99
            for i in range(N):
                line = X[i]
                n_total += len(line)
                in_seq = [0] + line
                out_seq = line + [1]
                #print(in_seq, out_seq)
                p, c = self.train(in_seq, out_seq)
                for i in range(len(p)):
                    if p[i] == out_seq[i]:
                        n_correct += 1
                tot_cost += c
            print("iteration:", n, "Cost: ", tot_cost, "classification-rate:",
                  float(n_correct) / n_total)
        self.save()
Ejemplo n.º 12
0
 def __init__(self,hidden_layer_sizes,keep_probs):
     self.hidden_layer_sizes = hidden_layer_sizes
     self.keep_probs = keep_probs
     
     #initiate parameters except the first and final layer
     self.all_params = []
     m1 = self.hidden_layer_sizes[0]
     for m2 in hidden_layer_sizes[1:]:
         w_init,b_init = init_weights(m1,m2)
         W = tf.Variable(w_init)
         b = tf.Variable(b_init)
         self.all_params += [(W,b)]
         m1=m2
Ejemplo n.º 13
0
    def fit_to_input(self, k, learning_rate=1.0, mu=0.99, epochs=100000):
        # This is not very flexible, as you would ideally
        # like to be able to activate any node in any hidden
        # layer, not just the last layer.
        # Exercise for students: modify this function to be able
        # to activate neurons in the middle layers.

        # cast hyperperams
        learning_rate = np.float32(learning_rate)
        mu = np.float32(mu)

        # randomly initialize an image
        X0 = init_weights((1, self.D))

        # make the image a shared so theano can update it
        X = theano.shared(X0, 'X_shared')

        # get the output of the neural network
        Y = self.forward(X)

        # t = np.zeros(self.hidden_layers[-1].M)
        # t[k] = 1

        # # choose Y[0] b/c it's shape 1xD, we want just a D-size vector, not 1xD matrix
        # cost = -(t*T.log(Y[0]) + (1 - t)*(T.log(1 - Y[0]))).sum()

        # k = which output node to look at
        # there is only 1 image, so we select the 0th row of X
        cost = -T.log(Y[0,k])

        updates = momentum_updates(cost, [X], mu, learning_rate)
        train = theano.function(
            inputs=[],
            outputs=[cost, Y],
            updates=updates,
        )

        costs = []
        for i in range(epochs):
            if i % 10000 == 0:
                print("epoch:", i)
            the_cost, out = train()
            if i == 0:
                print("out.shape:", out.shape)
            costs.append(the_cost)
        plt.plot(costs)
        plt.show()

        return X.get_value()
    def fit_to_input(self, k, learning_rate=0.00001, mu=0.99, reg=10e-10, epochs=20000):
        # This is not very flexible, as you would ideally
        # like to be able to activate any node in any hidden
        # layer, not just the last layer.
        # Exercise for students: modify this function to be able
        # to activate neurons in the middle layers.
        X0 = init_weights((1, self.D))
        X = theano.shared(X0, 'X_shared')
        dX = theano.shared(np.zeros(X0.shape), 'dX_shared')
        Y = self.forward(X)
        # t = np.zeros(self.hidden_layers[-1].M)
        # t[k] = 1

        # # choose Y[0] b/c it's shape 1xD, we want just a D-size vector, not 1xD matrix
        # cost = -(t*T.log(Y[0]) + (1 - t)*(T.log(1 - Y[0]))).sum() + reg*(X * X).sum()

        cost = -T.log(Y[0,k]) + reg*(X * X).sum()

        updates = [
            (X, X + mu*dX - learning_rate*T.grad(cost, X)),
            (dX, mu*dX - learning_rate*T.grad(cost, X)),
        ]
        train = theano.function(
            inputs=[],
            outputs=[cost, Y],
            updates=updates,
        )

        costs = []
        bestX = None
        for i in xrange(epochs):
            if i % 1000 == 0:
                print "epoch:", i
            the_cost, out = train()
            if i == 0:
                print "out.shape:", out.shape
            costs.append(the_cost)
            # if the_cost < 10:
            #     break
            if the_cost > costs[-1] or np.isnan(the_cost):
                break

            bestX = X.get_value()
        print "len(costs):", len(costs), "max:", np.max(costs), "min:", np.min(costs)
        plt.plot(costs)
        plt.show()

        return bestX
Ejemplo n.º 15
0
    def fit(self,
            X,
            Y,
            Xtest,
            Ytest,
            pretrain=True,
            learning_rate=0.01,
            mu=0.99,
            reg=0.1,
            epochs=1,
            batch_sz=100):
        # greedy layer-wise training of autoencoders
        pretrain_epochs = 1
        if not pretrain:
            pretrain_epochs = 0

        current_input = X
        for ae in self.hidden_layers:
            ae.fit(current_input, epochs=pretrain_epochs)

            # create current_input for the next layer
            current_input = ae.hidden_op(current_input)

        # initialize logistic regression layer
        N = len(Y)
        K = len(set(Y))
        W0 = init_weights((self.hidden_layers[-1].M, K))
        self.W = theano.shared(W0, "W_logreg")
        self.b = theano.shared(np.zeros(K), "b_logreg")

        self.params = [self.W, self.b]
        for ae in self.hidden_layers:
            self.params += ae.forward_params

        # for momentum
        self.dW = theano.shared(np.zeros(W0.shape), "dW_logreg")
        self.db = theano.shared(np.zeros(K), "db_logreg")
        self.dparams = [self.dW, self.db]
        for ae in self.hidden_layers:
            self.dparams += ae.forward_dparams

        X_in = T.matrix('X_in')
        targets = T.ivector('Targets')
        pY = self.forward(X_in)

        # squared_magnitude = [(p*p).sum() for p in self.params]
        # reg_cost = T.sum(squared_magnitude)
        cost = -T.mean(T.log(pY[T.arange(pY.shape[0]),
                                targets]))  #+ reg*reg_cost
        prediction = self.predict(X_in)
        cost_predict_op = theano.function(
            inputs=[X_in, targets],
            outputs=[cost, prediction],
        )

        updates = [(p, p + mu * dp - learning_rate * T.grad(cost, p))
                   for p, dp in zip(self.params, self.dparams)
                   ] + [(dp, mu * dp - learning_rate * T.grad(cost, p))
                        for p, dp in zip(self.params, self.dparams)]
        # updates = [(p, p - learning_rate*T.grad(cost, p)) for p in self.params]
        train_op = theano.function(
            inputs=[X_in, targets],
            updates=updates,
        )

        n_batches = N / batch_sz
        costs = []
        print "supervised training..."
        for i in xrange(epochs):
            print "epoch:", i
            X, Y = shuffle(X, Y)
            for j in xrange(n_batches):
                Xbatch = X[j * batch_sz:(j * batch_sz + batch_sz)]
                Ybatch = Y[j * batch_sz:(j * batch_sz + batch_sz)]
                train_op(Xbatch, Ybatch)
                the_cost, the_prediction = cost_predict_op(Xtest, Ytest)
                error = error_rate(the_prediction, Ytest)
                print "j / n_batches:", j, "/", n_batches, "cost:", the_cost, "error:", error
                costs.append(the_cost)
        plt.plot(costs)
        plt.show()
Ejemplo n.º 16
0
 def __init__(self, D, M):
     W = init_weights((D, M))
     b = np.zeros(M)
     self.W = theano.shared(W)
     self.b = theano.shared(b)
     self.params = [self.W, self.b]
Ejemplo n.º 17
0
    def fit(self,
            X,
            learning_rate=0.5,
            mu=0.99,
            epochs=1,
            batch_sz=100,
            show_fig=False):
        N, D = X.shape
        n_batches = N / batch_sz

        W0 = init_weights((D, self.M))
        self.W = theano.shared(W0, 'W_%s' % self.id)
        self.bh = theano.shared(np.zeros(self.M), 'bh_%s' % self.id)
        self.bo = theano.shared(np.zeros(D), 'bo_%s' % self.id)
        self.params = [self.W, self.bh, self.bo]
        self.forward_params = [
            self.W, self.bh
        ]  # the deep neural network class will need to use these

        # TODO: technically these should be reset before doing backprop
        # defining the changes in each variable, since we're using momentum
        self.dW = theano.shared(np.zeros(W0.shape), 'dW_%s' % self.id)
        self.dbh = theano.shared(np.zeros(self.M), 'dbh_%s' % self.id)
        self.dbo = theano.shared(np.zeros(D), 'dbo_%s' % self.id)
        self.dparams = [self.dW, self.dbh, self.dbo]
        self.forward_dparams = [self.dW, self.dbh]

        # tensor input (matrix)
        X_in = T.matrix('X_%s' % self.id)
        X_hat = self.forward_output(X_in)  # the reconstruction

        # attach it to the object so it can be used later
        # must be sigmoidal because the output is also a sigmoid
        # defining the hidden layer operation as a theano functions since it will be used in the deep neural network class.
        H = T.nnet.sigmoid(X_in.dot(self.W) + self.bh)
        self.hidden_op = theano.function(
            inputs=[X_in],
            outputs=H,
        )

        # squared error cost function:
        # cost = ((X_in - X_hat) * (X_in - X_hat)).sum() / N
        # cross entropy cost function:
        cost = -(X_in * T.log(X_hat) +
                 (1 - X_in) * T.log(1 - X_hat)).sum() / (batch_sz * D)
        cost_op = theano.function(
            inputs=[X_in],
            outputs=cost,
        )

        # gradient descent:
        updates = [(p, p + mu * dp - learning_rate * T.grad(cost, p))
                   for p, dp in zip(self.params, self.dparams)
                   ] + [(dp, mu * dp - learning_rate * T.grad(cost, p))
                        for p, dp in zip(self.params, self.dparams)]
        train_op = theano.function(
            inputs=[X_in],
            updates=updates,
        )

        costs = []
        print("training autoencoder: %s" % self.id)
        for i in range(epochs):
            print("epoch:", i)
            X = shuffle(X)
            for j in range(n_batches):
                batch = X[j * batch_sz:(j * batch_sz + batch_sz)]
                train_op(batch)
                the_cost = cost_op(
                    X)  # technically we could also get the cost for Xtest here
                print("j / n_batches:", j, "/", n_batches, "cost:", the_cost)
                costs.append(the_cost)
        if show_fig:
            plt.plot(costs)
            plt.show()
Ejemplo n.º 18
0
def aup(paras):
    total_anchor = paras.total_anchor
    #train_ratio = paras.train_ratio
    load_path_a = paras.feature_A
    load_path_b = paras.feature_B
    cuda = torch.device("cuda:0")
    dim = 56  #paras.represent_dim
    lr = paras.lr
    lr_step = paras.lr_step
    lr_prob = paras.lr_prob
    N = paras.N
    stop_P = paras.stop_P
    is_classification = paras.is_classification
    represent_epoch = paras.represent_epoch
    classification_epoch = paras.classification_epoch
    a_array_load = np.load(load_path_a)
    b_array_load = np.load(load_path_b)
    a_array_tensor = torch.Tensor(a_array_load)
    b_array_tensor = torch.Tensor(b_array_load)
    len_f = a_array_load.shape[0]
    len_t = b_array_load.shape[0]
    print(len_f, len_t)
    node_f = list(range(0, len_f))
    node_t = list(range(0, len_t))
    anchor_all = list(range(0, total_anchor))
    rd.seed(80)
    left_anchor, right_anchor = data.get_train_anchor()
    #anchor_train = rd.choice(anchor_all, int(train_ratio * total_anchor))
    #anchor_test = list(set(anchor_all) - set(anchor_train))
    anchor_test = data.get_test_anchor()
    model = SiameseNetwork(dim, len_f, len_t).to(device=cuda)
    init_weights(model)
    neta = NETA(len_f, dim).to(device=cuda)
    netb = NETB(len_t, dim).to(device=cuda)
    a_array_tensor = a_array_tensor.to(device=cuda)
    b_array_tensor = b_array_tensor.to(device=cuda)
    mse = nn.MSELoss()
    cos = nn.CosineEmbeddingLoss(margin=0)
    optimizer = optim.Adadelta(model.parameters(), lr=lr, weight_decay=0.001)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=lr_step,
                                                gamma=lr_prob)
    triplet_neg = 1
    anchor_flag = 1
    anchor_train_len = len(left_anchor)
    anchor_train_a_list = left_anchor
    anchor_train_b_list = right_anchor
    input_a = []
    input_b = []
    classifier_target = torch.empty(0).to(device=cuda)
    np.random.seed(5)
    index = 0
    while index < anchor_train_len:  #training anchor的个数
        a = anchor_train_a_list[index]
        b = anchor_train_b_list[index]
        input_a.append(a)
        input_b.append(b)
        an_target = torch.ones(anchor_flag).to(device=cuda)
        classifier_target = torch.cat((classifier_target, an_target), dim=0)
        an_negs_index = list(set(node_t) - {b})
        an_negs_index_sampled = list(
            np.random.choice(an_negs_index, triplet_neg, replace=False))
        an_as = triplet_neg * [a]
        input_a += an_as
        input_b += an_negs_index_sampled

        an_negs_index1 = list(set(node_f) - {a})
        an_negs_index_sampled1 = list(
            np.random.choice(an_negs_index1, triplet_neg, replace=False))
        an_as1 = triplet_neg * [b]
        input_b += an_as1
        input_a += an_negs_index_sampled1

        un_an_target = torch.zeros(triplet_neg * 2).to(device=cuda)
        classifier_target = torch.cat((classifier_target, un_an_target), dim=0)
        index += 1

    cosine_target = torch.unsqueeze(2 * classifier_target - 1, dim=1)
    classifier_target = torch.unsqueeze(classifier_target, dim=1)

    ina = a_array_load[input_a]
    inb = b_array_load[input_b]
    ina = torch.Tensor(ina).to(device=cuda)
    inb = torch.Tensor(inb).to(device=cuda)

    tensor_dataset = SiameseNetworkDataset(ina, inb, classifier_target,
                                           cosine_target)
    data_loader = DataLoader(tensor_dataset, batch_size=56, shuffle=False)
    hidden_a_for_c = None
    hidden_b_for_c = None
    for epoch in range(represent_epoch):
        model.train()
        scheduler.step()
        train_loss = 0
        loss_rec_a = 0
        loss_rec_b = 0
        loss_reg = 0
        loss_anchor = 0
        for data_batch in data_loader:
            in_a, in_b, c, cosine = data_batch
            cosine = torch.squeeze(cosine, dim=1)
            in_a = torch.unsqueeze(in_a, dim=1).to(device=cuda)
            in_b = torch.unsqueeze(in_b, dim=1).to(device=cuda)
            h_a, h_b, re_a, re_b = model(in_a, in_b)
            loss_rec_a_batch = 100 * mse(re_a, in_a)
            loss_rec_b_batch = 100 * mse(re_b, in_b)
            loss_anchor_batch = 1 * cos(h_a, h_b, cosine)
            loss_reg_batch = 0.001 * (h_a.norm() + h_b.norm())
            loss = loss_reg_batch + loss_rec_a_batch + loss_rec_b_batch + loss_anchor_batch

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            loss_rec_a += loss_rec_a_batch.item()
            loss_rec_b += loss_rec_b_batch.item()
            loss_reg += loss_reg_batch.item()
            loss_anchor += loss_anchor_batch.item()

        neta_dict = neta.state_dict()
        netb_dict = netb.state_dict()
        model.cpu()
        trainmodel_dict = model.state_dict()

        trainmodel_dict_a = {
            k: v
            for k, v in trainmodel_dict.items() if k in neta_dict
        }
        trainmodel_dict_b = {
            k: v
            for k, v in trainmodel_dict.items() if k in netb_dict
        }
        neta_dict.update(trainmodel_dict_a)
        netb_dict.update(trainmodel_dict_b)
        neta.load_state_dict(neta_dict)
        netb.load_state_dict(netb_dict)

        neta.eval()
        netb.eval()
        hidden_a = neta(torch.unsqueeze(a_array_tensor, dim=1))
        hidden_b = netb(torch.unsqueeze(b_array_tensor, dim=1))
        psenode = []
        for i in range(5313, 5469):  #modify with training ratio
            psenode.append(i)

        PatN_v, MatN_v, pp1, pp5, pp10, pp15, pp20, pp25, pp30 = tes_vec(
            hidden_a, hidden_b, left_anchor, right_anchor, anchor_test, N,
            node_t, psenode)
        PatN_t, MatN_t, p1, p5, p10, p15, p20, p25, p30 = tes_vec(
            hidden_a, hidden_b, anchor_test, anchor_test, right_anchor, N,
            node_t)
        print(
            'epoch:%d, loss:%.3f, rec_a:%.3f, rec_b:%.3f, anchor:%.3f, reg:%.3f, '
            'at%d, Val(P=%.3f, M=%.3f), Tes(P=%.3f, M=%.3f)\n,Test(%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f)'
            % (epoch, train_loss, loss_rec_a, loss_rec_b, loss_anchor,
               loss_reg, N, PatN_v, MatN_v, PatN_t, MatN_t, p1, p5, p10, p15,
               p20, p25, p30))

        if is_classification and PatN_t > stop_P:
            hidden_a_for_c = hidden_a.detach()
            hidden_b_for_c = hidden_b.detach()
            break

        model.to(device=cuda)

    if is_classification:
        classifier = Classifier().to(device=cuda)
        cel = nn.CrossEntropyLoss()
        hidden_a_for_c = hidden_a_for_c.cpu().numpy()
        hidden_b_for_c = hidden_b_for_c.cpu().numpy()
        ina_for_c = hidden_a_for_c[input_a]
        inb_for_c = hidden_b_for_c[input_b]
        ina_for_c = torch.Tensor(ina_for_c).to(device=cuda)
        inb_for_c = torch.Tensor(inb_for_c).to(device=cuda)

        tensor_dataset_for_c = SiameseNetworkDataset(ina_for_c, inb_for_c,
                                                     classifier_target,
                                                     cosine_target)
        data_loader_for_c = DataLoader(tensor_dataset_for_c,
                                       batch_size=dim,
                                       shuffle=False)
        optimizer_for_c = optim.Adadelta(classifier.parameters(),
                                         lr=lr,
                                         weight_decay=0.0001)
        scheduler_c = torch.optim.lr_scheduler.StepLR(optimizer_for_c,
                                                      step_size=lr_step,
                                                      gamma=lr_prob)
        # classifier
        for epoch in range(classification_epoch):
            classifier.train()
            scheduler_c.step()
            loss_c = 0
            for data_batch in data_loader_for_c:
                in_a, in_b, c, cosine = data_batch
                in_a, in_b = in_a.to(device=cuda), in_b.to(device=cuda)
                in_class = torch.cat((in_a, in_b), dim=1)
                class_out = classifier(in_class)
                c = torch.squeeze(c, dim=1)
                loss_classifier = cel(class_out, c.long())

                optimizer_for_c.zero_grad()
                loss_classifier.backward()
                optimizer_for_c.step()

                loss_c += loss_classifier.item()
            classifier.eval()
            hidden_a_for_c1 = torch.Tensor(hidden_a_for_c).to(device=cuda)
            hidden_b_for_c1 = torch.Tensor(hidden_b_for_c).to(device=cuda)
            PatN_v, MatN_v, pp1, pp5, pp10, pp15, pp20, pp25, pp30 = val_classifier(
                hidden_a_for_c1, hidden_b_for_c1, left_anchor, right_anchor,
                anchor_test, paras, node_t, classifier)
            PatN_t, MatN_t, p1, p5, p10, p15, p20, p25, p30, = val_classifier(
                hidden_a_for_c1, hidden_b_for_c1, anchor_test, anchor_test,
                right_anchor, paras, node_t, classifier)
            print(
                'epoch %d, loss %.3f, at%d, Val(P=%.3f, M=%.3f), Tes(P=%.3f, M=%.3f)\n,Test(%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f)'
                % (epoch, loss_c, N, PatN_v, MatN_v, PatN_t, MatN_t, p1, p5,
                   p10, p15, p20, p25, p30))
 def __init__(self, D, M):
     W = init_weights((D, M))
     b = np.zeros(M)
     self.W = theano.shared(W)
     self.b = theano.shared(b)
     self.params = [self.W, self.b]
Ejemplo n.º 20
0
    def fit(self, X, learning_rate=0.5, mu=0.99, epochs=1, batch_sz=100, show_fig=False):
        # cast to float
        mu = np.float64(mu)
        learning_rate = np.float64(learning_rate)

        Linhas, Colunas = X.shape
        n_batches = Linhas // batch_sz

        W0 = init_weights((Colunas, self.M))
        self.W = theano.shared(W0, 'W_%s' % self.id)
        self.bh = theano.shared(np.zeros(self.M, dtype=np.float64), 'bh_%s' % self.id)
        self.bo = theano.shared(np.zeros(Colunas, dtype=np.float64), 'bo_%s' % self.id)
        self.params = [self.W, self.bh, self.bo]
        self.forward_params = [self.W, self.bh]

        # TODO: technically these should be reset before doing backprop
        self.dW = theano.shared(np.zeros(W0.shape, dtype=np.float64), 'dW_%s' % self.id)
        self.dbh = theano.shared(np.zeros(self.M, dtype=np.float64), 'dbh_%s' % self.id)
        self.dbo = theano.shared(np.zeros(Colunas, dtype=np.float64), 'dbo_%s' % self.id)
        self.dparams = [self.dW, self.dbh, self.dbo]
        self.forward_dparams = [self.dW, self.dbh]

        X_in = T.matrix('X_%s' % self.id)
        X_hat = self.forward_output(X_in)

        # attach it to the object so it can be used later
        # must be sigmoidal because the output is also a sigmoid
        H = T.nnet.sigmoid(X_in.dot(self.W) + self.bh)
        self.hidden_op = theano.function(
            inputs=[X_in],
            outputs=H,
        )

        # save this for later so we can call it to
        # create reconstructions of input
        self.predict = theano.function(
            inputs=[X_in],
            outputs=X_hat,
        )

        cost = -(X_in * T.log(X_hat) + (1 - X_in) * T.log(1 - X_hat)).flatten().mean()
        cost_op = theano.function(
            inputs=[X_in],
            outputs=cost,
        )

        updates = momentum_updates(cost, self.params, mu, learning_rate)
        train_op = theano.function(
            inputs=[X_in],
            updates=updates,
        )

        costs = []
        print("training autoencoder: %s" % self.id)
        print("epochs to do:", epochs)
        for i in range(epochs):
            print("epoch:", i)
            X = shuffle(X)
            for j in range(n_batches):
                batch = X[j * batch_sz:(j * batch_sz + batch_sz)]
                train_op(batch)
                the_cost = cost_op(batch)  # technically we could also get the cost for Xtest here
                # if j % 10 == 0:
                print("j / n_batches:", j, "/", n_batches, "cost:", the_cost)
                costs.append(the_cost)
        if show_fig:
            plt.plot(costs)
            plt.show()
Ejemplo n.º 21
0
    def fit(self, X, Y, lr=0.001, mu=0.99):
        M = self.M
        V = self.V
        K = len(set(Y))  #K = 2
        lr = np.float32(lr)
        mu = np.float32(mu)

        #Form train and test data set
        XTrain = X[:-50]
        YTrain = Y[:-50]
        XTest = X[-50:]
        YTest = Y[-50:]
        N = len(XTrain)
        print(Y)
        #Initial weights
        Wx = init_weights(V, M)
        Wh = init_weights(M, M)
        bh = np.zeros(M).astype(np.float32)
        h0 = np.zeros(M).astype(np.float32)
        Wo = init_weights(M, K)
        bo = np.zeros(K).astype(np.float32)

        #Theano Variables
        self.Wx = theano.shared(Wx)
        self.Wh = theano.shared(Wh)
        self.bh = theano.shared(bh)
        self.h0 = theano.shared(h0)
        self.Wo = theano.shared(Wo)
        self.bo = theano.shared(bo)

        self.params = [self.Wx, self.Wh, self.bh, self.h0, self.Wo, self.bo]
        #		self.dparams = [theano.shared(np.zeros(p.get_value().shape).astype(np.float32)) for p in self.params]

        thX = T.ivector('X')  #T size vector
        thY = T.iscalar(
            'Y')  #Output, i.e, 0 for robert frost, 1 for edgar allan

        #Recurrence to loop through the input sequence
        def recurrence(x_t, h_t_prev):
            h_t = T.tanh(self.Wx[x_t] + h_t_prev.dot(self.Wh) + self.bh)
            y_t = T.nnet.softmax(h_t.dot(self.Wo) + self.bo)
            return h_t, y_t

        [h, y], _ = theano.scan(
            fn=recurrence,
            sequences=thX,
            n_steps=thX.shape[0],
            outputs_info=[self.h0, None],
        )

        #Prediction and cost calculation
        pY = y[-1, 0, :]  #y is  T x 1 x K
        pred = T.argmax(pY)

        cost = -T.mean(T.log(pY[thY]))

        updates = [(p, p - lr * T.grad(cost, p)) for p in self.params]
        # + [
        #			(d, mu*d - lr*T.grad(cost,p)) for p,d in zip(self.params, self.dparams)
        #			]

        #Training and prediction function
        train = theano.function(inputs=[thX, thY], updates=updates, outputs=pY)
        get_pred_cost = theano.function(inputs=[thX, thY],
                                        outputs=[pred, cost])

        #Stochastic gradient descent
        for i in range(500):
            XTrain, YTrain = shuffle(XTrain, YTrain)
            lr = lr * 0.9
            for n in range(N):
                x = XTrain[n]
                y = YTrain[n]

                p = train(x, y)
            #Test set
            n_correct = 0
            tot_c = 0
            for j in range(len(XTest)):
                p, c = get_pred_cost(XTest[j], YTest[j])
                if p == YTest[j]:
                    n_correct += 1
                tot_c += c
            print("Iteration: ", i, "Cost: ", tot_c, "Classification rate: ",
                  float(n_correct) / len(XTest))
  def fit(self, X, Y, lr=10e-7, mu=0.99, batch_sz=100):
    Y = Y.astype(np.int32)
    X, Y = shuffle(X,Y)
    N, c, d, d = X.shape
    print(len(Y))
    K = Y.shape[1]
    
    mu = np.float32(mu)
    lr = np.float32(lr)
    print("N:", N, "K:", K)

    #Create the convolution-pooling layers
    self.convpool_layers=[]
    mi = c
    outw = d
    outh = d
    for mo, fw, fh in self.convpool_layer_sizes:
      c = ConvPoolLayer(mi, mo, fw, fh)
      self.convpool_layers.append(c)
      outw = (outw - fw +1)/ 2
      outh = (outh - fh +1)/ 2
      mi = mo
 
    #Create the hidden layers
    self.hidden_layers = []
    m1 = int(self.convpool_layer_sizes[-1][0]*outw*outh)
    for m2 in self.hidden_layer_sizes:
      h = HiddenLayer(m1, m2)
      self.hidden_layers.append(h)
      m1 = m2 
      
    W = init_weights(m2, K)    #Logistic reg layer
    b = np.zeros([K]).astype(np.float32)
    
    #Create theano variables
    thX = T.tensor4('X', dtype='float32')
    thY = T.fmatrix('Y')
    
    self.W = theano.shared(W, 'W_log')
    self.b = theano.shared(b, 'b_log')
    
    #Create parameter array for updates
    params = [self.W, self.b]
    for c in self.convpool_layers:
      params += c.params
    for h in self.hidden_layers:
      params += h.params
    
    #Momentum parameters
    dparams = [theano.shared(np.zeros(p.get_value().shape).astype(np.float32)) for p in params]

    #Forward pass
    pY = self.forward(thX)
    P = T.argmax(pY, axis=1)
    cost = -(thY * T.log(pY)).sum()

    #Weight updates
    updates = [
    		(p, p + mu*d - lr*T.grad(cost, p)) for p,d in zip(params, dparams)
   		] + [
    		(d, mu*d - lr*T.grad(cost, p)) for p,d in zip(params, dparams)
    	]
    #Theano function for training and predicting and calculating cost
    train = theano.function(
        inputs=[thX, thY],
        updates=updates,
        allow_input_downcast=True
      )
      
    get_cost_prediction = theano.function(
        inputs=[thX, thY],
        outputs=[P, cost],
        allow_input_downcast=True
      )
    
    #Loop for Batch grad descent
    no_batches = int(N/batch_sz)
    for i in range(500):
      #lr *= 0.9
      for n in range(no_batches):
        Xbatch = X[n*batch_sz:(n*batch_sz+batch_sz)]
        Ybatch = Y[n*batch_sz:(n*batch_sz+batch_sz)]
        #print(Xbatch.shape, Ybatch.shape)
        train(Xbatch, Ybatch)
        if n%100==0:
          Yb = np.argmax(Ybatch, axis =1)
          P, c = get_cost_prediction(Xbatch, Ybatch)
          #print(P.shape, Ybatch.shape)
          er = error_rate(P, Yb)
          print("iteration:", i, "cost:", c, "error rate:", er)
Ejemplo n.º 23
0
    def fit(self,
            X,
            activation=relu,
            lr=0.5,
            epochs=1,
            mu=0.99,
            batch_sz=20,
            print_period=100,
            show_fig=False):
        # X = X.astype(np.float32)
        mu = np.float32(mu)
        lr = np.float32(lr)

        # init hidden layers
        N, D = X.shape
        n_batches = N // batch_sz

        # HiddenLayer could do this but i dont know whats up with the ids
        W0 = init_weights((D, self.M))
        self.W = theano.shared(W0, 'W_%s' % self.id)
        self.bh = theano.shared(np.zeros(self.M, dtype=np.float32),
                                'bh_%s' % self.id)
        self.bo = theano.shared(np.zeros(D, dtype=np.float32),
                                'bo_%s' % self.id)
        self.params = [self.W, self.bh, self.bo]
        self.forward_params = [self.W, self.bh]

        # shit for momentum
        # TODO: technically these should be reset before doing backprop
        self.dW = theano.shared(np.zeros(W0.shape), 'dW_%s' % self.id)
        self.dbh = theano.shared(np.zeros(self.M), 'dbh_%s' % self.id)
        self.dbo = theano.shared(np.zeros(D), 'dbo_%s' % self.id)
        self.dparams = [self.dW, self.dbh, self.dbo]
        self.forward_dparams = [self.dW, self.dbh]

        X_in = T.matrix('X_%s' % self.id)
        X_hat = self.forward_output(X_in)

        H = T.nnet.sigmoid(X_in.dot(self.W) + self.bh)
        self.hidden_op = theano.function(
            inputs=[X_in],
            outputs=H,
        )

        self.predict = theano.function(
            inputs=[X_in],
            outputs=X_hat,
        )

        # mse
        # cost = ((X_in - X_hat) * (X_in - X_hat)).sum() / N #mean or sum and mse as cost function

        # cross entropy
        cost = -(X_in * T.log(X_hat) +
                 (1 - X_in) * T.log(1 - X_hat)).flatten().mean()
        cost_op = theano.function(
            inputs=[X_in],
            outputs=cost,
        )

        # grad descent + adding momentum changes
        updates = momentum_updates(cost, self.params, mu, lr)
        train_op = theano.function(
            inputs=[X_in],
            updates=updates,
        )

        costs = []
        print("training autoencoder: %s" % self.id)
        print("epochs to do:", epochs)
        for i in range(epochs):
            print("epoch:", i)
            X = shuffle(X)
            for j in range(n_batches):
                batch = X[j * batch_sz:(j * batch_sz + batch_sz)]
                train_op(batch)
                the_cost = cost_op(
                    batch
                )  # technically we could also get the cost for Xtest here
                if j % 10 == 0:
                    print("j / n_batches:", j, "/", n_batches, "cost:",
                          the_cost)
                costs.append(the_cost)
        if show_fig:
            plt.plot(costs)
            plt.show()
        for ae in self.hidden_layers:
            Z = ae.forward_hidden(Z)
        return Z

<<<<<<< HEAD
    def fit_to_input(self, k, learning_rate=0.00001, mu=0.99, reg=10e-10, epochs=20000):
=======
    def fit_to_input(self, k, learning_rate=1.0, mu=0.99, epochs=100000):
>>>>>>> upstream/master
        # This is not very flexible, as you would ideally
        # like to be able to activate any node in any hidden
        # layer, not just the last layer.
        # Exercise for students: modify this function to be able
        # to activate neurons in the middle layers.
<<<<<<< HEAD
        X0 = init_weights((1, self.D))
        X = theano.shared(X0, 'X_shared')
        dX = theano.shared(np.zeros(X0.shape), 'dX_shared')
        Y = self.forward(X)
=======

        # cast hyperperams
        learning_rate = np.float32(learning_rate)
        mu = np.float32(mu)

        # randomly initialize an image
        X0 = init_weights((1, self.D))

        # make the image a shared so theano can update it
        X = theano.shared(X0, 'X_shared')
Ejemplo n.º 25
0
    def fit(self, X, learning_rate=0.1, epochs=1, batch_sz=100, show_fig=False):
        N, D = X.shape
        n_batches = N / batch_sz

        W0 = init_weights((D, self.M))
        self.W = theano.shared(W0, 'W_%s' % self.id)
        self.c = theano.shared(np.zeros(self.M), 'c_%s' % self.id)
        self.b = theano.shared(np.zeros(D), 'b_%s' % self.id)
        self.params = [self.W, self.c, self.b]
        self.forward_params = [self.W, self.c]

        # we won't use this to fit the RBM but we will use these for backpropagation later
        # TODO: technically they should be reset before doing backprop
        self.dW = theano.shared(np.zeros(W0.shape), 'dW_%s' % self.id)
        self.dc = theano.shared(np.zeros(self.M), 'dbh_%s' % self.id)
        self.db = theano.shared(np.zeros(D), 'dbo_%s' % self.id)
        self.dparams = [self.dW, self.dc, self.db]
        self.forward_dparams = [self.dW, self.dc]

        X_in = T.matrix('X_%s' % self.id)

        # attach it to the object so it can be used later
        # must be sigmoidal because the output is also a sigmoid
        H = T.nnet.sigmoid(X_in.dot(self.W) + self.c)
        self.hidden_op = theano.function(
            inputs=[X_in],
            outputs=H,
        )

        # we won't use this cost to do any updates
        # but we would like to see how this cost function changes
        # as we do contrastive divergence
        X_hat = self.forward_output(X_in)
        cost = -(X_in * T.log(X_hat) + (1 - X_in) * T.log(1 - X_hat)).sum() / (batch_sz * D)
        cost_op = theano.function(
            inputs=[X_in],
            outputs=cost,
        )

        # do one round of Gibbs sampling to obtain X_sample
        H = self.sample_h_given_v(X_in)
        X_sample = self.sample_v_given_h(H)

        # define the objective, updates, and train function
        objective = T.mean(self.free_energy(X_in)) - T.mean(self.free_energy(X_sample))

        # need to consider X_sample constant because you can't take the gradient of random numbers in Theano
        updates = [(p, p - learning_rate*T.grad(objective, p, consider_constant=[X_sample])) for p in self.params]
        train_op = theano.function(
            inputs=[X_in],
            updates=updates,
        )

        costs = []
        print "training rbm: %s" % self.id
        for i in xrange(epochs):
            print "epoch:", i
            X = shuffle(X)
            for j in xrange(n_batches):
                batch = X[j*batch_sz:(j*batch_sz + batch_sz)]
                train_op(batch)
                the_cost = cost_op(X)  # technically we could also get the cost for Xtest here
                print "j / n_batches:", j, "/", n_batches, "cost:", the_cost
                costs.append(the_cost)
        if show_fig:
            plt.plot(costs)
            plt.show()
Ejemplo n.º 26
0
    def fit(self, X, learning_rate=0.1, epochs=1, batch_sz=100, show_fig=False):
        N, D = X.shape
        n_batches = N / batch_sz

        W0 = init_weights((D, self.M))
        self.W = theano.shared(W0, 'W_%s' % self.id)
        self.c = theano.shared(np.zeros(self.M), 'c_%s' % self.id)
        self.b = theano.shared(np.zeros(D), 'b_%s' % self.id)
        self.params = [self.W, self.c, self.b]
        self.forward_params = [self.W, self.c]

        # we won't use this to fit the RBM(momentum isnt used in this RBM) but we will use these for backpropagation later
        # TODO: technically they should be reset before doing backprop
        self.dW = theano.shared(np.zeros(W0.shape), 'dW_%s' % self.id)
        self.dc = theano.shared(np.zeros(self.M), 'dbh_%s' % self.id)
        self.db = theano.shared(np.zeros(D), 'dbo_%s' % self.id)
        self.dparams = [self.dW, self.dc, self.db]
        self.forward_dparams = [self.dW, self.dc]

        # define our input:
        X_in = T.matrix('X_%s' % self.id)

        # define our hidden op which is used for our layer wise pretraining
        # attach it to the object so it can be used later
        # must be sigmoidal because the output is also a sigmoid
        H = T.nnet.sigmoid(X_in.dot(self.W) + self.c)
        self.hidden_op = theano.function(
            inputs=[X_in],
            outputs=H,
        )

        # we won't use this cost to do any updates
        # but we would like to see how this cost function changes
        # as we do contrastive divergence
        X_hat = self.forward_output(X_in)
        cost = -(X_in * T.log(X_hat) + (1 - X_in) * T.log(1 - X_hat)).sum() / (batch_sz * D)
        cost_op = theano.function(
            inputs=[X_in],
            outputs=cost,
        )

        # do one round of Gibbs sampling to obtain X_sample
        H = self.sample_h_given_v(X_in)
        X_sample = self.sample_v_given_h(H)

        # define the objective which is free energy of visible 0 minus the free energy of visible 1, updates, and train function
        # we're taking the mean since we're doing batch training
        objective = T.mean(self.free_energy(X_in)) - T.mean(self.free_energy(X_sample))

        # need to consider X_sample constant because you can't take the gradient of random numbers in Theano
        updates = [(p, p - learning_rate*T.grad(objective, p, consider_constant=[X_sample])) for p in self.params]
        train_op = theano.function(
            inputs=[X_in],
            updates=updates,
        )

        costs = []
        print("training rbm: %s" % self.id)
        for i in range(epochs):
            print("epoch:", i)
            X = shuffle(X)
            for j in range(n_batches):
                batch = X[j*batch_sz:(j*batch_sz + batch_sz)]
                train_op(batch)
                the_cost = cost_op(X)  # technically we could also get the cost for Xtest here
                print("j / n_batches:", j, "/", n_batches, "cost:", the_cost)
                costs.append(the_cost)
        if show_fig:
            plt.plot(costs)
            plt.show()
    losses = np.empty(decay.size)
    test_accs = np.empty(decay.size)

    for idx, decay_rate in enumerate(decay):
        np.random.seed(
            7
        )  # seed NumPy's random number generator for reproducibility of results
        # Initialize neural network
        nn = MLPClassifier(hidden_layer_sizes=(5, 2),
                           random_state=7,
                           max_iter=1,
                           warm_start=True)
        nn.fit(X_train, y_train)

        # Initialize weights
        nn.coefs_, nn.intercepts_ = init_weights(X_train.shape[1],
                                                 list(nn.hidden_layer_sizes))
        loss_next = compute_loss(X_train, y_train, nn)

        T = T_init
        loss = []
        start = time.time()
        for i in range(num_iters):
            # Save current parameters
            coefs_prev = nn.coefs_
            intercepts_prev = nn.intercepts_
            loss_prev = loss_next

            if debug:
                print('Iteration # %d' % i)
                print('Loss = ', loss_prev)
Ejemplo n.º 28
0
    def fit(self, X, Y, Xtest, Ytest,
            pretrain=True,
            train_head_only=False,
            learning_rate=0.1,
            mu=0.99,
            reg=0.0,
            epochs=1,
            batch_sz=100):

        # cast to float64
        learning_rate = np.float64(learning_rate)
        mu = np.float64(mu)
        reg = np.float64(reg)

        # greedy layer-wise training of autoencoders
        pretrain_epochs = 1
        if not pretrain:
            pretrain_epochs = 0

        current_input = X
        for ae in self.hidden_layers:
            ae.fit(current_input, epochs=pretrain_epochs)

            # create current_input for the next layer
            current_input = ae.hidden_op(current_input)

        # initialize logistic regression layer
        Linhas = len(Y)
        K = len(set(Y))
        W0 = init_weights((self.hidden_layers[-1].M, K))
        self.W = theano.shared(W0.astype(np.float64), "W_logreg")
        self.b = theano.shared(np.zeros(K, dtype=np.float64), "b_logreg")

        self.params = [self.W, self.b]
        if not train_head_only:
            for ae in self.hidden_layers:
                self.params += ae.forward_params

        X_in = T.matrix('X_in')
        targets = T.ivector('Targets')
        pY = self.forward(X_in)

        squared_magnitude = [(p * p).sum() for p in self.params]
        reg_cost = T.sum(squared_magnitude)
        cost = -T.mean(T.log(pY[T.arange(pY.shape[0]), targets])) + reg * reg_cost
        prediction = self.predict(X_in)
        cost_predict_op = theano.function(
            inputs=[X_in, targets],
            outputs=[cost, prediction],
        )

        updates = momentum_updates(cost, self.params, mu, learning_rate)
        train_op = theano.function(
            inputs=[X_in, targets],
            updates=updates,
        )

        n_batches = Linhas // batch_sz
        costs = []
        print("supervised training...")
        for i in range(epochs):
            print("epoch:", i)
            X, Y = shuffle(X, Y)
            for j in range(n_batches):
                Xbatch = X[j * batch_sz:(j * batch_sz + batch_sz)]
                Ybatch = Y[j * batch_sz:(j * batch_sz + batch_sz)]
                train_op(Xbatch, Ybatch)
                the_cost, the_prediction = cost_predict_op(Xtest, Ytest)
                error = error_rate(the_prediction, Ytest)
                print("j / n_batches:", j, "/", n_batches, "cost:", the_cost, "error:", error)
                costs.append(the_cost)
        plt.plot(costs)
        plt.show()
    def fit(self, X, Y, Xtest, Ytest, pretrain=True, learning_rate=0.01, mu=0.99, reg=0.1, epochs=1, batch_sz=100):
        # greedy layer-wise training of autoencoders
        pretrain_epochs = 1
        if not pretrain:
            pretrain_epochs = 0

        current_input = X
        for ae in self.hidden_layers:
            ae.fit(current_input, epochs=pretrain_epochs)

            # create current_input for the next layer
            current_input = ae.hidden_op(current_input)

        # initialize logistic regression layer
        N = len(Y)
        K = len(set(Y))
        W0 = init_weights((self.hidden_layers[-1].M, K))
        self.W = theano.shared(W0, "W_logreg")
        self.b = theano.shared(np.zeros(K), "b_logreg")

        self.params = [self.W, self.b]
        for ae in self.hidden_layers:
            self.params += ae.forward_params

        # for momentum
        self.dW = theano.shared(np.zeros(W0.shape), "dW_logreg")
        self.db = theano.shared(np.zeros(K), "db_logreg")
        self.dparams = [self.dW, self.db]
        for ae in self.hidden_layers:
            self.dparams += ae.forward_dparams

        X_in = T.matrix('X_in')
        targets = T.ivector('Targets')
        pY = self.forward(X_in)

        # squared_magnitude = [(p*p).sum() for p in self.params]
        # reg_cost = T.sum(squared_magnitude)
        cost = -T.mean( T.log(pY[T.arange(pY.shape[0]), targets]) ) #+ reg*reg_cost
        prediction = self.predict(X_in)
        cost_predict_op = theano.function(
            inputs=[X_in, targets],
            outputs=[cost, prediction],
        )

        updates = [
            (p, p + mu*dp - learning_rate*T.grad(cost, p)) for p, dp in zip(self.params, self.dparams)
        ] + [
            (dp, mu*dp - learning_rate*T.grad(cost, p)) for p, dp in zip(self.params, self.dparams)
        ]
        # updates = [(p, p - learning_rate*T.grad(cost, p)) for p in self.params]
        train_op = theano.function(
            inputs=[X_in, targets],
            updates=updates,
        )

        n_batches = N // batch_sz
        costs = []
        print("supervised training...")
        for i in range(epochs):
            print("epoch:", i)
            X, Y = shuffle(X, Y)
            for j in range(n_batches):
                Xbatch = X[j*batch_sz:(j*batch_sz + batch_sz)]
                Ybatch = Y[j*batch_sz:(j*batch_sz + batch_sz)]
                train_op(Xbatch, Ybatch)
                the_cost, the_prediction = cost_predict_op(Xtest, Ytest)
                error = error_rate(the_prediction, Ytest)
                print("j / n_batches:", j, "/", n_batches, "cost:", the_cost, "error:", error)
                costs.append(the_cost)
        plt.plot(costs)
        plt.show()
Ejemplo n.º 30
0
 def __init__(self, m1, m2):
     W = init_weights((m1, m2))
Ejemplo n.º 31
0
Archivo: model.py Proyecto: NSF20/NSF
def nsf(paras):
    cuda = torch.device('cuda:' + str(paras.gpu_id))
    len_anchor = paras.total_anchor
    anchor_all = list(range(0, len_anchor))
    len_s = paras.len_s
    len_t = paras.len_t
    node_f1 = list(range(0, len_s))
    node_f2 = list(range(0, len_t))
    feature_s = paras.feature_s
    feature_t = paras.feature_t
    dim = paras.represent_dim
    ker_size = paras.ker_size
    coefficient = paras.coefficient
    epoch = paras.epoch
    ratio = paras.train_ratio
    margin = paras.epsilon
    lr = paras.lr
    lr_step = paras.lr_step
    lr_prob = paras.lr_prob

    a_array_load = np.load(feature_s)
    a_array_tensor = torch.Tensor(a_array_load)
    b_array_load = np.load(feature_t)
    b_array_tensor = torch.Tensor(b_array_load)

    seeds = list(np.random.randint(0, 10000, 4))
    seed1 = seeds[0]
    seed2 = seeds[1]
    torch.cuda.manual_seed_all(seeds[2])
    torch.manual_seed(seeds[3])

    rd.seed(seed1)
    anchor_train = rd.choice(anchor_all, int(ratio * len_anchor))
    anchor_test = list(set(anchor_all) - set(anchor_train))
    triplet_neg = 1
    anchor_flag = 1
    anchor_train_len = len(anchor_train)
    anchor_train_a_list = anchor_train
    anchor_train_b_list = anchor_train
    input_a = []
    input_b = []
    classifier_target = torch.empty(0, 0).to(device=cuda)
    np.random.seed(seed2)
    index = 0
    while index < anchor_train_len:
        a = anchor_train_a_list[index]
        b = anchor_train_b_list[index]
        input_a.append(a)
        input_b.append(b)
        an_target = torch.ones(anchor_flag).to(device=cuda)
        classifier_target = torch.cat((classifier_target, an_target), dim=0)
        an_negs_index = list(set(node_f2) - {b})
        an_negs_index_sampled = list(
            np.random.choice(an_negs_index, triplet_neg, replace=False))
        an_as = triplet_neg * [a]
        input_a += an_as
        input_b += an_negs_index_sampled

        an_negs_index1 = list(set(node_f1) - {a})
        an_negs_index_sampled1 = list(
            np.random.choice(an_negs_index1, triplet_neg, replace=False))
        an_as1 = triplet_neg * [b]
        input_b += an_as1
        input_a += an_negs_index_sampled1

        un_an_target = torch.zeros(triplet_neg * 2).to(device=cuda)
        classifier_target = torch.cat((classifier_target, un_an_target), dim=0)
        index += 1

    cosine_target = torch.unsqueeze(2 * classifier_target - 1, dim=1)
    classifier_target = torch.unsqueeze(classifier_target, dim=1)

    ina = a_array_load[input_a]
    inb = b_array_load[input_b]
    ina = torch.Tensor(ina).to(device=cuda)
    inb = torch.Tensor(inb).to(device=cuda)
    tensor_dataset = SiameseNetworkDataset(ina, inb, classifier_target,
                                           cosine_target)
    data_loader = DataLoader(tensor_dataset, batch_size=56, shuffle=False)

    P, M = 0, 0
    model = SiameseNetwork(dim, ker_size, len_s, len_t).to(device=cuda)
    init_weights(model)
    neta = NETA(dim, ker_size, len_s).to(device=cuda)
    netb = NETB(dim, ker_size, len_t).to(device=cuda)
    a_array_tensor = a_array_tensor.to(device=cuda)
    b_array_tensor = b_array_tensor.to(device=cuda)
    cos = nn.CosineEmbeddingLoss(margin=0)
    optimizer = optim.Adadelta(model.parameters(), lr=lr, weight_decay=0.001)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=lr_step,
                                                gamma=lr_prob)

    for epoch in range(epoch):
        model.train()
        scheduler.step()
        train_loss = 0
        loss_reg = 0
        loss_anchor = 0
        for data_batch in data_loader:
            in_a, in_b, c, cosine = data_batch
            cosine = torch.squeeze(cosine, dim=1)
            in_a = torch.unsqueeze(in_a, dim=1).to(device=cuda)
            in_b = torch.unsqueeze(in_b, dim=1).to(device=cuda)
            h_a, h_b = model(in_a, in_b)
            loss_anchor_batch = 1 * cos(h_a, h_b, cosine)
            loss_reg_batch = coefficient * (h_a.norm() + h_b.norm())
            loss = loss_reg_batch + loss_anchor_batch

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            loss_reg += loss_reg_batch.item()
            loss_anchor += loss_anchor_batch.item()

        neta_dict = neta.state_dict()
        netb_dict = netb.state_dict()
        model.cpu()
        trainmodel_dict = model.state_dict()

        trainmodel_dict_a = {
            k: v
            for k, v in trainmodel_dict.items() if k in neta_dict
        }
        trainmodel_dict_b = {
            k: v
            for k, v in trainmodel_dict.items() if k in netb_dict
        }
        neta_dict.update(trainmodel_dict_a)
        netb_dict.update(trainmodel_dict_b)
        neta.load_state_dict(neta_dict)
        netb.load_state_dict(netb_dict)

        neta.eval()
        netb.eval()
        hidden_a = neta(torch.unsqueeze(a_array_tensor, dim=1))
        hidden_b = netb(torch.unsqueeze(b_array_tensor, dim=1))

        if epoch >= epoch - 30:
            PatN_t, MatN_t = tes_vec(hidden_a, hidden_b, anchor_train,
                                     anchor_test, node_f2)
            P += PatN_t
            M += MatN_t
        model.to(device=cuda)
    logging.info('%d %d %d %d %d %.4f %.1f %d %d %.3f %.3f' %
                 (seeds[0], seeds[1], seeds[2], seeds[3], ker_size,
                  coefficient, margin, ratio, dim, P / 30, M / 30))
    def fit(self, X, Y, learning_rate=0.01, mu=0.99, epochs=30, batch_sz=100):
        N, D = X.shape
        K = len(set(Y))

        self.hidden_layers = []
        mi = D
        for mo in self.hidden_layer_sizes:
            h = HiddenLayer(mi, mo)
            self.hidden_layers.append(h)
            mi = mo

        # initialize logistic regression layer
        W = init_weights((mo, K))
        b = np.zeros(K)
        self.W = theano.shared(W)
        self.b = theano.shared(b)

        self.params = [self.W, self.b]
        self.allWs = []
        for h in self.hidden_layers:
            self.params += h.params
            self.allWs.append(h.W)
        self.allWs.append(self.W)

        X_in = T.matrix('X_in')
        targets = T.ivector('Targets')
        pY = self.forward(X_in)

        cost = -T.mean( T.log(pY[T.arange(pY.shape[0]), targets]) )
        prediction = self.predict(X_in)
        # cost_predict_op = theano.function(
        #     inputs=[X_in, targets],
        #     outputs=[cost, prediction],
        # )

        dparams = [theano.shared(p.get_value()*0) for p in self.params]
        grads = T.grad(cost, self.params)

        updates = [
            (p, p + mu*dp - learning_rate*g) for p, dp, g in zip(self.params, dparams, grads)
        ] + [
            (dp, mu*dp - learning_rate*g) for dp, g in zip(dparams, grads)
        ]
        train_op = theano.function(
            inputs=[X_in, targets],
            outputs=[cost, prediction],
            updates=updates,
        )

        n_batches = N / batch_sz
        costs = []
        lastWs = [W.get_value() for W in self.allWs]
        W_changes = []
        print "supervised training..."
        for i in xrange(epochs):
            print "epoch:", i
            X, Y = shuffle(X, Y)
            for j in xrange(n_batches):
                Xbatch = X[j*batch_sz:(j*batch_sz + batch_sz)]
                Ybatch = Y[j*batch_sz:(j*batch_sz + batch_sz)]
                c, p = train_op(Xbatch, Ybatch)
                if j % 100 == 0:
                    print "j / n_batches:", j, "/", n_batches, "cost:", c, "error:", error_rate(p, Ybatch)
                costs.append(c)

                # log changes in all Ws
                W_change = [np.abs(W.get_value() - lastW).mean() for W, lastW in zip(self.allWs, lastWs)]
                W_changes.append(W_change)
                lastWs = [W.get_value() for W in self.allWs]

        W_changes = np.array(W_changes)
        plt.subplot(2,1,1)
        for i in xrange(W_changes.shape[1]):
            plt.plot(W_changes[:,i], label='layer %s' % i)
        plt.legend()
        # plt.show()

        plt.subplot(2,1,2)
        plt.plot(costs)
        plt.show()
        learning_rate = np.float32(learning_rate)
        mu = np.float32(mu)

>>>>>>> upstream/master
        N, D = X.shape
        K = len(set(Y))

        self.hidden_layers = []
        mi = D
        for mo in self.hidden_layer_sizes:
            h = HiddenLayer(mi, mo)
            self.hidden_layers.append(h)
            mi = mo

        # initialize logistic regression layer
        W = init_weights((mo, K))
<<<<<<< HEAD
        b = np.zeros(K)
=======
        b = np.zeros(K, dtype=np.float32)
>>>>>>> upstream/master
        self.W = theano.shared(W)
        self.b = theano.shared(b)

        self.params = [self.W, self.b]
        self.allWs = []
        for h in self.hidden_layers:
            self.params += h.params
            self.allWs.append(h.W)
        self.allWs.append(self.W)
Ejemplo n.º 34
0
    def fit(self,
            X,
            Y,
            Xtest,
            Ytest,
            pretrain=True,
            learning_rate=0.01,
            mu=0.99,
            reg=0.1,
            epochs=1,
            batch_sz=100):
        # greedy layer-wise training of autoencoders
        pretrain_epochs = 1
        if not pretrain:
            pretrain_epochs = 0

        current_input = X
        for ae in self.hidden_layers:  # call fit on each autoencoder successively
            ae.fit(current_input, epochs=pretrain_epochs)
            # we then calculate the output at the hidden layer, and we set that as the
            # current_input for the next layer
            # create current_input for the next layer (the next autoencoder)
            current_input = ae.hidden_op(current_input)

        # initialize logistic regression layer
        N = len(Y)
        K = len(set(Y))
        W0 = init_weights((self.hidden_layers[-1].M, K))
        self.W = theano.shared(W0, "W_logreg")
        self.b = theano.shared(np.zeros(K), "b_logreg")

        # we have to add the other parameters from the hidden layer
        self.params = [self.W, self.b]
        for ae in self.hidden_layers:
            self.params += ae.forward_params

        # do the same for momentum
        self.dW = theano.shared(np.zeros(W0.shape), "dW_logreg")
        self.db = theano.shared(np.zeros(K), "db_logreg")
        self.dparams = [self.dW, self.db]
        for ae in self.hidden_layers:
            self.dparams += ae.forward_dparams

        X_in = T.matrix('X_in')
        targets = T.ivector('Targets')
        pY = self.forward(X_in)
        """previously, we treated the targets as an indicator matrix, and the output of
        the neural network as a matrix of outputs. In this course and from here on out
        we're going to select the elements of py that would be 1, so that those are the
        elements in which targets is 1."""
        # squared_magnitude = [(p*p).sum() for p in self.params]
        # reg_cost = T.sum(squared_magnitude)
        cost = -T.mean(T.log(pY[T.arange(pY.shape[0]),
                                targets]))  #+ reg*reg_cost
        # in order to calculate the error rate, we need to calculate the predictions
        prediction = self.predict(X_in)
        cost_predict_op = theano.function(
            inputs=[X_in, targets],
            outputs=[cost, prediction],
        )

        updates = [(p, p + mu * dp - learning_rate * T.grad(cost, p))
                   for p, dp in zip(self.params, self.dparams)
                   ] + [(dp, mu * dp - learning_rate * T.grad(cost, p))
                        for p, dp in zip(self.params, self.dparams)]
        # updates = [(p, p - learning_rate*T.grad(cost, p)) for p in self.params]
        train_op = theano.function(
            inputs=[X_in, targets],
            updates=updates,
        )

        n_batches = N / batch_sz
        costs = []
        print("supervised training...")
        for i in range(epochs):
            print("epoch:", i)
            X, Y = shuffle(X, Y)
            for j in range(n_batches):
                Xbatch = X[j * batch_sz:(j * batch_sz + batch_sz)]
                Ybatch = Y[j * batch_sz:(j * batch_sz + batch_sz)]
                train_op(Xbatch, Ybatch)
                the_cost, the_prediction = cost_predict_op(Xtest, Ytest)
                error = error_rate(the_prediction, Ytest)
                print("j / n_batches:", j, "/", n_batches, "cost:", the_cost,
                      "error:", error)
                costs.append(the_cost)
        plt.plot(costs)
        plt.show()
 def __init__(self, D, M):
     W = init_weights((D, M))
Ejemplo n.º 36
0
import theano.tensor as T
import matplotlib.pyplot as plt 

from sklearn.utils import shuffle 
from util import relu, error_rate, getKaggleMNIST, init_weights 

class AutoEncoder(object):
    def __init__(self, M, an_id):
        self.M = M
        self.id = an_id

    def fit(self, X, learning_rate=0.5, mu=0.99, epochs=1, batch_sz=100 show_fig=False):
        N,D = X.shape
        n_batch = N  / batch_sz

        W0 = init_weights((D, self.M))

        self.W = theano.shared(W0, 'W_%s' % self.id)

        self.bh = theano.shared(np.zeros(self.M), 'tb_%s' % self.id)
        self.bo = theano.shared(np.zeros(D), 'bo_%s'  % self.id)

        self.params = [self.W, self.bh, self.bo]

        self.forward_params = [self.W, self.bh]

        self.dW = theano.shared(np.zeros(W0.shape), 'dW_%s' % self.id)
        self.dbh = theano.shared(np.zeros(self.M), 'dbh_%s' % self.id)
        self.dbo = theano.shared(np.zeros(D), 'dbo_%s' % self.id)

        self.dparams = [self.dW, self.dbh, self.dbo]
Ejemplo n.º 37
0
    def fit(self,
            X,
            learning_rate=0.1,
            epochs=1,
            batch_sz=100,
            show_fig=False):
        # cast to float32
        learning_rate = np.float32(learning_rate)

        N, D = X.shape
        n_batches = N // batch_sz

        W0 = init_weights((D, self.M))
        self.W = theano.shared(W0, 'W_%s' % self.id)
        self.c = theano.shared(np.zeros(self.M), 'c_%s' % self.id)
        self.b = theano.shared(np.zeros(D), 'b_%s' % self.id)
        self.params = [self.W, self.c, self.b]
        self.forward_params = [self.W, self.c]

        X_in = T.matrix('X_%s' % self.id)

        # attach it to the object so it can be used later
        # must be sigmoidal because the output is also a sigmoid
        H = T.nnet.sigmoid(X_in.dot(self.W) + self.c)
        self.hidden_op = theano.function(
            inputs=[X_in],
            outputs=H,
        )

        # we won't use this cost to do any updates
        # but we would like to see how this cost function changes
        # as we do contrastive divergence
        X_hat = self.forward_output(X_in)
        cost = -(X_in * T.log(X_hat) + (1 - X_in) * T.log(1 - X_hat)).mean()
        cost_op = theano.function(
            inputs=[X_in],
            outputs=cost,
        )

        # do one round of Gibbs sampling to obtain X_sample
        H = self.sample_h_given_v(X_in)
        X_sample = self.sample_v_given_h(H)

        # define the objective, updates, and train function
        objective = T.mean(self.free_energy(X_in)) - T.mean(
            self.free_energy(X_sample))

        # need to consider X_sample constant because you can't take the gradient of random numbers in Theano
        updates = [(
            p, p -
            learning_rate * T.grad(objective, p, consider_constant=[X_sample]))
                   for p in self.params]
        train_op = theano.function(
            inputs=[X_in],
            updates=updates,
        )

        costs = []
        print("training rbm: %s" % self.id)
        for i in range(epochs):
            print("epoch:", i)
            X = shuffle(X)
            for j in range(n_batches):
                batch = X[j * batch_sz:(j * batch_sz + batch_sz)]
                train_op(batch)
                the_cost = cost_op(
                    X)  # technically we could also get the cost for Xtest here
                print("j / n_batches:", j, "/", n_batches, "cost:", the_cost)
                costs.append(the_cost)
        if show_fig:
            plt.plot(costs)
            plt.show()
Ejemplo n.º 38
0
    def fit(self, X, Y, learning_rate=0.01, mu=0.99, epochs=30, batch_sz=100):
        N, D = X.shape
        K = len(set(Y))

        self.hidden_layers = []
        mi = D
        for mo in self.hidden_layer_sizes:
            h = HiddenLayer(mi, mo)
            self.hidden_layers.append(h)
            mi = mo

        # initialize logistic regression layer
        W = init_weights((mo, K))
        b = np.zeros(K)
        self.W = theano.shared(W)
        self.b = theano.shared(b)

        self.params = [self.W, self.b]
        self.allWs = []
        for h in self.hidden_layers:
            self.params += h.params
            self.allWs.append(h.W)
        self.allWs.append(self.W)

        X_in = T.matrix('X_in')
        targets = T.ivector('Targets')
        pY = self.forward(X_in)

        cost = -T.mean(T.log(pY[T.arange(pY.shape[0]), targets]))
        prediction = self.predict(X_in)
        # cost_predict_op = theano.function(
        #     inputs=[X_in, targets],
        #     outputs=[cost, prediction],
        # )

        dparams = [theano.shared(p.get_value() * 0) for p in self.params]
        grads = T.grad(cost, self.params)

        updates = [(p, p + mu * dp - learning_rate * g)
                   for p, dp, g in zip(self.params, dparams, grads)
                   ] + [(dp, mu * dp - learning_rate * g)
                        for dp, g in zip(dparams, grads)]
        train_op = theano.function(
            inputs=[X_in, targets],
            outputs=[cost, prediction],
            updates=updates,
        )

        n_batches = N / batch_sz
        costs = []
        lastWs = [W.get_value() for W in self.allWs]
        W_changes = []
        print "supervised training..."
        for i in xrange(epochs):
            print "epoch:", i
            X, Y = shuffle(X, Y)
            for j in xrange(n_batches):
                Xbatch = X[j * batch_sz:(j * batch_sz + batch_sz)]
                Ybatch = Y[j * batch_sz:(j * batch_sz + batch_sz)]
                c, p = train_op(Xbatch, Ybatch)
                if j % 100 == 0:
                    print "j / n_batches:", j, "/", n_batches, "cost:", c, "error:", error_rate(
                        p, Ybatch)
                costs.append(c)

                # log changes in all Ws
                W_change = [
                    np.abs(W.get_value() - lastW).mean()
                    for W, lastW in zip(self.allWs, lastWs)
                ]
                W_changes.append(W_change)
                lastWs = [W.get_value() for W in self.allWs]

        W_changes = np.array(W_changes)
        plt.subplot(2, 1, 1)
        for i in xrange(W_changes.shape[1]):
            plt.plot(W_changes[:, i], label='layer %s' % i)
        plt.legend()
        # plt.show()

        plt.subplot(2, 1, 2)
        plt.plot(costs)
        plt.show()
Ejemplo n.º 39
0
basis_L1 = init_basis_hermite(sigma_L1, bases_L1, 5)
basis_L2 = init_basis_hermite(sigma_L2, bases_L2, 3)
basis_L3 = init_basis_hermite(sigma_L3, bases_L3, 3)

alphas_L1 = init_alphas(64, 1, bases_L1)
alphas_L2 = init_alphas(64, 64, bases_L2)
alphas_L3 = init_alphas(64, 64, bases_L3)

w_L1 = T.sum(alphas_L1[:, :, :, None, None] * basis_L1[None, None, :, :, :],
             axis=2)
w_L2 = T.sum(alphas_L2[:, :, :, None, None] * basis_L2[None, None, :, :, :],
             axis=2)
w_L3 = T.sum(alphas_L3[:, :, :, None, None] * basis_L3[None, None, :, :, :],
             axis=2)
w_L4 = init_weights((3136, 10))

#-------------------------
# Set up function
#-------------------------

noise_l1, noise_l2, noise_l3, noise_py_x = model(X, w_L1, w_L2, w_L3, w_L4,
                                                 0.2, 0.7)
l1, l2, l3, py_x = model(X, w_L1, w_L2, w_L3, w_L4, 0., 0.)
y_x = T.argmax(py_x, axis=1)

cost = T.mean(T.nnet.categorical_crossentropy(noise_py_x, Y))
params = [alphas_L1, alphas_L2, alphas_L3, w_L4]
updates = adadelta(cost, params, learning_rate=lr, rho=0.95, epsilon=1e-6)

train = theano.function(inputs=[X, Y, lr],