Example #1
0
def train(data,
          net,
          max_epoch,
          get_lr,
          weight_decay,
          batch_size=100,
          use_cpu=False):
    print('Start intialization............')
    if use_cpu:
        print('Using CPU')
        dev = device.get_default_device()
    else:
        print('Using GPU')
        dev = device.create_cuda_gpu()

    net.to_device(dev)
    opt = optimizer.SGD(momentum=0.9, weight_decay=weight_decay)
    for (p, specs) in zip(net.param_names(), net.param_specs()):
        opt.register(p, specs)

    tx = tensor.Tensor((batch_size, 3, 32, 32), dev)
    ty = tensor.Tensor((batch_size, ), dev, core_pb2.kInt)
    train_x, train_y, test_x, test_y = data
    num_train_batch = train_x.shape[0] // batch_size
    num_test_batch = test_x.shape[0] // batch_size
    idx = np.arange(train_x.shape[0], dtype=np.int32)
    for epoch in range(max_epoch):
        np.random.shuffle(idx)
        loss, acc = 0.0, 0.0
        print('Epoch %d' % epoch)
        for b in range(num_train_batch):
            x = train_x[idx[b * batch_size:(b + 1) * batch_size]]
            y = train_y[idx[b * batch_size:(b + 1) * batch_size]]
            tx.copy_from_numpy(x)
            ty.copy_from_numpy(y)
            grads, (l, a) = net.train(tx, ty)
            loss += l
            acc += a
            for (s, p, g) in zip(net.param_names(), net.param_values(), grads):
                opt.apply_with_lr(epoch, get_lr(epoch), g, p, str(s), b)
            # update progress bar
            utils.update_progress(b * 1.0 / num_train_batch,
                                  'training loss = %f, accuracy = %f' % (l, a))
        info = '\ntraining loss = %f, training accuracy = %f, lr = %f' \
            % ((loss / num_train_batch), (acc / num_train_batch), get_lr(epoch))
        print(info)

        loss, acc = 0.0, 0.0
        for b in range(num_test_batch):
            x = test_x[b * batch_size:(b + 1) * batch_size]
            y = test_y[b * batch_size:(b + 1) * batch_size]
            tx.copy_from_numpy(x)
            ty.copy_from_numpy(y)
            l, a = net.evaluate(tx, ty)
            loss += l
            acc += a

        print('test loss = %f, test accuracy = %f' % ((loss / num_test_batch),
                                                      (acc / num_test_batch)))
    net.save('model', 20)  # save model params into checkpoint file
Example #2
0
def train(data_dir, net, num_epoch=20, batch_size=250):
    
    print 'Start intialization............'
    cuda = device.create_cuda_gpu()
    net.to_device(cuda)
    opt = optimizer.SGD(momentum=0.9,weight_decay=0.04)
    for (p, specs) in zip(net.param_values(), net.param_specs()):
        filler = specs.filler
        if filler.type == 'gaussian':
            initializer.gaussian(p, filler.mean, filler.std)
        else:
            p.set_value(0)
        opt.register(p, specs)
        print specs.name, filler.type, p.l1()
    print 'Loading data ..................'
    train_x, train_y = load_dataset(data_dir,1)
    test_x, test_y = load_dataset(data_dir,2)
    
    tx = tensor.Tensor((batch_size,3), cuda)
    ty = tensor.Tensor((batch_size,),cuda, core_pb2.kInt)
    #ta = tensor.Tensor((batch_size,3), cuda)
    #tb = tensor.Tensor((batch_size,),cuda, core_pb2.kInt)
    num_train_batch = train_x.shape[0]/batch_size 
    num_test_batch = test_x.shape[0]/batch_size
    idx = np.arange(train_x.shape[0], dtype=np.int32)
    id  = np.arange(test_x.shape[0],dtype=np.int32)
    for epoch in range(num_epoch):
        np.random.shuffle(idx)
        loss, acc = 0.000,0.000
        print 'Epoch %d' % epoch
        for b in range(num_train_batch):
            x = train_x[idx[b * batch_size:(b+1)* batch_size]]
            y = train_y[idx[b * batch_size:(b+1)* batch_size]]
            tx.copy_from_numpy(x)
            ty.copy_from_numpy(y)
            grads, (l, a) = net.train(tx, ty)
            loss += l
            acc += a
            for (s, p, g) in zip(net.param_specs(), net.param_values(), grads):
                opt.apply_with_lr(epoch, get_lr(epoch), g, p, str(s.name))
            # update progress bar
            	utils.update_progress(b * 1.0 / num_train_batch,
                                 'training loss = %f, accuracy = %f' % (l, a))
                info = '\ntraining loss = %f, training accuracy = %f' \
                % (loss/num_train_batch, acc/num_train_batch)
        print info
        
        loss,acc=0.000,0.000
        np.random.shuffle(id)
        for b in range(num_test_batch):
         	x = test_x[b * batch_size:(b+1) * batch_size]
            	y = test_y[b * batch_size:(b+1) * batch_size]
                tx.copy_from_numpy(x)
            	ty.copy_from_numpy(y)
            	l, a = net.evaluate(tx, ty)
		loss += l
                acc += a
 	print 'test loss = %f, test accuracy = %f' \
            % (loss / num_test_batch, acc / num_test_batch)
    net.save('model.bin')  # save model params into checkpoint file
Example #3
0
def train(data, net, max_epoch, get_lr, weight_decay, batch_size=100,
          use_cpu=False):
    print('Start intialization............')
    if use_cpu:
        print('Using CPU')
        dev = device.get_default_device()
    else:
        print('Using GPU')
        dev = device.create_cuda_gpu()

    net.to_device(dev)
    opt = optimizer.SGD(momentum=0.9, weight_decay=weight_decay)
    for (p, specs) in zip(net.param_names(), net.param_specs()):
        opt.register(p, specs)

    tx = tensor.Tensor((batch_size, 3, 32, 32), dev)
    ty = tensor.Tensor((batch_size,), dev, tensor.int32)
    train_x, train_y, test_x, test_y = data
    num_train_batch = train_x.shape[0] // batch_size
    num_test_batch = test_x.shape[0] // batch_size
    idx = np.arange(train_x.shape[0], dtype=np.int32)
    for epoch in range(max_epoch):
        np.random.shuffle(idx)
        loss, acc = 0.0, 0.0
        print('Epoch %d' % epoch)
        for b in range(num_train_batch):
            x = train_x[idx[b * batch_size: (b + 1) * batch_size]]
            y = train_y[idx[b * batch_size: (b + 1) * batch_size]]
            tx.copy_from_numpy(x)
            ty.copy_from_numpy(y)
            grads, (l, a) = net.train(tx, ty)
            loss += l
            acc += a
            for (s, p, g) in zip(net.param_names(), net.param_values(), grads):
                opt.apply_with_lr(epoch, get_lr(epoch), g, p, str(s), b)
            # update progress bar
            utils.update_progress(b * 1.0 / num_train_batch,
                                  'training loss = %f, accuracy = %f' % (l, a))
        info = '\ntraining loss = %f, training accuracy = %f, lr = %f' \
            % ((loss / num_train_batch), (acc / num_train_batch), get_lr(epoch))
        print(info)

        loss, acc = 0.0, 0.0
        for b in range(num_test_batch):
            x = test_x[b * batch_size: (b + 1) * batch_size]
            y = test_y[b * batch_size: (b + 1) * batch_size]
            tx.copy_from_numpy(x)
            ty.copy_from_numpy(y)
            l, a = net.evaluate(tx, ty)
            loss += l
            acc += a

        print('test loss = %f, test accuracy = %f' %
              ((loss / num_test_batch), (acc / num_test_batch)))
    net.save('model', 20)  # save model params into checkpoint file
def train(inputfolder,
          outputfolder,
          visfolder,
          trainratio,
          validationratio,
          testratio,
          dev,
          agent,
          max_epoch,
          use_cpu,
          batch_size=100):
    opt = optimizer.SGD(momentum=0.9, weight_decay=0.01)
    agent.push(MsgType.kStatus, 'Downlaoding data...')
    # all_feature, all_label = get_data(os.path.join(inputfolder, 'features.txt'), os.path.join(inputfolder, 'label.txt'))  # PUT THE DATA on/to dbsystem
    all_feature, all_label = get_data(
        os.path.join(inputfolder, 'features.txt'),
        os.path.join(inputfolder, 'label.txt'))  # PUT THE DATA on/to dbsystem
    agent.push(MsgType.kStatus, 'Finish downloading data')
    n_folds = 5
    print("all_label shape: ", all_label.shape)
    all_label = all_label[:, 1]
    # for i, (train_index, test_index) in enumerate(StratifiedKFold(all_label.reshape(all_label.shape[0]), n_folds=n_folds)):
    for i in range(3):
        train_index = np.arange(0, 1404)
        train_feature, train_label = all_feature[train_index], all_label[
            train_index]
        if i == 0:
            print("fold: ", i)
            break
    print("train label sum: ", train_label.sum())
    in_shape = np.array([1, 12, 375])
    trainx = tensor.Tensor(
        (batch_size, int(in_shape[0]), int(in_shape[1]), int(in_shape[2])),
        dev)
    trainy = tensor.Tensor((batch_size, ), dev, tensor.int32)
    num_train_batch = train_feature.shape[0] / batch_size
    idx = np.arange(train_feature.shape[0], dtype=np.int32)

    # height = 12
    # width = 375
    # kernel_y = 3
    # kernel_x = 80
    # stride_y = 1
    # stride_x = 20
    hyperpara = np.array([12, 375, 3, 10, 1, 3])
    height, width, kernel_y, kernel_x, stride_y, stride_x = hyperpara[
        0], hyperpara[1], hyperpara[2], hyperpara[3], hyperpara[4], hyperpara[
            5]
    print('kernel_y: ', kernel_y)
    print('kernel_x: ', kernel_x)
    print('stride_y: ', stride_y)
    print('stride_x: ', stride_x)
    net = model.create_net(in_shape, hyperpara, use_cpu)
    net.to_device(dev)

    test_epoch = 10
    occlude_test_epoch = 100
    for epoch in range(max_epoch):
        if handle_cmd(agent):
            break
        np.random.seed(10)
        np.random.shuffle(idx)
        train_feature, train_label = train_feature[idx], train_label[idx]
        print('Epoch %d' % epoch)

        loss, acc = 0.0, 0.0
        val_loss, val_acc = 0.0, 0.0  # using the first half as validation
        for b in range(int(num_train_batch)):
            x, y = train_feature[b * batch_size:(b + 1) *
                                 batch_size], train_label[b *
                                                          batch_size:(b + 1) *
                                                          batch_size]
            x = x.reshape((batch_size, in_shape[0], in_shape[1], in_shape[2]))
            trainx.copy_from_numpy(x)
            trainy.copy_from_numpy(y)
            grads, (l, a), probs = net.train(trainx, trainy)
            loss += l
            acc += a
            if b < (int(num_train_batch / 2)):
                val_loss += l
                val_acc += a
            for (s, p, g) in zip(net.param_specs(), net.param_values(), grads):
                opt.apply_with_lr(epoch, 0.005, g, p, str(s.name))
            info = 'training loss = %f, training accuracy = %f' % (l, a)
            utils.update_progress(b * 1.0 / num_train_batch, info)
        # put training status info into a shared queue
        info = dict(phase='train',
                    step=epoch,
                    accuracy=acc / num_train_batch,
                    loss=loss / num_train_batch,
                    timestamp=time.time())
        agent.push(MsgType.kInfoMetric, info)
        info = 'training loss = %f, training accuracy = %f' \
            % (loss / num_train_batch, acc / num_train_batch)
        print(info)
        val_info = 'validation loss = %f, validation accuracy = %f' \
           % (val_loss / (int(num_train_batch / 2)), val_acc / (int(num_train_batch / 2)))
        print(val_info)
        if epoch == (max_epoch - 1):
            print('final val_loss: ', val_loss / (int(num_train_batch / 2)))
            np.savetxt(outputfolder + '/final_results.txt',
                       np.full((1), val_loss / (int(num_train_batch / 2))),
                       delimiter=",")
Example #5
0
def train(data,
          max_epoch,
          hidden_size=100,
          seq_length=100,
          batch_size=16,
          num_stacks=1,
          dropout=0.5,
          model_path='model'):
    # SGD with L2 gradient normalization
    opt = optimizer.RMSProp(constraint=optimizer.L2Constraint(5))
    cuda = device.create_cuda_gpu()
    rnn = layer.LSTM(name='lstm',
                     hidden_size=hidden_size,
                     num_stacks=num_stacks,
                     dropout=dropout,
                     input_sample_shape=(data.vocab_size, ))
    rnn.to_device(cuda)
    print 'created rnn'
    rnn_w = rnn.param_values()[0]
    rnn_w.uniform(-0.08, 0.08)  # init all rnn parameters
    print 'rnn weight l1 = %f' % (rnn_w.l1())
    dense = layer.Dense('dense',
                        data.vocab_size,
                        input_sample_shape=(hidden_size, ))
    dense.to_device(cuda)
    dense_w = dense.param_values()[0]
    dense_b = dense.param_values()[1]
    print 'dense w ', dense_w.shape
    print 'dense b ', dense_b.shape
    initializer.uniform(dense_w, dense_w.shape[0], 0)
    print 'dense weight l1 = %f' % (dense_w.l1())
    dense_b.set_value(0)
    print 'dense b l1 = %f' % (dense_b.l1())

    g_dense_w = tensor.Tensor(dense_w.shape, cuda)
    g_dense_b = tensor.Tensor(dense_b.shape, cuda)

    lossfun = loss.SoftmaxCrossEntropy()
    for epoch in range(max_epoch):
        train_loss = 0
        for b in range(data.num_train_batch):
            batch = data.train_dat[b * batch_size:(b + 1) * batch_size]
            inputs, labels = convert(batch, batch_size, seq_length,
                                     data.vocab_size, cuda)
            inputs.append(tensor.Tensor())
            inputs.append(tensor.Tensor())

            outputs = rnn.forward(model_pb2.kTrain, inputs)[0:-2]
            grads = []
            batch_loss = 0
            g_dense_w.set_value(0.0)
            g_dense_b.set_value(0.0)
            for output, label in zip(outputs, labels):
                act = dense.forward(model_pb2.kTrain, output)
                lvalue = lossfun.forward(model_pb2.kTrain, act, label)
                batch_loss += lvalue.l1()
                grad = lossfun.backward()
                grad /= batch_size
                grad, gwb = dense.backward(model_pb2.kTrain, grad)
                grads.append(grad)
                g_dense_w += gwb[0]
                g_dense_b += gwb[1]
                # print output.l1(), act.l1()
            utils.update_progress(
                b * 1.0 / data.num_train_batch,
                'training loss = %f' % (batch_loss / seq_length))
            train_loss += batch_loss

            grads.append(tensor.Tensor())
            grads.append(tensor.Tensor())
            g_rnn_w = rnn.backward(model_pb2.kTrain, grads)[1][0]
            dense_w, dense_b = dense.param_values()
            opt.apply_with_lr(epoch, get_lr(epoch), g_rnn_w, rnn_w, 'rnnw')
            opt.apply_with_lr(epoch, get_lr(epoch), g_dense_w, dense_w,
                              'dense_w')
            opt.apply_with_lr(epoch, get_lr(epoch), g_dense_b, dense_b,
                              'dense_b')
        print '\nEpoch %d, train loss is %f' % \
            (epoch, train_loss / data.num_train_batch / seq_length)

        eval_loss = 0
        for b in range(data.num_test_batch):
            batch = data.val_dat[b * batch_size:(b + 1) * batch_size]
            inputs, labels = convert(batch, batch_size, seq_length,
                                     data.vocab_size, cuda)
            inputs.append(tensor.Tensor())
            inputs.append(tensor.Tensor())
            outputs = rnn.forward(model_pb2.kEval, inputs)[0:-2]
            for output, label in zip(outputs, labels):
                output = dense.forward(model_pb2.kEval, output)
                eval_loss += lossfun.forward(model_pb2.kEval, output,
                                             label).l1()
        print 'Epoch %d, evaluation loss is %f' % \
            (epoch, eval_loss / data.num_test_batch / seq_length)

        if (epoch + 1) % 30 == 0:
            # checkpoint the file model
            with open('%s_%d.bin' % (model_path, epoch), 'wb') as fd:
                print 'saving model to %s' % model_path
                d = {}
                for name, w in zip(['rnn_w', 'dense_w', 'dense_b'],
                                   [rnn_w, dense_w, dense_b]):
                    w.to_host()
                    d[name] = tensor.to_numpy(w)
                    w.to_device(cuda)
                d['idx_to_char'] = data.idx_to_char
                d['char_to_idx'] = data.char_to_idx
                d['hidden_size'] = hidden_size
                d['num_stacks'] = num_stacks
                d['dropout'] = dropout

                pickle.dump(d, fd)
Example #6
0
def train(data, max_epoch, hidden_size=100, seq_length=100, batch_size=16,
          num_stacks=1, dropout=0.5, model_path='model'):
    # SGD with L2 gradient normalization
    opt = optimizer.RMSProp(constraint=optimizer.L2Constraint(5))
    cuda = device.create_cuda_gpu()
    rnn = layer.LSTM(
        name='lstm',
        hidden_size=hidden_size,
        num_stacks=num_stacks,
        dropout=dropout,
        input_sample_shape=(
            data.vocab_size,
        ))
    rnn.to_device(cuda)
    print 'created rnn'
    rnn_w = rnn.param_values()[0]
    rnn_w.uniform(-0.08, 0.08)  # init all rnn parameters
    print 'rnn weight l1 = %f' % (rnn_w.l1())
    dense = layer.Dense(
        'dense',
        data.vocab_size,
        input_sample_shape=(
            hidden_size,
        ))
    dense.to_device(cuda)
    dense_w = dense.param_values()[0]
    dense_b = dense.param_values()[1]
    print 'dense w ', dense_w.shape
    print 'dense b ', dense_b.shape
    initializer.uniform(dense_w, dense_w.shape[0], 0)
    print 'dense weight l1 = %f' % (dense_w.l1())
    dense_b.set_value(0)
    print 'dense b l1 = %f' % (dense_b.l1())

    g_dense_w = tensor.Tensor(dense_w.shape, cuda)
    g_dense_b = tensor.Tensor(dense_b.shape, cuda)

    lossfun = loss.SoftmaxCrossEntropy()
    for epoch in range(max_epoch):
        train_loss = 0
        for b in range(data.num_train_batch):
            batch = data.train_dat[b * batch_size: (b + 1) * batch_size]
            inputs, labels = convert(batch, batch_size, seq_length,
                                     data.vocab_size, cuda)
            inputs.append(tensor.Tensor())
            inputs.append(tensor.Tensor())

            outputs = rnn.forward(model_pb2.kTrain, inputs)[0:-2]
            grads = []
            batch_loss = 0
            g_dense_w.set_value(0.0)
            g_dense_b.set_value(0.0)
            for output, label in zip(outputs, labels):
                act = dense.forward(model_pb2.kTrain, output)
                lvalue = lossfun.forward(model_pb2.kTrain, act, label)
                batch_loss += lvalue.l1()
                grad = lossfun.backward()
                grad /= batch_size
                grad, gwb = dense.backward(model_pb2.kTrain, grad)
                grads.append(grad)
                g_dense_w += gwb[0]
                g_dense_b += gwb[1]
                # print output.l1(), act.l1()
            utils.update_progress(
                b * 1.0 / data.num_train_batch, 'training loss = %f' %
                (batch_loss / seq_length))
            train_loss += batch_loss

            grads.append(tensor.Tensor())
            grads.append(tensor.Tensor())
            g_rnn_w = rnn.backward(model_pb2.kTrain, grads)[1][0]
            dense_w, dense_b = dense.param_values()
            opt.apply_with_lr(epoch, get_lr(epoch), g_rnn_w, rnn_w, 'rnnw')
            opt.apply_with_lr(
                epoch, get_lr(epoch),
                g_dense_w, dense_w, 'dense_w')
            opt.apply_with_lr(
                epoch, get_lr(epoch),
                g_dense_b, dense_b, 'dense_b')
        print '\nEpoch %d, train loss is %f' % \
            (epoch, train_loss / data.num_train_batch / seq_length)

        eval_loss = 0
        for b in range(data.num_test_batch):
            batch = data.val_dat[b * batch_size: (b + 1) * batch_size]
            inputs, labels = convert(batch, batch_size, seq_length,
                                     data.vocab_size, cuda)
            inputs.append(tensor.Tensor())
            inputs.append(tensor.Tensor())
            outputs = rnn.forward(model_pb2.kEval, inputs)[0:-2]
            for output, label in zip(outputs, labels):
                output = dense.forward(model_pb2.kEval, output)
                eval_loss += lossfun.forward(model_pb2.kEval,
                                             output, label).l1()
        print 'Epoch %d, evaluation loss is %f' % \
            (epoch, eval_loss / data.num_test_batch / seq_length)

        if (epoch + 1) % 30 == 0:
            # checkpoint the file model
            with open('%s_%d.bin' % (model_path, epoch), 'wb') as fd:
                print 'saving model to %s' % model_path
                d = {}
                for name, w in zip(
                        ['rnn_w', 'dense_w', 'dense_b'],
                        [rnn_w, dense_w, dense_b]):
                    w.to_host()
                    d[name] = tensor.to_numpy(w)
                    w.to_device(cuda)
                d['idx_to_char'] = data.idx_to_char
                d['char_to_idx'] = data.char_to_idx
                d['hidden_size'] = hidden_size
                d['num_stacks'] = num_stacks
                d['dropout'] = dropout

                pickle.dump(d, fd)
Example #7
0
    def train(self, data, max_epoch, model_path='model'):
        if self.use_cpu:
            print 'Using CPU'
            self.dev = device.get_default_device()
        else:
            print 'Using GPU'
            self.dev = device.create_cuda_gpu()

        self.net.to_device(self.dev)
        opt = optimizer.SGD(momentum=0.9, weight_decay=1e-4)
        # opt = optimizer.RMSProp(constraint=optimizer.L2Constraint(5))
        for (p, n) in zip(self.net.param_values(), self.net.param_names()):
            if 'var' in n:
                p.set_value(1.0)
            elif 'gamma' in n:
                p.uniform(0, 1)
            elif 'weight' in n:
                p.gaussian(0, 0.01)
            else:
                p.set_value(0.0)
            print n, p.shape, p.l1()

        tx = tensor.Tensor((self.batch_size, self.maxlen, self.vocab_size),
                           self.dev)
        ty = tensor.Tensor((self.batch_size, ), self.dev, core_pb2.kInt)
        train_x, train_y, test_x, test_y = data
        num_train_batch = train_x.shape[0] / self.batch_size
        num_test_batch = test_x.shape[0] / self.batch_size
        idx = np.arange(train_x.shape[0], dtype=np.int32)
        for epoch in range(max_epoch):
            np.random.shuffle(idx)
            loss, acc = 0.0, 0.0
            print '\nEpoch %d' % epoch
            start = time()
            for b in range(num_train_batch):
                batch_loss, batch_acc = 0.0, 0.0
                grads = []
                x = train_x[
                    idx[b * self.batch_size:(b + 1) *
                        self.batch_size]]  # x.shape = (batch_size, maxlen)
                y = train_y[idx[b * self.batch_size:(b + 1) *
                                self.batch_size]]  # y.shape = (batch_size,)
                # for input as (batch_size, max_len, vocab_size)
                sam_arrs = convert_samples(x, x.shape[1], self.vocab_size,
                                           self.dev)
                tx.copy_from_numpy(sam_arrs)
                ty.copy_from_numpy(np.array(y, dtype='int32'))
                grads, (batch_loss, batch_acc) = self.net.train(tx, ty)
                for (s, p, g) in zip(self.net.param_names(),
                                     self.net.param_values(), grads):
                    opt.apply_with_lr(epoch, get_lr(epoch), g, p, str(s), b)
                # update progress bar
                utils.update_progress(
                    b * 1.0 / num_train_batch,
                    'training loss = %f, accuracy = %f' %
                    (batch_loss, batch_acc))
                loss += batch_loss
                acc += batch_acc

            print "\ntraining time = ", time() - start
            info = 'training loss = %f, training accuracy = %f, lr = %f' \
                   % (loss / num_train_batch, acc / num_train_batch, get_lr(epoch))
            print info

            loss, acc = 0.0, 0.0
            start = time()
            for b in range(num_test_batch):
                batch_loss, batch_acc = 0.0, 0.0
                x = test_x[b * self.batch_size:(b + 1) *
                           self.batch_size]  # x.shape = (batch_size, maxlen)
                y = test_y[b * self.batch_size:(b + 1) * self.batch_size]
                sam_arrs = convert_samples(x, x.shape[1], self.vocab_size,
                                           self.dev)
                tx.copy_from_numpy(sam_arrs)
                ty.copy_from_numpy(np.array(y, dtype='int32'))
                grads, (batch_loss, batch_acc) = self.net.train(tx, ty)
                loss += batch_loss
                acc += batch_acc

            print "evaluation time = ", time() - start
            print 'test loss = %f, test accuracy = %f \n' \
                  % (loss / num_test_batch, acc / num_test_batch)

            if (epoch % 2) == 1 or epoch + 1 == max_epoch:
                # checkpoint the file model
                with open('%s_%d.bin' % (model_path, epoch), 'wb') as fd:
                    print 'saving model to %s_%d.bin' % (model_path, epoch)
                    d = {}
                    for name, w in zip(self.net.param_names(),
                                       self.net.param_values()):
                        w.to_host()
                        d[name] = tensor.to_numpy(w)
                        w.to_device(self.dev)
                    pickle.dump(d, fd)
Example #8
0
    def train(self, data_path, max_epoch, model_path='model'):
        # SGD with L2 gradient normalization
        opt = optimizer.RMSProp(constraint=optimizer.L2Constraint(5))
        #opt = optimizer.SGD(momentum=0.9, weight_decay=5e-4)

        # initialize embedding layer
        embed_w = self.embed.param_values()[0]
        embed_b = self.embed.param_values()[1]
        #initializer.uniform(embed_w, 0, embed_w.shape[1])
        embed_w.uniform(-0.08, 0.08)
        embed_b.set_value(0)
        print 'embed weight l1 = %f' % (embed_w.l1())
        print 'embed b l1 = %f' % (embed_b.l1())

        # initialize lstm layer
        lstm_w = self.lstm.param_values()[0]
        lstm_w.uniform(-0.08, 0.08)  # init all lstm parameters
        print 'lstm weight l1 = %f' % (lstm_w.l1())

        # initialize dense layer
        dense_w = self.dense.param_values()[0]
        dense_b = self.dense.param_values()[1]
        dense_w.uniform(-0.1, 0.1)
        dense_b.set_value(0)
        print 'dense w ', dense_w.shape
        print 'dense b ', dense_b.shape
        print 'dense weight l1 = %f' % (dense_w.l1())
        print 'dense b l1 = %f' % (dense_b.l1())

        start = time()
        train_dat, train_label, val_dat, val_label = load_sample() 
        #train_dat, train_label, val_dat, val_label = load_corpus(data_path)
        train_label = word2onehot(train_label, 2)
        val_label = word2onehot(val_label, 2)
        print 'loading time:', time() - start
        print "train data shape:", train_dat.shape, "train label shape:", train_label.shape
        print "val data shape:", val_dat.shape, "val label shape:", val_label.shape
        for epoch in range(max_epoch):
            train_loss = 0
            num_train_batch = train_dat.shape[0] / self.batchsize
            glb_acc = 0
            for b in range(num_train_batch):
                start = time()
                # load training data
                inputs_arr = train_dat[b * self.batchsize: (b + 1) * self.batchsize]
                labels = train_label[b * self.batchsize: (b + 1) * self.batchsize]
                lens = rm_padding(inputs_arr)
                acc = 0
                batch_loss = 0.0
                g_dense_w = tensor.Tensor(dense_w.shape, self.dev)
                g_dense_w.set_value(0)
                g_dense_b = tensor.Tensor(dense_b.shape, self.dev)
                g_dense_b.set_value(0)
                g_lstm_w = tensor.Tensor(lstm_w.shape, self.dev)
                g_lstm_w.set_value(0)
                g_embed_w = tensor.Tensor(embed_w.shape, self.dev)
                g_embed_w.set_value(0)
                for idx_sam in range(len(inputs_arr)):
                    sam_arr = inputs_arr[idx_sam]
                    sam_arr = convert_sample(sam_arr, sam_arr.shape[0], self.vocab_size, self.dev)
                    sample = tensor.from_numpy(sam_arr)
                    sample.to_device(self.dev)
                    #print sample.shape
                    embed = self.embed.forward(model_pb2.kTrain, sample)
                    #print embed.shape is (53, 128)
                    # embed.shape[0] means the sequence length of the sample
                    embeded = []
                    for idx_seq in range(self.seq_length):
                        if idx_seq >= embed.shape[0]:
                            embeded.append(tensor.Tensor())
                        else:
                            seq = tensor.Tensor((1,embed.shape[1]), self.dev)
                            tensor.copy_data_to_from(seq, embed, embed.shape[1], 0, idx_seq * embed.shape[1])
                            embeded.append(seq)
                    embeded.append(tensor.Tensor()) # hx
                    embeded.append(tensor.Tensor()) # cx
                    #print 'forward embedding time:', time() -start
                    #print tensor.to_numpy(embeded[self.seq_length-1])
                   
                    # forward lstm layer
                    hidden = self.lstm.forward(model_pb2.kTrain, embeded)
                    # outputs are [y1, ..., yn, hx, cx], only need the last output as the predicted latent vector
                    #print len(hidden), hidden[embed.shape[0]-1]
                    #print [hidden[i].l1() for i in range(len(hidden))]
                    # forward dense and loss layer
                    act = self.dense.forward(model_pb2.kTrain, hidden[lens[idx_sam]-1])
                    label = tensor.from_numpy(labels[idx_sam])
                    label.to_device(self.dev)
                    lvalue = self.loss.forward(model_pb2.kTrain, act, label)
                    #print 'forward dense time:', time() - start
                    regularized_act = self.sft.forward(model_pb2.kEval, act)
                    pred = tensor.to_numpy(regularized_act)
                    gt = labels[idx_sam][1]
                    if (gt and pred[0,1] > pred[0,0]) or (gt == 0 and pred[0,1] <= pred[0,0]):
                        acc += 1
                
                    grads = []
                    batch_loss += lvalue.l1() / self.batchsize
                    #print batch_loss
                    start = time()
                    # backward loss and dense layer
                    grad = self.loss.backward() / self.batchsize
                    grad, gwb = self.dense.backward(model_pb2.kTrain, grad)
                    g_dense_w += gwb[0]
                    g_dense_b += gwb[1]
                    #print 'dense_w l1 = %f' % (gwb[0].l1())
                    for i in range(self.seq_length):
                        if i == lens[idx_sam] - 1:
                            grads.append(grad)
                        else:
                            emp = tensor.Tensor(grad.shape, self.dev)
                            emp.set_value(0)
                            grads.append(emp)
                    grads.append(tensor.Tensor())
                    grads.append(tensor.Tensor())
                    # backward lstm layer
                    lstm_input_grad, lstm_param_grad = self.lstm.backward(model_pb2.kTrain, grads)
                    g_lstm_w += lstm_param_grad[0] 
                    #print 'lstm_input l1 = %f' % (lstm_input_grad[0].l1())
                    #print 'backward lstm'  
                    embed_grad = tensor.Tensor(embed.shape, self.dev)
                    for idx in range(len(lstm_input_grad)-2):
                        tensor.copy_data_to_from(embed_grad, lstm_input_grad[idx], embed.shape[1],
					idx * embed.shape[1], 0)
                    _, grad_w = self.embed.backward(model_pb2.kTrain, embed_grad)
                    #print 'backward embedding time:', time() - start
                    #print 'embed weight l1 = %f' % (grad_w[0].l1())
                    g_embed_w += grad_w[0]

                train_loss += batch_loss
                glb_acc += acc

                utils.update_progress(
                    b * 1.0 / num_train_batch, 'training loss = %f, acc = %f' %
                    (batch_loss, acc * 1.0 / self.batchsize))
                opt.apply_with_lr(epoch, get_lr(epoch), g_lstm_w, lstm_w, 'lstm_w')
                opt.apply_with_lr(epoch, get_lr(epoch), g_dense_w, dense_w, 'dense_w')
                opt.apply_with_lr(epoch, get_lr(epoch), g_dense_b, dense_b, 'dense_b')
                opt.apply_with_lr(epoch, get_lr(epoch), g_embed_w, embed_w, 'embed_w')
                #opt.apply_with_lr(epoch, get_lr(epoch), grad_w[1], embed_b, 'embed_b')
            print '\nEpoch %d, train loss is %f, acc = %f' % \
                    (epoch, train_loss / num_train_batch, glb_acc * 1. / (self.batchsize * num_train_batch))

            # evaluation
            eval_loss = 0
            val_acc = 0
            num_test_batch = min(5000, val_dat.shape[0] / self.batchsize)
            for b in range(num_test_batch):
                acc = 0
                val_arr = val_dat[b * self.batchsize: (b + 1) * self.batchsize]
                labels = val_label[b * self.batchsize: (b + 1) * self.batchsize]
                lens = rm_padding(val_arr)
                val_arr = convert(val_arr, self.batchsize, self.seq_length,
                                  self.vocab_size, self.dev)
                val_arr = np.swapaxes(val_arr, 0, 1).reshape((
			self.batchsize * self.seq_length, self.vocab_size)) 
                inputs = tensor.from_numpy(val_arr)
                inputs.to_device(self.dev) # shape (128*53, 33366)
                embed = self.embed.forward(model_pb2.kEval, inputs)
                embed.reshape((self.seq_length, self.batchsize, self.embed_size))
                embeded = []
                for idx in range(self.seq_length):
                    embed_seq = tensor.Tensor((self.batchsize, self.embed_size), self.dev)
                    tensor.copy_data_to_from(embed_seq, embed, 
			self.batchsize * self.embed_size, 0, idx * self.batchsize * self.embed_size)
                    embeded.append(embed_seq)
                embeded.append(tensor.Tensor()) # hx
                embeded.append(tensor.Tensor()) # cx

                hidden = self.lstm.forward(model_pb2.kEval, embeded)
                hidden_batch = tensor.Tensor((self.batchsize, self.hidden_size), self.dev)
                for idx in range(self.batchsize):
                    tensor.copy_data_to_from(hidden_batch, hidden[lens[idx]-1],
			self.hidden_size, idx * self.hidden_size, idx* self.hidden_size)

                act = self.dense.forward(model_pb2.kEval, hidden_batch)
                labels = tensor.from_numpy(labels)
                labels.to_device(self.dev)
                eval_loss += self.loss.forward(model_pb2.kEval, act, labels).l1()
                regularized_act = self.sft.forward(model_pb2.kEval, act)
                pred = tensor.to_numpy(regularized_act)
                gt = tensor.to_numpy(labels)[:,1]
                for i in range(self.batchsize):
                    if (gt[i] and pred[i,1] > pred[i,0]) or (gt[i] == 0 and pred[i,1] <= pred[i,0]):
                        acc += 1
                #print 'acc = %f' % (acc * 1. / self.batchsize)
                val_acc += acc
  
            print 'Epoch %d, evaluation loss is %f, acc = %f' % \
                (epoch, eval_loss / num_test_batch, val_acc * 1. / (num_test_batch * self.batchsize))

            # model saving
            if (epoch + 1) % 2 == 0 or epoch + 1 == max_epoch:
                print 'dense weight l1 = %f' % (dense_w.l1())
                print 'dense bias l1 = %f' % (dense_b.l1())
                print 'lstm weight l1 = %f' % (lstm_w.l1())
                print 'embed weight l1 = %f' % (embed_w.l1())
                # checkpoint the file model
                with open('%s_%d.bin' % (model_path, epoch), 'wb') as fd:
                    print 'saving model to %s' % model_path
                    d = {}
                    for name, w in zip(
                        ['embed_w','embed_b', 'lstm_w', 'dense_w', 'dense_b'],
                        [embed_w, embed_b, lstm_w, dense_w, dense_b]):
                        w.to_host()
                        d[name] = tensor.to_numpy(w)
                        w.to_device(self.dev)
                    '''d['idx_to_char'] = data.idx_to_char
                    d['char_to_idx'] = data.char_to_idx
                    d['hidden_size'] = hidden_size
                    d['num_stacks'] = num_stacks
                    d['dropout'] = dropout'''
                    pickle.dump(d, fd)