예제 #1
0
파일: nn.py 프로젝트: yujiali/nn
    def train(self):
        config = self.config

        layer_config = LayerConfig()
        layer_config.learn_rate = config.learn_rate
        layer_config.momentum = config.momentum
        layer_config.weight_decay = config.weight_decay

        nnstore = NNStore()
        nnstore.init_from_net(self)

        for epoch in range(0, config.num_epochs):
            # shuffle the data cases
            idx = np.random.permutation(self.num_total_cases)
            train_X = self.train_data.X[idx]
            train_T = self.train_data.T[idx]

            loss = 0

            for batch in range(0, self.num_minibatches):
                i_start = batch * config.minibatch_size
                if not batch == self.num_minibatches - 1:
                    i_end = i_start + config.minibatch_size
                else:
                    i_end = self.num_total_cases

                X = train_X[i_start:i_end]
                T = train_T[i_start:i_end]
                Xbelow = X

                # forward pass
                for i in range(0, self.num_layers):
                    Xbelow = self.layer[i].forward(Xbelow)
                self.output.forward(Xbelow)

                # compute loss
                loss += self.output.loss(T)

                # backprop
                dLdXabove = self.output.backprop(layer_config)
                for i in range(self.num_layers-1, -1, -1):
                    dLdXabove = self.layer[i].backprop(dLdXabove, layer_config)

            # statistics
            avg_loss = 1.0 * loss / self.num_total_cases

            if (epoch + 1) % config.epoch_to_display == 0:
                print 'epoch ' + str(epoch + 1) + ', loss = ' + str(avg_loss)

            if (epoch + 1) % config.epoch_to_save == 0:
                nnstore.update_from_net(self)
                nnstore.write(config.output_dir + '/m' + str(epoch + 1) + '.pdata')
예제 #2
0
    def train(self):
        config = self.config

        # convert t into a matrix in 1-of-K representation if it is a vector
        t = self.train_data.T
        if not self.config.is_regression:
            T_matrix = self.output.act_type.label_vec_to_mat(t, self.train_data.K)
        else:
            T_matrix = t

        layer_config = LayerConfig()
        layer_config.learn_rate = config.learn_rate
        layer_config.momentum = config.momentum
        layer_config.weight_decay = config.weight_decay

        nnstore = NNStore()
        nnstore.init_from_net(self)

        self.display_training_info(-1, 0, 0)
        t_start = time.time()

        for epoch in range(0, config.num_epochs):
            # shuffle the dataset 
            idx = np.random.permutation(self.num_total_cases)
            train_X = self.train_data.X[idx]
            train_T = T_matrix[idx]

            loss = 0

            for batch in range(0, self.num_minibatches):
                i_start = batch * config.minibatch_size
                if not batch == self.num_minibatches - 1:
                    i_end = i_start + config.minibatch_size
                else:
                    i_end = self.num_total_cases

                X = train_X[i_start:i_end]
                T = train_T[i_start:i_end]
                Xbelow = X

                # forward pass
                for i in range(0, self.num_layers):
                    Xbelow = self.layer[i].forward(Xbelow)
                self.output.forward(Xbelow)

                # compute loss
                loss += self.output.loss(T)

                # backprop
                dLdXabove = self.output.backprop(layer_config)
                for i in range(self.num_layers-1, -1, -1):
                    dLdXabove = self.layer[i].backprop(dLdXabove, layer_config)

            # statistics
            avg_loss = 1.0 * loss / self.num_total_cases

            if (epoch + 1) % config.epoch_to_display == 0:
                self.display_training_info(epoch, avg_loss, time.time() - t_start)
                t_start = time.time()

            if (epoch + 1) % config.epoch_to_save == 0:
                nnstore.update_from_net(self)
                nnstore.write(config.output_dir + '/m' + str(epoch + 1) + '.pdata')
예제 #3
0
    def train(self):
        config = self.config

        # convert t into a matrix in 1-of-K representation if it is a vector
        t = self.train_data.T
        if not self.config.is_regression:
            T_matrix = self.output.act_type.label_vec_to_mat(
                t, self.train_data.K)
        else:
            T_matrix = t

        layer_config = LayerConfig()
        layer_config.learn_rate = config.learn_rate
        layer_config.momentum = config.momentum
        layer_config.weight_decay = config.weight_decay

        nnstore = NNStore()
        nnstore.init_from_net(self)

        self.display_training_info(-1, 0, 0)
        t_start = time.time()

        for epoch in range(0, config.num_epochs):
            # shuffle the dataset
            idx = np.random.permutation(self.num_total_cases)
            train_X = self.train_data.X[idx]
            train_T = T_matrix[idx]

            loss = 0

            for batch in range(0, self.num_minibatches):
                i_start = batch * config.minibatch_size
                if not batch == self.num_minibatches - 1:
                    i_end = i_start + config.minibatch_size
                else:
                    i_end = self.num_total_cases

                X = train_X[i_start:i_end]
                T = train_T[i_start:i_end]
                Xbelow = X

                # forward pass
                for i in range(0, self.num_layers):
                    Xbelow = self.layer[i].forward(Xbelow)
                self.output.forward(Xbelow)

                # compute loss
                loss += self.output.loss(T)

                # backprop
                dLdXabove = self.output.backprop(layer_config)
                for i in range(self.num_layers - 1, -1, -1):
                    dLdXabove = self.layer[i].backprop(dLdXabove, layer_config)

            # statistics
            avg_loss = 1.0 * loss / self.num_total_cases

            if (epoch + 1) % config.epoch_to_display == 0:
                self.display_training_info(epoch, avg_loss,
                                           time.time() - t_start)
                t_start = time.time()

            if (epoch + 1) % config.epoch_to_save == 0:
                nnstore.update_from_net(self)
                nnstore.write(config.output_dir + '/m' + str(epoch + 1) +
                              '.pdata')
예제 #4
0
    def train(self):
        config = self.config

        # convert t into a matrix in 1-of-K representation if it is a vector
        t = self.train_data.T
        T_matrix = self.output.act_type.label_vec_to_mat(t, self.train_data.K)

        layer_config = LayerConfig()
        layer_config.learn_rate = config.learn_rate
        layer_config.momentum = config.init_momentum
        layer_config.weight_decay = config.weight_decay

        nnstore = NNStore()
        nnstore.init_from_net(self)

        best_net = NNStore()
        best_net.init_from_net(self)

        train_acc, val_acc, test_acc = self.display_training_info(
                -1, 
                self._compute_loss(
                    self.train_data.X, T_matrix, config.minibatch_size),
                0)
        acc_rec = np.zeros((config.num_epochs / config.epoch_to_display + 1, 4))
        acc_rec[0, 0] = 0
        acc_rec[0, 1] = train_acc
        if config.is_val:
            acc_rec[0, 2] = val_acc
        if config.is_test:
            acc_rec[0, 3] = test_acc

        t_start = time.time()

        best_acc = val_acc
        if self.config.is_test:
            best_test_acc = test_acc
        best_epoch = -1

        for epoch in range(0, config.num_epochs):
            gnp.free_reuse_cache()

            # decrease learning rate over time
            layer_config.learn_rate = config.learn_rate / \
                    (epoch / config.lr_drop_rate + 1)

            # TODO [dirty] special for Lnsvm
            if isinstance(self.output.act_type, act.LnsvmVariantOutput):
                #self.output.act_type.n = 3.0 - (3.0 - 0.5) / 50 * epoch
                self.output.act_type.n = 0.5
                if self.output.act_type.n < 0.5:
                    self.output.act_type.n = 0.5 

                if (epoch + 1) % config.epoch_to_display == 0:
                    print 'n %.4f' % self.output.act_type.n,
            
            if epoch >= config.switch_epoch:
                layer_config.momentum = config.final_momentum

            # shuffle the dataset 
            idx = np.random.permutation(self.num_total_cases)
            #idx = np.arange(self.num_total_cases)
            train_X = self.train_data.X[idx]
            train_T = T_matrix[idx]

            if config.input_noise > 0:
                train_X = train_X * (gnp.rand(train_X.shape) > config.input_noise)
                # train_X = train_X + gnp.randn(train_X.shape) * config.input_noise

            loss = 0

            for batch in range(0, self.num_minibatches):
                i_start = batch * config.minibatch_size
                if not batch == self.num_minibatches - 1:
                    i_end = i_start + config.minibatch_size
                else:
                    i_end = self.num_total_cases

                X = train_X[i_start:i_end]
                T = train_T[i_start:i_end]

                # forward pass
                self._forward(X)

                # compute loss
                loss += self.output.loss(T)

                if self.output.Y.isnan().any():
                    import ipdb
                    ipdb.set_trace()
                    print 'batch #%d <-- nan' % batch

                # backprop
                dLdXabove = self.output.backprop(layer_config)
                for i in range(self.num_layers-1, -1, -1):
                    dLdXabove = self.layer[i].backprop(dLdXabove, layer_config)

            # statistics
            avg_loss = 1.0 * loss / self.num_total_cases

            if (epoch + 1) % config.epoch_to_display == 0:
                train_acc, val_acc, test_acc = self.display_training_info(
                        epoch, avg_loss, time.time() - t_start)

                if val_acc == None:
                    val_acc = train_acc

                if (config.show_task_loss and val_acc < best_acc) or \
                        (not config.show_task_loss and val_acc > best_acc):
                    best_acc = val_acc
                    best_net.update_from_net(self)
                    if config.is_test:
                        best_test_acc = test_acc
                    best_epoch = epoch
                t_start = time.time()
                acc_rec[(epoch + 1) / config.epoch_to_display, 0] = epoch + 1
                acc_rec[(epoch + 1) / config.epoch_to_display, 1] = train_acc
                if config.is_val:
                    acc_rec[(epoch + 1) / config.epoch_to_display, 2] = val_acc
                if config.is_test:
                    acc_rec[(epoch + 1) / config.epoch_to_display, 3] = test_acc

            if (epoch + 1) % config.epoch_to_save == 0:
                nnstore.update_from_net(self)
                nnstore.write(config.output_dir + '/m' + str(epoch + 1) + '.pdata')


        print '----------------------------------------------------------------'

        if config.show_task_loss:
            s = 'loss'
        else:
            s = 'acc'
        
        if config.is_val:
            print 'Best val_%s %.4f' % (s, best_acc),
        else:
            print 'Best train_%s %.4f' % (s, best_acc),

        if config.is_test:
            print '--> test_%s %.4f' % (s, best_test_acc),
        print 'at epoch %d' % (best_epoch + 1)

        if config.is_output:
            f = open('%s/acc_rec.pdata' % config.output_dir, 'w')
            pickle.dump(acc_rec, f, -1)
            f.close()

            self.write_config('%s/cfg.txt' % config.output_dir)

            # save the best net
            fname = config.output_dir + '/best_net.pdata'
            print 'Saving the best model to ' + fname
            best_net.write(fname)

        if config.is_test:
            return (best_acc, best_test_acc)
        else:
            return (best_acc)