Exemple #1
0
    def fit(self, data, batch_size=100, n_epochs=10,
            valdata=None, val_theta=None, use_auc=False):
        '''
        fit a model to x, y data by batch
        val_theta: for recovering heterogeneous subpopulation
        '''
        savedir = os.path.dirname('nonlinear_models/%s' % self.name)
        os.system('mkdir -p %s' % savedir)
        self.writer = SummaryWriter(log_dir=self.log_dir)        
        
        time_start = time.time()
        losses = []
        vallosses = [1000]
        best_valloss, best_valindex = 1000, 0 # for early stopping
        n = len(data.dataset)
        cost = 0
        self.count = 0
        
        for epoch in range(n_epochs):

            for k, (x_batch, y_batch) in enumerate(data):

                x_batch, y_batch = to_var(x_batch).float(), to_var(y_batch).float()
                y_hat, regret = self.step(x_batch, y_batch)
                m = x_batch.size(0)                
                cost += 1 / (k+1) * (regret - cost)

                if self.print_every != 0 and self.count % self.print_every == 0:

                    losses.append(cost)
                    
                    # progress, time, avg loss, auc
                    duration = timeSince(time_start)
                    if int(duration.split('m')[0]) >= self.max_time:
                        return losses
                    
                    to_print = ('%.2f%% (%s) %.4f' % ((epoch * n + (k+1) * m) /
                                                      (n_epochs * n) * 100,
                                                      duration,
                                                      cost))
                    
                    print(to_print)
                    # if self.draw_plot:
                    #     self.plotMTL()
                    #     self.plot(x_batch, y_batch, silence=self.silence, inrange=True)

                    if valdata is not None:
                        if use_auc:
                            acc = reportAuc(self, valdata)
                        else:
                            acc = reportAcc(self,valdata)

                        valloss = -acc
                        vallosses.append(valloss)
                        if valloss <= best_valloss:
                            best_valloss = valloss
                            best_valindex = len(vallosses) - 1

                            torch.save(self.weightNet,
                                       'nonlinear_models/%s.pt' % self.name)
                            np.save('nonlinear_models/%s.loss' % self.name, losses)
                            
                        if len(vallosses) - best_valindex > self.n_early_stopping:
                            print('early stop at iteration', self.count)
                            return losses                            

                        if use_auc:
                            # note acc here is auc
                            self.writer.add_scalar('data/val_auc', acc,
                                                   self.count)
                        else:
                            self.writer.add_scalar('data/val_acc', acc,
                                                   self.count)
                            
                        if val_theta is not None:
                            sim = self.evaluate_subpopulation(val_theta, valdata)
                            self.writer.add_scalar('data/subpopulation_cosine',
                                                   sim, self.count)

                    self.writer.add_scalar('weight/grad_norm', gradNorm(self.weightNet),
                                           self.count)
                    self.writer.add_scalar('data/train_loss', cost, self.count)
                    
                    # for tag, value in self.weightNet.named_parameters():
                    #     tag = tag.replace('.', '/')
                    #     self.writer.add_histogram(tag, to_np(value), self.count)
                    #     if value.grad is not None:
                    #         self.writer.add_histogram(tag+'/grad', to_np(value.grad),
                    #                                   self.count)
                    cost = 0
                    
                self.count += 1

        # if self.draw_plot:
        #     self.plot(x_batch, y_batch, inrange=True, silence=self.silence)

        return losses
Exemple #2
0
    def fit(self, data, batch_size=100, n_epochs=10,
            valdata=None, val_theta=None):
        '''
        fit a model to x, y data by batch
        val_theta: for recovering heterogeneous subpopulation
        '''
        savedir = os.path.dirname('nonlinear_models/%s' % self.name)
        os.system('mkdir -p %s' % savedir)
        self.writer = SummaryWriter(log_dir=self.log_dir)        
        
        time_start = time.time()
        losses = []
        vallosses = [1000]
        best_valloss, best_valindex = 1000, 0 # for early stopping
        n = len(data.dataset)
        cost = 0
        self.count = 0
        
        for epoch in range(n_epochs):

            for k, (x_batch, y_batch) in enumerate(data):

                x_batch, y_batch = to_var(x_batch).float(), to_var(y_batch).float()
                y_hat, regret = self.step(x_batch, y_batch)
                m = x_batch.size(0)                
                cost += 1 / (k+1) * (regret - cost)

                if self.print_every != 0 and self.count % self.print_every == 0:

                    losses.append(cost)
                    
                    # progress, time, avg loss, auc
                    duration = timeSince(time_start)
                    if int(duration.split('m')[0]) >= self.max_time:
                        return losses
                    
                    to_print = ('%.2f%% (%s) %.4f' % ((epoch * n + (k+1) * m) /
                                                      (n_epochs * n) * 100,
                                                      duration,
                                                      cost))
                    
                    print(to_print)

                    if valdata is not None:
                        _mse = reportMSE(self,valdata,is_autoencoder=True)
                        valloss = _mse
                        vallosses.append(valloss)
                        if valloss <= best_valloss:
                            best_valloss = valloss
                            best_valindex = len(vallosses) - 1

                            torch.save(self.autoencoder,
                                       'nonlinear_models/%s.pt' % self.name)
                            np.save('nonlinear_models/%s.loss' % self.name, losses)
                            
                        if len(vallosses) - best_valindex > self.n_early_stopping:
                            print('early stop at iteration', self.count)
                            return losses                            

                        self.writer.add_scalar('data/val_mse', _mse, self.count)

                        
                    self.writer.add_scalar('model/grad_norm', gradNorm(self.autoencoder),
                                           self.count)
                    # self.writer.add_scalar('data/train_loss', cost, self.count)
                    
                    cost = 0
                    
                self.count += 1

        return losses