예제 #1
0
    def fitXy(self, x, y, batch_size=100, n_epochs=10, print_every=10,
              valdata=None):
        '''
        fit a model to x, y data by batch
        '''
        n, d = x.shape
        time_start = time.time()
        losses = []
        cost = 0
        
        for epoch in range(n_epochs):
            x, y = data_shuffle(x, y)

            num_batches = math.ceil(n / batch_size)

            for k in range(num_batches):
                start, end = k * batch_size, min((k + 1) * batch_size, n)
                x_batch, y_batch = prepareData(x[start:end], y[start:end])
                y_hat, regret = self.step(x_batch, y_batch)
                m = end-start
                cost += 1 / (k+1) * (regret/m - cost)
                
                if print_every != 0 and k % print_every == 0:
                    losses.append(cost)
                    print('%.2f%% (%s) %.4f' % ((epoch * n + (k+1) * (end-start)) /
                                                (n_epochs * n) * 100, # progress
                                                timeSince(time_start), # time 
                                                cost)) # cost
                    torch.save(self.model, 'models/%s.pt' % self.name)
                    np.save('models/%s.loss' % self.name, losses)
        return losses
예제 #2
0
    def fitData(self, data, batch_size=100, n_epochs=10, print_every=10,
                valdata=None):
        '''
        fit a model to x, y data by batch
        print_every is 0 if do not wish to print
        '''
        time_start = time.time()
        losses = []
        best_valloss, best_valindex = np.inf, 0
        vallosses = []
        n = len(data.dataset)
        cost = 0 
        
        for epoch in range(n_epochs):

            for k, (x_batch, y_batch) in enumerate(data):
                x_batch, y_batch = to_var(x_batch), to_var(y_batch)
                y_hat, regret = self.step(x_batch, y_batch)
                m = x_batch.size(0)                
                cost += 1 / (k+1) * (regret/m - cost)

                if print_every != 0 and k % print_every == 0:
                    
                    losses.append(cost)
                    # progress, time, avg loss, auc
                    to_print = ('%.2f%% (%s) %.4f %.4f' % ((epoch * n + (k+1) * m) /
                                                           (n_epochs * n) * 100,
                                                           timeSince(time_start),
                                                           cost,
                                                           model_auc(self.model,
                                                                     data)))
                    if valdata is not None:
                        valloss = calc_loss(self.model, valdata, self.loss)
                        vallosses.append(valloss)
                        np.save('models/%s.valloss' % self.name, vallosses)   
                        to_print += " %.4f" % model_auc(self.model, valdata)

                        if valloss <= best_valloss:
                            best_valloss = valloss
                            best_valindex = len(vallosses) - 1
                            torch.save(self.model, 'models/%s.pt' % self.name)   
                    else:
                        torch.save(self.model, 'models/%s.pt' % self.name)
                        
                    print(to_print)
                    np.save('models/%s.loss' % self.name, losses)

                    cost = 0
                    
        return losses, vallosses
예제 #3
0
    def fit(self, data, batch_size=100, n_epochs=10,
            valdata=None, val_theta=None, use_auc=False):
        '''
        fit a model to x, y data by batch
        val_theta: for recovering heterogeneous subpopulation
        '''
        savedir = os.path.dirname('nonlinear_models/%s' % self.name)
        os.system('mkdir -p %s' % savedir)
        self.writer = SummaryWriter(log_dir=self.log_dir)        
        
        time_start = time.time()
        losses = []
        vallosses = [1000]
        best_valloss, best_valindex = 1000, 0 # for early stopping
        n = len(data.dataset)
        cost = 0
        self.count = 0
        
        for epoch in range(n_epochs):

            for k, (x_batch, y_batch) in enumerate(data):

                x_batch, y_batch = to_var(x_batch).float(), to_var(y_batch).float()
                y_hat, regret = self.step(x_batch, y_batch)
                m = x_batch.size(0)                
                cost += 1 / (k+1) * (regret - cost)

                if self.print_every != 0 and self.count % self.print_every == 0:

                    losses.append(cost)
                    
                    # progress, time, avg loss, auc
                    duration = timeSince(time_start)
                    if int(duration.split('m')[0]) >= self.max_time:
                        return losses
                    
                    to_print = ('%.2f%% (%s) %.4f' % ((epoch * n + (k+1) * m) /
                                                      (n_epochs * n) * 100,
                                                      duration,
                                                      cost))
                    
                    print(to_print)
                    # if self.draw_plot:
                    #     self.plotMTL()
                    #     self.plot(x_batch, y_batch, silence=self.silence, inrange=True)

                    if valdata is not None:
                        if use_auc:
                            acc = reportAuc(self, valdata)
                        else:
                            acc = reportAcc(self,valdata)

                        valloss = -acc
                        vallosses.append(valloss)
                        if valloss <= best_valloss:
                            best_valloss = valloss
                            best_valindex = len(vallosses) - 1

                            torch.save(self.weightNet,
                                       'nonlinear_models/%s.pt' % self.name)
                            np.save('nonlinear_models/%s.loss' % self.name, losses)
                            
                        if len(vallosses) - best_valindex > self.n_early_stopping:
                            print('early stop at iteration', self.count)
                            return losses                            

                        if use_auc:
                            # note acc here is auc
                            self.writer.add_scalar('data/val_auc', acc,
                                                   self.count)
                        else:
                            self.writer.add_scalar('data/val_acc', acc,
                                                   self.count)
                            
                        if val_theta is not None:
                            sim = self.evaluate_subpopulation(val_theta, valdata)
                            self.writer.add_scalar('data/subpopulation_cosine',
                                                   sim, self.count)

                    self.writer.add_scalar('weight/grad_norm', gradNorm(self.weightNet),
                                           self.count)
                    self.writer.add_scalar('data/train_loss', cost, self.count)
                    
                    # for tag, value in self.weightNet.named_parameters():
                    #     tag = tag.replace('.', '/')
                    #     self.writer.add_histogram(tag, to_np(value), self.count)
                    #     if value.grad is not None:
                    #         self.writer.add_histogram(tag+'/grad', to_np(value.grad),
                    #                                   self.count)
                    cost = 0
                    
                self.count += 1

        # if self.draw_plot:
        #     self.plot(x_batch, y_batch, inrange=True, silence=self.silence)

        return losses
예제 #4
0
    def fit(self, data, batch_size=100, n_epochs=10,
            valdata=None, val_theta=None):
        '''
        fit a model to x, y data by batch
        val_theta: for recovering heterogeneous subpopulation
        '''
        savedir = os.path.dirname('nonlinear_models/%s' % self.name)
        os.system('mkdir -p %s' % savedir)
        self.writer = SummaryWriter(log_dir=self.log_dir)        
        
        time_start = time.time()
        losses = []
        vallosses = [1000]
        best_valloss, best_valindex = 1000, 0 # for early stopping
        n = len(data.dataset)
        cost = 0
        self.count = 0
        
        for epoch in range(n_epochs):

            for k, (x_batch, y_batch) in enumerate(data):

                x_batch, y_batch = to_var(x_batch).float(), to_var(y_batch).float()
                y_hat, regret = self.step(x_batch, y_batch)
                m = x_batch.size(0)                
                cost += 1 / (k+1) * (regret - cost)

                if self.print_every != 0 and self.count % self.print_every == 0:

                    losses.append(cost)
                    
                    # progress, time, avg loss, auc
                    duration = timeSince(time_start)
                    if int(duration.split('m')[0]) >= self.max_time:
                        return losses
                    
                    to_print = ('%.2f%% (%s) %.4f' % ((epoch * n + (k+1) * m) /
                                                      (n_epochs * n) * 100,
                                                      duration,
                                                      cost))
                    
                    print(to_print)

                    if valdata is not None:
                        _mse = reportMSE(self,valdata,is_autoencoder=True)
                        valloss = _mse
                        vallosses.append(valloss)
                        if valloss <= best_valloss:
                            best_valloss = valloss
                            best_valindex = len(vallosses) - 1

                            torch.save(self.autoencoder,
                                       'nonlinear_models/%s.pt' % self.name)
                            np.save('nonlinear_models/%s.loss' % self.name, losses)
                            
                        if len(vallosses) - best_valindex > self.n_early_stopping:
                            print('early stop at iteration', self.count)
                            return losses                            

                        self.writer.add_scalar('data/val_mse', _mse, self.count)

                        
                    self.writer.add_scalar('model/grad_norm', gradNorm(self.autoencoder),
                                           self.count)
                    # self.writer.add_scalar('data/train_loss', cost, self.count)
                    
                    cost = 0
                    
                self.count += 1

        return losses