Esempio n. 1
0
def RetrieveFeaturePredictionNMse(model_name):
    """
    Retrieve the Feature and Prediciton values and place in a np array
    :param model_name: the name of the model
    return Xtruth, Xpred, Ytruth, Ypred
    """
    # Retrieve the prediction and truth and prediction first
    feature_file = os.path.join('data',
                                'test_Xtruth_{}.csv'.format(model_name))
    pred_file = os.path.join('data', 'test_Ypred_{}.csv'.format(model_name))
    truth_file = os.path.join('data', 'test_Ytruth_{}.csv'.format(model_name))
    feat_file = os.path.join('data', 'test_Xpred_{}.csv'.format(model_name))

    # Getting the files from file name
    Xtruth = pd.read_csv(feature_file, header=None, delimiter=' ')
    Xpred = pd.read_csv(feat_file, header=None, delimiter=' ')
    Ytruth = pd.read_csv(truth_file, header=None, delimiter=' ')
    Ypred = pd.read_csv(pred_file, header=None, delimiter=' ')

    #retrieve mse, mae
    Ymae, Ymse = compare_truth_pred(pred_file, truth_file)  #get the maes of y

    print(Xtruth.shape)
    return Xtruth.values, Xpred.values, Ytruth.values, Ypred.values, Ymae, Ymse
Esempio n. 2
0
    def train(self):
        """
        The major training function. This would start the training using information given in the flags
        :return: None
        """
        cuda = True if torch.cuda.is_available() else False
        if cuda:
            self.model.cuda()

        # Construct optimizer after the model moved to GPU
        self.optm = self.make_optimizer()
        self.lr_scheduler = self.make_lr_scheduler(self.optm)

        # Time keeping
        tk = time_keeper(
            time_keeping_file=os.path.join(self.ckpt_dir, 'training time.txt'))

        for epoch in range(self.flags.train_step):
            # Set to Training Mode
            train_loss = 0
            # boundary_loss = 0                 # Unnecessary during training since we provide geometries
            self.model.train()
            for j, (geometry, spectra) in enumerate(self.train_loader):
                if cuda:
                    geometry = geometry.cuda()  # Put data onto GPU
                    spectra = spectra.cuda()  # Put data onto GPU
                #print('spectra = ', spectra)
                #print('geometry = ', geometry)
                self.optm.zero_grad()  # Zero the gradient first
                pi, sigma, mu = self.model(spectra)  # Get the output
                #print('spectra = {}, pi, sigma, mu = {}, {}, {}'.format(spectra.cpu().numpy(),
                #                        pi.detach().cpu().numpy()[0,:],
                #                        sigma.detach().cpu().numpy()[0,:,0],
                #                        mu.detach().cpu().numpy()[0,:,0]))
                #print('geometry shape', geometry.size())
                loss = self.make_loss(pi, sigma, mu,
                                      geometry)  # Get the loss tensor
                #loss = self.make_loss(pi, sigma, mu, geometry, warmup=epoch)               # Get the loss tensor
                #Xpred = mdn.sample(pi, sigma, mu).detach().cpu().numpy()
                #Ypred = torch.tensor(simulator(self.flags.data_set, Xpred), requires_grad=False)
                #if cuda:
                #    Ypred = Ypred.cuda()
                #simulator_loss = nn.functional.mse_loss(Ypred, spectra).detach().cpu().numpy() # Get the loss tensor
                #print('nll loss at epoch {}, batch {} is {} '.format(epoch, j, loss.detach().cpu().numpy()))
                loss.backward()  # Calculate the backward gradients
                # gradient clipping
                torch.nn.utils.clip_grad_value_(self.model.parameters(), 1)
                self.optm.step()  # Move one step the optimizer
                train_loss += loss  # Aggregate the loss
                # boundary_loss += self.Boundary_loss                 # Aggregate the BDY loss

            # Calculate the avg loss of training
            train_avg_loss = train_loss.cpu().data.numpy() / (j + 1)
            # boundary_avg_loss = boundary_loss.cpu().data.numpy() / (j + 1)

            if epoch % self.flags.eval_step == 0:  # For eval steps, do the evaluations and tensor board
                # Record the training loss to the tensorboard
                self.log.add_scalar('Loss/train', train_avg_loss, epoch)
                #self.log.add_scalar('Loss/simulator_train', simulator_loss, epoch)
                # self.log.add_scalar('Loss/BDY_train', boundary_avg_loss, epoch)

                # Set to Evaluation Mode
                self.model.eval()
                print("Doing Evaluation on the model now")
                test_loss = 0
                for j, (geometry, spectra) in enumerate(
                        self.test_loader):  # Loop through the eval set
                    if cuda:
                        geometry = geometry.cuda()
                        spectra = spectra.cuda()
                    pi, sigma, mu = self.model(spectra)  # Get the output
                    if self.flags.data_set == 'meta_material':
                        loss = self.make_loss(pi, sigma, mu,
                                              geometry)  # Get the loss tensor
                        test_loss += loss.detach().cpu().numpy()
                    else:
                        Xpred = mdn.sample(pi, sigma, mu).numpy()
                        Ypred_np = simulator(self.flags.data_set, Xpred)
                        mae, mse = compare_truth_pred(Ypred_np,
                                                      spectra.cpu().numpy(),
                                                      cut_off_outlier_thres=10,
                                                      quiet_mode=True)
                        test_loss += np.mean(mse)  # Aggregate the loss
                    break
                    # only get the first batch that is enough

                # Record the testing loss to the tensorboard
                test_avg_loss = test_loss / (j + 1)
                self.log.add_scalar('Loss/test', test_avg_loss, epoch)

                print("This is Epoch %d, training loss %.5f, validation loss %.5f"\
                      % (epoch, train_avg_loss, test_avg_loss ))
                #print("This is Epoch %d, training loss %.5f, validation loss %.5f, training simulator loss %.5f" \
                #      % (epoch, train_avg_loss, test_avg_loss, simulator_loss ))
                # Plotting the first spectra prediction for validation
                # f = self.compare_spectra(Ypred=logit[0,:].cpu().data.numpy(), Ytruth=spectra[0,:].cpu().data.numpy())
                # self.log.add_figure(tag='spectra compare',figure=f,global_step=epoch)

                # Model improving, save the model down
                if test_avg_loss < self.best_validation_loss:
                    self.best_validation_loss = test_avg_loss
                    self.save()
                    print("Saving the model down...")

                    if self.best_validation_loss < self.flags.stop_threshold:
                        print("Training finished EARLIER at epoch %d, reaching loss of %.5f" %\
                              (epoch, self.best_validation_loss))
                        return None

            # Learning rate decay upon plateau
            self.lr_scheduler.step(train_avg_loss)
        self.log.close()
        tk.record(1)  # Record the total time of the training peroid