Esempio n. 1
0
                                           shuffle=True)
test_loader = torch.utils.data.DataLoader(DataUtils.ECGDataset(train_path,
                                                               test_path,
                                                               test=True),
                                          batch_size=args.test_batch_size,
                                          drop_last=True,
                                          shuffle=True)
#model = LSTM(28*28, 64, 10) MNIST dataset
#model = LSTM(140, 64, 5)
#model = FC(28 * 28, 300, 100, 10)
#model = TTRNN([4,7,4,7], [4,2,4,4], [1,3,4,2,1], 1, 0.8, 'ttgru')
#model = RNN([2,5,2,7], [4,4,2,4], [1,2,5,3,1], 0.8, 5)
model = RNN([1, 5, 2, 1], [2, 2, 2, 2], [1, 2, 2, 2, 1], 0.8, 5)
if args.cuda:
    model.cuda()
optimizer = TorchOptim.Adam(model.parameters(), lr=args.lr)


def train(epoch):
    #model.train()
    for step, data in enumerate(train_loader):
        train = data[0]
        target = data[1].type(torch.LongTensor)
        sequence_length = data[2] / args.feature_size
        if args.cuda:
            data, target = train.cuda(), target.cuda()
        #data, target = TorchAutograd.Variable(data), TorchAutograd.Variable(target)
        output = model(data.view(args.train_batch_size, -1,
                                 args.feature_size).float(),
                       lengths=sequence_length)
        optimizer.zero_grad()
Esempio n. 2
0
def main(args):
    """
    Main function
    """
    # Use CUDA
    use_cuda = args.use_cuda and torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    # Fix random seed
    torch.manual_seed(args.seed)

    # Generate token-to-index and index-to-token mapping
    tok2id, id2tok = data_loader.build_or_load_vocab(args.train,
                                                     overwrite=True)

    print("*" * 5)
    print(args)

    # Create DataLoader() objects
    params = {
        "batch_size": args.batch_size,
        "collate_fn": data_loader.collate_fn,
        "shuffle": args.shuffle,
        "num_workers": args.num_workers,
    }
    train_dataset = data_loader.SNLIDataSet(args.train, tok2id)
    train_loader = torch.utils.data.DataLoader(train_dataset, **params)
    val_dataset = data_loader.SNLIDataSet(args.val, tok2id)
    val_loader = torch.utils.data.DataLoader(val_dataset, **params)

    # Initialize model
    if args.model == "rnn":  # RNN model
        model = RNN(
            vocab_size=const.MAX_VOCAB_SIZE,  # Vocabulary size
            emb_dim=const.EMB_DIM,  # Embedding dimensions
            hidden_dim=args.hidden_dim,  # Hidden dimensions
            dropout_prob=args.dropout_prob,  # Dropout probability
            padding_idx=const.PAD_IDX,  # Padding token index
            num_classes=const.NUM_CLASSES,  # Number of class labels
            id2tok=id2tok,  # Vocabulary
        ).to(device)
    elif args.model == "cnn":  # CNN model
        model = CNN(
            vocab_size=const.MAX_VOCAB_SIZE,  # Vocabulary size
            emb_dim=const.EMB_DIM,  # Embedding dimensions
            hidden_dim=args.hidden_dim,  # Hidden dimensions
            kernel_size=args.kernel_size,  # Kernel size
            dropout_prob=args.dropout_prob,  # Dropout probability
            padding_idx=const.PAD_IDX,  # Padding token index
            num_classes=const.NUM_CLASSES,  # Number of class labels
            id2tok=id2tok,  # Vocabulary
        ).to(device)
    else:
        print("Invalid model specification, exiting")
        exit()

    # Criterion
    criterion = torch.nn.CrossEntropyLoss()
    # Model parameters
    params = [p for p in model.parameters() if p.requires_grad]
    global num_params
    num_params = sum([np.prod(p.size()) for p in params])
    # Optimizer
    optimizer = torch.optim.Adam(params, lr=args.lr)

    # Logging
    global logging
    logging = {
        "train_accs": [],
        "train_loss": [],
        "val_accs": [],
        "val_loss": [],
        "num_params": int(num_params),
    }

    # Main training loop
    for epoch in range(1, args.epochs + 1):
        # Log epoch
        print("\n{} epoch: {} {}".format("=" * 20, epoch, "=" * 20))
        # Train model
        train(args, model, device, train_loader, val_loader, optimizer,
              criterion, epoch)

    print("*" * 5 + "\n")
Esempio n. 3
0
def main(args):
    """
    Evaluate SNLI model on MNLI data set
    """
    # Use CUDA
    use_cuda = args.use_cuda and torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    # Fix random seed
    torch.manual_seed(args.seed)

    # Generate token-to-index and index-to-token mapping
    tok2id, id2tok = data_loader.build_or_load_vocab(args.train,
                                                     overwrite=False)

    print("*" * 5)
    print(args)

    # Create DataLoader() objects
    params = {
        "batch_size": args.batch_size,
        "collate_fn": data_loader.collate_fn,
        "shuffle": args.shuffle,
        "num_workers": args.num_workers,
    }
    # train_dataset = data_loader.SNLIDataSet(args.train, tok2id)
    # train_loader = torch.utils.data.DataLoader(train_dataset, **params)
    val_dataset = data_loader.SNLIDataSet(args.val, tok2id)
    val_loader = torch.utils.data.DataLoader(val_dataset, **params)

    # Initialize model
    if args.model == "rnn":  # RNN model
        model = RNN(
            vocab_size=const.MAX_VOCAB_SIZE,  # Vocabulary size
            emb_dim=const.EMB_DIM,  # Embedding dimensions
            hidden_dim=args.hidden_dim,  # Hidden dimensions
            dropout_prob=args.dropout_prob,  # Dropout probability
            padding_idx=const.PAD_IDX,  # Padding token index
            num_classes=const.NUM_CLASSES,  # Number of class labels
            id2tok=id2tok,  # Vocabulary
        ).to(device)
        # Load model weights from disk
        model.load_state_dict(torch.load(const.MODELS + "rnn.pt"))
        model.eval()
    elif args.model == "cnn":  # CNN model
        model = CNN(
            vocab_size=const.MAX_VOCAB_SIZE,  # Vocabulary size
            emb_dim=const.EMB_DIM,  # Embedding dimensions
            hidden_dim=args.hidden_dim,  # Hidden dimensions
            kernel_size=args.kernel_size,  # Kernel size
            dropout_prob=args.dropout_prob,  # Dropout probability
            padding_idx=const.PAD_IDX,  # Padding token index
            num_classes=const.NUM_CLASSES,  # Number of class labels
            id2tok=id2tok,  # Vocabulary
        ).to(device)
        # Load model weights from disk
        model.load_state_dict(torch.load(const.MODELS + "cnn.pt"))
        model.eval()
    else:
        print("Invalid model specification, exiting")
        exit()

    # Criterion
    criterion = torch.nn.CrossEntropyLoss()
    # Model parameters
    params = [p for p in model.parameters() if p.requires_grad]

    # Inspect correct/incorrect predictions
    if args.inspect:
        right, wrong = eval_model(val_loader,
                                  model,
                                  device,
                                  criterion,
                                  inspect=True)
        print("\nValidation premises with correct predictions:\n")
        for i, item in enumerate(right):
            text = " ".join([id2tok[idx] for idx in item if idx > 0])
            print("#{}\n {}".format(i + 1, text))
        print("\nValidation premises with incorrect predictions:\n")
        for i, item in enumerate(wrong):
            text = " ".join([id2tok[idx] for idx in item if idx > 0])
            print("#{}\n {}".format(i + 1, text))
        return

    # Validation
    val_acc, _ = eval_model(val_loader, model, device, criterion)
    print("\n Validation accuracy: {}".format(val_acc))

    print("*" * 5 + "\n")
Esempio n. 4
0
trainInput, trainTarget = torch.LongTensor(trainInput).to(
    device), torch.LongTensor(trainTarget).to(device)
trainDataSet = Data.TensorDataset(trainInput, trainTarget)
trainDataLoader = Data.DataLoader(trainDataSet, batch_size, shuffle=True)  # 打乱

# RNN
model = RNN(vocab_size,
            emb_size,
            hidden_size,
            num_classes,
            num_layers=num_layers,
            nonlinearity=nonlinearity,
            dropout=dropout,
            bidirectional=bidirectional).to(device)
criterion = nn.CrossEntropyLoss().to(device)  # 损失函数
optimizer = optim.Adam(model.parameters(), lr=lr)  # 优化器

# Training
train(model, epoch, trainDataLoader, criterion, optimizer)

# testSet及处理
test_sentences = ["i hate me", "you love me"]
test_labels = [0, 1]
testInput, testTarget = make_data(test_sentences, word2idx, test_labels)
testInput = torch.LongTensor(testInput).to(device)
testTarget = torch.LongTensor(testTarget).to(device)

# 封装成数据集 加载器
testDataSet = Data.TensorDataset(testInput, testTarget)
testDataLoader = Data.DataLoader(testDataSet, 2, shuffle=False)  # 不打乱
                        ans2_pros.append(output2.data[0][0])
                if sorted(ans1_pros, reverse=True)[0] > sorted(
                        ans2_pros, reverse=True)[0]:
                    predicty[index][0] = 1
                    predicty[index + 1][0] = 0
                else:
                    predicty[index][0] = 0
                    predicty[index + 1][0] = 1
            index += 2
    print("changed num", count)
    return predicty


# train
rnn = RNN(100, 128, len(vocab))
optimizer = optim.SGD(rnn.parameters(), lr=0.1)
loss_function = nn.BCELoss()
losses, acc = rnn_train(data.trainset, rnn, optimizer, loss_function,
                        data.testset)
# plt.xlabel("Train epoch")
# plt.ylabel("loss")
# plt.plot([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], losses)
# plt.show()
plt.xlabel("Train epoch")
plt.ylabel("accuracy")
plt.plot([1, 2, 3, 4, 5, 6], acc)
plt.show()
# test
y, predicty = rnn_test(data.testset, rnn)
eval = Evaluation()
eval.accuracy(y, predicty, data)
Esempio n. 6
0
    #set up CNN and RNN
    cnn = CNN(embedding_size=embedding_size)
    rnn = RNN(embedding_dim=embedding_size,
              hidden_dim=hidden_size,
              vocab_size=vocab.index)

    #running with CUDA
    if torch.cuda.is_available():
        with torch.cuda.device(gpu_device):
            cnn.cuda()
            rnn.cuda()

    # set up loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    params = list(cnn.linear.parameters()) + list(rnn.parameters())
    optimizer = torch.optim.Adam(params, lr=learning_rate)
    losses, iter = [], []
    n = 0

    # training
    print('start training')
    for epoch in range(epochs):
        #img_sf, cap_sf = shuffle_data(data, seed= epoch)
        img_sf, cap_sf = data.shuffle(seed=epoch)
        cap_len = len(cap_sf)
        loss_tot = []
        tic = time.time()
        for i in range(cap_len):
            img_id = img_sf[i]
            image = data.get_img(img_id)
Esempio n. 7
0
class TENG:
    def __init__(self):
        ############visualize################

        self.log_dir = './tf'
        self.writer = SummaryWriter(self.log_dir)

        self.input_size = 9
        self.output_size = 25
        self.hidden_size = 300
        self.num_layers = 5
        self.learning_rate = 0.01  # 0.1
        self.sequence_length = 1

        self.batch_size = 100  # 400
        self.epochs = 50

        self.model = RNN(input_size=self.input_size,
                         hidden_size=self.hidden_size,
                         num_layers=self.num_layers,
                         output_size=self.output_size)
        # print('\nModel Info: ', self.model)
        '''
        (rnn): RNN(9, 25, num_layers=2, batch_first=True, dropout=0.1)
        (fc): Linear(in_features=25, out_features=25, bias=True)
        (relu): ReLU()
        '''
        print(self.model.rnn)
        self.model.to(device)
        self.loss_function = nn.CrossEntropyLoss()
        # self.updateParams = optim.SGD(
        # self.model.parameters(), lr=self.learning_rate, momentum=0.9, weight_decay=5e-4, nesterov=True)
        self.updateParams = optim.Adam(self.model.parameters(),
                                       weight_decay=5e-4,
                                       lr=self.learning_rate)
        self.scheduler = torch.optim.lr_scheduler.MultiStepLR(
            self.updateParams, milestones=[10, 20, 30], gamma=0.1)

        #######get the data########
        train_datasets, test_datasets = get_data()
        self.train_loader = torch.utils.data.DataLoader(
            train_datasets, batch_size=self.batch_size, shuffle=True)
        self.test_loader = torch.utils.data.DataLoader(
            test_datasets, batch_size=self.batch_size, shuffle=True)

        # mini-batch
        print(
            '#####Model Initialization is completed and ready for the training process.#####'
        )
        print('\n')
        time.sleep(0.1)
        model_file = "better_RNN_model_checkpoint.pth.tar"
        if os.path.isfile(model_file):
            print("#############Loading the pre-trained model#############")
            checkpoint = torch.load(model_file)
            self.start_epoch = checkpoint['epoch']
            self.best_accuracy = checkpoint['best_accuracy']
            self.model.load_state_dict(checkpoint['state_dict'])
            self.updateParams.load_state_dict(checkpoint['optimizer'])
            self.training_accuracy = checkpoint['training_accuracy']
            self.validation_accuracy = checkpoint['validation_accuracy']
            self.training_loss = checkpoint['training_loss']
            self.validation_loss = checkpoint['validation_loss']
            self.time_list = checkpoint['time']
            print('\n')
            print('preivous model accuracy:', self.best_accuracy)
            print('\n')
        else:
            self.start_epoch = 0
            self.best_accuracy = 0
            self.training_accuracy = []
            self.validation_accuracy = []
            self.training_loss = []
            self.validation_loss = []
            self.time_list = []
            print('NEW model accuracy:', self.best_accuracy)

    def train(self):
        def save_checkpoint(state,
                            better,
                            file='RNN_model_checkpoint.pth.tar'):
            torch.save(state, file)
            if better:
                shutil.copyfile(file, 'better_RNN_model_checkpoint.pth.tar')

        def training(epochs):

            step = 0
            self.model.train()  # initializing the training
            print("CNN training starts__epoch: {}, LR= {}".format(
                epochs, self.scheduler.get_lr()))
            training_loss = 0
            total = 0
            final_score = 0
            self.scheduler.step()  # dynamically change the learning rate
            self.loss = 0

            for batch_id, (X_batch, y_batch) in enumerate(self.train_loader):

                X_batch = X_batch.view(-1, self.sequence_length,
                                       self.input_size)

                X_batch = X_batch.float().to(device)
                y_batch = y_batch.to(device)
                y_batch = y_batch.to(device).detach()
                if X_batch.requires_grad:
                    pass
                else:
                    print('AutoGrad is OFF!')

                self.updateParams.zero_grad(
                )  # zero gradient before the backward
                result = self.model(X_batch)
                batch_loss = self.loss_function(result, y_batch)
                # wihout .item(),in gpu model, not enough memory
                training_loss += batch_loss.item()
                batch_loss.backward()
                self.updateParams.step(
                )  # performs a parameter update based on the current gradient
                _, predict = torch.max((result), 1)  # dim=1->each row
                final_score += predict.eq(y_batch).cpu().sum().type(
                    torch.DoubleTensor).item()

                # check the gradient
                # print('ID', batch_id)
                # print('after back prop--parameter: ', list(self.model.parameters())
                #       [0].grad)  # the gradient is so very small

            training_loss_mean = training_loss / \
                (len(self.train_loader.dataset)/(self.batch_size))
            training_accuracy = 100*final_score / \
                (len(self.train_loader.dataset))
            print("Training-epoch-{}-training_loss_mean: {:.4f}".format(
                epochs, training_loss_mean))
            print("Training-epoch-{}-training_accuracy: {:.4f}%".format(
                epochs, training_accuracy))
            # self.writer.add_image('Output', vutils.make_grid(output.data, normalize=True, scale_each=True), niter)
            return (training_loss_mean, training_accuracy)

        def validation(epochs):

            self.model.eval()
            validation_loss = 0
            total = 0
            final_score = 0

            with torch.no_grad(
            ):  # temporarily set all the requires_grad flag to false

                for batch_id, (test_data,
                               target_test) in enumerate(self.test_loader):

                    test_data = test_data.view(-1, self.sequence_length,
                                               self.input_size)

                    test_data = test_data.float().to(device)
                    target_test = target_test.to(device)

                    result = self.model(test_data)
                    batch_loss = self.loss_function(result, target_test)
                    validation_loss += batch_loss
                    _, predict = torch.max((result), 1)  # dim=1->each row
                    final_score += predict.eq(target_test).cpu().sum().type(
                        torch.DoubleTensor).item()

            validation_loss_mean = validation_loss / \
                (len(self.test_loader.dataset)/(self.batch_size))
            validation_accuracy = 100*final_score / \
                (len(self.test_loader.dataset))

            print("Validation-epoch-{}-Validation_loss_mean: {:.4f}".format(
                epochs, validation_loss_mean))
            print('Validation Accuracy: {:.4f}%'.format(validation_accuracy))

            self.model_accuracy_cur_epoch = validation_accuracy

            return (validation_loss_mean, validation_accuracy)

        if __name__ == "__main__":
            print("######CIFAR100 Training-Validation Starts######")
            epoch_iter = range(1, self.epochs)

            self.model_accuracy_cur_epoch = 0
            if self.start_epoch == self.epochs:
                pass
            else:
                for i in range(self.start_epoch + 1, self.epochs):
                    time_begin = time.time()
                    training_result = training(i)
                    self.training_loss.append(training_result[0])
                    self.training_accuracy.append(training_result[1])
                    vali_result = validation(i)
                    self.validation_loss.append(vali_result[0])
                    self.validation_accuracy.append(vali_result[1])
                    time_end = time.time() - time_begin
                    self.time_list.append(time_end)
                    progress = float(i * 100 // len(epoch_iter))
                    print('Progress: {:.4f}%'.format(progress))
                    print('\n')
                    #######################################
                    # Tensorboard Visualization
                    niter = i
                    # tensorboard --logdir=tf --port 6066

                    self.writer.add_scalars(
                        'Loss Curve', {
                            'Training Loss': training_result[0],
                            'Validation Loss': vali_result[0]
                        }, niter)  # attention->add_scalarS

                    self.writer.add_scalars(
                        'Accuracy Curve', {
                            'Training Accuracy': training_result[1],
                            'Validation Accuracy': vali_result[1]
                        }, niter)

                    #######################################
                    better = self.model_accuracy_cur_epoch > self.best_accuracy
                    self.best_accuracy = max(self.best_accuracy,
                                             self.model_accuracy_cur_epoch)
                    # if better:
                    #    torch.save(self.model.state_dict(), 'CNN_MODEL.pt')
                    save_checkpoint(
                        {
                            'epoch': i,
                            'best_accuracy': self.best_accuracy,
                            'state_dict': self.model.state_dict(),
                            'optimizer': self.updateParams.state_dict(),
                            'training_loss': self.training_loss,
                            'training_accuracy': self.training_accuracy,
                            'validation_loss': self.validation_loss,
                            'validation_accuracy': self.validation_accuracy,
                            'time': self.time_list,
                        }, better - 1)
                    print(
                        'Model Updated, proceeding to next epoch, best accuracy= {}'
                        .format(self.best_accuracy))
                # save the model after training
                torch.save(self.model.state_dict(), 'CNN_MODEL.pt')

            # ploting

            # loss function

            plt.figure(1)
            sns.set_style('whitegrid')
            plt.plot(epoch_iter,
                     self.training_loss,
                     color='red',
                     linestyle='solid',
                     linewidth='3.0',
                     marker='p',
                     markerfacecolor='red',
                     markersize='10',
                     label='Training Loss')
            plt.plot(epoch_iter,
                     self.validation_loss,
                     color='green',
                     linestyle='solid',
                     linewidth='3.0',
                     marker='o',
                     markerfacecolor='green',
                     markersize='10',
                     label='Validation Loss')
            plt.ylabel('Loss', fontsize=18)
            plt.xlabel('Epochs', fontsize=18)
            title = "RNN Result-loss"
            plt.title(title, fontsize=12)
            plt.legend(fontsize=14)
            plt.grid(True)
            plt.show()

            # Training accuracy
            plt.figure(2)
            sns.set_style('whitegrid')
            plt.plot(epoch_iter,
                     self.training_accuracy,
                     color='blue',
                     linestyle='solid',
                     linewidth='3.0',
                     marker='s',
                     markerfacecolor='blue',
                     markersize='10',
                     label='Training Accuracy')
            plt.plot(epoch_iter,
                     self.validation_accuracy,
                     color='green',
                     linestyle='solid',
                     linewidth='3.0',
                     marker='s',
                     markerfacecolor='green',
                     markersize='10',
                     label='Validation Accuracy')
            title = "RNN Result-accuracy"
            plt.title(title, fontsize=12)
            plt.xlabel('Epochs', fontsize=18)
            plt.title("Model Accuracy", fontsize=14)
            plt.legend(fontsize=14)
            plt.show()

            plt.figure(3)
            sns.set_style('whitegrid')
            plt.plot(epoch_iter,
                     self.time_list,
                     color='blue',
                     linestyle='solid',
                     linewidth='3.0',
                     marker='s',
                     markerfacecolor='blue',
                     markersize='10',
                     label='Validation Loss')
            plt.ylabel('Time (s)', fontsize=18)
            plt.xlabel('Epochs', fontsize=18)
            plt.title("Speed", fontsize=14)
            plt.legend(fontsize=14)
            plt.show()

    def forward_EM(self, filepath, target):
        # data processing
        df = pd.read_csv(filepath, header=None)  # no column names!!!

        df_x = df.iloc[:, :9]
        df_x = df_x.div(df_x.sum(axis=1), axis=0)  # normalize

        X = df_x
        X_scaling = StandardScaler().fit_transform(X)  # numpy.array
        input_data = torch.tensor(X_scaling, requires_grad=True)
        input_data = input_data.view(-1, self.sequence_length, self.input_size)

        y_new = df.iloc[:, -1]
        y_new -= 1

        input_data = input_data.float().to(device)

        ##############
        self.model.eval()
        result = self.model(input_data)
        _, predict = torch.max(result, 1)
        predict = predict.cpu()
        i = 0
        for elem in predict:
            if elem == target:
                i += 1
        # for i in range(len(predict)):
        #    # print(predict)
        #    if predict[i] == y_new[i]:
        #        count += 1
        acc = float(i / len(predict))
        # print('Accuracy: {}%'.format(acc*100))
        # from sklearn.metrics import confusion_matrix

        # confusion_matrix = confusion_matrix(
        #     y_true=y_new, y_pred=predict)

        # # #Normalize CM
        # confusion_matrix = cm = confusion_matrix.astype(
        #     'float') / confusion_matrix.sum(axis=1)[:, np.newaxis]
        # df_cm = pd.DataFrame(confusion_matrix)

        # # plot confusion matrix
        # fig, ax = plt.subplots()
        # sns.heatmap(df_cm, cmap="coolwarm", annot=False)
        # fig.set_size_inches(8, 6)
        # ax.set_title("Confusion Matrix of RNN, Data: {}".format(filepath))
        # ax.set_xlabel('Perdicted Label', fontsize=12)
        # ax.set_ylabel('Actual Label', fontsize=12)

        # plt.show()

        return predict, acc

    def forward_ni(self, filepath):
        # data processing
        df = pd.read_csv(filepath, header=None)  # no column names!!!

        df_x = df.iloc[:, :9]
        df_x = df_x.div(df_x.sum(axis=1), axis=0)  # normalize

        X = df_x
        X_scaling = StandardScaler().fit_transform(X)  # numpy.array
        input_data = torch.tensor(X_scaling, requires_grad=True)
        input_data = input_data.view(-1, self.sequence_length, self.input_size)

        y_new = df.iloc[:, -1]

        input_data = input_data.float().to(device)

        ##############
        self.model.eval()
        result = self.model(input_data)
        _, predict = torch.max(result, 1)
        predict = predict.cpu()
        predict = predict.numpy()
        i = 0
        print(predict)
        print(y_new.head(10))
        count = 0
        for i in range(len(predict)):
            # print(predict)
            if predict[i] == y_new[i]:
                count += 1

        acc = float(count / len(predict))
        # print('Accuracy: {}%'.format(acc*100))
        from sklearn.metrics import confusion_matrix

        confusion_matrix = confusion_matrix(y_true=y_new, y_pred=predict)

        # #Normalize CM
        confusion_matrix = cm = confusion_matrix.astype(
            'float') / confusion_matrix.sum(axis=1)[:, np.newaxis]
        df_cm = pd.DataFrame(confusion_matrix)

        # plot confusion matrix
        fig, ax = plt.subplots()
        sns.heatmap(df_cm, cmap="coolwarm", annot=False)
        fig.set_size_inches(8, 6)
        ax.set_title("Confusion Matrix of RNN, Data: {}".format(filepath))
        ax.set_xlabel('Perdicted Label', fontsize=12)
        ax.set_ylabel('Actual Label', fontsize=12)

        plt.show()

        return predict, acc
Esempio n. 8
0
class Trainer:
    def __init__(self, TRAIN_CONFIGS, GRU_CONFIGS, FFN_CONFIGS=None):
        self.TRAIN_CONFIGS = TRAIN_CONFIGS
        self.GRU_CONFIGS = self._process_gru_configs(GRU_CONFIGS)
        self.model = RNN(target=TRAIN_CONFIGS['target'],
                         **self.GRU_CONFIGS,
                         FFN_CONFIGS=FFN_CONFIGS)
        self.epochs_trained = 0
        self.trained = False
        # Storage for later
        self.loss = self.val_loss = self.train_y_hat = self.train_y_true = self.val_y_hat = self.val_y_true = None

    def _process_gru_configs(self, GRU_CONFIGS):
        lti = self._load_data_source
        HIDDEN_SIZE = lti.A.shape[-1]
        INPUT_SIZE = 1 if lti.B is None else lti.U.shape[-1]
        GRU_IMPLIED_CONFIGS = {
            "hidden_size": lti.A.shape[-1],
            "input_size": 1 if lti.B is None else lti.U.shape[-1]
        }
        GRU_CONFIGS.update(GRU_IMPLIED_CONFIGS)
        return GRU_CONFIGS

    @property
    def _load_data_source(self):
        data_dir = self.TRAIN_CONFIGS.get("data_dir")
        lti_file = self.TRAIN_CONFIGS.get("lti_file")
        with open(path.join(data_dir, lti_file), "rb") as f:
            lti = pickle.load(f)
        return lti

    @property
    def _load_train_data(self):
        def unsqueeze(*args):
            return (_unsqueeze(M) for M in args)

        def _unsqueeze(M):
            if not M is None:
                M = M.unsqueeze(-2)
            return M

        lti = self._load_data_source
        Y, H, X, h0 = lti.torch
        _Y, _H, _X = unsqueeze(Y, H, X)
        _h0 = None if self.TRAIN_CONFIGS.get(
            "init_h") == False else h0.reshape(self.GRU_CONFIGS["num_layers"],
                                               1,
                                               self.GRU_CONFIGS["hidden_size"])
        return _Y, _H, _X, _h0

    @property
    def fit(self):
        if self.trained == False:
            # get configs (for readability)
            nEpochs = self.TRAIN_CONFIGS['epochs']
            train_steps = self.TRAIN_CONFIGS['train_steps']
            init_h = self.TRAIN_CONFIGS['init_h']
            base = self.TRAIN_CONFIGS['base']
            # load data
            Y, H, X, h0 = tensor_to_cuda(*self._load_train_data)
            # split data
            if self.TRAIN_CONFIGS['target'] == 'states':
                y_train, y_val = H[:train_steps], H[train_steps:]
            elif self.TRAIN_CONFIGS['target'] == 'outputs':
                y_train, y_val = Y[:train_steps], Y[train_steps:]
            x_train, x_val = X[:train_steps], X[train_steps:]
            # prep model and optimizers
            self.model.cuda()
            optimizer = optim.Adam(self.model.parameters(), lr=1e-3)
            scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                             patience=2)
            # trian
            loss = [None] * nEpochs
            val_loss = [None] * nEpochs
            pbar = tqdm(total=nEpochs, leave=False)
            for i in range(nEpochs):
                # reset gradient
                optimizer.zero_grad()
                # generate prediction
                y_hat, h_plus1 = self.model(
                    x_train) if not init_h else self.model(x_train, h0)
                y_hat = y_hat.squeeze()
                # calculate loss
                l = loss_func(y_hat,
                              y_train,
                              base=self.TRAIN_CONFIGS['base'],
                              epoch=i)
                loss[i] = l.item()
                # learn from loss
                l.backward()
                optimizer.step()
                scheduler.step(l.item())
                # validate
                with torch.no_grad():
                    val_y_hat, _ = self.model(
                        x_val) if not init_h else self.model(x_val, h_plus1)
                    val_y_hat = val_y_hat.squeeze()
                    l = loss_func(val_y_hat,
                                  y_val,
                                  base=self.TRAIN_CONFIGS['base'],
                                  epoch=i)
                    val_loss[i] = l.item()
                # decorator
                pbar.set_description(
                    f"Loss={loss[i]:.3f}. Val={val_loss[i]:.3f}")
                pbar.update(1)

            pbar.close()
            self.epochs_trained += nEpochs

            self.loss, self.val_loss = loss, val_loss
            self.train_y_hat, self.train_y_true = y_hat.detach().cpu().squeeze(
            ), y_train.detach().cpu().squeeze()
            self.val_y_hat, self.val_y_true = val_y_hat.detach().cpu().squeeze(
            ), y_val.detach().cpu().squeeze()
            self.trained = True
        else:
            # this shouldn't ever be reached. It's a safety.
            raise ValueError("Model has already been trained.")
        return (self.loss, self.val_loss), \
               (self.train_y_hat, self.train_y_true), \
               (self.val_y_hat, self.val_y_true)

    def pickle_save(self, trial_num):
        p = Trainer._pickle_path(self.TRAIN_CONFIGS, trial_num)
        with open(p, "wb") as f:
            pickle.dump(self, f)

    # TODO remove
    # def _gen_relative_graphs(self, hat, true, dimH, val_begins, trial_num=0, fname_prefix=None, freq=10):
    #     Trainer.gen_relative_graphs(hat, true, dimH, val_begins, trial_num, self.TRAIN_CONFIGS.get("fig_dir"), fname_prefix, freq)

    @staticmethod
    def pickled_exists(TRAIN_CONFIGS, trial_num):
        p = Trainer._pickle_path(TRAIN_CONFIGS, trial_num)
        return path.exists(p)

    @staticmethod
    def _pickle_path(TRAIN_CONFIGS, trial_num):
        name = Trainer.model_name(TRAIN_CONFIGS, trial_num)
        if not np.char.endswith(name, ".pickle"):
            name += ".pickle"
        model_dir = TRAIN_CONFIGS.get("model_dir")
        return path.join(model_dir, name)

    @staticmethod
    def _gen_relative_graphs(hat,
                             true,
                             dimOut,
                             val_begins,
                             trial_num,
                             isState,
                             fig_dir=None,
                             fname_prefix=None,
                             freq=10,
                             pause=False):
        val_ends = hat.shape[0]
        palette = {
            "H1": "C0",
            "H2": "C1",
            "H3": "C2",
            "Y1": "C0",
            "Y2": "C1",
            "Y3": "C2"
        }
        for _base in range(dimOut):
            _dif = rel_space_dif(hat, true, _base)
            df = pd.DataFrame(_dif)
            df = df.drop(_base, axis=1)
            _pre = "H" if isState else "Y"
            df.columns = _pre + (df.columns + 1).astype(str)
            df.columns.name = "Hidden States" if isState else "Output Indices"
            df.index.name = "Itteration"
            df = df.stack()
            df.name = "Error"
            df = df.reset_index()
            _df = df[df['Itteration'] % freq == 0]
            plt.axhline(0, color="k", alpha=0.5)
            _hue = "Hidden States" if isState else "Output Indices"
            sns.lineplot(data=_df,
                         x="Itteration",
                         y="Error",
                         hue=_hue,
                         alpha=1,
                         palette=palette)

            plt.title(f"Relative Difference (Base: {_pre}{_base+1})")
            plt.axvspan(val_begins, val_ends, facecolor="0.1", alpha=0.25)
            if not fname_prefix is None and not fig_dir is None:
                fname = fname_prefix + f"-relgraph-{_pre}{_base+1}-trial{trial_num}"
                f = path.join(fig_dir, fname)
                plt.savefig(path.join(fig_dir, fname))
            else:
                print(f"fname_prefix='{fname_prefix}'; fig_dir='{fig_dir}'")
            if pause:
                plt.show()
            else:
                plt.show(block=False)
            plt.clf()

    @staticmethod
    def model_name(TRAIN_CONFIGS, trial_num):
        fprefix = TRAIN_CONFIGS.get("lti_file").split(".")[0]
        name = fprefix + f"-trial{trial_num}"
        return name

    @staticmethod
    def load_trained(TRAIN_CONFIGS, trial_num):
        model_dir = TRAIN_CONFIGS.get("model_dir")
        name = Trainer.model_name(TRAIN_CONFIGS, trial_num)
        if not np.char.endswith(name, ".pickle"):
            name += ".pickle"
        with open(path.join(model_dir, name), "rb") as f:
            trainer = pickle.load(f)
        return trainer

    def gen_relative_graphs(self, trial_num, freq=10, pause=False):
        train_hat, train_true, val_hat, val_true = self.train_y_hat, self.train_y_true, self.val_y_hat, self.val_y_true
        # derived
        dimOut = train_hat.shape[-1]
        val_begins = train_hat.shape[0]
        # combine predictions and true values
        hat = np.concatenate([train_hat, val_hat])
        true = np.concatenate([train_true, val_true])
        # graph
        fprefix = self.TRAIN_CONFIGS.get("lti_file").split(".pickle")[0]

        isState = self.TRAIN_CONFIGS.get("target") == "state"
        Trainer._gen_relative_graphs(hat,
                                     true,
                                     dimOut,
                                     val_begins,
                                     trial_num,
                                     isState,
                                     self.TRAIN_CONFIGS.get("fig_dir"),
                                     fprefix,
                                     freq=10,
                                     pause=False)

    @property
    def get_train_test_metrics(self):
        isState = self.TRAIN_CONFIGS.get("target") == "state"

        state_tups = [(self.train_y_hat, self.train_y_true),
                      (self.val_y_hat, self.val_y_true)]
        train, test = [
            all_state_metrics(state_hat, state_true, isState)
            for state_hat, state_true in state_tups
        ]

        return train, test

    def save_train_test_metrics(self, trial_num):
        metrics_dir = self.TRAIN_CONFIGS.get("metrics_dir")

        train, test = self.get_train_test_metrics
        _name = Trainer.model_name(self.TRAIN_CONFIGS, trial_num)

        train.to_csv(path.join(metrics_dir, _name + "-train.csv"))
        test.to_csv(path.join(metrics_dir, _name + "-val.csv"))

        return train, test