Ejemplo n.º 1
0
class Train():
    def __init__(self, difficulty):
        self.data_path = "../data"
        self.model_path = "../models"
        self.output_path = "../outputs"
        self.difficulty = difficulty
        self.timestamp = str(int(time.time()))
        self.model_name = "gru_" + self.difficulty
        self.data = Data(difficulty=self.difficulty, data_path=self.data_path)
        (self.img_features, self.w2i, self.i2w, self.nwords, self.UNK,
         self.PAD) = self.data()
        self.train = list(self.data.get_train_data())
        self.dev = list(self.data.get_validation_data())
        self.test = list(self.data.get_test_data())
        self.image_feature_size = 2048
        self.output_vector_size = 10

    def __call__(self,
                 number_of_iterations=2,
                 learning_rate=0.005,
                 embedding_size=300,
                 hidden_size=100,
                 batch_size=100):
        print("Starting 'Image Retrieval' in 'GRU' mode with '" +
              self.difficulty + "' data")

        self.model_full_path = self.model_path + "/" + self.model_name + "_" + self.timestamp + "_" + str(
            learning_rate) + "_" + str(embedding_size) + ".pty"
        self.output_file_name = self.output_path + "/" + self.model_name + "_" + self.timestamp + "_" + str(
            learning_rate) + "_" + str(embedding_size) + ".csv"

        self.number_of_iterations = number_of_iterations
        self.learning_rate = learning_rate
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.batch_size = batch_size
        self.model = GRU(self.nwords, self.embedding_size,
                         self.image_feature_size, self.output_vector_size,
                         self.hidden_size, self.batch_size)
        self.criterion = nn.CrossEntropyLoss()

        self.evaluate = Evaluate(self.model, self.img_features, self.minibatch,
                                 self.preprocess, self.image_feature_size,
                                 self.output_vector_size)
        print(self.model)

        self.optimizer = optim.Adam(self.model.parameters(),
                                    lr=self.learning_rate)

        self.train_loss_values = []

        self.magic()

        self.save_model()

        self.save_data()

    def minibatch(self, data, batch_size=50):
        for i in range(0, len(data), batch_size):
            yield data[i:i + batch_size]

    def preprocess(self, batch):
        """Helper function for functional batches"""
        correct_indexes = [observation[2] for observation in batch]
        img_ids = [observation[1] for observation in batch]
        text_features = [observation[0] for observation in batch]
        last_words = [len(dialog) for dialog in text_features]

        #Add Padding to max len of sentence in batch
        max_length = max(map(len, text_features))
        text_features = [
            txt + [self.PAD] * (max_length - len(txt)) for txt in text_features
        ]

        #return in "stacked" format, added last_words for excluding padding effects on GRU
        return text_features, img_ids, correct_indexes, last_words

    def magic(self):
        for ITER in range(self.number_of_iterations):

            random.shuffle(self.train)
            train_loss = 0.0
            start = time.time()
            iteration = 0

            for batch in self.minibatch(self.train, self.batch_size):
                self.model.zero_grad()
                self.optimizer.zero_grad()
                self.model.hidden = self.model.init_hidden()

                #Load data for model
                text_features, h5_ids, correct_index, last_words = self.preprocess(
                    batch)
                lookup_text_tensor = Variable(torch.LongTensor([text_features
                                                                ])).squeeze()

                full_img_batch = np.empty([
                    len(batch), self.output_vector_size,
                    self.image_feature_size
                ])

                for obs, img_ids in enumerate(h5_ids):
                    for index, h5_id in enumerate(img_ids):
                        full_img_batch[obs, index] = self.img_features[h5_id]

                full_img_batch = Variable(
                    torch.from_numpy(full_img_batch).type(torch.FloatTensor))

                #Target
                target = Variable(torch.LongTensor([correct_index])).squeeze()
                #Vector for excluding padding effects
                last_words = Variable(torch.LongTensor(last_words))

                #Run model and calculate loss
                prediction = self.model(lookup_text_tensor, full_img_batch,
                                        last_words)
                loss = self.criterion(prediction, target)
                train_loss += loss.data[0]

                iteration += self.batch_size
                print(iteration)

                loss.backward()
                self.optimizer.step()

            print(
                "ITERATION %r: train loss/sent=%.4f, time=%.2fs" %
                (ITER + 1, train_loss / len(self.train), time.time() - start))
            self.train_loss_values.append(train_loss / len(self.train))

    def save_model(self):
        #Save model
        torch.save(self.model, self.model_full_path)
        print("Saved model has test score",
              self.evaluate(self.test, self.batch_size))

    def plot(self):
        plt.plot(self.train_loss_values, label="Train loss")
        plt.legend(loc='best')
        plt.xlabel("Epochs")
        plt.ylabel("Loss")
        plt.title(self.model_name +
                  " - has loss with lr = %.4f, embedding size = %r" %
                  (self.learning_rate, self.embedding_size))
        plt.show()

    def save_data(self):
        file = open(self.output_file_name, "w")
        file.write(", ".join(map(str, self.train_loss_values)))
        file.write("\n")
        file.write(str(self.evaluate(self.test, self.batch_size)))
        file.write("\n")
        file.close()
Ejemplo n.º 2
0
batch_size = 256
hidden_size = 128
num_layers = 1
dropout = 0
testnum = 500
# interval is sample interval between last input and first output.
interval = 0

epoch = 100
device = 'cuda'

# Generate sin dataset for training and testing.
dataset = np.sin([i / 50 * 2 * np.pi for i in range(2000)])
x_train, y_train, x_test, y_test, normalizer = generate_data(
    dataset, 'minmax', input_length, output_length, testnum, interval)

# Build, train and predict.
model = GRU(1, hidden_size, num_layers, 1, dropout)
optimizer = opt.Adam(model.parameters())
loss = nn.MSELoss()
batch_train_loss, batch_val_loss = train(model, x_train, y_train, epoch,
                                         batch_size, optimizer, loss, device)
y_predict, y_real, _ = predict(model, x_test, y_test, loss, device, normalizer,
                               batch_size)

# Draw result
plt.plot(y_predict, label='prediction')
plt.plot(y_real, label='real')
plt.legend()
plt.show()
Ejemplo n.º 3
0
class Trainer(object):
    def __init__(self, config, h_loader, r_loader, test_loader):
        self.config = config
        self.h_loader = h_loader
        self.r_loader = r_loader
        self.test_loader = test_loader

        self.lr = config.lr
        self.beta1 = config.beta1
        self.beta2 = config.beta2
        self.weight_decay = config.weight_decay

        self.n_epochs = config.n_epochs
        self.n_steps = config.n_steps
        self.log_interval = int(config.log_interval)  # in case
        self.checkpoint_step = int(config.checkpoint_step)

        self.use_cuda = config.cuda
        self.outf = config.outf
        self.build_model()
        self.vis = vis_tool.Visualizer()

    def build_model(self):
        self.c2d = C2D().cuda()
        self.gru = GRU(self.c2d).cuda()

    def train(self):
        cfig = get_config()
        opt = optim.Adam(filter(lambda p: p.requires_grad,
                                self.gru.parameters()),
                         lr=self.lr,
                         betas=(self.beta1, self.beta2),
                         weight_decay=self.weight_decay)

        start_time = time.time()
        criterion = nn.BCELoss()

        max_acc = 0.

        for epoch in range(self.n_epochs):
            self.gru.train()
            epoch_loss = []
            for step, (h, r) in enumerate(zip(self.h_loader, self.r_loader)):
                h_video = h[0]
                r_video = r[0]

                h_video = Variable(h_video).cuda()
                r_video = Variable(r_video).cuda()

                self.gru.zero_grad()

                predicted = self.gru(h_video)
                target = torch.ones(len(predicted), dtype=torch.float32).cuda()

                h_loss = criterion(predicted, target)  # compute loss
                h_loss.backward()
                opt.step()

                self.gru.zero_grad()
                predicted = self.gru(r_video)  # predicted snippet's score
                target = torch.zeros(len(predicted),
                                     dtype=torch.float32).cuda()

                r_loss = criterion(predicted, target)  # compute loss
                r_loss.backward()
                opt.step()

                step_end_time = time.time()

                total_loss = r_loss + h_loss
                epoch_loss.append((total_loss.data).cpu().numpy())

                print(
                    '[%d/%d][%d/%d] - time: %.2f, h_loss: %.3f, r_loss: %.3f, total_loss: %.3f'
                    % (epoch + 1, self.n_epochs, step + 1, self.n_steps,
                       step_end_time - start_time, h_loss, r_loss, total_loss))

                self.vis.plot(
                    'H_LOSS with lr:%.4f, b1:%.1f, b2:%.3f, wd:%.5f' %
                    (cfig.lr, cfig.beta1, cfig.beta2, cfig.weight_decay),
                    (h_loss.data).cpu().numpy())

                self.vis.plot(
                    'R_LOSS with lr:%.4f, b1:%.1f, b2:%.3f, wd:%.5f' %
                    (cfig.lr, cfig.beta1, cfig.beta2, cfig.weight_decay),
                    (r_loss.data).cpu().numpy())

            self.vis.plot("Avg loss plot", np.mean(epoch_loss))

            # Test accuracy
            # self.gru.eval()
            # test_avg_acc = 0.
            # test_cnt = 0
            # for idx, (video, label, filename) in enumerate(self.test_loader):
            #     video = Variable(video).cuda()
            #     predicted = self.gru(video)  # [ frame 수, 1]
            #
            #     predicted = predicted.view(1, -1)
            #     predicted = predicted.cpu().detach().numpy()
            #
            #     predicted = predicted[0]
            #     label = label.cpu().numpy()
            #
            #     # print(type(predicted), type(label))
            #
            #     gt_label_predicted_score = predicted * label
            #     gt_label_predicted_score = list(gt_label_predicted_score)
            #
            #     # gt_label_predicted_score = gt_label_predicted_score.cpu().numpy()
            #     # print("Highlight frame predicted score:", gt_label_predicted_score)
            #
            #     # print(gt_label_predicted_score)
            #     # print(gt_label_predicted_score.shape)
            #
            #     # print(gt_label_predicted_score)
            #
            #     for sc in gt_label_predicted_score[0]:
            #         if sc != 0.:
            #             print("%.3f" % sc, end=' ')
            #
            #     for i in range(len(predicted)):
            #         if predicted[i] >= 0.45:
            #             predicted[i] = 1.
            #         else:
            #             predicted[i] = 0.
            #
            #     # print("After threshold predicted:", predicted)
            #     # print("Actual label:", label)
            #
            #     acc = (predicted == label).sum().item() / float(len(predicted))
            #     print("filename: %s accuracy: %.4f" % (filename, acc))
            #     test_avg_acc += acc
            #     test_cnt += 1
            #
            #     print()
            #
            # test_avg_acc = test_avg_acc / test_cnt
            # print("Epoch %d Test accuracy: %.5f" % (epoch+1, test_avg_acc))
            # self.vis.plot("Test Accuracy plot", test_avg_acc)

            # print("Epoch %d predicted output list" % (epoch+1), output_list)

            # save max test accuracy checkpoint
            # if test_avg_acc >= max_acc:
            #     max_acc = test_avg_acc
            #     torch.save(self.gru.state_dict(), 'max_test_acc_chkpoint' + str(epoch + 1) + '.pth')
            #     print("checkpoint saved")

            if epoch % self.checkpoint_step == 0:
                accuracy, savelist = self.test(self.test_loader)

                if accuracy > max_acc:
                    max_acc = accuracy
                    torch.save(
                        self.gru.state_dict(),
                        './samples/lr_%.4f_chkpoint' % cfig.lr +
                        str(epoch + 1) + '.pth')
                    for f in savelist:
                        np.save("./samples/" + f[0][0] + ".npy", f[1])
                    print(np.load("./samples/testRV04(198,360).mp4.npy"))
                    print("checkpoint saved")

    def test(self, t_loader):
        # self.gru.eval()
        # accuracy = 0.
        #
        # savelist = []
        #
        # total_len = len(t_loader)
        #
        # for step, (tv, label, filename) in enumerate(t_loader):
        #     filename = filename[0].split(".")[0]
        #
        #     label = label.squeeze()
        #
        #     start = 0
        #     end = 24
        #
        #     correct = 0
        #     count = 0
        #
        #     npy = np.zeros(tv.shape[1])
        #
        #     while end < tv.shape[1]:
        #
        #         t_video = Variable(tv[:, start:end, :, :, :]).cuda()
        #         predicted = self.gru(t_video)
        #
        #         gt_label = label[start:end]
        #
        #         if len(gt_label[gt_label == 1.]) > 12:
        #             gt_label = torch.ones(predicted.shape, dtype=torch.float32).cuda()
        #
        #         else:
        #             gt_label = torch.zeros(predicted.shape, dtype=torch.float32).cuda()
        #
        #         if predicted < 0.5:
        #             npy[start:end] = 1.
        #
        #         predicted[predicted < 0.5] = 1.
        #         predicted[predicted >= 0.5] = 0.
        #
        #         correct += (predicted == gt_label).item()
        #
        #         start += 24
        #         end += 24
        #         count += 1
        #
        #     accuracy += (correct / count) / total_len
        #
        #     savelist.append([filename, npy])

        # Test accuracy
        self.gru.eval()
        test_avg_acc = 0.
        test_cnt = 0
        savelist = []

        for idx, (video, label, filename) in enumerate(self.test_loader):
            video = Variable(video).cuda()
            predicted = self.gru(video)  # [ frame 수, 1]

            predicted = predicted.view(1, -1)
            predicted = predicted.cpu().detach().numpy()

            predicted = predicted[0]
            label = label.cpu().numpy()

            # print(type(predicted), type(label))

            gt_label_predicted_score = predicted * label
            gt_label_predicted_score = list(gt_label_predicted_score)

            # gt_label_predicted_score = gt_label_predicted_score.cpu().numpy()
            # print("Highlight frame predicted score:", gt_label_predicted_score)

            # print(gt_label_predicted_score)
            # print(gt_label_predicted_score.shape)

            # print(gt_label_predicted_score)

            for sc in gt_label_predicted_score[0]:
                if sc != 0.:
                    print("%.3f" % sc, end=' ')

            for i in range(len(predicted)):
                if predicted[i] >= 0.45:
                    predicted[i] = 1.
                else:
                    predicted[i] = 0.

            # print("After threshold predicted:", predicted)
            # print("Actual label:", label)

            acc = (predicted == label).sum().item() / float(len(predicted))
            print("filename: %s accuracy: %.4f" % (filename, acc))
            test_avg_acc += acc
            test_cnt += 1

            savelist.append([filename, predicted])

            print()

        test_avg_acc = test_avg_acc / test_cnt

        print("Accuracy:", round(test_avg_acc, 4))
        self.vis.plot("Accuracy with lr:%.3f" % self.lr, test_avg_acc)

        return test_avg_acc, savelist
Ejemplo n.º 4
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(description='From PyTorch MNIST Example')
    parser.add_argument('--batch-size',
                        type=int,
                        default=32,
                        metavar='N',
                        help='input batch size for training')

    parser.add_argument('--epochs',
                        type=int,
                        default=10000,
                        metavar='E',
                        help='number of epochs to train')
    parser.add_argument('--warmup-epochs',
                        type=int,
                        default=5000,
                        metavar='WE',
                        help='number of epochs to warmup')
    parser.add_argument('--num-steps',
                        type=int,
                        default=100,
                        metavar='N',
                        help='number of batches in one epochs')

    parser.add_argument('--num-points',
                        type=int,
                        default=10,
                        metavar='NS',
                        help='number of query points')

    parser.add_argument('--num-hidden',
                        type=int,
                        default=128,
                        metavar='NE',
                        help='number of hidden units')
    parser.add_argument('--lr',
                        type=float,
                        default=0.0003,
                        metavar='LR',
                        help='learning rate')
    parser.add_argument('--alpha',
                        type=float,
                        default=0,
                        metavar='A',
                        help='kl factor')
    parser.add_argument('--sampling',
                        action='store_true',
                        default=False,
                        help='uses sampling')

    parser.add_argument('--direction',
                        action='store_true',
                        default=False,
                        help='uses directed data-sets')
    parser.add_argument('--ranking',
                        action='store_true',
                        default=False,
                        help='sort data-set according to importance')

    parser.add_argument('--num-runs',
                        type=int,
                        default=1,
                        metavar='NR',
                        help='number of runs')
    parser.add_argument('--save-path',
                        default='trained_models/random_',
                        help='directory to save results')
    parser.add_argument('--load-path',
                        default='trained_models/default_model_0.pth',
                        help='path to load model')

    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')

    args = parser.parse_args()
    use_cuda = not args.no_cuda and torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    for i in range(args.num_runs):
        writer = SummaryWriter()
        performance = torch.zeros(args.epochs)
        accuracy_test = 0

        data_loader = PairedComparison(4,
                                       direction=args.direction,
                                       ranking=args.ranking)
        model = GRU(data_loader.num_inputs, data_loader.num_targets,
                    args.num_hidden).to(device)
        if args.alpha > 0:
            print('Loading pretrained network...')
            params, _ = torch.load(args.load_path)
            model.load_state_dict(params)
            model.reset_log_sigma()
            max_alpha = args.alpha
        optimizer = optim.Adam(model.parameters(), lr=args.lr, amsgrad=True)

        with trange(args.epochs) as t:
            for j in t:
                loss_train = 0
                for k in range(args.num_steps):
                    inputs, targets, _, _ = data_loader.get_batch(
                        args.batch_size, args.num_points, device=device)
                    predictive_distribution, _, _ = model(
                        inputs, targets, args.sampling)

                    loss = -predictive_distribution.log_prob(targets).mean()
                    writer.add_scalar('NLL', loss.item(),
                                      j * args.num_steps + k)

                    if args.alpha > 0:
                        alpha = min(j / args.warmup_epochs, 1.0) * max_alpha
                        kld = model.regularization(alpha)
                        loss = loss + kld
                        writer.add_scalar('KLD', kld.item(),
                                          j * args.num_steps + k)

                    loss_train += loss

                    optimizer.zero_grad()
                    loss.backward()
                    torch.nn.utils.clip_grad_norm_(model.parameters(), 40.0)
                    optimizer.step()

                t.set_description('Loss (train): {:5.4f}'.format(
                    loss_train.item() / args.num_steps))
                performance[j] = loss_train.item() / args.num_steps

        torch.save([model.state_dict(), performance],
                   args.save_path + str(i) + '.pth')