def main():
    extractor = resnet50(pretrained=True).to(DEVICE)
    recurrent = utils.loadModel(
        opt.model,
        LSTM_Net(2048,
                 opt.hidden_dim,
                 opt.output_dim,
                 num_layers=opt.layers,
                 bias=True,
                 dropout=opt.dropout,
                 bidirectional=opt.bidirectional,
                 seq_predict=False)).to(DEVICE)

    predict_set = dataset.TrimmedVideos(opt.video,
                                        opt.label,
                                        None,
                                        transform=transforms.Compose([
                                            transforms.ToTensor(),
                                            transforms.Normalize(
                                                mean=[0.485, 0.456, 0.406],
                                                std=[0.229, 0.224, 0.225]),
                                        ]))

    print("Dataset: {}".format(len(predict_set)))
    predict_loader = DataLoader(predict_set,
                                batch_size=opt.batch_size,
                                shuffle=False,
                                num_workers=opt.threads)

    # Predict
    predict(extractor, recurrent, predict_loader)
예제 #2
0
def main():
    if not os.path.exists(opt.output):
        os.makedirs(opt.output, exist_ok=True)

    opt.output = os.path.join(opt.output, 'p1_valid.txt')

    extractor = resnet50(pretrained=True)
    classifier = utils.loadModel(opt.resume, Classifier(8192, [2048], 11))
    extractor, classifier = extractor.to(DEVICE), classifier.to(DEVICE)

    predict_set = dataset.TrimmedVideos(opt.video,
                                        opt.label,
                                        None,
                                        sample=4,
                                        transform=transforms.Compose([
                                            transforms.ToTensor(),
                                            transforms.Normalize(
                                                mean=[0.485, 0.456, 0.406],
                                                std=[0.229, 0.224, 0.225]),
                                        ]))
    print("Dataset: {}".format(len(predict_set)))
    predict_loader = DataLoader(predict_set,
                                batch_size=opt.batch_size,
                                shuffle=False,
                                num_workers=opt.threads)

    # Predict
    results = predict(extractor, classifier, predict_loader)
    np.savetxt(opt.output, results, fmt='%d')
    print("Output File have been written to {}".format(opt.output))
def video_to_features(data_path):
    """ Transfer the training set and validation set videos into features """

    for train in (True, False):
        datasets = dataset.TrimmedVideos(data_path,
                                         train=train,
                                         downsample=1,
                                         transform=transforms.Compose([
                                             transforms.ToTensor(),
                                             transforms.Normalize(
                                                 mean=[0.485, 0.456, 0.406],
                                                 std=[0.229, 0.224, 0.225]),
                                         ]))

        dataloader = DataLoader(datasets,
                                batch_size=1,
                                shuffle=False,
                                num_workers=0)

        extractor = resnet50(pretrained=True).to(device).eval()

        if train: train_val = "train"
        else: train_val = "valid"

        for index, (data, _, category, name) in enumerate(dataloader, 1):
            data = data.squeeze(0)
            datas = np.zeros((data.shape[0], 2048), dtype=np.float)
            remain = data.shape[0]
            finish = 0

            while remain > 0:
                step = min(remain, 50)
                todo = data[finish:finish + step].to(device)
                datas[finish:finish +
                      step] = extractor(todo).cpu().data.numpy()

                remain -= step
                finish += step

            print("{:4d} {:16d} {}".format(
                index, datas.shape,
                os.path.join(data_path, "feature", train_val, category[0],
                             name[0] + ".npy")))

            # ------------------------------------
            # Save the feature tensor in .npy file
            # ------------------------------------
            if not os.path.exists(
                    os.path.join(data_path, "feature", train_val,
                                 category[0])):
                os.makedirs(
                    os.path.join(data_path, "feature", train_val, category[0]))

            np.savetxt(os.path.join(data_path, "feature", train_val,
                                    category[0], name[0] + ".npy"),
                       datas,
                       delimiter=',')

    return
def main():
    opt.output = os.path.join(opt.output, 'p2_result.txt')

    extractor = resnet50(pretrained=True).to(DEVICE)
    recurrent = utils.loadModel(
        opt.resume,
        LSTM_Net(2048,
                 opt.hidden_dim,
                 opt.output_dim,
                 num_layers=opt.layers,
                 bias=True,
                 dropout=opt.dropout,
                 bidirectional=opt.bidirectional,
                 seq_predict=False)).to(DEVICE)

    predict_set = dataset.TrimmedVideos(opt.video,
                                        opt.label,
                                        None,
                                        downsample=opt.downsample,
                                        transform=transforms.Compose([
                                            transforms.ToTensor(),
                                            transforms.Normalize(
                                                mean=[0.485, 0.456, 0.406],
                                                std=[0.229, 0.224, 0.225]),
                                        ]))

    print("Dataset: {}".format(len(predict_set)))
    predict_loader = DataLoader(predict_set,
                                batch_size=opt.batch_size,
                                shuffle=False,
                                collate_fn=utils.collate_fn_valid,
                                num_workers=opt.threads)

    # Predict
    results = predict(extractor, recurrent, predict_loader)
    np.savetxt(opt.output, results, fmt='%d')
    print("Output File have been written to {}".format(opt.output))
예제 #5
0
def continuous_frame_recognition():
    """ Using RNN network to recognize the action. """
    start_epoch = 1

    # -----------------------------------------------------
    # Create Model, optimizer, scheduler, and loss function
    # -----------------------------------------------------
    # extractor = resnet50(pretrained=True).to(DEVICE)
    recurrent = LSTM_Net(2048,
                         opt.hidden_dim,
                         opt.output_dim,
                         num_layers=opt.layers,
                         bias=True,
                         batch_first=False,
                         dropout=opt.dropout,
                         bidirectional=opt.bidirection,
                         seq_predict=False).to(DEVICE)

    # ----------------------------------------------
    # For signal direction LSTM
    #   weight_ih_l0 torch.Size([512, 2048])
    #   weight_hh_l0 torch.Size([512, 128])
    #   bias_ih_l0 torch.Size([512])
    #   bias_hh_l0 torch.Size([512])
    #
    # For bidirectional LSTM, reverse layer is added.
    #   weight_ih_l0_reverse torch.Size([512, 2048])
    #   weight_hh_l0_reverse torch.Size([512, 128])
    #   bias_ih_l0_reverse torch.Size([512])
    #   bias_hh_l0_reverse torch.Size([512])
    # ----------------------------------------------

    # Weight_init
    if "orthogonal" in opt.weight_init:
        for layer, param in recurrent.recurrent.named_parameters():
            print("{} {}".format(layer, param.shape))
            if len(param.shape) >= 2:
                nn.init.orthogonal_(param)

    # Bias_init
    if "forget_bias_0" in opt.bias_init:
        for layer, param in recurrent.recurrent.named_parameters():
            if layer.startswith("bias"):
                size = param.shape[0]
                start = int(size * 0.25)
                end = int(size * 0.5)
                param[start:end].data.fill_(0)

    if "forget_bias_1" in opt.bias_init:
        for layer, param in recurrent.recurrent.named_parameters():
            if layer.startswith("bias"):
                size = param.shape[0]
                start = int(size * 0.25)
                end = int(size * 0.5)
                param[start:end].data.fill_(1)

    # Set optimizer
    if opt.optimizer == "Adam":
        optimizer = optim.Adam(recurrent.parameters(),
                               lr=opt.lr,
                               betas=(opt.b1, opt.b2),
                               weight_decay=opt.weight_decay)
    elif opt.optimizer == "SGD":
        optimizer = optim.SGD(recurrent.parameters(),
                              lr=opt.lr,
                              momentum=opt.momentum,
                              weight_decay=opt.weight_decay)
    elif opt.optimizer == "ASGD":
        optimizer = optim.ASGD(recurrent.parameters(),
                               lr=opt.lr,
                               lambd=1e-4,
                               alpha=0.75,
                               t0=1000000.0,
                               weight_decay=opt.weight_decay)
    elif opt.optimizer == "Adadelta":
        optimizer = optim.Adadelta(recurrent.parameters(),
                                   lr=opt.lr,
                                   rho=0.9,
                                   eps=1e-06,
                                   weight_decay=opt.weight_decay)
    elif opt.optimizer == "Adagrad":
        optimizer = optim.Adagrad(recurrent.parameters(),
                                  lr=opt.lr,
                                  lr_decay=0,
                                  weight_decay=opt.weight_decay,
                                  initial_accumulator_value=0)
    elif opt.optimizer == "SparseAdam":
        optimizer = optim.SparseAdam(recurrent.parameters(),
                                     lr=opt.lr,
                                     betas=(opt.b1, opt.b2),
                                     eps=1e-08)
    elif opt.optimizer == "Adamax":
        optimizer = optim.Adamax(recurrent.parameters(),
                                 lr=opt.lr,
                                 betas=(opt.b1, opt.b2),
                                 eps=1e-08,
                                 weight_decay=opt.weight_dacay)
    else:
        raise argparse.ArgumentError

    scheduler = optim.lr_scheduler.MultiStepLR(optimizer,
                                               milestones=opt.milestones,
                                               gamma=opt.gamma)

    # Load parameter
    if opt.pretrain:
        recurrent = utils.loadModel(opt.pretrain, recurrent)
    if opt.resume:
        recurrent, optimizer, start_epoch, scheduler = utils.loadCheckpoint(
            opt.resume, recurrent, optimizer, scheduler)

    # Set criterion
    criterion = nn.CrossEntropyLoss().to(DEVICE)

    # Set dataloader
    transform = transforms.ToTensor()

    trainlabel = os.path.join(opt.train, "label", "gt_train.csv")
    trainfeature = os.path.join(opt.train, "feature", "train")
    vallabel = os.path.join(opt.val, "label", "gt_valid.csv")
    valfeature = os.path.join(opt.val, "feature", "valid")

    train_set = dataset.TrimmedVideos(None,
                                      trainlabel,
                                      trainfeature,
                                      downsample=opt.downsample,
                                      transform=transform)
    train_loader = DataLoader(train_set,
                              batch_size=opt.batch_size,
                              shuffle=True,
                              collate_fn=utils.collate_fn,
                              num_workers=opt.threads)

    # Show the memory used by neural network
    print("The neural network allocated GPU with {:.1f} MB".format(
        torch.cuda.memory_allocated() / 1024 / 1024))

    #------------------
    # Train the models
    #------------------
    trainloss = []
    trainaccs = []
    valloss = []
    valaccs = []
    epochs = []

    for epoch in range(start_epoch, opt.epochs + 1):
        scheduler.step()

        # Save the train loss and train accuracy
        max_trainaccs = max(trainaccs) if len(trainaccs) else 0
        min_trainloss = min(trainloss) if len(trainloss) else 0
        recurrent, loss, acc = train(recurrent, train_loader, optimizer, epoch,
                                     criterion, max_trainaccs, min_trainloss)
        trainloss.append(loss)
        trainaccs.append(acc)

        # validate the model with several downsample ratio
        loss_list, acc_list, label_list = [], [], []
        for downsample in [1, 2, 4, 6, 12]:
            val_set = dataset.TrimmedVideos(None,
                                            vallabel,
                                            valfeature,
                                            downsample=downsample,
                                            transform=transform)
            val_loader = DataLoader(val_set,
                                    batch_size=1,
                                    shuffle=True,
                                    collate_fn=utils.collate_fn,
                                    num_workers=opt.threads)
            print("[Epoch {}] [Validation] [Downsample: {:2d}]".format(
                epoch, downsample))
            acc, loss = val(recurrent, val_loader, epoch, criterion)

            loss_list.append(loss)
            acc_list.append(acc)
            label_list.append('val_{}'.format(downsample))

        valloss.append(loss_list)
        valaccs.append(acc_list)

        # Save the epochs
        epochs.append(epoch)

        # with open(os.path.join(opt.log, "problem_2", opt.tag, 'statistics.txt'), 'w') as textfile:
        #     textfile.write("\n".join(map(lambda x: str(x), (trainloss, trainaccs, valloss, valaccs, epochs))))

        records = list(
            map(lambda x: np.array(x),
                (trainloss, trainaccs, valloss, valaccs, epochs)))
        for record, name in zip(records,
                                ('trainloss.txt', 'trainaccs.txt',
                                 'valloss.txt', 'valaccs.txt', 'epochs.txt')):
            np.savetxt(os.path.join(opt.log, "problem_2", opt.tag, name),
                       record)

        if epoch % opt.save_interval == 0:
            savepath = os.path.join(opt.checkpoints, "problem_2", opt.tag,
                                    str(epoch) + '.pth')
            utils.saveCheckpoint(savepath, recurrent, optimizer, scheduler,
                                 epoch)

        # Draw the accuracy / loss curve
        draw_graphs(trainloss, valloss, trainaccs, valaccs, epochs,
                    "problem_2", label_list)

    return recurrent
예제 #6
0
def single_frame_recognition():
    """ Using 2D CNN network to recognize the action. """
    #-----------------------------------------------------
    # Create Model, optimizer, scheduler, and loss function
    #------------------------------------------------------
    extractor = resnet50(pretrained=True).to(DEVICE)
    classifier = Classifier(2048 * opt.sample, [2048],
                            num_class=opt.output_dim).to(DEVICE)

    print(classifier)

    # Set optimizer
    if opt.optimizer == "Adam":
        optimizer = optim.Adam(classifier.parameters(),
                               lr=opt.lr,
                               betas=(opt.b1, opt.b2),
                               weight_decay=opt.weight_decay)
    elif opt.optimizer == "SGD":
        optimizer = optim.SGD(classifier.parameters(),
                              lr=opt.lr,
                              momentum=opt.momentum,
                              weight_decay=opt.weight_decay)
    elif opt.optimizer == "ASGD":
        optimizer = optim.ASGD(classifier.parameters(),
                               lr=opt.lr,
                               lambd=1e-4,
                               alpha=0.75,
                               t0=1000000.0,
                               weight_decay=opt.weight_decay)
    elif opt.optimizer == "Adadelta":
        optimizer = optim.Adadelta(classifier.parameters(),
                                   lr=opt.lr,
                                   rho=0.9,
                                   eps=1e-06,
                                   weight_decay=opt.weight_decay)
    elif opt.optimizer == "Adagrad":
        optimizer = optim.Adagrad(classifier.parameters(),
                                  lr=opt.lr,
                                  lr_decay=0,
                                  weight_decay=opt.weight_decay,
                                  initial_accumulator_value=0)
    elif opt.optimizer == "SparseAdam":
        optimizer = optim.SparseAdam(classifier.parameters(),
                                     lr=opt.lr,
                                     betas=(opt.b1, opt.b2),
                                     eps=1e-08)
    elif opt.optimizer == "Adamax":
        optimizer = optim.Adamax(classifier.parameters(),
                                 lr=opt.lr,
                                 betas=(opt.b1, opt.b2),
                                 eps=1e-08,
                                 weight_decay=opt.weight_dacay)
    else:
        raise argparse.ArgumentError

    scheduler = optim.lr_scheduler.MultiStepLR(optimizer,
                                               milestones=opt.milestones,
                                               gamma=opt.gamma)

    criterion = nn.CrossEntropyLoss().to(DEVICE)

    transform = transforms.ToTensor()

    trainlabel = os.path.join(opt.train, "label", "gt_train.csv")
    trainfeature = os.path.join(opt.train, "feature", "train")
    vallabel = os.path.join(opt.val, "label", "gt_valid.csv")
    valfeature = os.path.join(opt.val, "feature", "valid")

    train_set = dataset.TrimmedVideos(None,
                                      trainlabel,
                                      trainfeature,
                                      sample=4,
                                      transform=transform)
    val_set = dataset.TrimmedVideos(None,
                                    vallabel,
                                    valfeature,
                                    sample=4,
                                    transform=transform)
    train_loader = DataLoader(train_set,
                              batch_size=opt.batch_size,
                              shuffle=True,
                              drop_last=True,
                              num_workers=opt.threads)
    val_loader = DataLoader(val_set,
                            batch_size=opt.batch_size,
                            shuffle=False,
                            drop_last=True,
                            num_workers=opt.threads)

    # Show the memory used by neural network
    print("The neural network allocated GPU with {:.1f} MB".format(
        torch.cuda.memory_allocated() / 1024 / 1024))

    #------------------
    # Train the models
    #------------------
    trainloss = []
    trainaccs = []
    valloss = []
    valaccs = []
    epochs = []
    for epoch in range(1, opt.epochs + 1):
        scheduler.step()

        # Save the train loss and train accuracy
        extractor, classifier, loss, acc = train(extractor, classifier,
                                                 train_loader, optimizer,
                                                 epoch, criterion)
        trainloss.append(loss)
        trainaccs.append(acc)

        # Save the validation loss and validation accuracy
        acc, loss = val(extractor, classifier, val_loader, epoch, criterion)
        valloss.append(loss)
        valaccs.append(acc)

        # Save the epochs
        epochs.append(epoch)

        records = list(
            map(lambda x: np.array(x),
                (trainloss, trainaccs, valloss, valaccs, epochs)))
        for record, name in zip(records,
                                ('trainloss.txt', 'trainaccs.txt',
                                 'valloss.txt', 'valaccs.txt', 'epochs.txt')):
            np.savetxt(os.path.join(opt.log, "problem_1", opt.tag, name),
                       record)

        if epoch % opt.save_interval == 0:
            savepath = os.path.join(opt.checkpoints, "problem_1", opt.tag,
                                    str(epoch) + '.pth')
            utils.saveCheckpoint(savepath, classifier, optimizer, scheduler,
                                 epoch)

        draw_graphs(trainloss, valloss, trainaccs, valaccs, epochs)

    return extractor, classifier