value_scale = 1
input_type = 'rgb'
sample_t_stride = 1
sample_duration = 16
inference_stride = 16

#normalize = get_normalize_method(mean, std, no_mean_norm, no_std_norm)
normalize = Normalize(mean, std)
spatial_transform = [Resize(sample_size)]
if inference_crop == 'center':
    spatial_transform.append(CenterCrop(sample_size))
if input_type == 'flow':
    spatial_transform.append(PickFirstChannels(n=2))
spatial_transform.append(ToTensor())
spatial_transform.extend([ScaleValue(value_scale), normalize])
spatial_transform = Compose(spatial_transform)

temporal_transform = []
if sample_t_stride > 1:
    temporal_transform.append(TemporalSubsampling(sample_t_stride))
temporal_transform.append(SlidingWindow(sample_duration, inference_stride))
temporal_transform = TemporalCompose(temporal_transform)

# 加载模型
#print('load model begin!')
model = generate_model_resnet(1)  # 生成resnet模型
#model = torch.load('./save_200.pth')
checkpoint = torch.load('./save_200.pth', map_location='cpu')
model.load_state_dict(checkpoint['state_dict'])
#print(model)
model.eval()  # 固定batchnorm,dropout等,一定要有
コード例 #2
0
def main_run(dataset, stage, trainDatasetDir, valDatasetDir, stage1_dict,
             stackSize, out_dir, seqLen, trainBatchSize, valBatchSize,
             numEpochs, lr1, decay_factor, decay_step, memSize, alphaX,
             alphaY):

    if dataset == 'gtea61':
        num_classes = 61
    elif dataset == 'gtea71':
        num_classes = 71
    elif dataset == 'gtea_gaze':
        num_classes = 44
    elif dataset == 'egtea':
        num_classes = 106
    else:
        print('Dataset not found')
        sys.exit()

    model_folder = os.path.join(
        './', out_dir, 'attConvLSTM', str(seqLen),
        'stage' + str(stage))  # Dir for saving models and log files
    # Create the dir
    if os.path.exists(model_folder):
        print('Directory {} exists!'.format(model_folder))
        sys.exit()
    os.makedirs(model_folder)

    # Log files
    writer = SummaryWriter(model_folder)
    train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
    train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
    val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w')
    val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w')

    # Data loader
    normalize = Normalize(mean=[0.485, 0.456, 0.406],
                          std=[0.229, 0.224, 0.225])
    spatial_transform = Compose([
        Scale(256),
        RandomHorizontalFlip(),
        MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224)
    ])
    spatial_transform2 = Compose([Scale((7, 7)), ToTensor()])

    vid_seq_train = makeDataset(trainDatasetDir,
                                spatial_transform2,
                                spatial_transform=spatial_transform,
                                sequence=False,
                                numSeg=1,
                                stackSize=stackSize,
                                fmt='.png',
                                seqLen=seqLen)

    trainInstances = vid_seq_train.__len__()

    train_loader = torch.utils.data.DataLoader(vid_seq_train,
                                               batch_size=trainBatchSize,
                                               shuffle=True,
                                               num_workers=4,
                                               pin_memory=True)

    if valDatasetDir is not None:
        vid_seq_val = makeDataset(valDatasetDir,
                                  spatial_transform2,
                                  spatial_transform=Compose(
                                      [Scale(256), CenterCrop(224)]),
                                  sequence=False,
                                  numSeg=1,
                                  stackSize=stackSize,
                                  fmt='.png',
                                  phase='Test',
                                  seqLen=seqLen)
        valInstances = vid_seq_val.__len__()

        val_loader = torch.utils.data.DataLoader(vid_seq_val,
                                                 batch_size=valBatchSize,
                                                 shuffle=False,
                                                 num_workers=2,
                                                 pin_memory=True)

    train_params = []
    if stage == 1:
        model = attentionModel(num_classes=num_classes, mem_size=memSize)
        model.train(False)
        for params in model.parameters():
            params.requires_grad = False
    else:  # stage == 2
        model = attentionModel(num_classes=num_classes, mem_size=memSize)
        model.load_state_dict(torch.load(stage1_dict), strict=False)
        model.train(False)

        for params in model.parameters():
            params.requires_grad = False
        #
        for params in model.resNet.layer4[0].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[0].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[2].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[2].conv2.parameters():
            params.requires_grad = True
            train_params += [params]
        #
        for params in model.resNet.fc.parameters():
            params.requires_grad = True
            train_params += [params]

        model.resNet.layer4[0].conv1.train(True)
        model.resNet.layer4[0].conv2.train(True)
        model.resNet.layer4[1].conv1.train(True)
        model.resNet.layer4[1].conv2.train(True)
        model.resNet.layer4[2].conv1.train(True)
        model.resNet.layer4[2].conv2.train(True)
        model.resNet.fc.train(True)

    for params in model.lstm_cell.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.classifier.parameters():
        params.requires_grad = True
        train_params += [params]

    model.lstm_cell.train(True)

    model.classifier.train(True)
    model.cuda()

    loss_fn = nn.CrossEntropyLoss()
    loss_fn_regression = nn.MSELoss()  # Loss function for the regression model

    optimizer_fn = torch.optim.Adam(train_params,
                                    lr=lr1,
                                    weight_decay=4e-5,
                                    eps=1e-4)

    optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer_fn, milestones=decay_step, gamma=decay_factor)

    train_iter = 0
    min_accuracy = 0
    for epoch in range(numEpochs):
        epoch_loss = 0
        numCorrTrain = 0
        x_loss = 0
        y_loss = 0
        trainSamples = 0
        iterPerEpoch = 0

        model.lstm_cell.train(True)
        model.classifier.train(True)
        writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch + 1)

        if stage == 2:
            model.resNet.layer4[0].conv1.train(True)
            model.resNet.layer4[0].conv2.train(True)
            model.resNet.layer4[1].conv1.train(True)
            model.resNet.layer4[1].conv2.train(True)
            model.resNet.layer4[2].conv1.train(True)
            model.resNet.layer4[2].conv2.train(True)
            model.resNet.fc.train(True)

        #for i, (inputs, targets) in enumerate(train_loader):
        for flowX, flowY, inputs, targets in train_loader:
            train_iter += 1
            iterPerEpoch += 1
            optimizer_fn.zero_grad()

            flowX = flowX.cuda()
            flowY = flowY.cuda()

            inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda())
            labelVariable = Variable(targets.cuda())
            trainSamples += inputs.size(0)

            output_label, _, flowXprediction, flowYprediction = model(
                inputVariable)

            #Reshaping predictions and inputs in order
            #to correctly regress on the inputs
            flowXprediction = flowXprediction.view(-1)
            flowX = torch.reshape(flowX, (-1, )).float()

            flowYprediction = flowYprediction.view(-1)
            flowY = torch.reshape(flowY, (-1, )).float()

            #print(f'Prediction: {flowXprediction.size()}')
            #print(f'Input : {flowX.size()}')

            #sys.exit()

            lossX = alphaX * loss_fn_regression(flowXprediction, flowX)
            lossY = alphaY * loss_fn_regression(flowYprediction, flowY)
            loss = loss_fn(output_label, labelVariable)

            #Weighting the loss of the ss task
            #by multiplying it by alpha
            total_loss = loss + lossX + lossY
            total_loss.backward()

            optimizer_fn.step()
            _, predicted = torch.max(output_label.data, 1)
            numCorrTrain += (predicted == targets.cuda()).sum()
            x_loss += lossX.item()
            y_loss += lossY.item()
            epoch_loss += loss.item()

        optim_scheduler.step()
        avg_x_loss = x_loss / iterPerEpoch
        avg_y_loss = y_loss / iterPerEpoch
        avg_loss = epoch_loss / iterPerEpoch
        trainAccuracy = torch.true_divide(numCorrTrain, trainSamples) * 100

        print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format(
            epoch + 1, avg_loss, trainAccuracy))
        print('X loss after {} epoch = {}% '.format(epoch + 1, avg_x_loss))
        print('Y loss after {} epoch = {}% '.format(epoch + 1, avg_y_loss))

        writer.add_scalar('train/epoch_loss', avg_loss, epoch + 1)
        writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1)
        writer.add_scalar('x_train_loss', avg_x_loss, epoch + 1)
        writer.add_scalar('y_train_loss', avg_y_loss, epoch + 1)

        train_log_loss.write('Training X loss after {} epoch= {}'.format(
            epoch + 1, avg_x_loss))
        train_log_loss.write('Training Y loss after {} epoch= {}'.format(
            epoch + 1, avg_y_loss))
        train_log_loss.write('Training loss after {} epoch = {}\n'.format(
            epoch + 1, avg_loss))
        train_log_acc.write('Training accuracy after {} epoch = {}\n'.format(
            epoch + 1, trainAccuracy))

        if valDatasetDir is not None:
            model.train(False)
            val_loss_epoch = 0
            val_iter = 0
            val_x_loss = 0
            val_y_loss = 0
            val_samples = 0
            numCorr = 0
            mmap_loss = 0

            with torch.no_grad():
                #for j, (inputs, targets) in enumerate(val_loader):
                for flowX, flowY, inputs, targets in val_loader:
                    val_iter += 1
                    val_samples += inputs.size(0)

                    flowX = flowX.cuda()
                    flowY = flowY.cuda()

                    inputVariable = Variable(
                        inputs.permute(1, 0, 2, 3, 4).cuda())
                    labelVariable = Variable(targets.cuda(async=True))
                    #labelVariable = Variable(targets.cuda())

                    output_label, _, flowXprediction, flowYprediction = model(
                        inputVariable)

                    #Reshaping predictions and inputs in order
                    #to correctly regress on the inputs
                    flowXprediction = flowXprediction.view(-1)
                    flowX = torch.reshape(flowX, (-1, )).float()

                    flowYprediction = flowXprediction.view(-1)
                    flowY = torch.reshape(flowX, (-1, )).float()

                    lossX = alphaX * loss_fn_regression(flowXprediction, flowX)
                    lossY = alphaY * loss_fn_regression(flowYprediction, flowY)

                    val_loss = loss_fn(output_label, labelVariable)
                    val_loss_epoch += val_loss.item()
                    val_x_loss += lossX.item()
                    val_y_loss += lossY.item()

                    _, predicted = torch.max(output_label.data, 1)
                    numCorr += (predicted == targets.cuda()).sum()

            avg_x_val_loss = val_x_loss / val_iter
            avg_y_val_loss = val_y_loss / val_iter
            val_accuracy = torch.true_divide(numCorr, val_samples) * 100
            avg_val_loss = val_loss_epoch / val_iter

            print('Val X Loss after {} epochs, loss = {}'.format(
                epoch + 1, avg_x_val_loss))
            print('Val Y Loss after {} epochs, loss = {}'.format(
                epoch + 1, avg_y_val_loss))
            print('Val: Epoch = {} | Loss {} | Accuracy = {}'.format(
                epoch + 1, avg_val_loss, val_accuracy))

            writer.add_scalar('val x/epoch_loss', avg_x_val_loss, epoch + 1)
            writer.add_scalar('val y/epoch_loss', avg_y_val_loss, epoch + 1)
            writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1)
            writer.add_scalar('val/accuracy', val_accuracy, epoch + 1)
            val_log_loss.write('Val X Loss after {} epochs = {}\n'.format(
                epoch + 1, avg_x_val_loss))
            val_log_loss.write('Val Y Loss after {} epochs = {}\n'.format(
                epoch + 1, avg_y_val_loss))
            val_log_loss.write('Val Loss after {} epochs = {}\n'.format(
                epoch + 1, avg_val_loss))
            val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(
                epoch + 1, val_accuracy))

            if val_accuracy > min_accuracy:
                save_path_model = (model_folder + '/model_rgb_state_dict.pth')
                torch.save(model.state_dict(), save_path_model)
                min_accuracy = val_accuracy

    train_log_loss.close()
    train_log_acc.close()
    val_log_acc.close()
    val_log_loss.close()
    writer.export_scalars_to_json(model_folder + "/all_scalars.json")
    writer.close()
コード例 #3
0
ファイル: main.py プロジェクト: shuxiao0312/STRG
def get_train_utils(opt, model_parameters):
    assert opt.train_crop in ['random', 'corner', 'center']
    spatial_transform = []
    if opt.train_crop == 'random':
        spatial_transform.append(
            RandomResizedCrop(
                opt.sample_size, (opt.train_crop_min_scale, 1.0),
                (opt.train_crop_min_ratio, 1.0 / opt.train_crop_min_ratio)))
    elif opt.train_crop == 'corner':
        scales = [1.0]
        scale_step = 1 / (2**(1 / 4))
        for _ in range(1, 5):
            scales.append(scales[-1] * scale_step)
        spatial_transform.append(MultiScaleCornerCrop(opt.sample_size, scales))
    elif opt.train_crop == 'center':
        spatial_transform.append(Resize(opt.sample_size))
        spatial_transform.append(CenterCrop(opt.sample_size))
    normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm,
                                     opt.no_std_norm)
    if not opt.no_hflip:
        spatial_transform.append(RandomHorizontalFlip())
    if opt.colorjitter:
        spatial_transform.append(ColorJitter())
    spatial_transform.append(ToTensor())
    if opt.input_type == 'flow':
        spatial_transform.append(PickFirstChannels(n=2))
    spatial_transform.append(ScaleValue(opt.value_scale))
    spatial_transform.append(normalize)
    spatial_transform = Compose(spatial_transform)

    assert opt.train_t_crop in ['random', 'center']
    temporal_transform = []
    if opt.sample_t_stride > 1:
        temporal_transform.append(TemporalSubsampling(opt.sample_t_stride))
    if opt.train_t_crop == 'random':
        temporal_transform.append(TemporalRandomCrop(opt.sample_duration))
    elif opt.train_t_crop == 'center':
        temporal_transform.append(TemporalCenterCrop(opt.sample_duration))
    temporal_transform = TemporalCompose(temporal_transform)

    train_data = get_training_data(opt.video_path, opt.annotation_path,
                                   opt.dataset, opt.input_type, opt.file_type,
                                   spatial_transform, temporal_transform)
    if opt.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_data)
    else:
        train_sampler = None
    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=opt.batch_size,
                                               shuffle=(train_sampler is None),
                                               num_workers=opt.n_threads,
                                               pin_memory=True,
                                               sampler=train_sampler,
                                               worker_init_fn=worker_init_fn)

    if opt.is_master_node:
        train_logger = Logger(opt.result_path / 'train.log',
                              ['epoch', 'loss', 'acc', 'lr'])
        train_batch_logger = Logger(
            opt.result_path / 'train_batch.log',
            ['epoch', 'batch', 'iter', 'loss', 'acc', 'lr'])
    else:
        train_logger = None
        train_batch_logger = None

    if opt.nesterov:
        dampening = 0
    else:
        dampening = opt.dampening
    optimizer = SGD(model_parameters,
                    lr=opt.learning_rate,
                    momentum=opt.momentum,
                    dampening=dampening,
                    weight_decay=opt.weight_decay,
                    nesterov=opt.nesterov)

    assert opt.lr_scheduler in ['plateau', 'multistep']
    assert not (opt.lr_scheduler == 'plateau' and opt.no_val)
    if opt.lr_scheduler == 'plateau':
        scheduler = lr_scheduler.ReduceLROnPlateau(
            optimizer, 'min', patience=opt.plateau_patience)
    else:
        scheduler = lr_scheduler.MultiStepLR(optimizer,
                                             opt.multistep_milestones)

    return (train_loader, train_sampler, train_logger, train_batch_logger,
            optimizer, scheduler)
コード例 #4
0
def main_run(model_state_dict, dataset_dir, seqLen, memSize, out_dir):
    model_folder = os.path.join('./', out_dir, 'attConvLSTMDoubleResnet',
                                str(seqLen))
    #dataset = 'gtea61'
    num_classes = 61

    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
    normalize = Normalize(mean=mean, std=std)
    spatial_transform = Compose(
        [Scale(256), CenterCrop(224),
         ToTensor(), normalize])

    vid_seq_test = makeDataset(dataset_dir,
                               seqLen=seqLen,
                               fmt='.png',
                               train=False,
                               spatial_transform=spatial_transform,
                               users=['S2'])

    test_loader = torch.utils.data.DataLoader(vid_seq_test,
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=2,
                                              pin_memory=True)

    model = attentionDoubleResnet(num_classes=num_classes, mem_size=memSize)
    model.load_state_dict(torch.load(model_state_dict))

    for params in model.parameters():
        params.requires_grad = False

    model.train(False)
    model.cuda()
    test_samples = vid_seq_test.__len__()
    print('Number of samples = {}'.format(test_samples))
    print('Evaluating...')
    numCorr = 0
    true_labels = []
    predicted_labels = []

    with torch.no_grad():
        #for j, (inputs, targets) in enumerate(test_loader):
        for inputs, inputsSN, targets in test_loader:
            inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda())
            inputSNVariable = Variable(inputsSN.permute(1, 0, 2, 3, 4).cuda())
            output_label, _ = model(inputVariable, inputSNVariable)

            _, predicted = torch.max(output_label.data, 1)
            numCorr += (predicted == targets.cuda()).sum()
            true_labels.append(targets)
            predicted_labels.append(predicted.cpu())

    test_accuracy = torch.true_divide(numCorr, test_samples) * 100
    test_accuracy = 'Test Accuracy = {}%'.format(test_accuracy)
    print(test_accuracy)
    fil = open(model_folder + "/test_log_acc.txt", "w")
    fil.write(test_accuracy)
    fil.close()

    cnf_matrix = confusion_matrix(true_labels, predicted_labels).astype(float)
    cnf_matrix_normalized = cnf_matrix / cnf_matrix.sum(axis=1)[:, np.newaxis]

    ticks = np.linspace(0, 60, num=61)
    plt.figure(1, figsize=(12, 12), dpi=100.0)
    plt.imshow(cnf_matrix_normalized, interpolation='none', cmap='binary')
    plt.colorbar()
    plt.xticks(ticks, fontsize=6)
    plt.yticks(ticks, fontsize=6)
    plt.grid(True)
    plt.clim(0, 1)
    xy = np.arange(start=0, stop=61)
    plt.plot(xy, xy)
    plt.savefig(model_folder + '/cnf_matrix_normalized.png',
                bbox_inches='tight')
    plt.show()
コード例 #5
0
def main_run(dataset, stage, train_data_dir, val_data_dir, stage1_dict, out_dir, seqLen, trainBatchSize,
             valBatchSize, numEpochs, lr1, decay_factor, decay_step, memSize, regressor):

    if dataset == 'gtea61':
        num_classes = 61
    elif dataset == 'gtea71':
      num_classes = 71
    elif dataset == 'gtea_gaze':
        num_classes = 44
    elif dataset == 'egtea':
        num_classes = 106
    else:
        print('Dataset not found')
        sys.exit()

    model_folder = os.path.join('./', out_dir, dataset, 'MS',str(stage))  # Dir for saving models and log files
    # Create the dir
    if os.path.exists(model_folder):
        print('Directory {} exists!'.format(model_folder))
        sys.exit()
    os.makedirs(model_folder)

    # Log files
    writer = SummaryWriter(model_folder)
    train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
    train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
    val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w')
    val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w')
    train_log_loss_ms= open((model_folder + '/train_log_loss_ms.txt'), 'w')
    val_log_loss_ms = open((model_folder + '/val_log_loss_ms.txt'), 'w')
    train_log_acc_ms= open((model_folder + '/train_log_acc_ms.txt'), 'w')
    val_log_acc_ms = open((model_folder + '/val_log_acc_ms.txt'), 'w')

    # Data loader
    normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    spatial_transform = Compose([Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224)])

    vid_seq_train = makeDataset(train_data_dir,
                                spatial_transform=spatial_transform, seqLen=seqLen, fmt='.png',phase='train', regressor=regressor)

    train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize,
                            shuffle=True, num_workers=4, pin_memory=True)
    if val_data_dir is not None:

        vid_seq_val = makeDataset(val_data_dir,
                                   spatial_transform=Compose([Scale(256), CenterCrop(224)]),
                                   seqLen=seqLen, fmt='.png',phase='test', regressor=regressor)

        val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize,
                                shuffle=False, num_workers=2, pin_memory=True)
        valInstances = vid_seq_val.__len__()


    trainInstances = vid_seq_train.__len__()

    train_params = []
    if stage == 1:

        model = attentionModel_ml(num_classes=num_classes, mem_size=memSize, regressor=regressor)
        model.train(False)
        for params in model.parameters():
            params.requires_grad = False
    else:

        model = attentionModel_ml(num_classes=num_classes, mem_size=memSize, regressor=regressor)
        model.load_state_dict(torch.load(stage1_dict),strict=False)
        model.train(False)
        for params in model.parameters():
            params.requires_grad = False
        #
        for params in model.resNet.layer4[0].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[0].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[2].conv1.parameters():
            params.requires_grad = True
            train_params += [params]
        #
        for params in model.resNet.layer4[2].conv2.parameters():
            params.requires_grad = True
            train_params += [params]
        #
        for params in model.resNet.fc.parameters():
            params.requires_grad = True
            train_params += [params]
        for params in model.conv.parameters():
            params.requires_grad = True
            train_params += [params]
        for params in model.clas.parameters():
            params.requires_grad = True
            train_params += [params]
        model.conv.train(True)
        model.clas.train(True)
        model.resNet.layer4[0].conv1.train(True)
        model.resNet.layer4[0].conv2.train(True)
        model.resNet.layer4[1].conv1.train(True)
        model.resNet.layer4[1].conv2.train(True)
        model.resNet.layer4[2].conv1.train(True)
        model.resNet.layer4[2].conv2.train(True)
        model.resNet.fc.train(True)

    for params in model.lstm_cell.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.classifier.parameters():
        params.requires_grad = True
        train_params += [params]
        


    model.lstm_cell.train(True)

    model.classifier.train(True)
    model.cuda()

    loss_fn = nn.CrossEntropyLoss()
    loss_fms = nn.NLLLoss()
    loss_reg = nn.MSELoss()
    optimizer_fn = torch.optim.Adam(train_params, lr=lr1, weight_decay=4e-5, eps=1e-4)

    optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=decay_step,
                                                           gamma=decay_factor)

    train_iter = 0
    min_accuracy = 0

    for epoch in range(numEpochs):
        epoch_loss = 0
        numCorrTrain = 0
        numCorrTrain_ms = 0
        trainSamples = 0
        iterPerEpoch = 0
        epoch_loss_ms = 0
        model.lstm_cell.train(True)
        model.classifier.train(True)
        writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch+1)
        if stage == 2:
            model.conv.train(True)
            model.clas.train(True)
            model.resNet.layer4[0].conv1.train(True)
            model.resNet.layer4[0].conv2.train(True)
            model.resNet.layer4[1].conv1.train(True)
            model.resNet.layer4[1].conv2.train(True)
            model.resNet.layer4[2].conv1.train(True)
            model.resNet.layer4[2].conv2.train(True)
            model.resNet.fc.train(True)
        for i, (inputs ,binary_map, targets) in enumerate(train_loader):
            train_iter += 1
            iterPerEpoch += 1
            optimizer_fn.zero_grad()
            inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda())
            labelVariable = Variable(targets.cuda())
            trainSamples += inputs.size(0)
            output_label, output_ms = model(inputVariable)
            
            loss = loss_fn(output_label, labelVariable)

            if stage==2 :
                loss.backward(retain_graph=True)
            else:
                loss.backward()
            if regressor == 0:
                binary_map = Variable(binary_map.permute(1, 0, 2, 3, 4).type(torch.LongTensor).cuda())
                output_ms = output_ms.view(-1,2)
            elif regressor == 1:
                binary_map = Variable(binary_map.permute(1, 0, 2, 3, 4).cuda())
                output_ms = output_ms.view(-1)
            binary_map =binary_map.contiguous().view(-1)
                        
            
            if stage==2:
                if regressor == 1:
                    loss_ms=loss_reg(output_ms, binary_map)
                    loss_ms.backward()
                    
                    epoch_loss_ms+=loss_ms.item()
                elif regressor == 0:
                    loss_ms=loss_fn(output_ms, binary_map)
                    loss_ms.backward()
                    _, predicted = torch.max(output_ms.data, 1)
                    numCorrTrain_ms += torch.sum(predicted == binary_map.data).data.item()
                    epoch_loss_ms+=loss_ms.item()
        
            optimizer_fn.step()
            _, predicted = torch.max(output_label.data, 1)
            numCorrTrain += torch.sum(predicted == labelVariable.data).data.item()
            epoch_loss += loss.item()

            
        avg_loss = epoch_loss/iterPerEpoch
        if stage ==2:
            trainAccuracy = (numCorrTrain_ms / trainSamples) * 100
            avg_loss_ms= epoch_loss_ms/iterPerEpoch
            #avg_loss = avg_loss + avg_loss_ms
            train_log_loss_ms.write('Train Loss MS after {} epochs = {}\n'.format(epoch + 1, avg_loss_ms))
            if regressor == 0:train_log_acc_ms.write('Train Accuracy after {} epochs = {}%\n'.format(epoch + 1, trainAccuracy))

        trainAccuracy = (numCorrTrain / trainSamples) * 100

        print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format(epoch+1, avg_loss, trainAccuracy))
        writer.add_scalar('train/epoch_loss', avg_loss, epoch+1)
        writer.add_scalar('train/accuracy', trainAccuracy, epoch+1)
        train_log_loss.write('Train Loss after {} epochs = {}\n'.format(epoch + 1, avg_loss))
        
        train_log_acc.write('Train Accuracy after {} epochs = {}%\n'.format(epoch + 1, trainAccuracy))
        if val_data_dir is not None:
            if (epoch+1) % 1 == 0:
                model.train(False)
                val_loss_epoch = 0
                val_iter = 0
                val_samples = 0
                numCorr = 0
                numCorr_ms = 0
                epoch_loss_ms_val=0
                
                for j, (inputs, binary_map, targets) in enumerate(val_loader):
                    val_iter += 1
                    val_samples += inputs.size(0)
                    inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda(), volatile=True)
                    labelVariable = Variable(targets.cuda(async=True), volatile=True)
                    output_label, output_ms = model(inputVariable)
                    val_loss = loss_fn(output_label, labelVariable)
                    val_loss_epoch += val_loss.item()
                    if regressor == 0:
                        binary_map = Variable(binary_map.permute(1, 0, 2, 3, 4).type(torch.LongTensor).cuda())
                        output_ms = output_ms.view(-1,2)
                    elif regressor == 1:
                        binary_map = Variable(binary_map.permute(1, 0, 2, 3, 4).cuda())
                        output_ms = output_ms.view(-1)
                    binary_map =binary_map.contiguous().view(-1)
                    if stage==2:
                        if regressor == 1:
                            loss_ms=loss_reg(output_ms, binary_map)
                            
                            epoch_loss_ms_val+=loss_ms.item()
                        elif regressor == 0:
                            loss_ms=loss_fn(output_ms, binary_map)
                            _, predicted = torch.max(output_ms.data, 1)
                            numCorr_ms += torch.sum(predicted == binary_map.data).data.item()
                            epoch_loss_ms_val+=loss_ms.item()
                                
                    _, predicted = torch.max(output_label.data, 1)
                    numCorr += torch.sum(predicted == labelVariable.data).data.item()
                
                avg_val_loss = val_loss_epoch / val_iter
                if stage ==2:
                    avg_loss_ms= epoch_loss_ms_val/ val_iter
                    val_accuracy = (numCorr_ms / val_samples) * 100
                    #avg_loss = avg_loss + avg_loss_ms 
                    val_log_loss_ms.write('Val Loss MS after {} epochs = {}\n'.format(epoch + 1, avg_loss_ms))
                    if regressor == 0:val_log_acc_ms.write('Val Accuracy after {} epochs = {}%\n'.format(epoch + 1, val_accuracy))
                val_accuracy = (numCorr / val_samples) * 100
                print('Val: Epoch = {} | Loss {} | Accuracy = {}'.format(epoch + 1, avg_val_loss, val_accuracy))
                writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1)
                writer.add_scalar('val/accuracy', val_accuracy, epoch + 1)
                
                val_log_loss.write('Val Loss after {} epochs = {}\n'.format(epoch + 1, avg_val_loss))
                val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(epoch + 1, val_accuracy))
                if val_accuracy > min_accuracy:
                    save_path_model = (model_folder + '/model_ms_state_dict.pth')
                    torch.save(model.state_dict(), save_path_model)
                    min_accuracy = val_accuracy
            else:
                if (epoch+1) % 10 == 0:
                    save_path_model = (model_folder + '/model_ms_state_dict_epoch' + str(epoch+1) + '.pth')
                    torch.save(model.state_dict(), save_path_model)

    train_log_loss.close()
    train_log_acc.close()
    val_log_acc.close()
    val_log_loss.close()
    train_log_loss_ms.close()
    val_log_loss_ms.close()
    writer.export_scalars_to_json(model_folder + "/all_scalars.json")
    writer.close()
    optim_scheduler.step()
コード例 #6
0
def main_run(dataset, trainDir, valDir, outDir, stackSize, trainBatchSize, valBatchSize, numEpochs, lr1,
             decay_factor, decay_step):


    if dataset == 'gtea61':
        num_classes = 61
    elif dataset == 'gtea71':
      num_classes = 71
    elif dataset == 'gtea_gaze':
        num_classes = 44
    elif dataset == 'egtea':
        num_classes = 106
    else:
        print('Dataset not found')
        sys.exit()

    min_accuracy = 0

    model_folder = os.path.join('./', outDir, dataset, 'flow')  # Dir for saving models and log files
    # Create the dir
    if os.path.exists(model_folder):
        print('Dir {} exists!'.format(model_folder))
        !rm -rf ./experiments
        #sys.exit()
    os.makedirs(model_folder)

    # Log files
    writer = SummaryWriter(model_folder)
    train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
    train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
    val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w')
    val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w')


    # Data loader
    normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

    spatial_transform = Compose([Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
                                 ToTensor(), normalize])

    vid_seq_train = makeDataset(trainDir, spatial_transform=spatial_transform, sequence=False,
                                stackSize=stackSize, fmt='.png')

    train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize,
                            shuffle=True, sampler=None, num_workers=4, pin_memory=True)
    valInstances=0
    if valDir is not None:

        vid_seq_val = makeDataset(valDir, spatial_transform=Compose([Scale(256), CenterCrop(224), ToTensor(), normalize]),
                                   sequence=False, stackSize=stackSize, fmt='.png', phase='Test')

        val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize,
                                shuffle=False, num_workers=2, pin_memory=True)
        valInstances = vid_seq_val.__len__()

    trainInstances = vid_seq_train.__len__()
    print('Number of samples in the dataset: training = {} | validation = {}'.format(trainInstances, valInstances))

    model = flow_resnet34(True, channels=2*stackSize, num_classes=num_classes)
    model.train(True)
    train_params = list(model.parameters())

    model.cuda()

    loss_fn = nn.CrossEntropyLoss()

    optimizer_fn = torch.optim.SGD(train_params, lr=lr1, momentum=0.9, weight_decay=5e-4)

    optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=decay_step, gamma=decay_factor)

    train_iter = 0

    for epoch in range(numEpochs):
        optim_scheduler.step()
        epoch_loss = 0
        numCorrTrain = 0
        trainSamples = 0
        iterPerEpoch = 0
        model.train(True)
        writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch+1)
        for i, (inputs, targets) in enumerate(train_loader):
            train_iter += 1
            iterPerEpoch += 1
            optimizer_fn.zero_grad()
            inputVariable = Variable(inputs.cuda())
            labelVariable = Variable(targets.cuda())
            trainSamples += inputs.size(0)
            output_label, _ = model(inputVariable)
            loss = loss_fn(output_label, labelVariable)
            loss.backward()
            optimizer_fn.step()
            _, predicted = torch.max(output_label.data, 1)
            numCorrTrain += (predicted == targets.cuda()).sum()
            epoch_loss += loss.data[0]
        avg_loss = epoch_loss/iterPerEpoch
        trainAccuracy = (numCorrTrain / trainSamples) * 100
        print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format(epoch + 1, avg_loss, trainAccuracy))
        writer.add_scalar('train/epoch_loss', avg_loss, epoch+1)
        writer.add_scalar('train/accuracy', trainAccuracy, epoch+1)
        train_log_loss.write('Training loss after {} epoch = {}\n'.format(epoch+1, avg_loss))
        train_log_acc.write('Training accuracy after {} epoch = {}\n'.format(epoch+1, trainAccuracy))
        if valDir is not None:
            if (epoch+1) % 1 == 0:
                model.train(False)
                val_loss_epoch = 0
                val_iter = 0
                val_samples = 0
                numCorr = 0
                for j, (inputs, targets) in enumerate(val_loader):
                    val_iter += 1
                    val_samples += inputs.size(0)
                    inputVariable = Variable(inputs.cuda(), volatile=True)
                    labelVariable = Variable(targets.cuda(async=True), volatile=True)
                    output_label, _ = model(inputVariable)
                    val_loss = loss_fn(output_label, labelVariable)
                    val_loss_epoch += val_loss.data[0]
                    _, predicted = torch.max(output_label.data, 1)
                    numCorr += (predicted == targets.cuda()).sum()
                val_accuracy = (numCorr / val_samples) * 100
                avg_val_loss = val_loss_epoch / val_iter
                print('Validation: Epoch = {} | Loss = {} | Accuracy = {}'.format(epoch + 1, avg_val_loss, val_accuracy))
                writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1)
                writer.add_scalar('val/accuracy', val_accuracy, epoch + 1)
                val_log_loss.write('Val Loss after {} epochs = {}\n'.format(epoch + 1, avg_val_loss))
                val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(epoch + 1, val_accuracy))
                if val_accuracy > min_accuracy:
                    save_path_model = (model_folder + '/model_flow_state_dict.pth')
                    torch.save(model.state_dict(), save_path_model)
                    min_accuracy = val_accuracy
            else:
                if (epoch+1) % 10 == 0:
                    save_path_model = (model_folder + '/model_flow_state_dict_epoch' + str(epoch+1) + '.pth')
                    torch.save(model.state_dict(), save_path_model)

    train_log_loss.close()
    train_log_acc.close()
    val_log_acc.close()
    val_log_loss.close()
    writer.export_scalars_to_json(model_folder + "/all_scalars.json")
    writer.close()
コード例 #7
0
    print('##########################################')
    print('####### train')
    print('##########################################')
    assert cfg.train_crop in ['random', 'corner', 'center']
    if cfg.train_crop == 'random':
        crop_method = (cfg.scales, cfg.sample_size)
    elif cfg.train_crop == 'corner':
        crop_method = MultiScaleCornerCrop(cfg.scales, cfg.sample_size)
    elif cfg.train_crop == 'center':
        crop_method = MultiScaleCornerCrop(cfg.scales,
                                           cfg.sample_size,
                                           crop_positions=['c'])
    spatial_transform = Compose([
        crop_method,
        RandomHorizontalFlip(),
        ToTensor(cfg.norm_value), norm_method
    ])
    temporal_transform = TemporalRandomCrop(cfg.sample_duration)
    target_transform = ClassLabel()
    training_data = get_training_set(cfg, spatial_transform,
                                     temporal_transform, target_transform)
    train_loader = torch.utils.data.DataLoader(training_data,
                                               batch_size=cfg.batch_size,
                                               shuffle=True,
                                               num_workers=cfg.n_threads,
                                               drop_last=True,
                                               pin_memory=True)
    train_logger = Logger(os.path.join(cfg.custom_logdir, 'train.log'),
                          ['epoch', 'loss', 'acc', 'lr'])
    train_batch_logger = Logger(
コード例 #8
0
def main_run(dataset, flowModel_state_dict, RGBModel_state_dict, dataset_dir,
             stackSize, seqLen, memSize, numSeg):

    if dataset == 'gtea61':
        num_classes = 61
    elif dataset == 'gtea71':
        num_classes = 71
    elif dataset == 'gtea_gaze':
        num_classes = 44
    elif dataset == 'egtea':
        num_classes = 106

    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]

    normalize = Normalize(mean=mean, std=std)

    flow_wt = 0.5
    testBatchSize = 1
    sequence = True
    spatial_transform = Compose(
        [Scale(256), CenterCrop(224),
         ToTensor(), normalize])

    vid_seq_test = makeDataset(dataset_dir,
                               spatial_transform=spatial_transform,
                               sequence=sequence,
                               numSeg=numSeg,
                               stackSize=stackSize,
                               fmt='.jpg',
                               phase='Test',
                               seqLen=seqLen)

    test_loader = torch.utils.data.DataLoader(vid_seq_test,
                                              batch_size=testBatchSize,
                                              shuffle=False,
                                              num_workers=2,
                                              pin_memory=True)

    modelFlow = flow_resnet34(False,
                              channels=2 * stackSize,
                              num_classes=num_classes)
    modelFlow.load_state_dict(torch.load(flowModel_state_dict))
    modelRGBSN = attentionMDoubleResnet(num_classes=num_classes,
                                        mem_size=memSize)
    modelRGBSN.load_state_dict(torch.load(RGBSNModel_state_dict))

    for params in modelFlow.parameters():
        params.requires_grad = False

    for params in modelRGBSN.parameters():
        params.requires_grad = False

    modelFlow.train(False)
    modelRGBSN.train(False)
    modelFlow.cuda()
    modelRGBSN.cuda()
    test_samples = vid_seq_test.__len__()
    print('Number of samples = {}'.format(test_samples))
    print('Evaluating...')
    numCorrTwoStream = 0

    true_labels = []
    predicted_labels = []

    for j, (inputFlow, inputFrame, inputSN, targets) in enumerate(test_loader):
        inputVariableFlow = Variable(inputFlow[0].cuda(), volatile=True)
        inputVariableFrame = Variable(inputFrame.permute(1, 0, 2, 3, 4).cuda(),
                                      volatile=True)
        inputSN = Variable(inputSN.permute(1, 0, 2, 3, 4).cuda(),
                           volatile=True)
        output_labelFlow, _ = modelFlow(inputVariableFlow)
        output_labelFrameSN, _ = modelRGBSN(inputVariableFrame,
                                            inputVariableSN)
        output_label_meanFlow = torch.mean(output_labelFlow.data, 0, True)
        output_label_meanTwoStream = (flow_wt * output_label_meanFlow) + (
            (1 - flow_wt) * output_labelFrameSN.data)
        _, predictedTwoStream = torch.max(output_label_meanTwoStream, 1)
        numCorrTwoStream += (predictedTwoStream == targets[0]).sum()
        true_labels.append(targets)
        predicted_labels.append(predictedTwoStream)
    test_accuracyTwoStream = (numCorrTwoStream / test_samples) * 100
    print('Test Accuracy = {}'.format(test_accuracyTwoStream))

    cnf_matrix = confusion_matrix(true_labels, predicted_labels).astype(float)
    cnf_matrix_normalized = cnf_matrix / cnf_matrix.sum(axis=1)[:, np.newaxis]

    ticks = np.linspace(0, 60, num=61)
    plt.imshow(cnf_matrix_normalized, interpolation='none', cmap='binary')
    plt.colorbar()
    plt.xticks(ticks, fontsize=6)
    plt.yticks(ticks, fontsize=6)
    plt.grid(True)
    plt.clim(0, 1)
    plt.savefig(dataset + '-twoStreamDoubleResnet.jpg', bbox_inches='tight')
    plt.show()
コード例 #9
0
    else:
        norm_method = Normalize(opt.mean, opt.std)

    if not opt.no_train:
        assert opt.train_crop in ['random', 'corner', 'center']
        if opt.train_crop == 'random':
            crop_method = MultiScaleRandomCrop(opt.scales, opt.sample_size)
        elif opt.train_crop == 'corner':
            crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size)
        elif opt.train_crop == 'center':
            crop_method = MultiScaleCornerCrop(
                opt.scales, opt.sample_size, crop_positions=['c'])
        if opt.dataset == 'gtea':
            spatial_transform = Compose([
                crop_method,
                RandomHorizontalFlip(),
                ToTensor(opt.norm_value), norm_method,
            ])
        else:
            spatial_transform = Compose([
                crop_method,
                RandomHorizontalFlip(),
                RGB2Gray(),
                ToTensor(opt.norm_value), norm_method,
            ])
        temporal_transform = TemporalRandomCrop(opt.sample_duration)
        if opt.compress == 'mask':
            spatio_temporal_transform = Coded(opt.mask_path)
        elif opt.compress == 'avg':
            spatio_temporal_transform = Averaged()
        elif opt.compress == 'one':
コード例 #10
0
    def __init__(self,
                 root_path,
                 annotation_path,
                 subset,
                 n_samples_for_each_video=1,
                 spatial_transform=None,
                 temporal_transform=None,
                 target_transform=None,
                 sample_duration=16,
                 modality='rgb',
                 get_loader=get_default_video_loader):

        if subset == 'training':
            self.data, self.class_names = make_dataset(
                root_path, annotation_path, subset, n_samples_for_each_video,
                sample_duration)
            # self.val_data, _ = make_dataset(
            #     root_path, annotation_path, 'validation', n_samples_for_each_video,
            #     sample_duration)
            # self.data += self.val_data
        else:
            self.data, self.class_names = make_dataset(
                root_path, annotation_path, 'testing',
                n_samples_for_each_video, sample_duration)

        print('loaded', len(self.data))

        self.spatial_transform = spatial_transform
        self.temporal_transform = temporal_transform
        self.target_transform = target_transform

        self.subset = subset
        self.modality = modality
        if self.modality == 'flow':
            self.loader = get_default_video_loader_flow()
        elif self.modality == 'depth':
            self.loader = get_default_video_loader_depth()
        else:
            self.loader = get_loader()

        sometimes = lambda aug: iaa.Sometimes(0.3, aug)
        self.aug_seq = iaa.Sequential([
            # iaa.Fliplr(0.5),
            # sometimes(iaa.MotionBlur(k=2)),
            # sometimes(iaa.ChangeColorTemperature((1100, 10000))),
            sometimes(
                iaa.MultiplyAndAddToBrightness(mul=(0.8, 1.2), add=(-30, 30))),
            # sometimes(iaa.Affine(scale={'x': (0.8, 1.2), 'y': (0.8, 1.2)},
            #                      translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)},
            #                      rotate=(-20, 20),
            #                      shear=(-10, 10),
            #                      cval=(0, 255),
            #                      mode=ia.ALL, )),
            # sometimes(iaa.PerspectiveTransform(scale=(0.01, 0.15))),
            # sometimes(iaa.AdditiveGaussianNoise(scale=0.05 * 255)),
        ])
        self.aug_seq.to_deterministic()

        # added by alexhu
        self.root_path = root_path
        if self.modality != 'pose':
            self.to_tensor = Compose(self.spatial_transform.transforms[-2:])
            self.spatial_transform.transforms = self.spatial_transform.transforms[:
                                                                                  -2]
コード例 #11
0
        # if opt.train_crop == 'random':
        #     crop_method = MultiScaleRandomCrop(opt.scales, opt.sample_size)
        # elif opt.train_crop == 'corner':
        #     crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size)
        # elif opt.train_crop == 'center':
        #     crop_method = MultiScaleCornerCrop(
        #         opt.scales, opt.sample_size, crop_positions=['c'])
        # spatial_transform = Compose([
        #     crop_method,
        #     RandomHorizontalFlip(),
        #     ToTensor(opt.norm_value), norm_method
        # ])

        spatial_transform = Compose([
            ScaleQC(opt.sample_size),
            CenterCrop(opt.sample_size),
            RandomHorizontalFlip(),
            ToTensor(opt.norm_value), norm_method
        ])
        temporal_transform = TemporalRandomCrop(opt.sample_duration)
        target_transform = ClassLabel()
        # training_data = get_training_set(opt, spatial_transform,
        #                                  temporal_transform, target_transform)
        # train_loader = torch.utils.data.DataLoader(
        #     training_data,
        #     batch_size=opt.batch_size,
        #     shuffle=True,
        #     num_workers=opt.n_threads,
        #     pin_memory=True)

        train_logger = Logger(os.path.join(opt.result_path, 'train.log'),
                              ['epoch', 'loss', 'acc', 'lr'])
コード例 #12
0
ファイル: confusion.py プロジェクト: B10oh4z4rd/FPAR_project
def main_run(dataset, model_state_dict, dataset_dir, stackSize, seqLen,
             memSize):

    if dataset == 'gtea61':
        num_classes = 61
    elif dataset == 'gtea71':
        num_classes = 71
    elif dataset == 'gtea_gaze':
        num_classes = 44
    elif dataset == 'egtea':
        num_classes = 106

    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]

    normalize = Normalize(mean=mean, std=std)

    testBatchSize = 1
    spatial_transform = Compose(
        [Scale(256), CenterCrop(224),
         ToTensor(), normalize])

    vid_seq_test = makeDataset(dataset_dir,
                               spatial_transform=spatial_transform,
                               sequence=False,
                               numSeg=1,
                               stackSize=stackSize,
                               fmt='.png',
                               phase='Test',
                               seqLen=seqLen)

    test_loader = torch.utils.data.DataLoader(vid_seq_test,
                                              batch_size=testBatchSize,
                                              shuffle=False,
                                              num_workers=2,
                                              pin_memory=True)

    model = twoStreamAttentionModel(stackSize=5,
                                    memSize=512,
                                    num_classes=num_classes)
    model.load_state_dict(torch.load(model_state_dict))

    for params in model.parameters():
        params.requires_grad = False

    classes = sorted(
        os.listdir(
            "/content/drive/My Drive/testingGithub/FPAR_project/GTEA61/processed_frames2/train/S1"
        ))[1:]
    print(classes)
    print(len(classes))

    model.train(False)
    model.cuda()

    test_samples = vid_seq_test.__len__()
    print('Number of samples = {}'.format(test_samples))
    print('Evaluating...')
    numCorrTwoStream = 0
    predicted_labels = []
    true_labels = []
    with torch.no_grad():
        test_preds = get_all_preds(model, test_loader)
        labels = vid_seq_test.labels
        predictions = test_preds.argmax(dim=1)
        cm = confusion_matrix(labels, predictions)
        plt.figure(figsize=(25, 25))
        plot_confusion_matrix(cm, classes)
コード例 #13
0
ファイル: main.py プロジェクト: poem2018/AVSS2019
def main(config):
    # load model
    if config.model == 'c3d':
        model, params = VioNet_C3D(config)
    elif config.model == 'convlstm':
        model, params = VioNet_ConvLSTM(config)
    elif config.model == 'densenet':
        model, params = VioNet_densenet(config)
    elif config.model == 'densenet_lean':
        model, params = VioNet_densenet_lean(config)
    # default densenet
    else:
        model, params = VioNet_densenet_lean(config)

    # dataset
    dataset = config.dataset
    sample_size = config.sample_size
    stride = config.stride
    sample_duration = config.sample_duration

    # cross validation phase
    cv = config.num_cv

    # train set
    crop_method = GroupRandomScaleCenterCrop(size=sample_size)
    norm = Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    spatial_transform = Compose(
        [crop_method,
         GroupRandomHorizontalFlip(),
         ToTensor(), norm])
    temporal_transform = RandomCrop(size=sample_duration, stride=stride)
    target_transform = Label()

    train_batch = config.train_batch

    train_data = VioDB('../VioDB/{}_jpg/'.format(dataset),
                       '../VioDB/{}_jpg{}.json'.format(dataset,
                                                       cv), 'training',
                       spatial_transform, temporal_transform, target_transform)
    train_loader = DataLoader(train_data,
                              batch_size=train_batch,
                              shuffle=True,
                              num_workers=4,
                              pin_memory=True)

    # val set
    crop_method = GroupScaleCenterCrop(size=sample_size)
    norm = Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    spatial_transform = Compose([crop_method, ToTensor(), norm])
    temporal_transform = CenterCrop(size=sample_duration, stride=stride)
    target_transform = Label()

    val_batch = config.val_batch

    val_data = VioDB('../VioDB/{}_jpg/'.format(dataset),
                     '../VioDB/{}_jpg{}.json'.format(dataset,
                                                     cv), 'validation',
                     spatial_transform, temporal_transform, target_transform)
    val_loader = DataLoader(val_data,
                            batch_size=val_batch,
                            shuffle=False,
                            num_workers=4,
                            pin_memory=True)

    # make dir
    if not os.path.exists('./pth'):
        os.mkdir('./pth')
    if not os.path.exists('./log'):
        os.mkdir('./log')

    # log
    batch_log = Log(
        './log/{}_fps{}_{}_batch{}.log'.format(
            config.model,
            sample_duration,
            dataset,
            cv,
        ), ['epoch', 'batch', 'iter', 'loss', 'acc', 'lr'])
    epoch_log = Log(
        './log/{}_fps{}_{}_epoch{}.log'.format(config.model, sample_duration,
                                               dataset, cv),
        ['epoch', 'loss', 'acc', 'lr'])
    val_log = Log(
        './log/{}_fps{}_{}_val{}.log'.format(config.model, sample_duration,
                                             dataset, cv),
        ['epoch', 'loss', 'acc'])

    # prepare
    criterion = nn.CrossEntropyLoss().to(device)

    learning_rate = config.learning_rate
    momentum = config.momentum
    weight_decay = config.weight_decay

    optimizer = torch.optim.SGD(params=params,
                                lr=learning_rate,
                                momentum=momentum,
                                weight_decay=weight_decay)

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, verbose=True, factor=config.factor, min_lr=config.min_lr)

    acc_baseline = config.acc_baseline
    loss_baseline = 1

    for i in range(config.num_epoch):
        train(i, train_loader, model, criterion, optimizer, device, batch_log,
              epoch_log)
        val_loss, val_acc = val(i, val_loader, model, criterion, device,
                                val_log)
        scheduler.step(val_loss)
        if val_acc > acc_baseline or (val_acc >= acc_baseline
                                      and val_loss < loss_baseline):
            torch.save(
                model.state_dict(),
                './pth/{}_fps{}_{}{}_{}_{:.4f}_{:.6f}.pth'.format(
                    config.model, sample_duration, dataset, cv, i, val_acc,
                    val_loss))
            acc_baseline = val_acc
            loss_baseline = val_loss
コード例 #14
0
import torch
from torch.utils.data import Dataset
from PIL import Image
import numpy as np
import random
import glob
import sys


from spatial_transforms import (Compose, ToTensor, CenterCrop, Scale, Normalize, MultiScaleCornerCrop,
                                RandomHorizontalFlip)

mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
normalize = Normalize(mean=mean, std=std)
spatial_transform2 = Compose([Scale((7,7)), ToTensor(), normalize])

def gen_split(root_dir, stackSize):
    DatasetX = []
    DatasetY = []
    DatasetF = []
    Labels = []
    NumFrames = []
    #The root directory should be flow_x_processed/train or test
    for dir_user in sorted(os.listdir(root_dir)):
        class_id = 0
        dir = os.path.join(root_dir, dir_user)
        for target in sorted(os.listdir(dir)):
            dir1 = os.path.join(dir, target)
            insts = sorted(os.listdir(dir1))
            if insts != []:
コード例 #15
0
    if not opt.no_train:
        assert opt.train_crop in ['random', 'corner', 'center', 'custom']
        if opt.train_crop == 'random':
            crop_method = MultiScaleRandomCrop(opt.scales, opt.sample_size)
        elif opt.train_crop == 'corner':
            crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size)
        elif opt.train_crop == 'center':
            crop_method = MultiScaleCornerCrop(opt.scales,
                                               opt.sample_size,
                                               crop_positions=['c'])
        elif opt.train_crop == 'custom':
            crop_method = RandomSampleCrop(opt.sample_size)
        clip_transform = None

        spatial_transform = Compose(
            [ToTensor(opt.norm_value),
             ColorJitter(0.05, 0.05), norm_method])
        temporal_transform = TemporalRandomCrop(
            int(opt.sample_duration * opt.t_stride))

        training_data = get_training_set(opt, spatial_transform,
                                         temporal_transform)

        train_loader = torch.utils.data.DataLoader(training_data,
                                                   batch_size=opt.batch_size,
                                                   shuffle=True,
                                                   num_workers=opt.n_threads,
                                                   pin_memory=True)
        train_logger = Logger(os.path.join(opt.result_path, 'train.log'),
                              ['epoch', 'loss', 'acc', 'lr'])
        train_batch_logger = Logger(
コード例 #16
0
    if not opt.no_train:

        ##--------------------------------------------------------------------------------------------
        if opt.model == 'I3D':
            assert opt.train_crop in ['random', 'corner', 'center']
            if opt.train_crop == 'random':
                crop_method = MultiScaleRandomCrop([0.875], opt.sample_size)
            elif opt.train_crop == 'corner':
                crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size)
            elif opt.train_crop == 'center':
                crop_method = MultiScaleCornerCrop(opt.scales,
                                                   opt.sample_size,
                                                   crop_positions=['c'])
            spatial_transform = Compose([
                Scale((256, 256)), crop_method,
                RandomHorizontalFlip(),
                ToTensor(opt.norm_value), norm_method
            ])
            temporal_transform = TemporalRandomCrop(opt.sample_duration, 1)
        elif opt.model == 'resnet_50':
            assert opt.train_crop in ['random', 'corner', 'center']
            if opt.train_crop == 'random':
                crop_method = MultiScaleRandomCrop(
                    opt.scales, opt.sample_size)  # [1, 0.9, 0.875]
            elif opt.train_crop == 'corner':
                crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size)
            elif opt.train_crop == 'center':
                crop_method = MultiScaleCornerCrop(opt.scales,
                                                   opt.sample_size,
                                                   crop_positions=['c'])
            spatial_transform = Compose([
コード例 #17
0
def main():
    global epochs

    # Config
    parser = argparse.ArgumentParser(
        description="To read EgoGesture Dataset and run through SSAR network")
    parser.add_argument('--path',
                        default='',
                        help='full path to EgoGesture Dataset')
    args = parser.parse_args()
    path = args.path

    # Setup multiscale random crop
    scales = [initial_scale]
    for _ in range(1, n_scales):
        scales.append(scales[-1] * scale_step)

    # Setup datasets / dataloaders
    if do_data_augmentation:
        train_spatial_transforms = Compose([
            MultiScaleRandomCrop(scales, (126, 224)),
            SpatialElasticDisplacement()
        ])
    else:
        train_spatial_transforms = transforms.Resize((126, 224))
    image_transform_train = Compose([
        train_spatial_transforms,
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])
    image_transform_val = Compose([
        transforms.Resize((126, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])
    image_transform_test = Compose([
        transforms.Resize((126, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])
    mask_transform = Compose([train_spatial_transforms, transforms.ToTensor()])
    if not do_data_augmentation:
        image_transform_train = image_transform_val

    subject_ids_train = [
        3, 4, 5, 6, 8, 10, 15, 16, 17, 20, 21, 22, 23, 25, 26, 27, 30, 32, 36,
        38, 39, 40, 42, 43, 44, 45, 46, 48, 49, 50
    ]
    subject_ids_val = [1, 7, 12, 13, 24, 29, 33, 34, 35, 37]
    subject_ids_test = [2, 9, 11, 14, 18, 19, 28, 31, 41, 47]

    if mode == 'training':
        train_dataset = EgoGestDataSequence(path,
                                            'train_dataset',
                                            image_transform_train,
                                            mask_transform,
                                            get_mask=use_mask_loss,
                                            subject_ids=subject_ids_train)
        val_dataset = EgoGestDataSequence(path,
                                          'val_dataset',
                                          image_transform_val,
                                          mask_transform,
                                          get_mask=use_mask_loss,
                                          subject_ids=subject_ids_val)
    # If we're not in training mode then switch the training dataset out with test or validation
    elif mode == 'validation':
        train_dataset = EgoGestDataSequence(path,
                                            'val_dataset',
                                            image_transform_val,
                                            mask_transform,
                                            get_mask=use_mask_loss,
                                            subject_ids=subject_ids_val)
    else:
        train_dataset = EgoGestDataSequence(path,
                                            'val_dataset',
                                            image_transform_test,
                                            mask_transform,
                                            get_mask=use_mask_loss,
                                            subject_ids=subject_ids_test)

    # train_indices, val_indices, test_indices = check_and_split_data(host_name=hostname,
    #                                                                 data_folder=path,
    #                                                                 dataset_len=len(dataset),
    #                                                                 train_fraction=0.6,
    #                                                                 validation_fraction=0.2)

    torch.manual_seed(42)
    torch.backends.cudnn.deterministic = True

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size,
                                               num_workers=num_workers,
                                               pin_memory=True,
                                               shuffle=True,
                                               collate_fn=collate_fn_padd)
    if mode == 'training':
        val_loader = torch.utils.data.DataLoader(val_dataset,
                                                 batch_size=batch_size,
                                                 num_workers=num_workers,
                                                 pin_memory=True,
                                                 shuffle=True,
                                                 collate_fn=collate_fn_padd)

    # Init model and load pre-trained weights
    rnet = resnet.resnet18(False)
    model = SSAR(ResNet=rnet,
                 input_size=83,
                 number_of_classes=83,
                 batch_size=batch_size,
                 dropout=dropout).cuda()
    model_weights = './weights/final_weights.pth'
    state = model.state_dict()
    loaded_weights = torch.load(model_weights)
    state.update(loaded_weights)
    model.load_state_dict(state)

    # Setup optimizer and loss
    criterion = torch.nn.CrossEntropyLoss(ignore_index=label_mask_value)
    criterion = criterion.cuda()
    if mode == 'training':
        set_train_mode(
            model, train=True
        )  # Need this here so the optimizer has the correct parameters to be trained
        optimizer = Adam(filter(lambda p: p.requires_grad, model.parameters()),
                         lr=learning_rate)
    else:
        optimizer = None

    # Continue from previous training checkpoint
    epoch_resume, step_resume, best_val_loss = load_latest(
        model, results_path, training_mode, optimizer)
    if not restore_training_variables:
        epoch_resume = 0
        step_resume = 0
        best_val_loss = np.inf

    # Train / test / val setup
    if mode != 'training':
        epoch_resume = 0
        step_resume = 0
        epochs = 1

    # old_tensor_set = set()

    # Accuracy bar plot
    plt.ion()
    fig, ax = plt.subplots(nrows=3, ncols=1, figsize=(10, 7), dpi=100)
    train_acc_bars = ax[0].bar(rel_poses, 0, 1 / accuracy_bins)
    val_acc_bars = ax[1].bar(rel_poses, 0, 1 / accuracy_bins)
    train_loss_line, = ax[2].plot([], [], label='Train Loss')
    val_loss_line, = ax[2].plot([], [], label='Val Loss')
    train_acc_texts = [
        ax[0].text(x,
                   y,
                   "",
                   horizontalalignment='center',
                   verticalalignment='bottom')
        for x, y in zip(rel_poses, np.ones_like(rel_poses))
    ]
    val_acc_texts = [
        ax[1].text(x,
                   y,
                   "",
                   horizontalalignment='center',
                   verticalalignment='bottom')
        for x, y in zip(rel_poses, np.ones_like(rel_poses))
    ]
    ax[0].set_ylim(0., 1.1)
    ax[0].set_title('Relative Gesture Position vs Training Accuracy')
    ax[1].set_ylim(0., 1.1)
    ax[1].set_title('Relative Gesture Position vs Validation Accuracy')
    ax[2].legend(loc='best')
    # loss_text = ax[2].text(0, -0.2, "Loss: ")
    plt.show()

    # Setup movie moviewriter for writing accuracy plot over time
    # moviewriter = FFMpegWriter(fps=1)
    # moviewriter.setup(fig, os.path.join(results_path, 'accuracy_over_time.mp4'), dpi=100)

    # Main training loop
    if mode == 'training':
        optimizer.zero_grad()
    train_history = {}
    val_history = {}
    patience_counter = 0
    for epoch in range(epoch_resume, epochs):
        # Display info
        print(f"Epoch: {epoch}")

        if epoch == epoch_resume and step_resume > 0:
            print(f"Fast forwarding to train step {step_resume}")

        # Reset epoch stats
        train_metrics = {}

        # Switch to train mode while freezing parts of the model we don't want to train
        set_train_mode(model, train=True)

        # Train
        if mode == 'training':
            print('Training:')
        for train_step, batch in enumerate(train_loader):
            # Advance train_loader to resume training from last checkpointed position (Note: Assumes same batch size)
            if epoch == epoch_resume and train_step < step_resume:
                del batch
                continue

            # Save model
            if mode == 'training' and train_step % 100 == 0 and (
                    train_step != step_resume or epoch != epoch_resume):
                save_model(model, optimizer, training_mode, epoch, train_step,
                           best_val_loss, results_path)

            # Do one training train_step (may not actually train_step optimizer if doing gradiant accumulation)
            loss, batch_correct_count_samples = process_batch(model,
                                                              train_step,
                                                              batch,
                                                              criterion,
                                                              optimizer,
                                                              mode=mode)
            del batch

            # Update metrics
            update_metrics(train_metrics, epoch, loss,
                           batch_correct_count_samples)

            if (train_step + 1) % 10 == 0:
                # Display metrics
                print_metrics(train_metrics, train_step)
                update_accuracy_plot(train_acc_bars, train_acc_texts,
                                     train_metrics['accuracy_hist'])

        # Update train metric history and plots for this epoch
        update_epoch_history(train_history, train_metrics)
        update_loss_plot(train_loss_line, train_history)

        # Validation
        if mode == 'training':
            print('Validation:')
            val_metrics = {}

            # Switch to evaluation mode for validation
            set_train_mode(model, train=False)

            for val_step, batch in enumerate(val_loader):
                loss, batch_correct_count_samples = process_batch(
                    model,
                    val_step,
                    batch,
                    criterion,
                    optimizer,
                    mode='validation')

                # Update metrics
                update_metrics(val_metrics, epoch, loss,
                               batch_correct_count_samples)

                if (val_step + 1) % 10 == 0:
                    # Display metrics
                    print_metrics(val_metrics, val_step)
                    update_accuracy_plot(val_acc_bars, val_acc_texts,
                                         val_metrics['accuracy_hist'])

            # Update validation metric history and plots
            update_epoch_history(val_history, val_metrics)
            update_loss_plot(val_loss_line, val_history)

            # Early stoping
            if val_metrics['loss_epoch'] < best_val_loss:
                best_val_loss = val_metrics['loss_epoch']
                patience_counter = 0
                save_model(model,
                           optimizer,
                           training_mode,
                           epoch,
                           val_step,
                           best_val_loss,
                           results_path,
                           filename_override='model_best.pth')
            else:
                patience_counter += 1
            if patience_counter >= early_stoppping_patience:
                print(
                    f'Validation accuracy did not improve for {patience_counter} epochs, stopping'
                )
                break

    # Save final model
    if mode == 'training' and (train_step != step_resume
                               or epoch != epoch_resume):
        save_model(model, optimizer, training_mode, epoch, train_step,
                   best_val_loss, results_path)

    print('Done!')

    plt.ioff()
    plt.show()
コード例 #18
0
def main_run(dataset, flowModel, rgbModel, stackSize, seqLen, memSize,
             trainDatasetDir, valDatasetDir, outDir, trainBatchSize,
             valBatchSize, lr1, numEpochs, decay_step, decay_factor,
             uniformSampling):
    # GTEA 61
    num_classes = 61

    # Train/Validation/Test split
    train_splits = ["S1", "S3", "S4"]
    val_splits = ["S2"]
    directory = trainDatasetDir

    model_folder = os.path.join(
        './', outDir, dataset,
        'twoStream')  # Dir for saving models and log files
    # Create the dir
    if os.path.exists(model_folder):
        print('Dir {} exists!'.format(model_folder))
        sys.exit()
    os.makedirs(model_folder)

    # Log files
    train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
    train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
    val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w')
    val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w')

    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]

    normalize = Normalize(mean=mean, std=std)

    spatial_transform = Compose([
        Scale(256),
        RandomHorizontalFlip(),
        MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
        ToTensor(), normalize
    ])

    vid_seq_train = makeDataset(directory,
                                train_splits,
                                spatial_transform=spatial_transform,
                                sequence=False,
                                numSeg=1,
                                stackSize=stackSize,
                                fmt='.png',
                                seqLen=seqLen,
                                uniform_sampling=uniformSampling)

    train_loader = torch.utils.data.DataLoader(vid_seq_train,
                                               batch_size=trainBatchSize,
                                               shuffle=True,
                                               num_workers=4,
                                               pin_memory=True)

    vid_seq_val = makeDataset(directory,
                              val_splits,
                              spatial_transform=Compose([
                                  Scale(256),
                                  CenterCrop(224),
                                  ToTensor(), normalize
                              ]),
                              sequence=False,
                              numSeg=1,
                              stackSize=stackSize,
                              fmt='.png',
                              phase='Test',
                              seqLen=seqLen,
                              uniform_sampling=uniformSampling)

    val_loader = torch.utils.data.DataLoader(vid_seq_val,
                                             batch_size=valBatchSize,
                                             shuffle=False,
                                             num_workers=2,
                                             pin_memory=True)
    valSamples = vid_seq_val.__len__()

    model = twoStreamAttentionModel(flowModel=flowModel,
                                    frameModel=rgbModel,
                                    stackSize=stackSize,
                                    memSize=memSize,
                                    num_classes=num_classes)

    for params in model.parameters():
        params.requires_grad = False

    model.train(False)
    train_params = []

    for params in model.classifier.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.lstm_cell.parameters():
        train_params += [params]
        params.requires_grad = True

    for params in model.frameModel.resNet.layer4[0].conv1.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.resNet.layer4[0].conv2.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.resNet.layer4[1].conv1.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.resNet.layer4[1].conv2.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.resNet.layer4[2].conv1.parameters():
        params.requires_grad = True
        train_params += [params]
    #
    for params in model.frameModel.resNet.layer4[2].conv2.parameters():
        params.requires_grad = True
        train_params += [params]
    #
    for params in model.frameModel.resNet.fc.parameters():
        params.requires_grad = True
        train_params += [params]

    base_params = []
    for params in model.flowModel.layer4.parameters():
        base_params += [params]
        params.requires_grad = True

    model.cuda()

    trainSamples = vid_seq_train.__len__()
    min_accuracy = 0

    loss_fn = nn.CrossEntropyLoss()
    optimizer_fn = torch.optim.SGD([
        {
            'params': train_params
        },
        {
            'params': base_params,
            'lr': 1e-4
        },
    ],
                                   lr=lr1,
                                   momentum=0.9,
                                   weight_decay=5e-4)

    optim_scheduler = torch.optim.lr_scheduler.StepLR(optimizer_fn,
                                                      step_size=decay_step,
                                                      gamma=decay_factor)
    train_iter = 0

    for epoch in range(numEpochs):
        epoch_loss = 0
        numCorrTrain = 0
        iterPerEpoch = 0
        model.classifier.train(True)
        model.flowModel.layer4.train(True)
        for j, (inputFlow, inputFrame, targets) in enumerate(train_loader):
            train_iter += 1
            iterPerEpoch += 1
            optimizer_fn.zero_grad()
            inputVariableFlow = inputFlow.to(DEVICE)
            inputVariableFrame = inputFrame.permute(1, 0, 2, 3, 4).to(DEVICE)
            labelVariable = targets.to(DEVICE)
            output_label = model(inputVariableFlow, inputVariableFrame)
            loss = loss_fn(torch.log_softmax(output_label, dim=1),
                           labelVariable)
            loss.backward()
            optimizer_fn.step()
            _, predicted = torch.max(output_label.data, 1)
            numCorrTrain += (predicted == targets.cuda()).sum()
            epoch_loss += loss.data.item()

        avg_loss = epoch_loss / iterPerEpoch
        trainAccuracy = (numCorrTrain.item() / trainSamples) * 100
        print('Average training loss after {} epoch = {} '.format(
            epoch + 1, avg_loss))
        print('Training accuracy after {} epoch = {}% '.format(
            epoch + 1, trainAccuracy))
        train_log_loss.write('Training loss after {} epoch = {}\n'.format(
            epoch + 1, avg_loss))
        train_log_acc.write('Training accuracy after {} epoch = {}\n'.format(
            epoch + 1, trainAccuracy))

        # Validation Phase
        #if valDatasetDir is not None:
        if (epoch + 1) % 1 == 0:
            model.train(False)
            val_loss_epoch = 0
            val_iter = 0
            numCorr = 0
            for j, (inputFlow, inputFrame, targets) in enumerate(val_loader):
                val_iter += 1
                inputVariableFlow = inputFlow.to(DEVICE)
                inputVariableFrame = inputFrame.permute(1, 0, 2, 3,
                                                        4).to(DEVICE)
                labelVariable = targets.to(DEVICE)
                output_label = model(inputVariableFlow, inputVariableFrame)
                loss = loss_fn(torch.log_softmax(output_label, dim=1),
                               labelVariable)
                val_loss_epoch += loss.data.item()
                _, predicted = torch.max(output_label.data, 1)
                numCorr += (predicted == labelVariable.data).sum()
            val_accuracy = (numCorr.item() / valSamples) * 100
            avg_val_loss = val_loss_epoch / val_iter
            print('Val Loss after {} epochs, loss = {}'.format(
                epoch + 1, avg_val_loss))
            print('Val Accuracy after {} epochs = {}%'.format(
                epoch + 1, val_accuracy))
            val_log_loss.write('Val Loss after {} epochs = {}\n'.format(
                epoch + 1, avg_val_loss))
            val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(
                epoch + 1, val_accuracy))
            if val_accuracy > min_accuracy:
                save_path_model = (model_folder +
                                   '/model_twoStream_state_dict.pth')
                torch.save(model.state_dict(), save_path_model)
                min_accuracy = val_accuracy
        #else:
        #    if (epoch + 1) % 10 == 0:
        #        save_path_model = (model_folder + '/model_twoStream_state_dict_epoch' + str(epoch + 1) + '.pth')
        #        torch.save(model.state_dict(), save_path_model)

        optim_scheduler.step()

    train_log_loss.close()
    train_log_acc.close()
    val_log_acc.close()
    val_log_loss.close()
コード例 #19
0
ファイル: main.py プロジェクト: ThanThoai/Action_Recognition
def main(config):
    if config.model == 'c3d':
        model, params = C3D(config)
    elif config.model == 'convlstm':
        model, params = ConvLSTM(config)
    elif config.model == 'densenet':
        model, params = densenet(config)
    elif config.model == 'densenet_lean':
        model, params = densenet_lean(config)
    elif config.model == 'resnext':
        model, params = resnext(config)
    else:
        model, params = densenet_lean(config)

    dataset = config.dataset
    sample_size = config.sample_size
    stride = config.stride
    sample_duration = config.sample_duration

    cv = config.num_cv

    # crop_method = GroupRandomScaleCenterCrop(size=sample_size)
    crop_method = MultiScaleRandomCrop(config.scales, config.sample_size[0])
    # norm = Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])

    norm = Normalize([114.7748, 107.7354, 99.475], [1, 1, 1])
    # spatial_transform = Compose(
    #     [crop_method,
    #      GroupRandomHorizontalFlip(),
    #      ToTensor(1), norm])
    spatial_transform = Compose([
        RandomHorizontalFlip(), crop_method,
        ToTensor(config.norm_value), norm
    ])
    # temporal_transform = RandomCrop(size=sample_duration, stride=stride)
    temporal_transform = TemporalRandomCrop(config.sample_duration,
                                            config.downsample)
    target_transform = Label()

    train_batch = config.train_batch
    train_data = RWF2000('/content/RWF_2000/frames/',
                         g_path + '/RWF-2000.json', 'training',
                         spatial_transform, temporal_transform,
                         target_transform, dataset)
    train_loader = DataLoader(train_data,
                              batch_size=train_batch,
                              shuffle=True,
                              num_workers=4,
                              pin_memory=True)

    crop_method = GroupScaleCenterCrop(size=sample_size)
    norm = Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    spatial_transform = Compose([crop_method, ToTensor(), norm])
    temporal_transform = CenterCrop(size=sample_duration, stride=stride)
    target_transform = Label()

    val_batch = config.val_batch

    val_data = RWF2000('/content/RWF_2000/frames/', g_path + '/RWF-2000.json',
                       'validation', spatial_transform, temporal_transform,
                       target_transform, dataset)
    val_loader = DataLoader(val_data,
                            batch_size=val_batch,
                            shuffle=False,
                            num_workers=4,
                            pin_memory=True)

    if not os.path.exists('{}/pth'.format(config.output)):
        os.mkdir('{}/pth'.format(config.output))
    if not os.path.exists('{}/log'.format(config.output)):
        os.mkdir('{}/log'.format(config.output))

    batch_log = Log(
        '{}/log/{}_fps{}_{}_batch{}.log'.format(
            config.output,
            config.model,
            sample_duration,
            dataset,
            cv,
        ), ['epoch', 'batch', 'iter', 'loss', 'acc', 'lr'])
    epoch_log = Log(
        '{}/log/{}_fps{}_{}_epoch{}.log'.format(config.output, config.model,
                                                sample_duration, dataset, cv),
        ['epoch', 'loss', 'acc', 'lr'])
    val_log = Log(
        '{}/log/{}_fps{}_{}_val{}.log'.format(config.output, config.model,
                                              sample_duration, dataset, cv),
        ['epoch', 'loss', 'acc'])

    criterion = nn.CrossEntropyLoss().to(device)
    # criterion = nn.BCELoss().to(device)

    learning_rate = config.learning_rate
    momentum = config.momentum
    weight_decay = config.weight_decay

    optimizer = torch.optim.SGD(params=params,
                                lr=learning_rate,
                                momentum=momentum,
                                weight_decay=weight_decay,
                                dampening=False,
                                nesterov=False)

    # optimizer = torch.optim.Adam(params=params, lr = learning_rate, weight_decay= weight_decay)

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, verbose=True, factor=config.factor, min_lr=config.min_lr)

    acc_baseline = config.acc_baseline
    loss_baseline = 1

    for p in range(1, config.num_prune):
        if p > 0:
            model = torch.load('{}/pth/prune_{}.pth'.format(
                config.output, p - 1))
        print(f"Prune {p}/{config.num_prune}")
        params = sum([np.prod(p.size()) for p in model.parameters()])
        print("Number of Parameters: %.1fM" % (params / 1e6))
        model = prune_model(model)
        params = sum([np.prod(p.size()) for p in model.parameters()])
        print("Number of Parameters: %.1fM" % (params / 1e6))
        model.to(config.device)
        acc_baseline = 0
        for i in range(5):
            train(i, train_loader, model, criterion, optimizer, device,
                  batch_log, epoch_log)
            val_loss, val_acc = val(i, val_loader, model, criterion, device,
                                    val_log)
            scheduler.step(val_loss)
            if val_acc > acc_baseline or (val_acc >= acc_baseline
                                          and val_loss < loss_baseline):
                # torch.save(
                # model.state_dict(),
                # '{}/pth/prune_{}_{}_fps{}_{}{}_{}_{:.4f}_{:.6f}.pth'.format(
                #     config.output, p, config.model, sample_duration, dataset, cv, i, val_acc,
                #     val_loss))
                torch.save(model,
                           '{}/pth/prune_{}.pth'.format(config.output, p))
コード例 #20
0
    cf = confusion_matrix(labels, pred).astype(float)
    cls_cnt = cf.sum(axis=1)
    cls_hit = np.diag(cf)
    cls_acc = cls_hit / cls_cnt
    print(cls_acc)
    print('Accuracy {:.02f}%'.format(np.mean(cls_acc) * 100))
    with open(opt.result_path, 'w') as f:
        json.dump(test_results, f)

if __name__ == '__main__':
    opt = parse_opts()
    opt.mean = get_mean()
    opt.arch = '{}-{}'.format(opt.model_name, opt.model_depth)
    opt.sample_duration = 16
    spatial_transform = Compose([Scale(opt.sample_size),
                                 CenterCrop(opt.sample_size),
                                 ToTensor(1),
                                 Normalize(opt.mean, [1, 1, 1])])
    temporal_transform = LoopPadding(opt.sample_duration)
    data = Video(opt.val_list, spatial_transform=spatial_transform,
                 temporal_transform=temporal_transform,
                 sample_duration=opt.sample_duration, n_samples_for_each_video=0)
    data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size,
                                              shuffle=False, num_workers=opt.n_threads, pin_memory=True)
    model, _ = generate_model(opt)
    model = nn.DataParallel(model, device_ids=opt.gpus).cuda()
    print('loading model {}'.format(opt.model))
    model_data = torch.load(opt.model)
    assert opt.arch == model_data['arch']
    model.load_state_dict(model_data['state_dict'])
    model.eval()
    test(data_loader, model, opt)
コード例 #21
0
        norm_method = Normalize(opt.mean, [1, 1, 1])
    else:
        norm_method = Normalize(opt.mean, opt.std)

    if opt.train_crop == 'random':
        crop_method = MultiScaleRandomCrop(opt.scales, opt.sample_size)
    elif opt.train_crop == 'corner':
        crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size)
    elif opt.train_crop == 'center':
        crop_method = MultiScaleCornerCrop(opt.scales,
                                           opt.sample_size,
                                           crop_positions=['c'])

    if not opt.no_val:
        spatial_transform = Compose([
            ToTensor(opt.norm_value),
            norm_method,
        ])
        temporal_transform = LoopPadding(opt.sample_duration)
        target_transform = ClassLabel()
        if opt.compress == 'mask':
            spatio_temporal_transform = None
            temporal_transform = None
        elif opt.compress == 'avg':
            spatio_temporal_transform = Averaged()
        elif opt.compress == 'one':
            spatio_temporal_transform = OneFrame()
        elif opt.compress == 'spatial':
            spatial_transform = Compose([
                LowResolution(opt.spatial_compress_size, use_cv2=opt.use_cv2),
                ToTensor(opt.norm_value),
                norm_method,
コード例 #22
0
 elif opt.train_crop == 'corner':
     crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size)
 elif opt.train_crop == 'center':
     crop_method = MultiScaleCornerCrop(opt.scales,
                                        opt.sample_size,
                                        crop_positions=['c'])
 # spatial_transform = Compose([
 #     crop_method,
 #     RandomHorizontalFlip(),
 #     ToTensor(opt.norm_value), norm_method
 # ])
 spatial_transform = Compose([
     transforms.RandomResizedCrop(224),
     transforms.RandomHorizontalFlip(),
     #transforms.Resize(224),
     #transforms.RandomCrop(224),
     transforms.ToTensor(),
     #Threshold(200),
     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
 ])
 target_transform = ClassLabel()
 training_data = get_training_set(opt, spatial_transform,
                                  target_transform)
 train_loader = torch.utils.data.DataLoader(training_data,
                                            batch_size=opt.batch_size,
                                            shuffle=True,
                                            num_workers=opt.n_threads,
                                            pin_memory=True)
 train_logger = Logger(os.path.join(opt.result_path, 'train.log'),
                       ['epoch', 'loss', 'acc', 'lr'])
 train_batch_logger = Logger(
コード例 #23
0
def main_run(numEpochs, lr, stepSize, decayRate, trainBatchSize, seqLen,
             memSize, evalInterval, evalMode, numWorkers, outDir,
             fightsDir_train, noFightsDir_train, fightsDir_test,
             noFightsDir_test):

    train_dataset_dir_fights = fightsDir_train
    train_dataset_dir_noFights = noFightsDir_train
    test_dataset_dir_fights = fightsDir_test
    test_dataset_dir_noFights = noFightsDir_test

    trainDataset, trainLabels, trainNumFrames = make_split(
        train_dataset_dir_fights, train_dataset_dir_noFights)
    testDataset, testLabels, testNumFrames = make_split(
        test_dataset_dir_fights, test_dataset_dir_noFights)

    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
    normalize = Normalize(mean=mean, std=std)
    spatial_transform = Compose([
        Scale(256),
        RandomHorizontalFlip(),
        MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
        ToTensor(), normalize
    ])

    vidSeqTrain = makeDataset(trainDataset,
                              trainLabels,
                              trainNumFrames,
                              spatial_transform=spatial_transform,
                              seqLen=seqLen)

    # trainLoader = torch.utils.data.DataLoader(vidSeqTrain, batch_size=trainBatchSize,
    #                         shuffle=True, num_workers=numWorkers, pin_memory=True, drop_last=True)
    trainLoader = torch.utils.data.DataLoader(vidSeqTrain,
                                              batch_size=trainBatchSize,
                                              shuffle=True,
                                              pin_memory=True,
                                              drop_last=True)

    if evalMode == 'centerCrop':
        test_spatial_transform = Compose(
            [Scale(256), CenterCrop(224),
             ToTensor(), normalize])
        testBatchSize = 1
    elif evalMode == 'tenCrops':
        test_spatial_transform = Compose(
            [Scale(256), TenCrops(size=224, mean=mean, std=std)])
        testBatchSize = 1
    elif evalMode == 'fiveCrops':
        test_spatial_transform = Compose(
            [Scale(256), FiveCrops(size=224, mean=mean, std=std)])
        testBatchSize = 1
    elif evalMode == 'horFlip':
        test_spatial_transform = Compose([
            Scale(256),
            CenterCrop(224),
            FlippedImagesTest(mean=mean, std=std)
        ])
        testBatchSize = 1

    vidSeqTest = makeDataset(testDataset,
                             testLabels,
                             testNumFrames,
                             seqLen=seqLen,
                             spatial_transform=test_spatial_transform)

    # testLoader = torch.utils.data.DataLoader(vidSeqTest, batch_size=testBatchSize,
    #                         shuffle=False, num_workers=int(numWorkers/2), pin_memory=True)
    testLoader = torch.utils.data.DataLoader(vidSeqTest,
                                             batch_size=testBatchSize,
                                             shuffle=False,
                                             pin_memory=True)

    numTrainInstances = vidSeqTrain.__len__()
    numTestInstances = vidSeqTest.__len__()

    print('Number of training samples = {}'.format(numTrainInstances))
    print('Number of testing samples = {}'.format(numTestInstances))

    modelFolder = './experiments_' + outDir  # Dir for saving models and log files
    # Create the dir
    if os.path.exists(modelFolder):
        print(modelFolder + ' exists!!!')
        sys.exit()
    else:
        os.makedirs(modelFolder)
    # Log files
    writer = SummaryWriter(modelFolder)
    trainLogLoss = open((modelFolder + '/trainLogLoss.txt'), 'w')
    trainLogAcc = open((modelFolder + '/trainLogAcc.txt'), 'w')
    testLogLoss = open((modelFolder + '/testLogLoss.txt'), 'w')
    testLogAcc = open((modelFolder + '/testLogAcc.txt'), 'w')

    model = ViolenceModel(mem_size=memSize)

    trainParams = []
    for params in model.parameters():
        params.requires_grad = True
        trainParams += [params]
    model.train(True)
    # model.cuda()

    lossFn = nn.CrossEntropyLoss()
    optimizerFn = torch.optim.RMSprop(trainParams, lr=lr)
    optimScheduler = torch.optim.lr_scheduler.StepLR(optimizerFn, stepSize,
                                                     decayRate)

    minAccuracy = 50

    for epoch in range(numEpochs):
        if epoch != 0:
            optimScheduler.step()
        epochLoss = 0
        numCorrTrain = 0
        iterPerEpoch = 0
        model.train(True)
        print('Epoch = {}'.format(epoch + 1))
        writer.add_scalar('lr', optimizerFn.param_groups[0]['lr'], epoch + 1)
        for i, (inputs, targets) in enumerate(trainLoader):
            iterPerEpoch += 1
            optimizerFn.zero_grad()
            # inputVariable1 = Variable(inputs.permute(1, 0, 2, 3, 4).cuda())
            inputVariable1 = Variable(inputs.permute(1, 0, 2, 3, 4))
            # labelVariable = Variable(targets.cuda())
            print(inputVariable1.shape)
            labelVariable = Variable(targets)
            # print("labelVariable")
            # print(labelVariable)
            # print("targets")
            # print(targets)
            outputLabel = model(inputVariable1)
            # print("outputs")
            # print(outputLabel)
            loss = lossFn(outputLabel, labelVariable)

            loss.backward()
            optimizerFn.step()
            outputProb = torch.nn.Softmax(dim=1)(outputLabel)
            # print("outputProb.data going into torch.max ")
            # print(outputProb.data)
            _, predicted = torch.max(outputProb.data, 1)
            # numCorrTrain += (predicted == targets.cuda()).sum()
            # print("predicted")
            # print(predicted)
            numCorrTrain += (predicted == targets).sum()
            # epochLoss += loss.data[0]
            # print(numCorrTrain)
            epochLoss += loss.data.item()
        avgLoss = epochLoss / iterPerEpoch
        trainAccuracy = (numCorrTrain / numTrainInstances) * 100
        print('Training: Loss = {} | Accuracy = {}% '.format(
            avgLoss, trainAccuracy))
        writer.add_scalar('train/epochLoss', avgLoss, epoch + 1)
        writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1)
        trainLogLoss.write('Training loss after {} epoch = {}\n'.format(
            epoch + 1, avgLoss))
        trainLogAcc.write('Training accuracy after {} epoch = {}\n'.format(
            epoch + 1, trainAccuracy))

        if (epoch + 1) % evalInterval == 0:
            model.train(False)
            print('Evaluating...')
            testLossEpoch = 0
            testIter = 0
            numCorrTest = 0
            for j, (inputs, targets) in enumerate(testLoader):
                testIter += 1
                if evalMode == 'centerCrop':
                    # inputVariable1 = Variable(inputs.permute(1, 0, 2, 3, 4).cuda(), volatile=True)
                    inputVariable1 = Variable(inputs.permute(1, 0, 2, 3, 4),
                                              volatile=True)
                else:
                    # inputVariable1 = Variable(inputs[0].cuda(), volatile=True)
                    inputVariable1 = Variable(inputs[0], volatile=True)
                # labelVariable = Variable(targets.cuda(async =True), volatile=True)
                labelVariable = Variable(targets)
                outputLabel = model(inputVariable1)
                outputLabel_mean = torch.mean(outputLabel, 0, True)
                testLoss = lossFn(outputLabel_mean, labelVariable)
                testLossEpoch += testLoss.data.item()
                _, predicted = torch.max(outputLabel_mean.data, 1)
                numCorrTest += (predicted == targets[0]).sum()
            testAccuracy = (numCorrTest / numTestInstances) * 100
            avgTestLoss = testLossEpoch / testIter
            print('Testing: Loss = {} | Accuracy = {}% '.format(
                avgTestLoss, testAccuracy))
            writer.add_scalar('test/epochloss', avgTestLoss, epoch + 1)
            writer.add_scalar('test/accuracy', testAccuracy, epoch + 1)
            testLogLoss.write('Test Loss after {} epochs = {}\n'.format(
                epoch + 1, avgTestLoss))
            testLogAcc.write('Test Accuracy after {} epochs = {}%\n'.format(
                epoch + 1, testAccuracy))
            if testAccuracy > minAccuracy:
                savePathClassifier = (modelFolder + '/bestModel.pth')
                torch.save(model, savePathClassifier)
                minAccuracy = testAccuracy
    trainLogAcc.close()
    testLogAcc.close()
    trainLogLoss.close()
    testLogLoss.close()
    writer.export_scalars_to_json(modelFolder + "/all_scalars.json")
    writer.close()
    return True
コード例 #24
0
def main_run(dataset, model_state_dict, dataset_dir, stackSize, seqLen,
             memSize):

    if dataset == 'gtea61':
        num_classes = 61
    elif dataset == 'gtea71':
        num_classes = 71
    elif dataset == 'gtea_gaze':
        num_classes = 44
    elif dataset == 'egtea':
        num_classes = 106

    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]

    normalize = Normalize(mean=mean, std=std)

    testBatchSize = 1
    spatial_transform = Compose(
        [Scale(256), CenterCrop(224),
         ToTensor(), normalize])

    vid_seq_test = makeDataset(dataset_dir,
                               spatial_transform=spatial_transform,
                               sequence=False,
                               numSeg=1,
                               stackSize=stackSize,
                               fmt='.jpg',
                               phase='Test',
                               seqLen=seqLen)

    test_loader = torch.utils.data.DataLoader(vid_seq_test,
                                              batch_size=testBatchSize,
                                              shuffle=False,
                                              num_workers=2,
                                              pin_memory=True)

    model = twoStreamAttentionModel(stackSize=5,
                                    memSize=512,
                                    num_classes=num_classes)
    model.load_state_dict(torch.load(model_state_dict))

    for params in model.parameters():
        params.requires_grad = False

    model.train(False)
    model.cuda()

    test_samples = vid_seq_test.__len__()
    print('Number of samples = {}'.format(test_samples))
    print('Evaluating...')
    numCorrTwoStream = 0

    predicted_labels = []
    true_labels = []
    for j, (inputFlow, inputFrame, targets) in enumerate(test_loader):
        inputVariableFrame = Variable(inputFrame.permute(1, 0, 2, 3, 4).cuda(),
                                      volatile=True)
        inputVariableFlow = Variable(inputFlow.cuda(), volatile=True)
        output_label = model(inputVariableFlow, inputVariableFrame)
        _, predictedTwoStream = torch.max(output_label.data, 1)
        numCorrTwoStream += (predictedTwoStream == targets.cuda()).sum()
        predicted_labels.append(predictedTwoStream)
        true_labels.append(targets)
    test_accuracyTwoStream = (numCorrTwoStream / float(test_samples)) * 100
    print('Accuracy {:.02f}%'.format(test_accuracyTwoStream))
    cnf_matrix = confusion_matrix(true_labels, predicted_labels).astype(float)
    cnf_matrix_normalized = cnf_matrix / cnf_matrix.sum(axis=1)[:, np.newaxis]

    ticks = np.linspace(0, 60, num=61)
    plt.imshow(cnf_matrix_normalized, interpolation='none', cmap='binary')
    plt.colorbar()
    plt.xticks(ticks, fontsize=6)
    plt.yticks(ticks, fontsize=6)
    plt.grid(True)
    plt.clim(0, 1)
    plt.savefig(dataset + '-twoStreamJoint.jpg', bbox_inches='tight')
    plt.show()
コード例 #25
0
        img_prefix = ''

    whole_model, parameters = generate_model(args)
    print(whole_model)
    # input('...')

    if args.no_mean_norm and not args.std_norm:
        norm_method = Normalize([0, 0, 0], [1, 1, 1])
    elif not args.std_norm:
        norm_method = Normalize(args.mean, [1, 1, 1])
    else:
        norm_method = Normalize(args.mean, args.std)

    spatial_transform = Compose([
        Scale(args.sample_size),
        CenterCrop(args.sample_size),
        ToTensor(args.norm_value), norm_method
    ])
    # if not args.test_temp_crop == 'sparse':
    if args.compared_temp_transform == 'shuffle':
        temp_transform = ShuffleFrames(args.sample_duration)
    else:
        temp_transform = ReverseFrames(args.sample_duration)

    temp_crop_method = TemporalRandomCrop(args.sample_duration)
    # if args.compared_temp_transform == 'reverse':
    # temp_transform = Compose([
    # ReverseFrames(args.sample_duration),
    # temp_crop_method
    # ])
    # elif args.compared_temp_transform == 'shuffle':
コード例 #26
0
ファイル: rawrun.py プロジェクト: dribnet/3D-ResNets-PyTorch
def main():
    parser = argparse.ArgumentParser(description="Run model against images")
    parser.add_argument(
        '--input-glob',
        default=
        'data/kinetics_videos/jpg/yoga/0wHOYxjRmlw_000041_000051/image_000{41,42,43,44,45,46,47,48,49,50,41,42,43,44,45,46}.jpg',
        help="inputs")
    parser.add_argument("--depth", default="50", help="which model depth")
    args = parser.parse_args()

    model_file = model_files[args.depth]
    model_depth = int(args.depth)

    model = resnet.generate_model(model_depth=model_depth,
                                  n_classes=700,
                                  n_input_channels=3,
                                  shortcut_type="B",
                                  conv1_t_size=7,
                                  conv1_t_stride=1,
                                  no_max_pool=False,
                                  widen_factor=1.0)

    # model = load_pretrained_model(model, args.model, "resnet", 700)

    checkpoint = torch.load(model_file, map_location='cpu')
    arch = '{}-{}'.format("resnet", model_depth)
    print(arch, checkpoint['arch'])
    assert arch == checkpoint['arch']

    if hasattr(model, 'module'):
        # I think this only for legacy models
        model.module.load_state_dict(checkpoint['state_dict'])
    else:
        model.load_state_dict(checkpoint['state_dict'])

    model.eval()

    image_clips = []
    files = real_glob(args.input_glob)
    files = extend_to_length(files, 16)
    print(files)
    for f in files:
        img = Image.open(f).convert("RGB")
        image_clips.append(img)

    # print("EARLY", image_clips[0][0:4,0:4,0])

    mean = [0.4345, 0.4051, 0.3775]
    std = [0.2768, 0.2713, 0.2737]
    normalize = Normalize(mean, std)

    sample_size = 112

    spatial_transform = [Resize(sample_size)]
    spatial_transform.append(CenterCrop(sample_size))
    spatial_transform.append(ToTensor())
    spatial_transform.extend([ScaleValue(1), normalize])
    spatial_transform = Compose(spatial_transform)

    # c = spatial_transform(image_clips[0])
    # c.save("raw.png")

    model_clips = []
    clip = [spatial_transform(img) for img in image_clips]
    model_clips.append(torch.stack(clip, 0).permute(1, 0, 2, 3))
    model_clips = torch.stack(model_clips, 0)

    print("Final", model_clips.shape)
    print("PEEK", model_clips[0, 0, 0, 0:4, 0:4])

    with torch.no_grad():
        outputs = model(model_clips)
        print(outputs[0][0:10])
        outputs = F.softmax(outputs, dim=1).cpu()

    sorted_scores, locs = torch.topk(outputs[0], k=3)

    print(locs[0])

    video_results = []
    for i in range(sorted_scores.size(0)):
        video_results.append({
            'label': magic_labels_700[locs[i].item()],
            'score': sorted_scores[i].item()
        })

    print(video_results)
コード例 #27
0
ファイル: main.py プロジェクト: sophiazy/3D-ResNets-PyTorch
    print(opt)
    with open(os.path.join(opt.result_path, 'opts.json'), 'w') as opt_file:
        json.dump(vars(opt), opt_file)

    torch.manual_seed(opt.manual_seed)

    model = generate_model(opt)
    print(model)
    criterion = nn.CrossEntropyLoss()
    if not opt.no_cuda:
        criterion = criterion.cuda()

    if not opt.no_train:
        spatial_transform = Compose([
            MultiScaleCornerCrop(opt.scales, opt.sample_size),
            RandomHorizontalFlip(),
            ToTensor(opt.norm_value),
            Normalize(opt.mean, [1, 1, 1])
        ])
        temporal_transform = TemporalRandomCrop(opt.sample_duration)
        target_transform = ClassLabel()
        if opt.dataset == 'kinetics':
            training_data = Kinetics(opt.video_path,
                                     opt.annotation_path,
                                     'training',
                                     spatial_transform=spatial_transform,
                                     temporal_transform=temporal_transform,
                                     target_transform=target_transform)
        else:
            training_data = ActivityNet(opt.video_path,
                                        opt.annotation_path,
                                        'training',
コード例 #28
0
def main_run(dataset, stage, train_data_dir, val_data_dir, stage1_dict,
             out_dir, seqLen, trainBatchSize, valBatchSize, numEpochs, lr1,
             decay_factor, decay_step, memSize, regression, rloss, debug,
             verbose, CAM):
    # GTEA 61
    num_classes = 61

    # Train/Validation/Test split
    train_splits = ["S1", "S3", "S4"]
    val_splits = ["S2"]

    if debug:
        n_workers = 0
        device = 'cpu'
    else:
        n_workers = 4
        device = 'cuda'

    model_folder = os.path.join(
        './', out_dir, dataset, 'rgb',
        'stage' + str(stage))  # Dir for saving models and log files
    # Create the dir
    if os.path.exists(model_folder):
        print('Directory {} exists!'.format(model_folder))
        sys.exit()
    os.makedirs(model_folder)

    # Log files
    train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
    train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
    val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w')
    val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w')

    # Data loader
    normalize = Normalize(mean=[0.485, 0.456, 0.406],
                          std=[0.229, 0.224, 0.225])
    spatial_transform = Compose([
        Scale(256),
        RandomHorizontalFlip(),
        MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
        # ToTensor(),
        # normalize
    ])
    transform_rgb = Compose([ToTensor(), normalize])
    transform_MS = Compose([Resize((7, 7)), ToTensor()])

    vid_seq_train = makeDataset(train_data_dir,
                                splits=train_splits,
                                spatial_transform=spatial_transform,
                                transform_rgb=transform_rgb,
                                transform_MS=transform_MS,
                                seqLen=seqLen,
                                fmt='.png',
                                regression=regression)

    train_loader = torch.utils.data.DataLoader(vid_seq_train,
                                               batch_size=trainBatchSize,
                                               shuffle=True,
                                               num_workers=n_workers,
                                               pin_memory=True)

    vid_seq_val = makeDataset(train_data_dir,
                              splits=val_splits,
                              spatial_transform=Compose(
                                  [Scale(256), CenterCrop(224)]),
                              transform_rgb=transform_rgb,
                              transform_MS=transform_MS,
                              seqLen=seqLen,
                              fmt='.png',
                              regression=regression,
                              verbose=False)

    val_loader = torch.utils.data.DataLoader(vid_seq_val,
                                             batch_size=valBatchSize,
                                             shuffle=False,
                                             num_workers=n_workers,
                                             pin_memory=True)
    valInstances = vid_seq_val.__len__()
    '''
    if val_data_dir is not None:

        vid_seq_val = makeDataset(val_data_dir,
                                  spatial_transform=Compose([Scale(256), CenterCrop(224), ToTensor(), normalize]),
                                  seqLen=seqLen, fmt='.jpg')

        val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize,
                                                 shuffle=False, num_workers=2, pin_memory=True)
        valInstances = vid_seq_val.__len__()
    '''
    trainInstances = vid_seq_train.__len__()

    train_params = []
    if stage == 1:
        if regression:
            model = SelfSupervisedAttentionModel(num_classes=num_classes,
                                                 mem_size=memSize,
                                                 n_channels=1)
        else:
            model = SelfSupervisedAttentionModel(num_classes=num_classes,
                                                 mem_size=memSize)
        model.train(False)
        for params in model.parameters():
            params.requires_grad = False
    else:
        if regression:
            model = SelfSupervisedAttentionModel(num_classes=num_classes,
                                                 mem_size=memSize,
                                                 n_channels=1)
        else:
            model = SelfSupervisedAttentionModel(num_classes=num_classes,
                                                 mem_size=memSize)

        model.load_state_dict(torch.load(stage1_dict), strict=False)
        model.train(False)
        for params in model.parameters():
            params.requires_grad = False
        #
        for params in model.resNet.layer4[0].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[0].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[2].conv1.parameters():
            params.requires_grad = True
            train_params += [params]
        #
        for params in model.resNet.layer4[2].conv2.parameters():
            params.requires_grad = True
            train_params += [params]
        #
        for params in model.resNet.fc.parameters():
            params.requires_grad = True
            train_params += [params]

        model.resNet.layer4[0].conv1.train(True)
        model.resNet.layer4[0].conv2.train(True)
        model.resNet.layer4[1].conv1.train(True)
        model.resNet.layer4[1].conv2.train(True)
        model.resNet.layer4[2].conv1.train(True)
        model.resNet.layer4[2].conv2.train(True)
        model.resNet.fc.train(True)

        # Add params from ms_module
        for params in model.ms_module.parameters():
            params.requires_grad = True
            train_params += [params]

    for params in model.lstm_cell.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.classifier.parameters():
        params.requires_grad = True
        train_params += [params]

    model.lstm_cell.train(True)

    model.classifier.train(True)
    model.ms_module.train(True)
    model.to(device)

    # wandb.init(project="first_person_action_recognition")

    loss_fn = nn.CrossEntropyLoss()
    if regression:
        if rloss == 'MSE':
            # Mean Squared Error loss
            loss_ms_fn = nn.MSELoss()  # it should work
        elif rloss == 'L1':
            # L1 loss
            loss_ms_fn = nn.L1Loss()
        elif rloss == 'SmoothL1':
            # Huber Loss or Smooth L1 Loss
            loss_ms_fn = nn.SmoothL1Loss()
        elif rloss == 'KLdiv':
            # Kullback-Leiber Loss
            loss_ms_fn = nn.KLDivLoss()
    else:
        # classification
        loss_ms_fn = nn.CrossEntropyLoss()  # TODO: check paper Planamente

    optimizer_fn = torch.optim.Adam(train_params,
                                    lr=lr1,
                                    weight_decay=4e-5,
                                    eps=1e-4)

    optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer_fn, milestones=decay_step, gamma=decay_factor)

    train_iter = 0
    min_accuracy = 0

    for epoch in range(numEpochs):
        epoch_loss = 0
        numCorrTrain = 0
        trainSamples = 0
        iterPerEpoch = 0

        #model.train(True)
        model.lstm_cell.train(True)
        model.classifier.train(True)
        if stage == 2:
            model.resNet.layer4[0].conv1.train(True)
            model.resNet.layer4[0].conv2.train(True)
            model.resNet.layer4[1].conv1.train(True)
            model.resNet.layer4[1].conv2.train(True)
            model.resNet.layer4[2].conv1.train(True)
            model.resNet.layer4[2].conv2.train(True)
            model.resNet.fc.train(True)

            model.ms_module.train(True)

        for i, (inputsRGB, inputsMS, targets) in enumerate(train_loader):
            # Inputs:
            #   - inputsRGB : the rgb frame input
            # Labels :
            #   - inputsMS  : the motion task label
            #   - targets   : output

            train_iter += 1
            iterPerEpoch += 1
            optimizer_fn.zero_grad()
            inputVariable = inputsRGB.permute(1, 0, 2, 3, 4).to(device)
            labelVariable = targets.to(device)
            msVariable = inputsMS.to(device)
            trainSamples += inputsRGB.size(0)
            output_label, _, output_ms = model(inputVariable, device)
            loss_c = loss_fn(output_label, labelVariable)
            if regression:
                msVariable = torch.reshape(
                    msVariable, (seqLen * 7 * 7, msVariable.size(0)))
                output_ms = torch.sigmoid(output_ms)
                output_ms = torch.reshape(output_ms,
                                          (seqLen * 7 * 7, output_ms.size(0)))
            else:
                # classification task
                msVariable = torch.reshape(
                    msVariable, (seqLen * 7 * 7, msVariable.size(0))).long()
                output_ms = torch.reshape(
                    output_ms, (seqLen * 7 * 7, 2, output_ms.size(0)))  #

            loss_ms = loss_ms_fn(output_ms, msVariable)
            loss = loss_c + loss_ms
            if verbose:
                print(loss_c)
                print(loss_ms)
                print(loss)
                print()
            # loss = loss_fn(output_label, labelVariable) + loss_ms_fn(output_ms, inputsMS) # TODO (forse): invertire 0 e 1 dim per inputsMS # output1 = F.softmax(torch.reshape(output_ms, (32, 7, 2, 7*7))[0, 0, :, :], dim=0)
            loss.backward()
            optimizer_fn.step()
            _, predicted = torch.max(output_label.data, 1)
            numCorrTrain += (predicted == targets.to(device)).sum()
            epoch_loss += loss.data.item()
        avg_loss = epoch_loss / iterPerEpoch
        trainAccuracy = (numCorrTrain.data.item() / trainSamples) * 100

        train_log_loss.write('Training loss after {} epoch = {}\n'.format(
            epoch + 1, avg_loss))
        train_log_acc.write('Training accuracy after {} epoch = {}\n'.format(
            epoch + 1, trainAccuracy))
        print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format(
            epoch + 1, avg_loss, trainAccuracy))

        # VALIDATION PHASE
        #if val_data_dir is not None:
        if (epoch + 1) % 1 == 0:
            model.train(False)
            val_loss_epoch = 0
            val_iter = 0
            val_samples = 0
            numCorr = 0
            for j, (inputsRGB, inputsMS, targets) in enumerate(val_loader):
                val_iter += 1
                val_samples += inputsRGB.size(0)
                inputVariable = inputsRGB.permute(1, 0, 2, 3, 4).to(
                    device)  # la permutazione è a solo scopo di computazione
                labelVariable = targets.to(device)
                msVariable = inputsMS.to(device)
                output_label, _, output_ms = model(inputVariable, device)
                loss_c = loss_fn(output_label, labelVariable)
                if regression:
                    msVariable = torch.reshape(
                        msVariable, (seqLen * 7 * 7, msVariable.size(0)))
                    output_ms = torch.sigmoid(output_ms)
                    output_ms = torch.reshape(
                        output_ms, (seqLen * 7 * 7, output_ms.size(0)))
                else:
                    # classification task
                    msVariable = torch.reshape(
                        msVariable,
                        (seqLen * 7 * 7, msVariable.size(0))).long()
                    output_ms = torch.reshape(
                        output_ms, (seqLen * 7 * 7, 2, output_ms.size(0)))
                loss_ms = loss_ms_fn(output_ms, msVariable)
                val_loss = loss_c + loss_ms
                # val_loss = loss_fn(output_label, labelVariable) # TODO: add ms Loss
                val_loss_epoch += val_loss.data.item()
                _, predicted = torch.max(output_label.data, 1)
                numCorr += (predicted == targets.to(device)).sum()
            val_accuracy = (numCorr.data.item() / val_samples) * 100
            avg_val_loss = val_loss_epoch / val_iter
            print('Valid: Epoch = {} | Loss {} | Accuracy = {}'.format(
                epoch + 1, avg_val_loss, val_accuracy))

            val_log_loss.write('Val Loss after {} epochs = {}\n'.format(
                epoch + 1, avg_val_loss))
            val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(
                epoch + 1, val_accuracy))
            if val_accuracy > min_accuracy:
                save_path_model = (model_folder + '/model_rgb_state_dict.pth')
                torch.save(model.state_dict(), save_path_model)
                min_accuracy = val_accuracy
            '''else:
                if (epoch+1) % 10 == 0:
                    save_path_model = (model_folder + '/model_rgb_state_dict_epoch' + str(epoch+1) + '.pth')
                    torch.save(model.state_dict(), save_path_model)
                '''
        optim_scheduler.step()

    train_log_loss.close()
    train_log_acc.close()
    val_log_acc.close()
    val_log_loss.close()
コード例 #29
0
ファイル: main.py プロジェクト: DeLightCMU/3D-ResNets-PyTorch
    else:
        norm_method = Normalize(opt.mean, opt.std)

    if not opt.no_train:
        assert opt.train_crop in ['random', 'corner', 'center']
        if opt.train_crop == 'random':
            crop_method = MultiScaleRandomCrop(opt.scales, opt.sample_size)
        elif opt.train_crop == 'corner':
            crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size)
        elif opt.train_crop == 'center':
            crop_method = MultiScaleCornerCrop(opt.scales,
                                               opt.sample_size,
                                               crop_positions=['c'])
        spatial_transform = Compose([
            crop_method,
            RandomHorizontalFlip(),
            ToTensor(opt.norm_value), norm_method
        ])
        temporal_transform = TemporalRandomCrop(opt.sample_duration)
        target_transform = ClassLabel()
        training_data = get_training_set(opt, spatial_transform,
                                         temporal_transform, target_transform)
        train_loader = torch.utils.data.DataLoader(training_data,
                                                   batch_size=opt.batch_size,
                                                   shuffle=False,
                                                   num_workers=opt.n_threads,
                                                   pin_memory=True)
        train_logger = Logger(
            os.path.join(opt.result_path, 'train.log'),
            ['epoch', 'loss', 'acc', 'acc_img', 'lr', 'epoch_time'])
        train_batch_logger = Logger(
コード例 #30
0
    else:
        norm_method = Normalize(opt.mean, opt.std)

    if not opt.no_train:
        assert opt.train_crop in ['random', 'corner', 'center']
        if opt.train_crop == 'random':
            crop_method = MultiScaleRandomCrop(opt.scales, opt.sample_size)
        elif opt.train_crop == 'corner':
            crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size)
        elif opt.train_crop == 'center':
            crop_method = MultiScaleCornerCrop(opt.scales,
                                               opt.sample_size,
                                               crop_positions=['c'])
        spatial_transform = Compose([
            crop_method,
            RandomHorizontalFlip(),
            ToTensor(opt.norm_value), norm_method
        ])
        temporal_transform = Compose([TemporalRandomCrop(opt.sample_duration)])
        target_transform = ClassLabel()
        training_data = get_training_set(opt, spatial_transform,
                                         temporal_transform, target_transform)
        train_loader = torch.utils.data.DataLoader(training_data,
                                                   batch_size=opt.batch_size,
                                                   shuffle=True,
                                                   num_workers=opt.n_threads,
                                                   pin_memory=True)
        train_logger = Logger(os.path.join(opt.result_path, 'train.log'),
                              ['epoch', 'loss', 'acc', 'lr'])
        train_batch_logger = Logger(
            os.path.join(opt.result_path, 'train_batch.log'),