Exemple #1
0
def main():
    in_frames = 2
    batch_size = 2

    train_dataloader = torch.utils.data.DataLoader(VideoDataset(frames_dir='test_vid/train',img_size=1024,num_in_frames=in_frames,),
                                                  batch_size=batch_size, shuffle=True)
    test_dataloader = torch.utils.data.DataLoader(VideoDataset(frames_dir='test_vid/test',img_size=1024,num_in_frames=in_frames,),
                                                  batch_size=batch_size, shuffle=False)

    model_dict = init_model(res_blocks=1, in_frames=in_frames, batch_size=batch_size, epoch_to_load=None)
    train(model_dict['Unet'], model_dict['Discriminator'], train_dataloader, model_dict['Unet_optimizer'], model_dict['Discriminator_optimizer'], test_dataloader=test_dataloader, epochs=50)
Exemple #2
0
def load(fname='../../jackson-clips'):
    print("Starting to load data.")
    batch_size = 128
    train_data = VideoDataset.VideoDataset(fname=fname,
                                           transform=[transforms.ToTensor()])
    train_loader = torch.utils.data.DataLoader(train_data, shuffle = True, \
        batch_size = batch_size, num_workers = 8, drop_last = True)
    return train_loader
Exemple #3
0
def preprocess_frame(bb, frameNum, frame):
    bb_dims = ['xmin', 'ymin', 'xmax', 'ymax']

    # crop frame
    x_min, y_min, x_max, y_max = [
        bb.loc[bb['frame'] == frameNum][dim].tolist()[0] for dim in bb_dims
    ]
    frame = frame[int(y_min):int(y_max), int(x_min):int(x_max), :]
    # resize frame
    frame = VideoDataset.resize_frame(frame)
    return frame
Exemple #4
0
    def __next__(self):
        if self.pos >= len(self.lists):
            raise StopIteration

        pos = self.pos
        self.pos += 1

        if len(self.labels) == 0:
            label = None
        else:
            label = self.labels[pos]

        dataset = VideoDataset(self.lists[pos], label, self.seq_num,
                               self.img_size)
        dataLoader = torch.utils.data.DataLoader(
            dataset,
            batch_size=self.batch_size,
            shuffle=False,
            num_workers=0,
            pin_memory=True,
        )

        return dataLoader
Exemple #5
0
def main_run(numEpochs, lr, stepSize, decayRate, trainBatchSize, seqLen,
             memSize, evalInterval, evalMode, numWorkers, outDir,
             fightsDir_train, noFightsDir_train, fightsDir_test,
             noFightsDir_test):

    train_dataset_dir_fights = fightsDir_train
    train_dataset_dir_noFights = noFightsDir_train
    test_dataset_dir_fights = fightsDir_test
    test_dataset_dir_noFights = noFightsDir_test

    trainDataset, trainLabels, trainNumFrames = make_split(
        train_dataset_dir_fights, train_dataset_dir_noFights)
    testDataset, testLabels, testNumFrames = make_split(
        test_dataset_dir_fights, test_dataset_dir_noFights)

    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
    normalize = Normalize(mean=mean, std=std)
    spatial_transform = Compose([
        Scale(256),
        RandomHorizontalFlip(),
        MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
        ToTensor(), normalize
    ])

    vidSeqTrain = VideoDataset(trainDataset,
                               trainLabels,
                               trainNumFrames,
                               spatial_transform=spatial_transform,
                               seqLen=seqLen)

    trainLoader = torch.utils.data.DataLoader(vidSeqTrain,
                                              batch_size=trainBatchSize,
                                              shuffle=True,
                                              num_workers=numWorkers,
                                              pin_memory=True,
                                              drop_last=True)

    if evalMode == 'centerCrop':
        test_spatial_transform = Compose(
            [Scale(256), CenterCrop(224),
             ToTensor(), normalize])
        testBatchSize = 1
    elif evalMode == 'tenCrops':
        test_spatial_transform = Compose(
            [Scale(256), TenCrops(size=224, mean=mean, std=std)])
        testBatchSize = 1
    elif evalMode == 'fiveCrops':
        test_spatial_transform = Compose(
            [Scale(256), FiveCrops(size=224, mean=mean, std=std)])
        testBatchSize = 1
    elif evalMode == 'horFlip':
        test_spatial_transform = Compose([
            Scale(256),
            CenterCrop(224),
            FlippedImagesTest(mean=mean, std=std)
        ])
        testBatchSize = 1

    vidSeqTest = VideoDataset(testDataset,
                              testLabels,
                              testNumFrames,
                              seqLen=seqLen,
                              spatial_transform=test_spatial_transform)

    testLoader = torch.utils.data.DataLoader(vidSeqTest,
                                             batch_size=testBatchSize,
                                             shuffle=False,
                                             num_workers=int(numWorkers / 2),
                                             pin_memory=True)

    numTrainInstances = vidSeqTrain.__len__()
    numTestInstances = vidSeqTest.__len__()

    print('Number of training samples = {}'.format(numTrainInstances))
    print('Number of testing samples = {}'.format(numTestInstances))

    modelFolder = './experiments_' + outDir  # Dir for saving models and log files
    # Create the dir
    if os.path.exists(modelFolder):
        print(modelFolder + ' exists!!!')
        sys.exit()
    else:
        os.makedirs(modelFolder)
    # Log files
    writer = SummaryWriter(modelFolder)
    trainLogLoss = open((modelFolder + '/trainLogLoss.txt'), 'w')
    trainLogAcc = open((modelFolder + '/trainLogAcc.txt'), 'w')
    testLogLoss = open((modelFolder + '/testLogLoss.txt'), 'w')
    testLogAcc = open((modelFolder + '/testLogAcc.txt'), 'w')

    model = ViolenceModel(mem_size=memSize)

    trainParams = []
    for params in model.parameters():
        params.requires_grad = True
        trainParams += [params]
    model.train(True)
    model.cuda()

    lossFn = nn.CrossEntropyLoss()
    optimizerFn = torch.optim.RMSprop(trainParams, lr=lr)
    optimScheduler = torch.optim.lr_scheduler.StepLR(optimizerFn, stepSize,
                                                     decayRate)

    minAccuracy = 50

    for epoch in range(numEpochs):
        optimScheduler.step()
        epochLoss = 0
        numCorrTrain = 0
        iterPerEpoch = 0
        model.train(True)
        print('Epoch = {}'.format(epoch + 1))
        writer.add_scalar('lr', optimizerFn.param_groups[0]['lr'], epoch + 1)
        for i, (inputs, targets) in enumerate(trainLoader):
            iterPerEpoch += 1
            optimizerFn.zero_grad()
            inputVariable1 = Variable(inputs.permute(1, 0, 2, 3, 4).cuda())
            labelVariable = Variable(targets.cuda())
            outputLabel = model(inputVariable1)
            loss = lossFn(outputLabel, labelVariable)
            loss.backward()
            optimizerFn.step()
            outputProb = torch.nn.Softmax(dim=1)(outputLabel)
            _, predicted = torch.max(outputProb.data, 1)
            numCorrTrain += (predicted == targets.cuda()).sum()
            epochLoss += loss.data[0]
        avgLoss = epochLoss / iterPerEpoch
        trainAccuracy = (numCorrTrain / numTrainInstances) * 100
        print('Training: Loss = {} | Accuracy = {}% '.format(
            avgLoss, trainAccuracy))
        writer.add_scalar('train/epochLoss', avgLoss, epoch + 1)
        writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1)
        trainLogLoss.write('Training loss after {} epoch = {}\n'.format(
            epoch + 1, avgLoss))
        trainLogAcc.write('Training accuracy after {} epoch = {}\n'.format(
            epoch + 1, trainAccuracy))

        if (epoch + 1) % evalInterval == 0:
            model.train(False)
            print('Evaluating...')
            testLossEpoch = 0
            testIter = 0
            numCorrTest = 0
            for j, (inputs, targets) in enumerate(testLoader):
                testIter += 1
                if evalMode == 'centerCrop':
                    inputVariable1 = Variable(inputs.permute(1, 0, 2, 3,
                                                             4).cuda(),
                                              volatile=True)
                else:
                    inputVariable1 = Variable(inputs[0].cuda(), volatile=True)
                labelVariable = Variable(targets.cuda(async=True),
                                         volatile=True)
                outputLabel = model(inputVariable1)
                outputLabel_mean = torch.mean(outputLabel, 0, True)
                testLoss = lossFn(outputLabel_mean, labelVariable)
                testLossEpoch += testLoss.data[0]
                _, predicted = torch.max(outputLabel_mean.data, 1)
                numCorrTest += (predicted == targets[0]).sum()
            testAccuracy = (numCorrTest / numTestInstances) * 100
            avgTestLoss = testLossEpoch / testIter
            print('Testing: Loss = {} | Accuracy = {}% '.format(
                avgTestLoss, testAccuracy))
            writer.add_scalar('test/epochloss', avgTestLoss, epoch + 1)
            writer.add_scalar('test/accuracy', testAccuracy, epoch + 1)
            testLogLoss.write('Test Loss after {} epochs = {}\n'.format(
                epoch + 1, avgTestLoss))
            testLogAcc.write('Test Accuracy after {} epochs = {}%\n'.format(
                epoch + 1, testAccuracy))
            if testAccuracy > minAccuracy:
                savePathClassifier = (modelFolder + '/bestModel.pth')
                torch.save(model, savePathClassifier)
                minAccuracy = testAccuracy
    trainLogAcc.close()
    testLogAcc.close()
    trainLogLoss.close()
    testLogLoss.close()
    writer.export_scalars_to_json(modelFolder + "/all_scalars.json")
    writer.close()
    return True
Exemple #6
0
 def getDataset(path, label, seq_num, img_size, mode):
     return VideoDataset(path, label, seq_num, img_size, mode)
import torch
#import torchvision
from torch import optim, nn
import VideoDataset
import Model

batch_size = 32
dataset_train = VideoDataset.VideoDataset()
dataset_val = VideoDataset.VideoDataset(dataset='val')
dataset_test = VideoDataset.VideoDataset(dataset='test')
loaders = {
    'train':
    torch.utils.data.DataLoader(dataset_train,
                                batch_size=batch_size,
                                shuffle=True,
                                num_workers=1),
    'val':
    torch.utils.data.DataLoader(dataset_val,
                                batch_size=batch_size,
                                shuffle=False,
                                num_workers=1),
    'test':
    torch.utils.data.DataLoader(dataset_test,
                                batch_size=batch_size,
                                shuffle=False,
                                num_workers=1)
}
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(torch.cuda.is_available())
print(device)
#exit(2)
    def load_data(self,
                  mode='auto',
                  data_path='../data/',
                  numFramesToLoad=1000,
                  need_split=True):
        labels_fname = 'jackson-town-square-2017-12-14.csv'

        # load model
        # encoder = init_encoder(mode)
        if mode == 'auto':
            model_fname = 'models/autoencoder_0.0001.pth'
            encoder = AutoEncoder()
            encoder = torch.load(model_fname)
            for param in encoder.parameters():
                param.requires_grad = False
        elif mode == 'res18':
            encoder = models.resnet18(pretrained=True)
            encoder = nn.Sequential(*list(encoder.children())[:-1])
            encoder = encoder.to(device)
            # turn off intermediate state saving
            for param in encoder.parameters():
                param.requires_grad = False
        elif mode == 'res50':
            encoder = models.resnet50(pretrained=True)
            encoder = nn.Sequential(*list(encoder.children())[:-1])
            encoder = encoder.to(device)
            # turn off intermediate state saving
            for param in encoder.parameters():
                param.requires_grad = False
        else:
            raise Exception("Illegal parameter for mode")

        # get unique frames with vehicles
        vehicleFrames = getVehicleFrames(data_path + labels_fname)[0]
        shuffle(vehicleFrames)

        # bb = preprocess_bb()
        bb = pd.read_csv(data_path + labels_fname, header=0)
        bb_dims = ['xmin', 'ymin', 'xmax', 'ymax']

        video_fname = '../../jackson-clips'
        video = swag.VideoCapture(video_fname)

        carCount = 0
        truckCount = 0
        margin = 5
        numFramesLoaded = 0

        frameNums = np.empty((0))
        for frameIter in range(len(vehicleFrames)):
            frameNum = vehicleFrames[
                frameIter]  # frame number as it appears in BB dataset

            # force class balancing
            vehicleType = bb.loc[bb['frame'] ==
                                 frameNum]['object_name'].to_string()
            vehicleType = vehicleType.split(' ')[-1]
            if vehicleType == 'car':
                if carCount - truckCount > margin:
                    continue  # more cars than trucks, so skip
                carCount += 1
            if vehicleType == 'truck':
                if truckCount - carCount > margin:
                    continue  # more trucks than cars, so skip
                truckCount += 1

            # read in frame of interest
            video.set(1, frameNum)
            ret, frame = video.read()
            if ret == False: break  # EOF reached

            # crop and resize frame
            # frame = preprocess_frame(bb, frameNum, frame)
            # print(frame.shape)

            # crop frame
            x_min, y_min, x_max, y_max = [
                bb.loc[bb['frame'] == frameNum][dim].tolist()[0]
                for dim in bb_dims
            ]
            # print(x_min, y_min, x_max, y_max)
            frame = frame[int(y_min):int(y_max), int(x_min):int(x_max), :]
            # print(frame.shape)
            # resize frame
            frame = VideoDataset.resize_frame(frame)
            # print(frame.shape)

            # use autoencoder to generate 1D code from 3D image
            # frameTensor = frame2tensor(frame)

            transform = [transforms.ToTensor()]
            for tform in transform:  # convert to tensor
                frameTensor = tform(frame)
            frameTensor = frameTensor.unsqueeze_(0)
            frameTensor = frameTensor.to(device=device, dtype=dtype)

            # encode
            # code = encode(encoder, frameTensor, mode)
            if mode == 'auto':
                # This is an autoencoder
                code = encoder.encode(frameTensor)
            elif mode == 'res18':
                # This is a resnet_18
                code = encoder(frameTensor).squeeze(3).squeeze(2)
            elif mode == 'res50':
                # This is a resnet_50
                code_50 = encoder(frameTensor).squeeze(3).squeeze(2)
                code_50 = code_50.view(1, 512, 4)
                code = code_50.max(dim=2, keepdim=False)[0]
            else:
                raise Exception(
                    "Illegal parameter for mode but how did we even get here?")

            # Codes should all be 512 now
            self.codes.append(code)

            # get labels associated with each frame
            self.labels.append(
                (vehicleType == 'car') -
                (vehicleType == 'truck'))  # -1 is truck, 1 is car

            numFramesLoaded += 1
            frameNums = np.append(frameNums, frameNum)
            # if numFramesLoaded % 20 == 0:
            print(numFramesLoaded, "frames successfully loaded out of",
                  numFramesToLoad)
            if numFramesLoaded >= numFramesToLoad: break

        # report vehicle statistics for class balancing
        print("\nCar count:", carCount)
        print("Truck count:", truckCount)
        print(carCount - truckCount, "more cars than trucks.")

        # convert to encoded images and labels to tensors
        codeMatrix = torch.stack(self.codes, 0)
        labelTensor = torch.Tensor(self.labels)

        if need_split == True:
            # split dataset into train, val, test
            train_primitive_matrix, val_primitive_matrix, test_primitive_matrix, \
             train_ground, val_ground, test_ground, frameNums_train = split_data(codeMatrix, labelTensor, frameNums)

            return train_primitive_matrix, val_primitive_matrix, test_primitive_matrix, \
             np.array(train_ground), np.array(val_ground), np.array(test_ground), mode, frameNums_train
        else:
            return _, codeMatrix, _, \
             _, np.array(labelTensor), _, mode, frameNums