def main(): in_frames = 2 batch_size = 2 train_dataloader = torch.utils.data.DataLoader(VideoDataset(frames_dir='test_vid/train',img_size=1024,num_in_frames=in_frames,), batch_size=batch_size, shuffle=True) test_dataloader = torch.utils.data.DataLoader(VideoDataset(frames_dir='test_vid/test',img_size=1024,num_in_frames=in_frames,), batch_size=batch_size, shuffle=False) model_dict = init_model(res_blocks=1, in_frames=in_frames, batch_size=batch_size, epoch_to_load=None) train(model_dict['Unet'], model_dict['Discriminator'], train_dataloader, model_dict['Unet_optimizer'], model_dict['Discriminator_optimizer'], test_dataloader=test_dataloader, epochs=50)
def load(fname='../../jackson-clips'): print("Starting to load data.") batch_size = 128 train_data = VideoDataset.VideoDataset(fname=fname, transform=[transforms.ToTensor()]) train_loader = torch.utils.data.DataLoader(train_data, shuffle = True, \ batch_size = batch_size, num_workers = 8, drop_last = True) return train_loader
def preprocess_frame(bb, frameNum, frame): bb_dims = ['xmin', 'ymin', 'xmax', 'ymax'] # crop frame x_min, y_min, x_max, y_max = [ bb.loc[bb['frame'] == frameNum][dim].tolist()[0] for dim in bb_dims ] frame = frame[int(y_min):int(y_max), int(x_min):int(x_max), :] # resize frame frame = VideoDataset.resize_frame(frame) return frame
def __next__(self): if self.pos >= len(self.lists): raise StopIteration pos = self.pos self.pos += 1 if len(self.labels) == 0: label = None else: label = self.labels[pos] dataset = VideoDataset(self.lists[pos], label, self.seq_num, self.img_size) dataLoader = torch.utils.data.DataLoader( dataset, batch_size=self.batch_size, shuffle=False, num_workers=0, pin_memory=True, ) return dataLoader
def main_run(numEpochs, lr, stepSize, decayRate, trainBatchSize, seqLen, memSize, evalInterval, evalMode, numWorkers, outDir, fightsDir_train, noFightsDir_train, fightsDir_test, noFightsDir_test): train_dataset_dir_fights = fightsDir_train train_dataset_dir_noFights = noFightsDir_train test_dataset_dir_fights = fightsDir_test test_dataset_dir_noFights = noFightsDir_test trainDataset, trainLabels, trainNumFrames = make_split( train_dataset_dir_fights, train_dataset_dir_noFights) testDataset, testLabels, testNumFrames = make_split( test_dataset_dir_fights, test_dataset_dir_noFights) mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] normalize = Normalize(mean=mean, std=std) spatial_transform = Compose([ Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224), ToTensor(), normalize ]) vidSeqTrain = VideoDataset(trainDataset, trainLabels, trainNumFrames, spatial_transform=spatial_transform, seqLen=seqLen) trainLoader = torch.utils.data.DataLoader(vidSeqTrain, batch_size=trainBatchSize, shuffle=True, num_workers=numWorkers, pin_memory=True, drop_last=True) if evalMode == 'centerCrop': test_spatial_transform = Compose( [Scale(256), CenterCrop(224), ToTensor(), normalize]) testBatchSize = 1 elif evalMode == 'tenCrops': test_spatial_transform = Compose( [Scale(256), TenCrops(size=224, mean=mean, std=std)]) testBatchSize = 1 elif evalMode == 'fiveCrops': test_spatial_transform = Compose( [Scale(256), FiveCrops(size=224, mean=mean, std=std)]) testBatchSize = 1 elif evalMode == 'horFlip': test_spatial_transform = Compose([ Scale(256), CenterCrop(224), FlippedImagesTest(mean=mean, std=std) ]) testBatchSize = 1 vidSeqTest = VideoDataset(testDataset, testLabels, testNumFrames, seqLen=seqLen, spatial_transform=test_spatial_transform) testLoader = torch.utils.data.DataLoader(vidSeqTest, batch_size=testBatchSize, shuffle=False, num_workers=int(numWorkers / 2), pin_memory=True) numTrainInstances = vidSeqTrain.__len__() numTestInstances = vidSeqTest.__len__() print('Number of training samples = {}'.format(numTrainInstances)) print('Number of testing samples = {}'.format(numTestInstances)) modelFolder = './experiments_' + outDir # Dir for saving models and log files # Create the dir if os.path.exists(modelFolder): print(modelFolder + ' exists!!!') sys.exit() else: os.makedirs(modelFolder) # Log files writer = SummaryWriter(modelFolder) trainLogLoss = open((modelFolder + '/trainLogLoss.txt'), 'w') trainLogAcc = open((modelFolder + '/trainLogAcc.txt'), 'w') testLogLoss = open((modelFolder + '/testLogLoss.txt'), 'w') testLogAcc = open((modelFolder + '/testLogAcc.txt'), 'w') model = ViolenceModel(mem_size=memSize) trainParams = [] for params in model.parameters(): params.requires_grad = True trainParams += [params] model.train(True) model.cuda() lossFn = nn.CrossEntropyLoss() optimizerFn = torch.optim.RMSprop(trainParams, lr=lr) optimScheduler = torch.optim.lr_scheduler.StepLR(optimizerFn, stepSize, decayRate) minAccuracy = 50 for epoch in range(numEpochs): optimScheduler.step() epochLoss = 0 numCorrTrain = 0 iterPerEpoch = 0 model.train(True) print('Epoch = {}'.format(epoch + 1)) writer.add_scalar('lr', optimizerFn.param_groups[0]['lr'], epoch + 1) for i, (inputs, targets) in enumerate(trainLoader): iterPerEpoch += 1 optimizerFn.zero_grad() inputVariable1 = Variable(inputs.permute(1, 0, 2, 3, 4).cuda()) labelVariable = Variable(targets.cuda()) outputLabel = model(inputVariable1) loss = lossFn(outputLabel, labelVariable) loss.backward() optimizerFn.step() outputProb = torch.nn.Softmax(dim=1)(outputLabel) _, predicted = torch.max(outputProb.data, 1) numCorrTrain += (predicted == targets.cuda()).sum() epochLoss += loss.data[0] avgLoss = epochLoss / iterPerEpoch trainAccuracy = (numCorrTrain / numTrainInstances) * 100 print('Training: Loss = {} | Accuracy = {}% '.format( avgLoss, trainAccuracy)) writer.add_scalar('train/epochLoss', avgLoss, epoch + 1) writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1) trainLogLoss.write('Training loss after {} epoch = {}\n'.format( epoch + 1, avgLoss)) trainLogAcc.write('Training accuracy after {} epoch = {}\n'.format( epoch + 1, trainAccuracy)) if (epoch + 1) % evalInterval == 0: model.train(False) print('Evaluating...') testLossEpoch = 0 testIter = 0 numCorrTest = 0 for j, (inputs, targets) in enumerate(testLoader): testIter += 1 if evalMode == 'centerCrop': inputVariable1 = Variable(inputs.permute(1, 0, 2, 3, 4).cuda(), volatile=True) else: inputVariable1 = Variable(inputs[0].cuda(), volatile=True) labelVariable = Variable(targets.cuda(async=True), volatile=True) outputLabel = model(inputVariable1) outputLabel_mean = torch.mean(outputLabel, 0, True) testLoss = lossFn(outputLabel_mean, labelVariable) testLossEpoch += testLoss.data[0] _, predicted = torch.max(outputLabel_mean.data, 1) numCorrTest += (predicted == targets[0]).sum() testAccuracy = (numCorrTest / numTestInstances) * 100 avgTestLoss = testLossEpoch / testIter print('Testing: Loss = {} | Accuracy = {}% '.format( avgTestLoss, testAccuracy)) writer.add_scalar('test/epochloss', avgTestLoss, epoch + 1) writer.add_scalar('test/accuracy', testAccuracy, epoch + 1) testLogLoss.write('Test Loss after {} epochs = {}\n'.format( epoch + 1, avgTestLoss)) testLogAcc.write('Test Accuracy after {} epochs = {}%\n'.format( epoch + 1, testAccuracy)) if testAccuracy > minAccuracy: savePathClassifier = (modelFolder + '/bestModel.pth') torch.save(model, savePathClassifier) minAccuracy = testAccuracy trainLogAcc.close() testLogAcc.close() trainLogLoss.close() testLogLoss.close() writer.export_scalars_to_json(modelFolder + "/all_scalars.json") writer.close() return True
def getDataset(path, label, seq_num, img_size, mode): return VideoDataset(path, label, seq_num, img_size, mode)
import torch #import torchvision from torch import optim, nn import VideoDataset import Model batch_size = 32 dataset_train = VideoDataset.VideoDataset() dataset_val = VideoDataset.VideoDataset(dataset='val') dataset_test = VideoDataset.VideoDataset(dataset='test') loaders = { 'train': torch.utils.data.DataLoader(dataset_train, batch_size=batch_size, shuffle=True, num_workers=1), 'val': torch.utils.data.DataLoader(dataset_val, batch_size=batch_size, shuffle=False, num_workers=1), 'test': torch.utils.data.DataLoader(dataset_test, batch_size=batch_size, shuffle=False, num_workers=1) } device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(torch.cuda.is_available()) print(device) #exit(2)
def load_data(self, mode='auto', data_path='../data/', numFramesToLoad=1000, need_split=True): labels_fname = 'jackson-town-square-2017-12-14.csv' # load model # encoder = init_encoder(mode) if mode == 'auto': model_fname = 'models/autoencoder_0.0001.pth' encoder = AutoEncoder() encoder = torch.load(model_fname) for param in encoder.parameters(): param.requires_grad = False elif mode == 'res18': encoder = models.resnet18(pretrained=True) encoder = nn.Sequential(*list(encoder.children())[:-1]) encoder = encoder.to(device) # turn off intermediate state saving for param in encoder.parameters(): param.requires_grad = False elif mode == 'res50': encoder = models.resnet50(pretrained=True) encoder = nn.Sequential(*list(encoder.children())[:-1]) encoder = encoder.to(device) # turn off intermediate state saving for param in encoder.parameters(): param.requires_grad = False else: raise Exception("Illegal parameter for mode") # get unique frames with vehicles vehicleFrames = getVehicleFrames(data_path + labels_fname)[0] shuffle(vehicleFrames) # bb = preprocess_bb() bb = pd.read_csv(data_path + labels_fname, header=0) bb_dims = ['xmin', 'ymin', 'xmax', 'ymax'] video_fname = '../../jackson-clips' video = swag.VideoCapture(video_fname) carCount = 0 truckCount = 0 margin = 5 numFramesLoaded = 0 frameNums = np.empty((0)) for frameIter in range(len(vehicleFrames)): frameNum = vehicleFrames[ frameIter] # frame number as it appears in BB dataset # force class balancing vehicleType = bb.loc[bb['frame'] == frameNum]['object_name'].to_string() vehicleType = vehicleType.split(' ')[-1] if vehicleType == 'car': if carCount - truckCount > margin: continue # more cars than trucks, so skip carCount += 1 if vehicleType == 'truck': if truckCount - carCount > margin: continue # more trucks than cars, so skip truckCount += 1 # read in frame of interest video.set(1, frameNum) ret, frame = video.read() if ret == False: break # EOF reached # crop and resize frame # frame = preprocess_frame(bb, frameNum, frame) # print(frame.shape) # crop frame x_min, y_min, x_max, y_max = [ bb.loc[bb['frame'] == frameNum][dim].tolist()[0] for dim in bb_dims ] # print(x_min, y_min, x_max, y_max) frame = frame[int(y_min):int(y_max), int(x_min):int(x_max), :] # print(frame.shape) # resize frame frame = VideoDataset.resize_frame(frame) # print(frame.shape) # use autoencoder to generate 1D code from 3D image # frameTensor = frame2tensor(frame) transform = [transforms.ToTensor()] for tform in transform: # convert to tensor frameTensor = tform(frame) frameTensor = frameTensor.unsqueeze_(0) frameTensor = frameTensor.to(device=device, dtype=dtype) # encode # code = encode(encoder, frameTensor, mode) if mode == 'auto': # This is an autoencoder code = encoder.encode(frameTensor) elif mode == 'res18': # This is a resnet_18 code = encoder(frameTensor).squeeze(3).squeeze(2) elif mode == 'res50': # This is a resnet_50 code_50 = encoder(frameTensor).squeeze(3).squeeze(2) code_50 = code_50.view(1, 512, 4) code = code_50.max(dim=2, keepdim=False)[0] else: raise Exception( "Illegal parameter for mode but how did we even get here?") # Codes should all be 512 now self.codes.append(code) # get labels associated with each frame self.labels.append( (vehicleType == 'car') - (vehicleType == 'truck')) # -1 is truck, 1 is car numFramesLoaded += 1 frameNums = np.append(frameNums, frameNum) # if numFramesLoaded % 20 == 0: print(numFramesLoaded, "frames successfully loaded out of", numFramesToLoad) if numFramesLoaded >= numFramesToLoad: break # report vehicle statistics for class balancing print("\nCar count:", carCount) print("Truck count:", truckCount) print(carCount - truckCount, "more cars than trucks.") # convert to encoded images and labels to tensors codeMatrix = torch.stack(self.codes, 0) labelTensor = torch.Tensor(self.labels) if need_split == True: # split dataset into train, val, test train_primitive_matrix, val_primitive_matrix, test_primitive_matrix, \ train_ground, val_ground, test_ground, frameNums_train = split_data(codeMatrix, labelTensor, frameNums) return train_primitive_matrix, val_primitive_matrix, test_primitive_matrix, \ np.array(train_ground), np.array(val_ground), np.array(test_ground), mode, frameNums_train else: return _, codeMatrix, _, \ _, np.array(labelTensor), _, mode, frameNums