Esempio n. 1
0
def get_loaders(opt):
    """ Make dataloaders for train and validation sets
	"""
    # train loader
    norm_method = Normalize(mean=[0.485, 0.456, 0.406],
                            std=[0.229, 0.224, 0.225])
    spatial_transform = Compose([
        Scale((opt.sample_size, opt.sample_size)),
        Resize(256),
        CenterCrop(224),
        ToTensor(), norm_method
    ])
    temporal_transform = TemporalRandomCrop(25)
    target_transform = ClassLabel()
    training_data = get_training_set(opt, spatial_transform,
                                     temporal_transform, target_transform)
    train_loader = torch.utils.data.DataLoader(training_data,
                                               batch_size=opt.batch_size,
                                               shuffle=True,
                                               num_workers=opt.num_workers,
                                               pin_memory=True)

    # validation loader
    target_transform = ClassLabel()
    temporal_transform = LoopPadding(25)
    validation_data = get_validation_set(opt, spatial_transform,
                                         temporal_transform, target_transform)
    val_loader = torch.utils.data.DataLoader(validation_data,
                                             batch_size=opt.batch_size,
                                             shuffle=False,
                                             num_workers=opt.num_workers,
                                             pin_memory=True)
    return train_loader, val_loader
Esempio n. 2
0
def get_dataloader(opt):

    mean = [110.63666788 / 255, 103.16065604 / 255, 96.29023126 / 255]
    std = [1, 1, 1]

    norm_method = Normalize(mean, std)

    spatial_transform = Compose(
        [Scale(112),
         CornerCrop(112, 'c'),
         ToTensor(255), norm_method])

    temporal_transform = LoopPadding(16)
    target_transform = ClassLabel()

    test_data = SurgicalDataset(os.path.abspath(opt.frames_path),
                                os.path.abspath(
                                    opt.video_phase_annotation_path),
                                opt.class_names,
                                spatial_transform=spatial_transform,
                                temporal_transform=temporal_transform,
                                target_transform=target_transform,
                                sample_duration=16)

    test_loader = torch.utils.data.DataLoader(test_data,
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=4,
                                              pin_memory=True)

    return test_loader
Esempio n. 3
0
def get_traininfo(opt, norm_method):
    assert opt.train_crop in ['random', 'corner', 'center']
    if opt.train_crop == 'random':
        crop_method = MultiScaleRandomCrop(opt.scales, opt.sample_size)
    elif opt.train_crop == 'corner':
        crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size)
    elif opt.train_crop == 'center':
        crop_method = MultiScaleCornerCrop(opt.scales,
                                           opt.sample_size,
                                           crop_positions=['c'])
    spatial_transform = Compose([
        RandomRotate(),
        RandomResize(), crop_method,
        ToTensor(opt.norm_value), norm_method
    ])
    temporal_transform = TemporalRandomCrop(opt.sample_duration)
    target_transform = ClassLabel()
    training_data = get_training_set(opt, spatial_transform,
                                     temporal_transform, target_transform)
    train_loader = torch.utils.data.DataLoader(training_data,
                                               batch_size=opt.batch_size,
                                               shuffle=True,
                                               num_workers=opt.n_threads,
                                               pin_memory=True)
    train_logger = Logger(os.path.join(opt.result_path, 'train.log'),
                          ['epoch', 'loss', 'prec1', 'prec5', 'lr'])
    train_batch_logger = Logger(
        os.path.join(opt.result_path, 'train_batch.log'),
        ['epoch', 'batch', 'iter', 'loss', 'prec1', 'prec5', 'lr'])
    return train_loader, train_logger, train_batch_logger
Esempio n. 4
0
    def __init__(self, model_file, sample_duration, model_type, cuda_id=0):

        self.opt = parse_opts()

        self.opt.model = model_type

        self.opt.root_path = './C3D_ResNet/data'

        self.opt.resume_path = os.path.join(self.opt.root_path, model_file)
        self.opt.pretrain_path = os.path.join(self.opt.root_path,
                                              'models/resnet-18-kinetics.pth')

        self.opt.cuda_id = cuda_id
        self.opt.dataset = 'ucf101'
        self.opt.n_classes = 400
        self.opt.n_finetune_classes = 3
        self.opt.ft_begin_index = 4
        self.opt.model_depth = 18
        self.opt.resnet_shortcut = 'A'
        self.opt.sample_duration = sample_duration
        self.opt.batch_size = 1
        self.opt.n_threads = 1
        self.opt.checkpoint = 5

        self.opt.arch = '{}-{}'.format(self.opt.model, self.opt.model_depth)
        self.opt.mean = get_mean(self.opt.norm_value,
                                 dataset=self.opt.mean_dataset)
        self.opt.std = get_std(self.opt.norm_value)
        # print(self.opt)

        print('Loading C3D action-recognition model..')

        self.model, parameters = generate_model(self.opt)
        # print(self.model)

        if self.opt.no_mean_norm and not self.opt.std_norm:
            norm_method = Normalize([0, 0, 0], [1, 1, 1])
        elif not self.opt.std_norm:
            norm_method = Normalize(self.opt.mean, [1, 1, 1])
        else:
            norm_method = Normalize(self.opt.mean, self.opt.std)

        if self.opt.resume_path:
            print('    loading checkpoint {}'.format(self.opt.resume_path))
            checkpoint = torch.load(self.opt.resume_path)
            # assert self.opt.arch == checkpoint['arch']

            self.opt.begin_epoch = checkpoint['epoch']
            self.model.load_state_dict(checkpoint['state_dict'])

        self.spatial_transform = Compose([
            ScaleQC(int(self.opt.sample_size / self.opt.scale_in_test)),
            CornerCrop(self.opt.sample_size, self.opt.crop_position_in_test),
            ToTensor(self.opt.norm_value), norm_method
        ])

        self.target_transform = ClassLabel()

        self.model.eval()
Esempio n. 5
0
def get_loaders(opt):
	""" Make dataloaders for train and validation sets
	"""
	# train loader
	opt.mean = get_mean(opt.norm_value, dataset=opt.mean_dataset)
	if opt.no_mean_norm and not opt.std_norm:
		norm_method = Normalize([0, 0, 0], [1, 1, 1])
	elif not opt.std_norm:
		norm_method = Normalize(opt.mean, [1, 1, 1])
	else:
		norm_method = Normalize(opt.mean, opt.std)
	spatial_transform = Compose([
		# crop_method,
		Scale((opt.sample_size, opt.sample_size)),
		# RandomHorizontalFlip(),
		ToTensor(opt.norm_value), norm_method
	])
	temporal_transform = TemporalRandomCrop(16)
	target_transform = ClassLabel()
	training_data = get_training_set(opt, spatial_transform,
									 temporal_transform, target_transform)
	train_loader = torch.utils.data.DataLoader(
		training_data,
		batch_size=opt.batch_size,
		shuffle=True,
		num_workers=opt.num_workers,
		pin_memory=True)

	# validation loader
	spatial_transform = Compose([
		Scale((opt.sample_size, opt.sample_size)),
		# CenterCrop(opt.sample_size),
		ToTensor(opt.norm_value), norm_method
	])
	target_transform = ClassLabel()
	temporal_transform = LoopPadding(16)
	validation_data = get_validation_set(
		opt, spatial_transform, temporal_transform, target_transform)
	val_loader = torch.utils.data.DataLoader(
		validation_data,
		batch_size=opt.batch_size,
		shuffle=False,
		num_workers=opt.num_workers,
		pin_memory=True)
	return train_loader, val_loader
Esempio n. 6
0
def get_testinfo(opt, norm_method):
    spatial_transform = Compose([
        Scale(opt.sample_size),
        CenterCrop(opt.sample_size),
        ToTensor(opt.norm_value), norm_method
    ])
    # temporal_transform = LoopPadding(opt.sample_duration)
    target_transform = ClassLabel()

    test_data = get_test_set(opt, spatial_transform, target_transform)
    test_loader = torch.utils.data.DataLoader(
        test_data,
        batch_size=1,  # batchsize must be 1
        shuffle=False,
        num_workers=opt.n_threads,
        pin_memory=True)
    return test_loader
Esempio n. 7
0
def get_valinfo(opt, norm_method):
    spatial_transform = Compose([
        Scale(opt.sample_size),
        CenterCrop(opt.sample_size),
        ToTensor(opt.norm_value), norm_method
    ])
    temporal_transform = TemporalCenterCrop(opt.sample_duration)

    target_transform = ClassLabel()
    validation_data = get_validation_set(opt, spatial_transform,
                                         temporal_transform, target_transform)
    val_loader = torch.utils.data.DataLoader(validation_data,
                                             batch_size=opt.batch_size,
                                             shuffle=False,
                                             num_workers=opt.n_threads,
                                             pin_memory=True)
    val_logger = Logger(os.path.join(opt.result_path, 'val.log'),
                        ['epoch', 'loss', 'prec1', 'prec5'])
    return validation_data, val_loader, val_logger
def create_dataloader(args):
    if args.root_path != '':
        args.video_path = os.path.join(args.root_path, args.video_path)
        args.annotation_path = os.path.join(args.root_path,
                                            args.annotation_path)
        args.result_path = os.path.join(args.root_path, args.result_path)
        if args.resume_path:
            args.resume_path = os.path.join(args.root_path, args.resume_path)
        if args.pretrain_path:
            # args.pretrain_path = os.path.join(args.root_path, args.pretrain_path)
            args.pretrain_path = os.path.abspath(args.pretrain_path)
    args.scales = [args.initial_scale]
    for i in range(1, args.n_scales):
        args.scales.append(args.scales[-1] * args.scale_step)

    args.mean = get_mean(args.norm_value, dataset=args.mean_dataset)
    args.std = get_std(args.norm_value)

    if args.no_mean_norm and not args.std_norm:
        norm_method = Normalize([0, 0, 0], [1, 1, 1])
    elif not args.std_norm:
        norm_method = Normalize(args.mean, [1, 1, 1])
    else:
        norm_method = Normalize(args.mean, args.std)

    assert args.train_crop in ['random', 'corner', 'center']
    if args.train_crop == 'random':
        crop_method = MultiScaleRandomCrop(args.scales, args.sample_size)
    elif args.train_crop == 'corner':
        crop_method = MultiScaleCornerCrop(args.scales, args.sample_size)
    elif args.train_crop == 'center':
        crop_method = MultiScaleCornerCrop(args.scales,
                                           args.sample_size,
                                           crop_positions=['c'])

    spatial_transform = Compose([
        crop_method,
        RandomHorizontalFlip(),
        ToTensor(args.norm_value), norm_method
    ])
    temporal_transform = TemporalRandomCrop(args.sample_duration)
    target_transform = ClassLabel()
    training_data = get_training_set(args, spatial_transform,
                                     temporal_transform, target_transform)
    train_loader = torch.utils.data.DataLoader(training_data,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.n_threads,
                                               pin_memory=True)

    spatial_transform = Compose([
        # Scale(args.sample_size),
        Scale(int(args.sample_size / args.scale_in_test)),
        # CenterCrop(args.sample_size),
        CornerCrop(args.sample_size, args.crop_position_in_test),
        ToTensor(args.norm_value),
        norm_method
    ])
    temporal_transform = TemporalCenterCrop(args.sample_duration)
    target_transform = ClassLabel()
    validation_data = get_validation_set(args, spatial_transform,
                                         temporal_transform, target_transform)
    val_loader = torch.utils.data.DataLoader(validation_data,
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=args.n_threads,
                                             pin_memory=True)

    return train_loader, val_loader
Esempio n. 9
0
    parser.add_argument('--manual_seed',
                        default=1,
                        type=int,
                        help='Manually set random seed')
    args = parser.parse_args()
    return args


if __name__ == '__main__':
    args = opt()  #argsの読み出し
    args.arch = "ResNet-{}".format(args.model_depth)  #実行するアーキテクチャを書き込む
    spatial_transform = Compose([
        ToTensor(),  #1iterごとに読み込まれる各フレーム(PIL Image)をTensorへ変換する
    ])
    temporal_transform = TemporalRandomCrop4flow()  #時間方向の前処理,今回はなし
    target_transform = ClassLabel()  #学習する正解データ,2クラス分類なのでラベル
    #accuracies=AverageMeter()#各回におけるaccとその平均

    model = test_generate_model(args)  #モデルの読み込み(pretrainがあれば重みも読み込んでおく)

    test_data = get_training_set(args, spatial_transform, temporal_transform,
                                 target_transform)  #データローダに入力するデータセットの作成
    test_loader = torch.utils.data.DataLoader(test_data, batch_size=20)

    pred = []
    Y = []
    for i, (x, y) in enumerate(test_loader):
        x = torch.tensor(x).cuda()
        with torch.no_grad():
            output = model(x)
        pred += [int(l.argmax()) for l in output]
Esempio n. 10
0
    def test(self, annotation_path='', video_path=''):
        opt = self.opt

        if annotation_path != '':
            opt.annotation_path = annotation_path
            if opt.root_path != '':
                opt.annotation_path = os.path.join(opt.root_path,
                                                   opt.annotation_path)

        # if video_path != '':
        #     opt.video_path = video_path
        #     if opt.root_path != '':
        #         opt.video_path = os.path.join(opt.root_path, opt.video_path)

        if not os.path.exists(opt.result_path):
            os.makedirs(opt.result_path)

        with open(os.path.join(opt.result_path, 'opts.json'), 'w') as opt_file:
            json.dump(vars(opt), opt_file)

        if opt.no_mean_norm and not opt.std_norm:
            norm_method = Normalize([0, 0, 0], [1, 1, 1])
        elif not opt.std_norm:
            norm_method = Normalize(opt.mean, [1, 1, 1])
        else:
            norm_method = Normalize(opt.mean, opt.std)

        # original
        spatial_transform = Compose([
            #Scale(opt.sample_size),
            Scale(112),
            CenterCrop(112),
            ToTensor(opt.norm_value),
            norm_method
        ])

        temporal_transform = TemporalCenterCrop(opt.sample_duration)

        target_transform = ClassLabel()
        test_data = get_test_set(opt, spatial_transform, temporal_transform,
                                 target_transform)

        test_loader = torch.utils.data.DataLoader(test_data,
                                                  batch_size=opt.batch_size,
                                                  shuffle=False,
                                                  num_workers=opt.n_threads,
                                                  pin_memory=True)
        test_logger = Logger(os.path.join(opt.result_path, 'test.log'),
                             ['top1', 'precision', 'recall'])

        if opt.resume_path:
            print('loading checkpoint {}'.format(opt.resume_path))
            checkpoint = torch.load(opt.resume_path)
            assert opt.arch == checkpoint['arch']

            opt.begin_epoch = checkpoint['epoch']
            self.model.load_state_dict(checkpoint['state_dict'])

        recorder = []

        self.model.eval()

        batch_time = AverageMeter()
        top1 = AverageMeter()
        precisions = AverageMeter()
        recalls = AverageMeter()

        y_true = []
        y_pred = []
        end_time = time.time()

        for i, (inputs, targets) in enumerate(test_loader):
            if not opt.no_cuda:
                targets = targets.cuda(non_blocking=True)
            #inputs = Variable(torch.squeeze(inputs), volatile=True)
            with torch.no_grad():
                inputs = Variable(inputs)
                targets = Variable(targets)
                outputs = self.model(inputs)
                if not opt.no_softmax_in_test:
                    outputs = F.softmax(outputs, dim=1)
                recorder.append(outputs.data.cpu().numpy().copy())
            y_true.extend(targets.cpu().numpy().tolist())
            y_pred.extend(outputs.argmax(1).cpu().numpy().tolist())

            _cls = outputs.argmax(1).cpu().numpy().tolist()[0]

            prec1 = self.calculate_accuracy(outputs, targets, topk=(1, ))
            precision = calculate_precision(outputs, targets)
            recall = calculate_recall(outputs, targets)

            top1.update(prec1[0], inputs.size(0))
            precisions.update(precision, inputs.size(0))
            recalls.update(recall, inputs.size(0))

            batch_time.update(time.time() - end_time)
            end_time = time.time()

        test_logger.log({
            'top1': top1.avg,
            'precision': precisions.avg,
            'recall': recalls.avg
        })

        print('-----Evaluation is finished------')
        print('Overall Prec@1 {:.05f}%'.format(top1.avg * 100))

        return y_pred, y_true, test_data
Esempio n. 11
0
def main():
    detector, classifier = load_models(opt)

    if opt.no_mean_norm and not opt.std_norm:
        norm_method = Normalize([0, 0, 0], [1, 1, 1])
    elif not opt.std_norm:
        norm_method = Normalize(opt.mean, [1, 1, 1])
    else:
        norm_method = Normalize(opt.mean, opt.std)

    spatial_transform = Compose([
        Scale(112),
        CenterCrop(112),
        ToTensor(opt.norm_value), norm_method
    ])

    target_transform = ClassLabel()

    ## Get list of videos to test
    if opt.dataset == 'egogesture':
        subject_list = ['Subject{:02d}'.format(i) for i in [2, 9, 11, 14, 18, 19, 28, 31, 41, 47]]
        test_paths = []
        for subject in subject_list:
            for x in glob.glob(os.path.join(opt.video_path, subject, '*/*/rgb*')):
                test_paths.append(x)
    elif opt.dataset == 'nvgesture':
        df = pd.read_csv(os.path.join(opt.video_path, 'nvgesture_test_correct_cvpr2016_v2.lst'), delimiter=' ', header=None)
        test_paths = []
        for x in df[0].values:
            test_paths.append(os.path.join(opt.video_path, x.replace('path:', ''), 'sk_color_all'))

    print('Start Evaluation')
    detector.eval()
    classifier.eval()

    levenshtein_accuracies = AverageMeter()
    videoidx = 0
    for path in test_paths[:]:
        if opt.dataset == 'egogesture':
            opt.whole_path = os.path.join(*path.rsplit(os.sep, 4)[1:])
        elif opt.dataset == 'nvgesture':
            opt.whole_path = os.path.join(*path.rsplit(os.sep, 5)[1:])

        videoidx += 1
        active_index = 0
        passive_count = 0
        active = False
        prev_active = False
        finished_prediction = None
        pre_predict = False

        cum_sum = np.zeros(opt.n_classes_clf, )
        clf_selected_queue = np.zeros(opt.n_classes_clf, )
        det_selected_queue = np.zeros(opt.n_classes_det, )
        myqueue_det = Queue(opt.det_queue_size, n_classes=opt.n_classes_det)
        myqueue_clf = Queue(opt.clf_queue_size, n_classes=opt.n_classes_clf)

        print('[{}/{}]============'.format(videoidx, len(test_paths)))
        print(path)
        opt.sample_duration = max(opt.sample_duration_clf, opt.sample_duration_det)
        temporal_transform = TemporalRandomCrop(opt.sample_duration, opt.downsample)
        test_data = get_online_data(
            opt, spatial_transform, None, target_transform)

        test_loader = torch.utils.data.DataLoader(
            test_data,
            batch_size=opt.batch_size,
            shuffle=False,
            num_workers=opt.n_threads,
            pin_memory=True)

        results = []
        prev_best1 = opt.n_classes_clf
        dataset_len = len(test_loader.dataset)
        for i, (inputs, targets) in enumerate(test_loader):
            if not opt.no_cuda:
                targets = targets.cuda()
            ground_truth_array = np.zeros(opt.n_classes_clf + 1, )
            with torch.no_grad():
                inputs = Variable(inputs)
                targets = Variable(targets)
                if opt.modality_det == 'RGB':
                    inputs_det = inputs[:, :-1, -opt.sample_duration_det:, :, :]
                elif opt.modality_det == 'Depth':
                    inputs_det = inputs[:, -1, -opt.sample_duration_det:, :, :].unsqueeze(1)
                elif opt.modality_det == 'RGB-D':
                    inputs_det = inputs[:, :, -opt.sample_duration_det:, :, :]

                outputs_det = detector(inputs_det)
                outputs_det = F.softmax(outputs_det, dim=1)
                outputs_det = outputs_det.cpu().numpy()[0].reshape(-1, )

                # enqueue the probabilities to the detector queue
                myqueue_det.enqueue(outputs_det.tolist())

                if opt.det_strategy == 'raw':
                    det_selected_queue = outputs_det
                elif opt.det_strategy == 'median':
                    det_selected_queue = myqueue_det.median
                elif opt.det_strategy == 'ma':
                    det_selected_queue = myqueue_det.ma
                elif opt.det_strategy == 'ewma':
                    det_selected_queue = myqueue_det.ewma

                prediction_det = np.argmax(det_selected_queue)
                prob_det = det_selected_queue[prediction_det]

                #### State of the detector is checked here as detector act as a switch for the classifier
                if prediction_det == 1:
                    if opt.modality_clf == 'RGB':
                        inputs_clf = inputs[:, :-1, :, :, :]
                    elif opt.modality_clf == 'Depth':
                        inputs_clf = inputs[:, -1, :, :, :].unsqueeze(1)
                    elif opt.modality_clf == 'RGB-D':
                        inputs_clf = inputs[:, :, :, :, :]
                    inputs_clf = torch.Tensor(inputs_clf.numpy()[:,:,::2,:,:])
                    outputs_clf = classifier(inputs_clf)
                    outputs_clf = F.softmax(outputs_clf, dim=1)
                    outputs_clf = outputs_clf.cpu().numpy()[0].reshape(-1, )

                    # Push the probabilities to queue
                    myqueue_clf.enqueue(outputs_clf.tolist())
                    passive_count = 0

                    if opt.clf_strategy == 'raw':
                        clf_selected_queue = outputs_clf
                    elif opt.clf_strategy == 'median':
                        clf_selected_queue = myqueue_clf.median
                    elif opt.clf_strategy == 'ma':
                        clf_selected_queue = myqueue_clf.ma
                    elif opt.clf_strategy == 'ewma':
                        clf_selected_queue = myqueue_clf.ewma

                else:
                    outputs_clf = np.zeros(opt.n_classes_clf, )
                    # Push the probabilities to queue
                    myqueue_clf.enqueue(outputs_clf.tolist())
                    passive_count += 1

            if passive_count >= opt.det_counter or i == (dataset_len -2):
                active = False
            else:
                active = True

            # one of the following line need to be commented !!!!
            if active:
                active_index += 1
                cum_sum = ((cum_sum * (active_index - 1)) + (
                            weighting_func(active_index) * clf_selected_queue)) / active_index  # Weighted Aproach
                # cum_sum = ((cum_sum * (x-1)) + (1.0 * clf_selected_queue))/x #Not Weighting Aproach

                best2, best1 = tuple(cum_sum.argsort()[-2:][::1])
                if float(cum_sum[best1] - cum_sum[best2]) > opt.clf_threshold_pre:
                    finished_prediction = True
                    pre_predict = True

            else:
                active_index = 0

            if active == False and prev_active == True:
                finished_prediction = True
            elif active == True and prev_active == False:
                finished_prediction = False

            if finished_prediction == True:
                best2, best1 = tuple(cum_sum.argsort()[-2:][::1])
                if cum_sum[best1] > opt.clf_threshold_final:
                    if pre_predict == True:
                        if best1 != prev_best1:
                            if cum_sum[best1] > opt.clf_threshold_final:
                                results.append(((i * opt.stride_len) + opt.sample_duration_clf, best1))
                                print('Early Detected - class : {} with prob : {} at frame {}'.format(best1, cum_sum[best1],
                                                                                                    (
                                                                                                                i * opt.stride_len) + opt.sample_duration_clf))
                    else:
                        if cum_sum[best1] > opt.clf_threshold_final:
                            if best1 == prev_best1:
                                if cum_sum[best1] > 5:
                                    results.append(((i * opt.stride_len) + opt.sample_duration_clf, best1))
                                    print('Late Detected - class : {} with prob : {} at frame {}'.format(best1,
                                                                                                        cum_sum[best1], (
                                                                                                                    i * opt.stride_len) + opt.sample_duration_clf))
                            else:
                                results.append(((i * opt.stride_len) + opt.sample_duration_clf, best1))

                                print('Late Detected - class : {} with prob : {} at frame {}'.format(best1, cum_sum[best1],
                                                                                                    (
                                                                                                                i * opt.stride_len) + opt.sample_duration_clf))

                    finished_prediction = False
                    prev_best1 = best1

                cum_sum = np.zeros(opt.n_classes_clf, )

            if active == False and prev_active == True:
                pre_predict = False

            prev_active = active


        if opt.dataset == 'egogesture':
            target_csv_path = os.path.join(opt.video_path,
                                        'labels-final-revised1',
                                        opt.whole_path.rsplit(os.sep, 2)[0],
                                        'Group' + opt.whole_path[-1] + '.csv').replace('Subject', 'subject')
            true_classes = []
            with open(target_csv_path) as csvfile:
                readCSV = csv.reader(csvfile, delimiter=',')
                for row in readCSV:
                    true_classes.append(int(row[0]) - 1)
        elif opt.dataset == 'nvgesture':
            true_classes = []
            with open('./annotation_nvGesture/vallistall.txt') as csvfile:
                readCSV = csv.reader(csvfile, delimiter=' ')
                for row in readCSV:
                    if row[0] == opt.whole_path:
                        if row[1] != '26':
                            true_classes.append(int(row[1]) - 1)
        if len(results) != 0:
            predicted = np.array(results)[:, 1]
        else:
            predicted = []
        true_classes = np.array(true_classes)
        levenshtein_distance = LevenshteinDistance(true_classes, predicted)
        levenshtein_accuracy = 1 - (levenshtein_distance / len(true_classes))
        if levenshtein_distance < 0:  # Distance cannot be less than 0
            levenshtein_accuracies.update(0, len(true_classes))
        else:
            levenshtein_accuracies.update(levenshtein_accuracy, len(true_classes))

        print('predicted classes: \t', predicted)
        print('True classes :\t\t', true_classes)
        print('Levenshtein Accuracy = {} ({})'.format(levenshtein_accuracies.val, levenshtein_accuracies.avg))

    print('Average Levenshtein Accuracy= {}'.format(levenshtein_accuracies.avg))

    print('-----Evaluation is finished------')
    with open("./results/online-results.log", "a") as myfile:
        myfile.write("{}, {}, {}, {}, {}, {}".format(datetime.datetime.now(),
                                        opt.resume_path_clf, 
                                        opt.model_clf,
                                        opt.width_mult_clf,
                                        opt.modality_clf,
                                        levenshtein_accuracies.avg))
Esempio n. 12
0
        RandomHorizontalFlip(),
        ucf_crop,
        ToTensor(args.norm_value),
        norm_method,
    ])

    ucf_temporal = TemporalRandomCrop(args.sample_duration, args.downsample)
    # ucf_temporal = TemporalCenterCrop(args.sample_duration, args.downsample)
    #    kinetics_temporal = TransformTwice(TemporalRandomCrop(args.sample_duration, args.downsample))
    kinetics_temporal = TemporalRandomCrop(args.sample_duration,
                                           args.downsample)
    #     kinetics_temporal = TemporalCenterCrop(args.sample_duration, args.downsample)

    spatial_transform = [ucf_spatial, kinetics_spatial]
    temporal_transform = [ucf_temporal, kinetics_temporal]
    target_transform = [ClassLabel(), ClassLabel_fromarray(labels)]
    # here the second is irrelevant since we don't use their labels

    combined_dataset = UCF_with_Kinetics(args.l_vids_path,
                                         args.l_annotation_path,
                                         args.ul_vids_path,
                                         args.ul_annotation_path,
                                         'training',
                                         1,
                                         spatial_transform=spatial_transform,
                                         temporal_transform=temporal_transform,
                                         target_transform=target_transform,
                                         sample_duration=args.sample_duration)
    label_length = combined_dataset.labeled_length
    unlabel_length = combined_dataset.unlabeled_length
    assert label_length + unlabel_length == len(
Esempio n. 13
0
def classify_video(opt, video_path):

    classifier = load_models(opt)

    if opt.no_mean_norm and not opt.std_norm:
        norm_method = Normalize([0, 0, 0], [1, 1, 1])
    elif not opt.std_norm:
        norm_method = Normalize(opt.mean, [1, 1, 1])
    else:
        norm_method = Normalize(opt.mean, opt.std)

    spatial_transform = Compose(
        [Scale(112),
         CenterCrop(112),
         ToTensor(opt.norm_value), norm_method])

    target_transform = ClassLabel()

    # vedio open
    idx2label = [
        "Zoom_in_with_fingers", "Click_with_index_finger", "Sweep_diagonal",
        "Sweep_circle", "Sweep_cross", "Make_a_phone_call", "Wave_finger",
        "Knock", "Dual_hands_heart", "Move_fingers_left"
    ]
    opt.sample_duration = opt.sample_duration_clf

    fps = ""
    #

    cap = cv2.VideoCapture(video_path)
    # cap = cv2.VideoCapture(0)
    num_frame = 0
    clip = []
    active_index = 0
    passive_count = 0
    active = False
    prev_active = False
    finished_prediction = None
    pre_predict = False
    classifier.eval()
    cum_sum = np.zeros(opt.n_classes_clf, )
    clf_selected_queue = np.zeros(opt.n_classes_clf, )
    myqueue_clf = Queue(opt.clf_queue_size, n_classes=opt.n_classes_clf)
    results = []
    prev_best1 = opt.n_classes_clf
    spatial_transform.randomize_parameters()
    temporal_transform = TemporalRandomCrop(opt.sample_duration,
                                            opt.downsample)
    pre_len_result = 0
    cur_label = ""
    step = 2
    fps_r = []

    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    t1 = time.time()
    while cap.isOpened():
        ret, frame = cap.read()
        if num_frame == 0:
            active = True
            cur_frame = cv2.resize(frame, (320, 240))
            cur_frame = Image.fromarray(
                cv2.cvtColor(cur_frame, cv2.COLOR_BGR2RGB))
            cur_frame = cur_frame.convert('RGB')
            for i in range(opt.sample_duration):
                clip.append(cur_frame)
            clip = [spatial_transform(img) for img in clip]

        elif num_frame == total_frames:
            break
        elif num_frame == total_frames - 3:
            active = False

        if num_frame % step == 0:
            clip.pop(0)
            _frame = cv2.resize(frame, (320, 240))
            _frame = Image.fromarray(cv2.cvtColor(_frame, cv2.COLOR_BGR2RGB))
            _frame = _frame.convert('RGB')
            _frame = spatial_transform(_frame)
            clip.append(_frame)
            im_dim = clip[0].size()[-2:]

            try:
                test_data = torch.cat(clip,
                                      0).view((opt.sample_duration, -1) +
                                              im_dim).permute(1, 0, 2, 3)
            except Exception as e:
                pdb.set_trace()
                raise e
            inputs = torch.cat([test_data], 0).view(1, 3, opt.sample_duration,
                                                    112, 112)
            # print(inputs.size())

            with torch.no_grad():
                inputs = Variable(inputs)
                if opt.modality_clf == 'RGB':
                    inputs_clf = inputs[:, :, :, :, :]
                inputs_clf = torch.Tensor(inputs_clf.numpy()[:, :, ::2, :, :])
                outputs_clf = classifier(inputs_clf)
                outputs_clf = F.softmax(outputs_clf, dim=1)
                outputs_clf = outputs_clf.cpu().numpy()[0].reshape(-1, )

                myqueue_clf.enqueue(outputs_clf.tolist())

                if opt.clf_strategy == 'raw':
                    clf_selected_queue = outputs_clf
                elif opt.clf_strategy == 'median':
                    clf_selected_queue = myqueue_clf.median
                elif opt.clf_strategy == 'ma':
                    clf_selected_queue = myqueue_clf.ma
                elif opt.clf_strategy == 'ewma':
                    clf_selected_queue = myqueue_clf.ewma

                # print(clf_selected_queue)

            # one of the following line need to be commented !!!!
            if active:
                active_index += 1

                cum_sum = ((cum_sum *
                            (active_index - 1)) + (1.0 * clf_selected_queue)
                           ) / active_index  #Not Weighting Aproach
                best2, best1 = tuple(cum_sum.argsort()[-2:][::1])
                if float(cum_sum[best1] -
                         cum_sum[best2]) > opt.clf_threshold_pre:
                    finished_prediction = True
                    pre_predict = True
            else:
                active_index = 0
            if active == False and prev_active == True:
                finished_prediction = True
            elif active == True and prev_active == False:
                finished_prediction = False

            if finished_prediction == True:
                # print("fnishsed_prediction")
                #print(finished_prediction,pre_predict)
                best2, best1 = tuple(cum_sum.argsort()[-2:][::1])
                if cum_sum[best1] > opt.clf_threshold_final:
                    results.append(
                        ((i * opt.stride_len) + opt.sample_duration_clf,
                         best1))
                    finished_prediction = False
                    prev_best1 = best1

                cum_sum = np.zeros(opt.n_classes_clf, )

            if active == False and prev_active == True:
                pre_predict = False

            prev_active = active

            if len(results) != 0:
                predicted = np.array(results)[:, 1]
                prev_best1 = -1
            else:
                predicted = []

            if len(results) > pre_len_result:
                cur_label = idx2label[predicted[pre_len_result]]
                pre_len_result = len(results)
        num_frame += 1
    elapsedTime = time.time() - t1
    return cur_label, elapsedTime
Esempio n. 14
0
def classify_video(opt, video_path):

    classifier = load_models(opt)

    if opt.no_mean_norm and not opt.std_norm:
        norm_method = Normalize([0, 0, 0], [1, 1, 1])
    elif not opt.std_norm:
        norm_method = Normalize(opt.mean, [1, 1, 1])
    else:
        norm_method = Normalize(opt.mean, opt.std)

    spatial_transform = Compose(
        [Scale(112),
         CenterCrop(112),
         ToTensor(opt.norm_value), norm_method])

    target_transform = ClassLabel()

    # vedio open
    idx2label = [
        "Zoom_in_with_fingers", "Click_with_index_finger", "Sweep_diagonal",
        "Sweep_circle", "Sweep_cross", "Make_a_phone_call", "Wave_finger",
        "Knock", "Dual_hands_heart", "Move_fingers_left"
    ]
    opt.sample_duration = opt.sample_duration_clf

    cap = cv2.VideoCapture(video_path)
    # cap = cv2.VideoCapture(0)
    num_frame = 0
    clip = []
    classifier.eval()
    spatial_transform.randomize_parameters()
    temporal_transform = TemporalRandomCrop(opt.sample_duration,
                                            opt.downsample)
    cur_label = ""
    step = 2

    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    t1 = time.time()
    print('toral:', total_frames)
    while cap.isOpened():
        num_frame += 1
        if num_frame == total_frames - 1:
            break
        ret, frame = cap.read()
        cur_frame = cv2.resize(frame, (320, 240))
        cur_frame = Image.fromarray(cv2.cvtColor(cur_frame, cv2.COLOR_BGR2RGB))
        cur_frame = cur_frame.convert('RGB')
        if num_frame % step == 0:
            clip.append(cur_frame)

    indexes = temporal_transform([i for i in range(len(clip))])
    new_clip = []
    for i in indexes:
        new_clip.append(clip[i])
    new_clip = [spatial_transform(img) for img in new_clip]
    im_dim = new_clip[0].size()[-2:]
    try:
        test_data = torch.cat(new_clip, 0).view((opt.sample_duration, -1) +
                                                im_dim).permute(1, 0, 2, 3)
    except Exception as e:
        pdb.set_trace()
        raise e
    inputs = torch.cat([test_data], 0).view(1, 3, opt.sample_duration, 112,
                                            112)
    # print(inputs.size())

    with torch.no_grad():
        inputs = Variable(inputs)
        if opt.modality_clf == 'RGB':
            inputs_clf = inputs[:, :, :, :, :]
        inputs_clf = torch.Tensor(inputs_clf.numpy()[:, :, ::2, :, :])
        outputs_clf = classifier(inputs_clf)
        outputs_clf = F.softmax(outputs_clf, dim=1)
        outputs_clf = outputs_clf.cpu().numpy()[0].reshape(-1, )

        best2, best1 = tuple(outputs_clf.argsort()[-2:][::1])
        cur_label = idx2label[best1]

    elapsedTime = time.time() - t1
    return cur_label, elapsedTime
Esempio n. 15
0
def objective(trial):
    opt = parse_opts()

    if trial:
        opt.weight_decay = trial.suggest_uniform('weight_decay', 0.01, 0.1)
        opt.learning_rate = trial.suggest_uniform('learning_rate', 1 - 5,
                                                  1 - 4)

    if opt.root_path != '':
        opt.video_path = os.path.join(opt.root_path, opt.video_path)
        opt.annotation_path = os.path.join(opt.root_path, opt.annotation_path)
        opt.result_path = os.path.join(opt.root_path, opt.result_path)
        if opt.resume_path:
            opt.resume_path = os.path.join(opt.root_path, opt.resume_path)
        if opt.pretrain_path:
            opt.pretrain_path = os.path.join(opt.root_path, opt.pretrain_path)
    opt.scales = [opt.initial_scale]
    for i in range(1, opt.n_scales):
        opt.scales.append(opt.scales[-1] * opt.scale_step)
    opt.arch = '{}-{}'.format(opt.model, opt.model_depth)
    opt.mean = get_mean(opt.norm_value, dataset=opt.mean_dataset)
    opt.std = get_std(opt.norm_value)
    print(opt)
    with open(os.path.join(opt.result_path, 'opts.json'), 'w') as opt_file:
        json.dump(vars(opt), opt_file)

    torch.manual_seed(opt.manual_seed)

    model, parameters = generate_model(opt)
    print(model)
    criterion = nn.CrossEntropyLoss()
    if not opt.no_cuda:
        criterion = criterion.cuda()

    if opt.no_mean_norm and not opt.std_norm:
        norm_method = Normalize([0, 0, 0], [1, 1, 1])
    elif not opt.std_norm:
        norm_method = Normalize(opt.mean, [1, 1, 1])
    else:
        norm_method = Normalize(opt.mean, opt.std)

    # norm_method = Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))

    if not opt.no_train:
        assert opt.train_crop in ['random', 'corner', 'center']
        if opt.train_crop == 'random':
            crop_method = MultiScaleRandomCrop(opt.scales, opt.sample_size)
        elif opt.train_crop == 'corner':
            crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size)
        elif opt.train_crop == 'center':
            crop_method = MultiScaleCornerCrop(opt.scales,
                                               opt.sample_size,
                                               crop_positions=['c'])
        spatial_transform = Compose([
            crop_method,
            RandomHorizontalFlip(),
            ToTensor(opt.norm_value), norm_method
        ])
        temporal_transform = TemporalRandomCrop(opt.sample_duration)
        target_transform = ClassLabel()
        training_data = get_training_set(opt, spatial_transform,
                                         temporal_transform, target_transform)
        train_loader = torch.utils.data.DataLoader(
            training_data,
            batch_size=opt.batch_size,
            # sampler option is mutually exclusive with shuffle
            shuffle=False,
            sampler=ImbalancedDatasetSampler(training_data),
            num_workers=opt.n_threads,
            pin_memory=True)
        train_logger = Logger(os.path.join(opt.result_path, 'train.log'),
                              ['epoch', 'loss', 'acc', 'lr'])
        train_batch_logger = Logger(
            os.path.join(opt.result_path, 'train_batch.log'),
            ['epoch', 'batch', 'iter', 'loss', 'acc', 'lr'])

        optimizer = optim.Adam(parameters,
                               lr=opt.learning_rate,
                               weight_decay=opt.weight_decay)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                               verbose=True,
                                                               factor=0.1**0.5)
    if not opt.no_val:
        spatial_transform = Compose([
            Scale(opt.sample_size),
            CenterCrop(opt.sample_size),
            ToTensor(opt.norm_value), norm_method
        ])
        temporal_transform = LoopPadding(opt.sample_duration)
        target_transform = ClassLabel()
        validation_data = get_validation_set(opt, spatial_transform,
                                             temporal_transform,
                                             target_transform)
        val_loader = torch.utils.data.DataLoader(
            validation_data,
            batch_size=opt.batch_size,
            shuffle=False,
            sampler=ImbalancedDatasetSampler(validation_data),
            num_workers=opt.n_threads,
            pin_memory=True)
        val_logger = Logger(os.path.join(opt.result_path, 'val.log'),
                            ['epoch', 'loss', 'acc'])

    if opt.resume_path:
        print('loading checkpoint {}'.format(opt.resume_path))
        checkpoint = torch.load(opt.resume_path)
        assert opt.arch == checkpoint['arch']

        opt.begin_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        if not opt.no_train:
            optimizer.load_state_dict(checkpoint['optimizer'])

    print('run')
    writer = SummaryWriter(
        comment=
        f"_wd{opt.weight_decay}_lr{opt.learning_rate}_ft_begin{opt.ft_begin_index}_pretrain{not opt.pretrain_path == ''}"
    )
    for i in range(opt.begin_epoch, opt.n_epochs + 1):
        if not opt.no_train:
            epoch, losses_avg, accuracies_avg = train_epoch(
                i, train_loader, model, criterion, optimizer, opt,
                train_logger, train_batch_logger)
            writer.add_scalar('loss/train', losses_avg, epoch)
            writer.add_scalar('acc/train', accuracies_avg, epoch)

        if not opt.no_val:
            epoch, val_losses_avg, val_accuracies_avg = val_epoch(
                i, val_loader, model, criterion, opt, val_logger)
            writer.add_scalar('loss/val', val_losses_avg, epoch)
            writer.add_scalar('acc/val', val_accuracies_avg, epoch)

        if not opt.no_train and not opt.no_val:
            scheduler.step(val_losses_avg)
        print('=' * 100)

    if opt.test:
        spatial_transform = Compose([
            Scale(int(opt.sample_size / opt.scale_in_test)),
            CornerCrop(opt.sample_size, opt.crop_position_in_test),
            ToTensor(opt.norm_value), norm_method
        ])
        temporal_transform = LoopPadding(opt.sample_duration)
        target_transform = VideoID()

        test_data = get_test_set(opt, spatial_transform, temporal_transform,
                                 target_transform)
        test_loader = torch.utils.data.DataLoader(test_data,
                                                  batch_size=opt.batch_size,
                                                  shuffle=False,
                                                  num_workers=opt.n_threads,
                                                  pin_memory=True)
        test.test(test_loader, model, opt, test_data.class_names)

    writer.close()
    return val_losses_avg
def main():

    resnet_in = generate_model(opt)
    resnet_in.module.fc = Identity()
    model = ReNet34(resnet_in, encode_length=encode_length)

    if opt.no_mean_norm and not opt.std_norm:
        norm_method = Normalize([0, 0, 0], [1, 1, 1])
    elif not opt.std_norm:
        norm_method = Normalize(opt.mean, [1, 1, 1])
    else:
        norm_method = Normalize(opt.mean, opt.std)

    if not opt.no_train:
        assert opt.train_crop in ['random', 'corner', 'center']
        if opt.train_crop == 'random':
            crop_method = MultiScaleRandomCrop(opt.scales, opt.sample_size)
        elif opt.train_crop == 'corner':
            crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size)
        elif opt.train_crop == 'center':
            crop_method = MultiScaleCornerCrop(opt.scales,
                                               opt.sample_size,
                                               crop_positions=['c'])

        ## train loader
        spatial_transform = Compose([
            crop_method,
            RandomHorizontalFlip(),
            ToTensor(opt.norm_value), norm_method
        ])
        temporal_transform = TemporalRandomCrop(opt.sample_duration)
        target_transform = ClassLabel()
        training_data = get_training_set(opt, spatial_transform,
                                         temporal_transform, target_transform)
        train_loader = torch.utils.data.DataLoader(training_data,
                                                   batch_size=opt.batch_size,
                                                   shuffle=True,
                                                   num_workers=opt.n_threads,
                                                   pin_memory=True)

        ## test loader
        spatial_transform = Compose([
            Scale(int(opt.sample_size / opt.scale_in_test)),
            CornerCrop(opt.sample_size, opt.crop_position_in_test),
            ToTensor(opt.norm_value), norm_method
        ])
        temporal_transform = LoopPadding(opt.sample_duration)

        target_transform = ClassLabel()
        test_data = get_test_set(opt, spatial_transform, temporal_transform,
                                 target_transform)
        test_loader = torch.utils.data.DataLoader(test_data,
                                                  batch_size=opt.batch_size,
                                                  shuffle=False,
                                                  num_workers=opt.n_threads,
                                                  pin_memory=True)

        ## Database loader
        spatial_transform = Compose([
            Scale(int(opt.sample_size / opt.scale_in_test)),
            CornerCrop(opt.sample_size, opt.crop_position_in_test),
            ToTensor(opt.norm_value), norm_method
        ])
        temporal_transform = LoopPadding(opt.sample_duration)
        target_transform = ClassLabel()
        validation_data = get_validation_set(opt, spatial_transform,
                                             temporal_transform,
                                             target_transform)
        database_loader = torch.utils.data.DataLoader(
            validation_data,
            batch_size=opt.batch_size,
            shuffle=False,
            num_workers=opt.n_threads,
            pin_memory=True)

        if opt.nesterov:
            dampening = 0
        else:
            dampening = opt.dampening

        optimizer = optim.SGD(model.parameters(),
                              lr=opt.learning_rate,
                              momentum=opt.momentum,
                              dampening=dampening,
                              weight_decay=opt.weight_decay,
                              nesterov=opt.nesterov)
        scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,
                                                   'min',
                                                   patience=opt.lr_patience)

    if opt.resume_path:
        print('loading checkpoint {}'.format(opt.resume_path))
        checkpoint = torch.load(opt.resume_path)
        assert opt.arch == checkpoint['arch']

        opt.begin_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        if not opt.no_train:
            optimizer.load_state_dict(checkpoint['optimizer'])
            for state in optimizer.state.values():
                for k, v in state.items():
                    if torch.is_tensor(v):
                        state[k] = v.cuda()

    print('run')
    for epoch in range(opt.begin_epoch, opt.n_epochs + 1):
        model.cuda().train()
        for i, (images, labels) in enumerate(train_loader):

            images = Variable(images.cuda())
            labels = Variable(labels.cuda().long())

            # Forward + Backward + Optimize
            optimizer.zero_grad()
            x, _, b = model(images)

            target_b = F.cosine_similarity(b[:int(labels.size(0) / 2)],
                                           b[int(labels.size(0) / 2):])
            target_x = F.cosine_similarity(x[:int(labels.size(0) / 2)],
                                           x[int(labels.size(0) / 2):])
            loss = F.mse_loss(target_b, target_x)
            loss.backward()
            optimizer.step()
            scheduler.step()

        # Test the Model
        if (epoch + 1) % 10 == 0:
            model.eval()
            retrievalB, retrievalL, queryB, queryL = compress(
                database_loader, test_loader, model)
            result_map = calculate_top_map(qB=queryB,
                                           rB=retrievalB,
                                           queryL=queryL,
                                           retrievalL=retrievalL,
                                           topk=100)
            print('--------mAP@100: {}--------'.format(result_map))
Esempio n. 17
0
 elif opt.train_crop == 'corner':
     crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size)
 elif opt.train_crop == 'center':
     crop_method = MultiScaleCornerCrop(opt.scales,
                                        opt.sample_size,
                                        crop_positions=['c'])
 elif opt.train_crop == 'driver focus':
     crop_method = DriverFocusCrop(opt.scales, opt.sample_size)
 train_spatial_transform = Compose([
     crop_method,
     MultiScaleRandomCrop(opt.scales, opt.sample_size),
     ToTensor(opt.norm_value), norm_method
 ])
 train_temporal_transform = UniformRandomSample(opt.sample_duration,
                                                opt.end_second)
 train_target_transform = ClassLabel()
 train_horizontal_flip = RandomHorizontalFlip()
 training_data = get_training_set(opt, train_spatial_transform,
                                  train_horizontal_flip,
                                  train_temporal_transform,
                                  train_target_transform)
 train_loader = torch.utils.data.DataLoader(training_data,
                                            batch_size=opt.batch_size,
                                            shuffle=True,
                                            num_workers=opt.n_threads,
                                            pin_memory=True)
 train_logger = Logger(os.path.join(opt.result_path, 'train.log'),
                       ['epoch', 'loss', 'acc', 'lr'])
 train_batch_logger = Logger(
     os.path.join(opt.result_path, 'train_batch.log'),
     ['epoch', 'batch', 'iter', 'loss', 'acc', 'lr'])
Esempio n. 18
0
def main(clf_threshold_pre):
    print(f'Early-detection threshold: {clf_threshold_pre}')

    opt = parse_opts_online()

    detector, classifier = load_models(opt)

    if opt.no_mean_norm and not opt.std_norm:
        norm_method = Normalize([0, 0, 0], [1, 1, 1])
    elif not opt.std_norm:
        norm_method = Normalize(opt.mean, [1, 1, 1])
    else:
        norm_method = Normalize(opt.mean, opt.std)

    if opt.model_clf == 'ssar':
        opt.sample_size_clf = (126, 224)
        opt.mean_clf = (0.485, 0.456, 0.406)
        opt.std_clf = (0.229, 0.224, 0.225)

        spatial_transform_clf = transforms.Compose([
            transforms.Resize(opt.sample_size_clf),
            transforms.ToTensor(),
            transforms.Normalize(opt.mean_clf, opt.std_clf)
        ])

    spatial_transform = Compose(
        [Scale(112),
         CenterCrop(112),
         ToTensor(opt.norm_value), norm_method])

    target_transform = ClassLabel()

    ## Get list of videos to test
    if opt.dataset == 'egogesture':
        subject_list = [
            'Subject{:02d}'.format(i)
            for i in [2, 9, 11, 14, 18, 19, 28, 31, 41, 47]
        ]
        test_paths = []
        for subject in subject_list:
            for x in glob.glob(
                    os.path.join(opt.video_path, subject, '*/*/rgb*/')):
                test_paths.append(x)
    elif opt.dataset == 'nv':
        df = pd.read_csv(os.path.join(
            opt.video_path, 'nvgesture_test_correct_cvpr2016_v2.lst'),
                         delimiter=' ',
                         header=None)
        test_paths = []
        for x in df[0].values:
            test_paths.append(
                os.path.join(opt.video_path, x.replace('path:', ''),
                             'sk_color_all').replace(os.sep, '/'))

    # Figures setup
    # fig, ax = plt.subplots(nrows=6, ncols=1)

    # x_data, y_datas = [], []
    # lines = []
    # for j in range(6):
    #     if j != 0:
    #         ax[j].set_xlim(0, 400)
    #         ax[j].set_ylim(0, 1)
    #     y_datas.append([])
    #     lines.append([])
    #     for _ in range(opt.n_classes_clf):
    #         y_data = []
    #         y_datas[j].append(y_data)
    #         line, = ax[j].plot(x_data, y_data)
    #         lines[j].append(line)

    print('Start Evaluation')
    detector.eval()
    classifier.eval()

    levenshtein_accuracies = AverageMeter()
    frames_early_meter = AverageMeter()
    videoidx = 0
    for path in test_paths[4:]:
        path = os.path.normpath(path)
        if opt.dataset == 'egogesture':
            opt.whole_path = path.rsplit(os.sep, 4)[-4:]
            opt.whole_path = os.sep.join(opt.whole_path)
        elif opt.dataset == 'nv':
            opt.whole_path = path.split(
                os.sep,
                3)  # TODO: fix bad dependency on fixed depth file locations
            opt.whole_path = opt.whole_path[-1]

        videoidx += 1
        active_index = 0
        passive_count = 999
        active = False
        prev_active = False
        finished_prediction = None
        pre_predict = False

        cum_sum = np.zeros(opt.n_classes_clf, )
        cum_sum_unweighted = np.zeros(opt.n_classes_clf, )
        clf_selected_queue = np.zeros(opt.n_classes_clf, )
        det_selected_queue = np.zeros(opt.n_classes_det, )
        myqueue_det = Queue(opt.det_queue_size, n_classes=opt.n_classes_det)
        myqueue_clf = Queue(opt.clf_queue_size, n_classes=opt.n_classes_clf)

        print('[{}/{}]============'.format(videoidx, len(test_paths)))
        print(path)
        opt.sample_duration = max(opt.sample_duration_clf,
                                  opt.sample_duration_det)

        if opt.model_clf == 'ssar':
            test_data = get_online_data(
                opt, [spatial_transform, spatial_transform_clf],
                None,
                target_transform,
                modality='RGB')
        else:
            test_data = get_online_data(opt, spatial_transform, None,
                                        target_transform)

        test_loader = torch.utils.data.DataLoader(test_data,
                                                  batch_size=opt.batch_size,
                                                  shuffle=False,
                                                  num_workers=opt.n_threads,
                                                  pin_memory=True)

        results = []
        prev_best1 = opt.n_classes_clf

        if opt.model_clf == 'ssar':
            # Init recurrent state zero
            lstm_hidden = [None, None, None, None]

        for i, (inputs, targets) in enumerate(test_loader):
            if opt.model_clf == 'ssar':
                inputs, inputs_clf = inputs
            if not opt.no_cuda:
                targets = targets.cuda(non_blocking=True)
            ground_truth_array = np.zeros(opt.n_classes_clf + 1, )
            with torch.no_grad():
                inputs = Variable(inputs)
                targets = Variable(targets)
                if opt.modality_det == 'RGB':
                    inputs_det = inputs[:, :3, -opt.sample_duration_det:, :, :]
                elif opt.modality_det == 'Depth':
                    inputs_det = inputs[:, -1, -opt.
                                        sample_duration_det:, :, :].unsqueeze(
                                            1)
                elif opt.modality_det == 'RGB-D':
                    inputs_det = inputs[:, :, -opt.sample_duration_det:, :, :]

                # print(inputs_det[0, :, -1, 0:4, 0:4])
                outputs_det = detector(inputs_det)
                outputs_det = F.softmax(outputs_det, dim=1)
                outputs_det = outputs_det.cpu().numpy()[0].reshape(-1, )

                # enqueue the probabilities to the detector queue
                myqueue_det.enqueue(outputs_det.tolist())

                if opt.det_strategy == 'raw':
                    det_selected_queue = outputs_det
                elif opt.det_strategy == 'median':
                    det_selected_queue = myqueue_det.median
                elif opt.det_strategy == 'ma':
                    det_selected_queue = myqueue_det.ma
                elif opt.det_strategy == 'ewma':
                    det_selected_queue = myqueue_det.ewma

                prediction_det = np.argmax(det_selected_queue)
                prob_det = det_selected_queue[1]

                #### State of the detector is checked here as detector act as a switch for the classifier
                if prediction_det == 1:
                    if opt.model_clf == 'ssar':
                        inputs_clf = Variable(inputs_clf)
                        if not opt.no_cuda:
                            inputs_clf = inputs_clf.cuda()
                        if opt.modality_clf == 'RGB':
                            inputs_clf = inputs_clf[:, :3, -1, :, :]
                        elif opt.modality_clf == 'Depth':
                            inputs_clf = inputs_clf[:, -1,
                                                    -1, :, :].unsqueeze(1)
                        elif opt.modality_clf == 'RGB-D':
                            inputs_clf = inputs_clf[:, :, -1, :, :]

                        outputs_clf, lstm_hidden = classifier(
                            inputs_clf, lstm_hidden, get_lstm_state=True)
                    else:
                        if opt.modality_clf == 'RGB':
                            inputs_clf = inputs[:, :3, :, :, :]
                        elif opt.modality_clf == 'Depth':
                            inputs_clf = inputs[:, -1, :, :, :].unsqueeze(1)
                        elif opt.modality_clf == 'RGB-D':
                            inputs_clf = inputs[:, :, :, :, :]

                        outputs_clf = classifier(inputs_clf)

                    outputs_clf = F.softmax(outputs_clf, dim=1)
                    outputs_clf = outputs_clf.cpu().numpy()[0].reshape(-1, )

                    # Push the probabilities to queue
                    myqueue_clf.enqueue(outputs_clf.tolist())
                    passive_count = 0

                    if opt.clf_strategy == 'raw':
                        clf_selected_queue = outputs_clf
                    elif opt.clf_strategy == 'median':
                        clf_selected_queue = myqueue_clf.median
                    elif opt.clf_strategy == 'ma':
                        clf_selected_queue = myqueue_clf.ma
                    elif opt.clf_strategy == 'ewma':
                        clf_selected_queue = myqueue_clf.ewma

                else:
                    if opt.model_clf == 'ssar':
                        # Reset recurrent state
                        lstm_hidden = [None, None, None, None]

                    outputs_clf = np.zeros(opt.n_classes_clf, )
                    # Push the probabilities to queue
                    myqueue_clf.enqueue(outputs_clf.tolist())
                    passive_count += 1

            if passive_count >= opt.det_counter:
                active = False
            else:
                active = True

            # one of the following line need to be commented !!!!
            if active:
                active_index += 1
                cum_sum = ((cum_sum * (active_index - 1)) +
                           (weighting_func(active_index) * clf_selected_queue)
                           ) / active_index  # Weighted Aproach
                cum_sum_unweighted = ((cum_sum_unweighted *
                                       (active_index - 1)) +
                                      (1.0 * clf_selected_queue)
                                      ) / active_index  #Not Weighting Aproach

                best2, best1 = tuple(cum_sum.argsort()[-2:][::1])
                if float(cum_sum[best1] - cum_sum[best2]) > clf_threshold_pre:
                    finished_prediction = True
                    pre_predict = True

            else:
                active_index = 0

            # Visualize
            # x_data.append(i)
            # y_datas[1][0].append(prob_det)
            # lines[1][0].set_xdata(x_data)
            # lines[1][0].set_ydata(y_datas[1][0])
            # for j in range(opt.n_classes_clf):
            #     y_datas[2][j].append(cum_sum[j])
            #     y_datas[3][j].append(cum_sum_unweighted[j])
            #     y_datas[4][j].append(clf_selected_queue[j] if active else 0)
            #     for k in range(2, 5):
            #         lines[k][j].set_xdata(x_data)
            #         lines[k][j].set_ydata(y_datas[k][j])
            # for k in range(1, 6):
            #     ax[k].set_xlim(i - 400, i)
            # mean = np.array(opt.mean, dtype=np.float32).reshape(1, 1, -1)
            # img = inputs_det[0, :, -1].permute(1, 2, 0).cpu().numpy() + mean
            # img = img.astype(int)
            # if i == 0:
            #     im_plt = ax[0].imshow(img)
            # else:
            #     im_plt.set_data(img)
            if i % 10 == 0:
                plt.draw()
                plt.pause(0.001)

            if active == False and prev_active == True:
                finished_prediction = True
            elif active == True and prev_active == False:
                finished_prediction = False

            if finished_prediction == True:
                detection_frame = (i *
                                   opt.stride_len) + opt.sample_duration_clf
                best2, best1 = tuple(cum_sum.argsort()[-2:][::1])
                if cum_sum[best1] > opt.clf_threshold_final:
                    if pre_predict == True:
                        if best1 != prev_best1:
                            if cum_sum[best1] > opt.clf_threshold_final:
                                results.append((detection_frame, best1))
                                print(
                                    'Early Detected - class : {} with prob : {} at frame {}'
                                    .format(best1, cum_sum[best1],
                                            detection_frame))
                    else:
                        # raw_best = clf_selected_queue.argsort()[-1]
                        # results.append((detection_frame,raw_best))
                        # print( 'Late Detected - class : {} with prob : {} at frame {}'.format(raw_best, clf_selected_queue[raw_best], detection_frame))
                        if cum_sum[best1] > opt.clf_threshold_final:
                            if best1 == prev_best1:
                                if cum_sum[best1] > 5:
                                    results.append((detection_frame, best1))
                                    print(
                                        'Late Detected - class : {} with prob : {} at frame {}'
                                        .format(best1, cum_sum[best1],
                                                detection_frame))
                            else:
                                results.append((detection_frame, best1))

                                print(
                                    'Late Detected - class : {} with prob : {} at frame {}'
                                    .format(best1, cum_sum[best1],
                                            detection_frame))

                    prev_best1 = best1
                    finished_prediction = False

                # prev_best1 = best1
                # finished_prediction = False

                cum_sum = np.zeros(opt.n_classes_clf, )
                cum_sum_unweighted = np.zeros(opt.n_classes_clf, )

            if active == False and prev_active == True:
                pre_predict = False

            prev_active = active

        if opt.dataset == 'egogesture':
            opt.video_path = os.path.normpath(opt.video_path)
            opt.whole_path = os.path.normpath(opt.whole_path)
            target_csv_path = os.path.join(
                opt.video_path, 'labels-final-revised1',
                opt.whole_path.rsplit(os.sep, 2)[0],
                'Group' + opt.whole_path.rsplit('.', 1)[0][-1] +
                '.csv').replace('Subject', 'subject')
            true_classes = []
            end_frames = []
            with open(target_csv_path) as csvfile:
                readCSV = csv.reader(csvfile, delimiter=',')
                for row in readCSV:
                    true_classes.append(int(row[0]) - 1)
                    end_frames.append(int(row[2]))
        elif opt.dataset == 'nv':
            true_classes = []
            with open('./annotation_nvGesture/vallistall.txt') as csvfile:
                readCSV = csv.reader(csvfile, delimiter=' ')
                for row in readCSV:
                    if row[0] == opt.whole_path:
                        if row[1] != '26':
                            true_classes.append(int(row[1]) - 1)

        predicted = np.array(results)[:, 1]
        detection_frames = np.array(results)[:, 0]

        true_classes = np.array(true_classes)
        levenshtein_distance, avg_frames_early = LevenshteinDistancePlusAvgFramesEarly(
            true_classes, predicted, end_frames, detection_frames)
        levenshtein_accuracy = 1 - (levenshtein_distance / len(true_classes))
        if levenshtein_distance < 0:  # Distance cannot be less than 0
            levenshtein_accuracies.update(0, len(true_classes))
        else:
            levenshtein_accuracies.update(levenshtein_accuracy,
                                          len(true_classes))
        frames_early_meter.update(avg_frames_early)

        print('predicted classes: \t', predicted)
        print('True classes :\t\t', true_classes)
        print('Levenshtein Accuracy = {} ({})'.format(
            levenshtein_accuracies.val, levenshtein_accuracies.avg))
        print(
            f'Average frames early = {frames_early_meter.val} ({frames_early_meter.avg})'
        )

    print('Average Levenshtein Accuracy= {}'.format(
        levenshtein_accuracies.avg))

    print('-----Evaluation is finished------')

    early_x_data.append(clf_threshold_pre)
    early_y_data.append(frames_early_meter.avg)
    early_plot.set_xdata(early_x_data)
    early_plot.set_ydata(early_y_data)
    plt.annotate(f'{levenshtein_accuracies.avg * 100:.2f}',
                 (clf_threshold_pre, frames_early_meter.avg),
                 textcoords='offset pixels',
                 xytext=(10, 10))
    plt.gca().relim()
    plt.gca().autoscale_view()
    plt.pause(0.0001)
    plt.draw()
Esempio n. 19
0
def main():
    opt = parse_opts()

    ecd_name, cls_name = opt.model_name.split('-')
    ecd_model = get_encoder_net(ecd_name)
    cls_model = get_end_net(cls_name)

    cfg.encoder_model = ecd_name
    cfg.classification_model = cls_name

    if opt.debug:
        cfg.debug = opt.debug
    else:
        if opt.tensorboard == 'TEST':
            cfg.tensorboard = opt.model_name
        else:
            cfg.tensorboard = opt.tensorboard
            cfg.flag = opt.flag
    model = cls_model(cfg,
                      encoder=CNNencoder(
                          cfg,
                          ecd_model(pretrained=True, path=opt.encoder_model)))
    cfg.video_path = os.path.join(cfg.root_path, cfg.video_path)
    cfg.annotation_path = os.path.join(cfg.root_path, cfg.annotation_path)

    cfg.list_all_member()

    torch.manual_seed(cfg.manual_seed)
    print('##########################################')
    print('####### model 仅支持单GPU')
    print('##########################################')
    model = model.cuda()
    print(model)
    criterion = nn.CrossEntropyLoss()
    if cfg.cuda:
        criterion = criterion.cuda()

    norm_method = Normalize([0, 0, 0], [1, 1, 1])

    print('##########################################')
    print('####### train')
    print('##########################################')
    assert cfg.train_crop in ['random', 'corner', 'center']
    if cfg.train_crop == 'random':
        crop_method = (cfg.scales, cfg.sample_size)
    elif cfg.train_crop == 'corner':
        crop_method = MultiScaleCornerCrop(cfg.scales, cfg.sample_size)
    elif cfg.train_crop == 'center':
        crop_method = MultiScaleCornerCrop(cfg.scales,
                                           cfg.sample_size,
                                           crop_positions=['c'])
    spatial_transform = Compose([
        crop_method,
        RandomHorizontalFlip(),
        ToTensor(cfg.norm_value), norm_method
    ])
    temporal_transform = TemporalRandomCrop(cfg.sample_duration)
    target_transform = ClassLabel()
    training_data = get_training_set(cfg, spatial_transform,
                                     temporal_transform, target_transform)
    train_loader = torch.utils.data.DataLoader(training_data,
                                               batch_size=cfg.batch_size,
                                               shuffle=True,
                                               num_workers=cfg.n_threads,
                                               drop_last=False,
                                               pin_memory=True)
    optimizer = model.get_optimizer(lr1=cfg.lr, lr2=cfg.lr2)
    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,
                                               'min',
                                               patience=cfg.lr_patience)
    print('##########################################')
    print('####### val')
    print('##########################################')
    spatial_transform = Compose([
        Scale(cfg.sample_size),
        CenterCrop(cfg.sample_size),
        ToTensor(cfg.norm_value), norm_method
    ])
    temporal_transform = LoopPadding(cfg.sample_duration)
    target_transform = ClassLabel()
    validation_data = get_validation_set(cfg, spatial_transform,
                                         temporal_transform, target_transform)
    val_loader = torch.utils.data.DataLoader(validation_data,
                                             batch_size=cfg.batch_size,
                                             shuffle=False,
                                             num_workers=cfg.n_threads,
                                             drop_last=False,
                                             pin_memory=True)
    print('##########################################')
    print('####### run')
    print('##########################################')
    if cfg.debug:
        logger = None
    else:
        path = get_log_dir(cfg.logdir, name=cfg.tensorboard, flag=cfg.flag)
        logger = Logger(logdir=path)
        cfg.save_config(path)

    for i in range(cfg.begin_epoch, cfg.n_epochs + 1):
        train_epoch(i, train_loader, model, criterion, optimizer, cfg, logger)
        validation_loss = val_epoch(i, val_loader, model, criterion, cfg,
                                    logger)

        scheduler.step(validation_loss)
Esempio n. 20
0
train_loader = None
val_loader = None

# 画像郡のパスとそのチャンネル数をそれぞれ辞書に登録
paths = {opt.video_path: '3ch'}
if opt.add_gray_image_paths:
    for one_ch in opt.add_gray_image_paths:
        paths[one_ch] = '1ch'
if opt.add_RGB_image_paths:
    for three_ch in opt.add_RGB_image_paths:
        paths[three_ch] = '3ch'

spatial_transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize([0, 0, 0], [1, 1, 1])])
target_transform = ClassLabel(True)
training_data = data_set[opt.data_set](
    paths,
    opt.annotation_path,
    'training',
    spatial_transform=spatial_transform,
    target_transform=target_transform,
)
train_loader = torch.utils.data.DataLoader(training_data,
                                           batch_size=opt.batch_size,
                                           shuffle=True,
                                           num_workers=opt.n_threads,
                                           pin_memory=True,
                                           worker_init_fn=worker_init_fn)

dampening = 0 if opt.nesterov else opt.dampening
Esempio n. 21
0
def get_ucf_data(opt):

    mean = get_mean(opt.norm_value, dataset='kinetics')
    std = get_std(opt.norm_value)
    norm_method = Normalize(mean, [1, 1, 1])

    spatial_transform = Compose([
        Scale(opt.sample_size),
        CornerCrop(opt.sample_size, 'c'),
        ToTensor(opt.norm_value), norm_method
    ])

    temporal_transform = LoopPadding(opt.sample_duration)
    target_transform = ClassLabel()  # VideoID()

    # get training data
    training_data = UCF101(opt.video_path,
                           opt.annotation_path,
                           'training',
                           0,
                           spatial_transform=spatial_transform,
                           temporal_transform=temporal_transform,
                           target_transform=target_transform,
                           sample_duration=16)

    # wrap training data
    train_loader = torch.utils.data.DataLoader(training_data,
                                               batch_size=opt.batch_size,
                                               shuffle=False,
                                               num_workers=opt.n_threads,
                                               pin_memory=False)  # True

    # get validation data
    val_data = UCF101(opt.video_path,
                      opt.annotation_path,
                      'validation',
                      0,
                      spatial_transform=spatial_transform,
                      temporal_transform=temporal_transform,
                      target_transform=target_transform,
                      sample_duration=16)

    # wrap validation data
    val_loader = torch.utils.data.DataLoader(val_data,
                                             batch_size=opt.batch_size,
                                             shuffle=False,
                                             num_workers=opt.n_threads,
                                             pin_memory=False)

    target_transform = VideoID()
    # get test data
    test_data = UCF101(opt.video_path,
                       opt.annotation_path,
                       'testing',
                       0,
                       spatial_transform=spatial_transform,
                       temporal_transform=temporal_transform,
                       target_transform=target_transform,
                       sample_duration=16)

    # wrap test data
    test_loader = torch.utils.data.DataLoader(test_data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=opt.n_threads,
                                              pin_memory=False)

    return train_loader, val_loader, test_loader, test_data
Esempio n. 22
0
        assert opt.train_crop in ['random', 'corner', 'center']
        if opt.train_crop == 'random':
            crop_method = MultiScaleRandomCrop(opt.scales, opt.sample_size)
        elif opt.train_crop == 'corner':
            crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size)
        elif opt.train_crop == 'center':
            crop_method = MultiScaleCornerCrop(opt.scales,
                                               opt.sample_size,
                                               crop_positions=['c'])
        spatial_transform = Compose([
            crop_method,
            RandomHorizontalFlip(opt.dataset),
            ToTensor(), norm_method
        ])
        temporal_transform = TemporalSampling(opt.sample_duration)
        target_transform = ClassLabel()
        training_data = get_training_set(opt, spatial_transform,
                                         temporal_transform, target_transform)
        train_loader = torch.utils.data.DataLoader(training_data,
                                                   batch_size=opt.batch_size,
                                                   shuffle=True,
                                                   num_workers=opt.n_threads,
                                                   pin_memory=True,
                                                   drop_last=True)
        train_logger = Logger(os.path.join(opt.result_path, 'train.log'),
                              ['epoch', 'loss', 'acc', 'lr'])
        train_batch_logger = Logger(
            os.path.join(opt.result_path, 'train_batch.log'),
            ['epoch', 'batch', 'iter', 'loss', 'acc', 'lr'])

        if opt.nesterov:
Esempio n. 23
0
        ToTensor(opt.norm_value), norm_method,
    ])

    kinetics_transform = transforms.Compose([
        transforms.Resize(128),
        transforms.CenterCrop(112),
        ToTensor(opt.norm_value), norm_method,
    ])

    spatial_transform.append(ucf_transform)
    spatial_transform.append(kinetics_transform)

    temporal_transform.append(TemporalRandomCrop(opt.sample_duration, opt.downsample))
    temporal_transform.append(TemporalCenterCrop(opt.sample_duration, opt.downsample))

    target_transform.append(ClassLabel())
    target_transform.append(ClassLabel_fromarray(labels))

    kinetics_clustered = Kinetics_clustered(opt.ul_vids_path,
                                            opt.ul_annotation_path,
                                            'training',
                                            1,
                                            spatial_transform=spatial_transform[0],
                                            temporal_transform=temporal_transform[0],
                                            target_transform=target_transform[1])
    train_loader = torch.utils.data.DataLoader(dataset=kinetics_clustered,
                                               batch_size=opt.batch_size,
                                               shuffle=True,
                                               num_workers=opt.n_threads,
                                               pin_memory=True)
Esempio n. 24
0
        assert opt.train_crop in ['random', 'corner', 'center']
        if opt.train_crop == 'random':
            crop_method = MultiScaleRandomCrop(opt.scales, opt.sample_size)
        elif opt.train_crop == 'corner':
            crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size)
        elif opt.train_crop == 'center':
            crop_method = MultiScaleCornerCrop(opt.scales,
                                               opt.sample_size,
                                               crop_positions=['c'])
        spatial_transform = Compose([
            crop_method,
            RandomHorizontalFlip(),
            ToTensor(opt.norm_value), norm_method
        ])
        temporal_transform = TemporalRandomCrop(opt.sample_duration)
        target_transform = ClassLabel()
        training_data = get_training_set(opt, spatial_transform,
                                         temporal_transform, target_transform)
        train_loader = torch.utils.data.DataLoader(training_data,
                                                   batch_size=opt.batch_size,
                                                   shuffle=True,
                                                   num_workers=opt.n_threads,
                                                   pin_memory=True)
        train_logger = Logger(os.path.join(opt.result_path, 'train.log'),
                              ['epoch', 'loss', 'acc', 'lr'])
        train_batch_logger = Logger(
            os.path.join(opt.result_path, 'train_batch.log'),
            ['epoch', 'batch', 'iter', 'loss', 'acc', 'lr'])

        if opt.nesterov:
            dampening = 0
Esempio n. 25
0
def main():
    opt = parse_opts()
    # Path configurations
    opt.annotation_path = os.path.join(opt.annotation_directory,
                                       opt.annotation_path)
    save_result_dir_name = \
        os.path.join(opt.result_path,
                     get_prefix() + '_{}{}_{}_epochs'.format(opt.model, opt.model_depth, opt.n_epochs))
    if not os.path.exists(save_result_dir_name):
        os.mkdir(save_result_dir_name)
    opt.result_path = os.path.join(opt.result_path, save_result_dir_name)

    # For data generator
    opt.scales = [opt.initial_scale]
    for epoch in range(1, opt.n_scales):
        opt.scales.append(opt.scales[-1] * opt.scale_step)
    opt.arch = '{}-{}'.format(opt.model, opt.model_depth)

    # Model
    model, parameters = generate_model(opt)
    # print(model)

    # Loss function
    criterion = nn.CrossEntropyLoss()
    if not opt.no_cuda:
        criterion = criterion.cuda()

    # Normalizing
    if not opt.no_mean_norm:
        opt.mean = get_mean(opt.norm_value, dataset=opt.mean_dataset)
        opt.std = get_std(opt.norm_value, dataset=opt.std_dataset)
        norm_method = Normalize(opt.mean, opt.std)
    else:
        norm_method = Normalize([0, 0, 0], [1, 1, 1])

    print(opt)
    with open(os.path.join(opt.result_path, 'opts.json'), 'w') as opt_file:
        json.dump(vars(opt), opt_file)

    # **************************** TRAINING CONFIGURATIONS ************************************
    assert opt.train_crop in ['corner', 'center']
    if opt.train_crop == 'corner':
        crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size)
    elif opt.train_crop == 'center':
        crop_method = MultiScaleCornerCrop(opt.scales,
                                           opt.sample_size,
                                           crop_positions=['c'])

    # Пространственное преобразование
    spatial_transform = Compose([
        crop_method,
        #RandomHorizontalFlip(),
        ToTensor(opt.norm_value),
        norm_method
    ])
    # Временное преобразование
    temporal_transform = TemporalRandomCrop(opt.sample_duration)
    # Целевое преобразование
    target_transform = ClassLabel()

    train_loader_list = []
    if not opt.no_cross_validation:
        annotation_list = os.listdir(opt.annotation_directory)
        for annotation in annotation_list:
            opt.annotation_path = os.path.join(opt.annotation_directory,
                                               annotation)
            training_data = get_training_set(opt, spatial_transform,
                                             temporal_transform,
                                             target_transform)
            train_loader = torch.utils.data.DataLoader(
                training_data,
                batch_size=opt.batch_size,
                shuffle=True,
                num_workers=opt.n_threads,
                pin_memory=True)
            train_loader_list.append(train_loader)
    else:
        training_data = get_training_set(opt, spatial_transform,
                                         temporal_transform, target_transform)
        train_loader = torch.utils.data.DataLoader(training_data,
                                                   batch_size=opt.batch_size,
                                                   shuffle=True,
                                                   num_workers=opt.n_threads,
                                                   pin_memory=True)
        train_loader_list.append(train_loader)

    train_logger = Logger(os.path.join(opt.result_path, 'train.log'),
                          ['epoch', 'loss', 'acc', 'lr'])
    train_batch_logger = Logger(
        os.path.join(opt.result_path, 'train_batch.log'),
        ['epoch', 'batch', 'iter', 'loss', 'acc', 'lr'])

    optimizer = optim.SGD(parameters,
                          lr=opt.learning_rate,
                          momentum=opt.momentum,
                          dampening=opt.dampening,
                          weight_decay=opt.weight_decay)

    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,
                                               'min',
                                               patience=opt.lr_patience)

    # ***************************** VALIDATION CONFIGURATIONS *********************************
    spatial_transform = Compose([
        Scale(opt.sample_size),
        CenterCrop(opt.sample_size),
        ToTensor(opt.norm_value), norm_method
    ])
    temporal_transform = LoopPadding(opt.sample_duration)
    target_transform = ClassLabel()

    val_loader_list = []
    if not opt.no_cross_validation:
        annotation_list = os.listdir(opt.annotation_directory)
        for annotation in annotation_list:
            opt.annotation_path = os.path.join(opt.annotation_directory,
                                               annotation)
            validation_data = get_validation_set(opt, spatial_transform,
                                                 temporal_transform,
                                                 target_transform)
            val_loader = torch.utils.data.DataLoader(validation_data,
                                                     batch_size=opt.batch_size,
                                                     shuffle=False,
                                                     num_workers=opt.n_threads,
                                                     pin_memory=True)
            val_loader_list.append(val_loader)
    else:
        validation_data = get_validation_set(opt, spatial_transform,
                                             temporal_transform,
                                             target_transform)
        val_loader = torch.utils.data.DataLoader(validation_data,
                                                 batch_size=opt.batch_size,
                                                 shuffle=False,
                                                 num_workers=opt.n_threads,
                                                 pin_memory=True)
        val_loader_list.append(val_loader)

    val_logger = Logger(os.path.join(opt.result_path, 'val.log'),
                        ['epoch', 'loss', 'acc'])

    # **************************************** TRAINING ****************************************
    epoch_avg_time = AverageMeter()
    train_loss_list = []
    train_acc_list = []
    valid_acc_list = []
    best_accuracy = 0
    current_train_data = 0
    current_valid_data = 0
    opt.frequence_cross_validation = round(opt.n_epochs /
                                           opt.n_cross_validation_sets + 0.5)

    for epoch in range(opt.begin_epoch, opt.n_epochs + 1):
        epoch_start_time = time.time()
        print('Epoch #' + str(epoch))

        # optimizer = regulate_learning_rate(optimizer, epoch, opt.frequence_regulate_lr)

        train_loader = train_loader_list[current_train_data]
        if not opt.no_cross_validation and epoch % opt.frequence_cross_validation == 0:
            print('\t##### Cross-validation: switch training data #####')
            current_train_data = (current_train_data +
                                  1) % len(train_loader_list)
            train_loader = train_loader_list[current_train_data]
        train_loss, train_acc = train_epoch(epoch, train_loader, model,
                                            criterion, optimizer, opt,
                                            train_logger, train_batch_logger)

        val_loader = val_loader_list[current_valid_data]
        if not opt.no_cross_validation and epoch % opt.frequence_cross_validation == 0:
            print('\t##### Cross-validation: switch validation data #####')
            current_valid_data = (current_valid_data +
                                  1) % len(val_loader_list)
            val_loader = val_loader_list[current_valid_data]
        validation_acc = val_epoch(epoch, val_loader, model, criterion, opt,
                                   val_logger)

        train_loss_list.append(train_loss)
        train_acc_list.append(train_acc)
        valid_acc_list.append(validation_acc)

        # Save model with best accuracy
        if validation_acc > best_accuracy:
            best_accuracy = validation_acc
            save_file_path = os.path.join(opt.result_path, 'best_model.pth')
            states = {
                'epoch': epoch + 1,
                'arch': opt.arch,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict()
            }
            torch.save(states, save_file_path)

        epoch_end_time = time.time() - epoch_start_time
        epoch_avg_time.update(epoch_end_time)
        print('\tTime left: ' +
              str(round(epoch_avg_time.avg *
                        (opt.n_epochs - epoch) / 60, 1)) + ' minutes')

    # ******************************* SAVING RESULTS OF TRAINING ******************************
    save_pictures(np.linspace(1, opt.n_epochs, opt.n_epochs),
                  train_loss_list, 'red', 'Loss',
                  os.path.join(opt.result_path, 'train_loss.png'))
    save_pictures(np.linspace(1, opt.n_epochs, opt.n_epochs), train_acc_list,
                  'blue', 'Accuracy',
                  os.path.join(opt.result_path, 'train_accuracy.png'))
    save_pictures(np.linspace(1, opt.n_epochs, opt.n_epochs), valid_acc_list,
                  'blue', 'Accuracy',
                  os.path.join(opt.result_path, 'validation_accuracy.png'))