def __init__(self, initial_frame, first_box, opts, args, positive=True):
        self.opts = opts
        self.positive = positive

        if positive:
            self.max_num_past_frames = opts['nFrames_long']
        else:
            self.max_num_past_frames = opts['nFrames_short']

        self.transform = ADNet_Augmentation(opts)

        # list of train_db_
        # train_db_ = {
        #             'past_frame': [],
        #             'bboxes': [],
        #             'labels': [],
        #             'score_labels': []
        #         }
        self.train_db = []

        self.add_frame_then_generate_samples(initial_frame, first_box)
def adnet_train_sl(args, opts):

    if torch.cuda.is_available():
        if args.cuda:
            torch.set_default_tensor_type('torch.cuda.FloatTensor')
        if not args.cuda:
            print(
                "WARNING: It looks like you have a CUDA device, but aren't " + "using CUDA.\nRun with --cuda for optimal training speed.")
            torch.set_default_tensor_type('torch.FloatTensor')
    else:
        torch.set_default_tensor_type('torch.FloatTensor')

    if not os.path.exists(args.save_folder):
        os.mkdir(args.save_folder)

    if args.visualize:
        writer = SummaryWriter(log_dir=os.path.join('tensorboardx_log', args.save_file))

    train_videos = get_train_videos(opts)
    opts['num_videos'] = len(train_videos['video_names'])

    net, domain_specific_nets = adnet(opts=opts, trained_file=args.resume, multidomain=args.multidomain)

    if args.cuda:
        net = nn.DataParallel(net)
        cudnn.benchmark = True

        net = net.cuda()

    if args.cuda:
        optimizer = optim.SGD([
            {'params': net.module.base_network.parameters(), 'lr': 1e-4},
            {'params': net.module.fc4_5.parameters()},
            {'params': net.module.fc6.parameters()},
            {'params': net.module.fc7.parameters()}],  # as action dynamic is zero, it doesn't matter
            lr=1e-3, momentum=opts['train']['momentum'], weight_decay=opts['train']['weightDecay'])
    else:
        optimizer = optim.SGD([
            {'params': net.base_network.parameters(), 'lr': 1e-4},
            {'params': net.fc4_5.parameters()},
            {'params': net.fc6.parameters()},
            {'params': net.fc7.parameters()}],
            lr=1e-3, momentum=opts['train']['momentum'], weight_decay=opts['train']['weightDecay'])

    if args.resume:
        # net.load_weights(args.resume)
        checkpoint = torch.load(args.resume)

        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

    net.train()


    if not args.resume:
        print('Initializing weights...')

        if args.cuda:
            scal = torch.Tensor([0.01])
            # fc 4
            nn.init.normal_(net.module.fc4_5[0].weight.data)
            net.module.fc4_5[0].weight.data = net.module.fc4_5[0].weight.data * scal.expand_as(net.module.fc4_5[0].weight.data)
            net.module.fc4_5[0].bias.data.fill_(0.1)
            # fc 5
            nn.init.normal_(net.module.fc4_5[3].weight.data)
            net.module.fc4_5[3].weight.data = net.module.fc4_5[3].weight.data * scal.expand_as(net.module.fc4_5[3].weight.data)
            net.module.fc4_5[3].bias.data.fill_(0.1)

            # fc 6
            nn.init.normal_(net.module.fc6.weight.data)
            net.module.fc6.weight.data = net.module.fc6.weight.data * scal.expand_as(net.module.fc6.weight.data)
            net.module.fc6.bias.data.fill_(0)
            # fc 7
            nn.init.normal_(net.module.fc7.weight.data)
            net.module.fc7.weight.data = net.module.fc7.weight.data * scal.expand_as(net.module.fc7.weight.data)
            net.module.fc7.bias.data.fill_(0)
        else:
            scal = torch.Tensor([0.01])
            # fc 4
            nn.init.normal_(net.fc4_5[0].weight.data)
            net.fc4_5[0].weight.data = net.fc4_5[0].weight.data * scal.expand_as(net.fc4_5[0].weight.data )
            net.fc4_5[0].bias.data.fill_(0.1)
            # fc 5
            nn.init.normal_(net.fc4_5[3].weight.data)
            net.fc4_5[3].weight.data = net.fc4_5[3].weight.data * scal.expand_as(net.fc4_5[3].weight.data)
            net.fc4_5[3].bias.data.fill_(0.1)
            # fc 6
            nn.init.normal_(net.fc6.weight.data)
            net.fc6.weight.data = net.fc6.weight.data * scal.expand_as(net.fc6.weight.data)
            net.fc6.bias.data.fill_(0)
            # fc 7
            nn.init.normal_(net.fc7.weight.data)
            net.fc7.weight.data = net.fc7.weight.data * scal.expand_as(net.fc7.weight.data)
            net.fc7.bias.data.fill_(0)

    action_criterion = nn.CrossEntropyLoss()
    score_criterion = nn.CrossEntropyLoss()


    print('generating Supervised Learning dataset..')
    # dataset = SLDataset(train_videos, opts, transform=

    datasets_pos, datasets_neg = initialize_pos_neg_dataset(train_videos, opts, transform=ADNet_Augmentation(opts))
    number_domain = opts['num_videos']

    batch_iterators_pos = []
    batch_iterators_neg = []

    # calculating number of data
    len_dataset_pos = 0
    len_dataset_neg = 0
    for dataset_pos in datasets_pos:
        len_dataset_pos += len(dataset_pos)
    for dataset_neg in datasets_neg:
        len_dataset_neg += len(dataset_neg)

    epoch_size_pos = len_dataset_pos // opts['minibatch_size']
    epoch_size_neg = len_dataset_neg // opts['minibatch_size']
    epoch_size = epoch_size_pos + epoch_size_neg  # 1 epoch, how many iterations
    print("1 epoch = " + str(epoch_size) + " iterations")

    max_iter = opts['numEpoch'] * epoch_size
    print("maximum iteration = " + str(max_iter))

    data_loaders_pos = []
    data_loaders_neg = []

    for dataset_pos in datasets_pos:
        data_loaders_pos.append(data.DataLoader(dataset_pos, opts['minibatch_size'], num_workers=args.num_workers, shuffle=True, pin_memory=True))
    for dataset_neg in datasets_neg:
        data_loaders_neg.append(data.DataLoader(dataset_neg, opts['minibatch_size'], num_workers=args.num_workers, shuffle=True, pin_memory=True))

    epoch = args.start_epoch
    if epoch != 0 and args.start_iter == 0:
        start_iter = epoch * epoch_size
    else:
        start_iter = args.start_iter

    which_dataset = list(np.full(epoch_size_pos, fill_value=1))
    which_dataset.extend(np.zeros(epoch_size_neg, dtype=int))
    shuffle(which_dataset)

    which_domain = np.random.permutation(number_domain)

    action_loss = 0
    score_loss = 0

    # training loop
    for iteration in range(start_iter, max_iter):
        if args.multidomain:
            curr_domain = which_domain[iteration % len(which_domain)]
        else:
            curr_domain = 0
        # if new epoch (not including the very first iteration)
        if (iteration != start_iter) and (iteration % epoch_size == 0):
            epoch += 1
            shuffle(which_dataset)
            np.random.shuffle(which_domain)

            print('Saving state, epoch:', epoch)
            domain_specific_nets_state_dict = []
            for domain_specific_net in domain_specific_nets:
                domain_specific_nets_state_dict.append(domain_specific_net.state_dict())

            torch.save({
                'epoch': epoch,
                'adnet_state_dict': net.state_dict(),
                'adnet_domain_specific_state_dict': domain_specific_nets,
                'optimizer_state_dict': optimizer.state_dict(),
            }, os.path.join(args.save_folder, args.save_file) +
                       'epoch' + repr(epoch) + '.pth')

            if args.visualize:
                writer.add_scalars('data/epoch_loss', {'action_loss': action_loss / epoch_size,
                                                       'score_loss': score_loss / epoch_size,
                                                       'total': (action_loss + score_loss) / epoch_size}, global_step=epoch)

            # reset epoch loss counters
            action_loss = 0
            score_loss = 0

        # if new epoch (including the first iteration), initialize the batch iterator
        # or just resuming where batch_iterator_pos and neg haven't been initialized
        if iteration % epoch_size == 0 or len(batch_iterators_pos) == 0 or len(batch_iterators_neg) == 0:
            # create batch iterator
            for data_loader_pos in data_loaders_pos:
                batch_iterators_pos.append(iter(data_loader_pos))
            for data_loader_neg in data_loaders_neg:
                batch_iterators_neg.append(iter(data_loader_neg))

        # if not batch_iterators_pos[curr_domain]:
        #     # create batch iterator
        #     batch_iterators_pos[curr_domain] = iter(data_loaders_pos[curr_domain])
        #
        # if not batch_iterators_neg[curr_domain]:
        #     # create batch iterator
        #     batch_iterators_neg[curr_domain] = iter(data_loaders_neg[curr_domain])

        # load train data
        if which_dataset[iteration % len(which_dataset)]:  # if positive
            try:
                images, bbox, action_label, score_label, vid_idx = next(batch_iterators_pos[curr_domain])
            except StopIteration:
                batch_iterators_pos[curr_domain] = iter(data_loaders_pos[curr_domain])
                images, bbox, action_label, score_label, vid_idx = next(batch_iterators_pos[curr_domain])
        else:
            try:
                images, bbox, action_label, score_label, vid_idx = next(batch_iterators_neg[curr_domain])
            except StopIteration:
                batch_iterators_neg[curr_domain] = iter(data_loaders_neg[curr_domain])
                images, bbox, action_label, score_label, vid_idx = next(batch_iterators_neg[curr_domain])

        # TODO: check if this requires grad is really false like in Variable
        if args.cuda:
            images = torch.Tensor(images.cuda())
            bbox = torch.Tensor(bbox.cuda())
            action_label = torch.Tensor(action_label.cuda())
            score_label = torch.Tensor(score_label.float().cuda())

        else:
            images = torch.Tensor(images)
            bbox = torch.Tensor(bbox)
            action_label = torch.Tensor(action_label)
            score_label = torch.Tensor(score_label)

        t0 = time.time()

        # load ADNetDomainSpecific with video index
        if args.cuda:
            net.module.load_domain_specific(domain_specific_nets[curr_domain])
        else:
            net.load_domain_specific(domain_specific_nets[curr_domain])

        # forward
        action_out, score_out = net(images)

        # backprop
        optimizer.zero_grad()
        if which_dataset[iteration % len(which_dataset)]:  # if positive
            action_l = action_criterion(action_out, torch.max(action_label, 1)[1])
        else:
            action_l = torch.Tensor([0])
        score_l = score_criterion(score_out, score_label.long())
        loss = action_l + score_l
        loss.backward()
        optimizer.step()

        action_loss += action_l.item()
        score_loss += score_l.item()

        # save the ADNetDomainSpecific back to their module
        if args.cuda:
            domain_specific_nets[curr_domain].load_weights_from_adnet(net.module)
        else:
            domain_specific_nets[curr_domain].load_weights_from_adnet(net)

        t1 = time.time()

        if iteration % 10 == 0:
            print('Timer: %.4f sec.' % (t1 - t0))
            print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.data.item()), end=' ')
            if args.visualize and args.send_images_to_visualization:
                random_batch_index = np.random.randint(images.size(0))
                writer.add_image('image', images.data[random_batch_index].cpu().numpy(), random_batch_index)

        if args.visualize:
            writer.add_scalars('data/iter_loss', {'action_loss': action_l.item(),
                                                  'score_loss': score_l.item(),
                                                  'total': (action_l.item() + score_l.item())}, global_step=iteration)
            # hacky fencepost solution for 0th epoch plot
            if iteration == 0:
                writer.add_scalars('data/epoch_loss', {'action_loss': action_loss,
                                                       'score_loss': score_loss,
                                                       'total': (action_loss + score_loss)}, global_step=epoch)

        if iteration % 5000 == 0:
            print('Saving state, iter:', iteration)

            domain_specific_nets_state_dict = []
            for domain_specific_net in domain_specific_nets:
                domain_specific_nets_state_dict.append(domain_specific_net.state_dict())

            torch.save({
                'epoch': epoch,
                'adnet_state_dict': net.state_dict(),
                'adnet_domain_specific_state_dict': domain_specific_nets,
                'optimizer_state_dict': optimizer.state_dict(),
            }, os.path.join(args.save_folder, args.save_file) +
                       repr(iteration) + '_epoch' + repr(epoch) +'.pth')

    # final save
    torch.save({
        'epoch': epoch,
        'adnet_state_dict': net.state_dict(),
        'adnet_domain_specific_state_dict': domain_specific_nets,
        'optimizer_state_dict': optimizer.state_dict(),
    }, os.path.join(args.save_folder, args.save_file) + '.pth')

    return net, domain_specific_nets, train_videos
    def reset(self, net, domain_specific_nets, train_videos, opts, args):
        self.action_list = []  # a_t,l  # argmax of self.action_prob_list
        self.action_prob_list = []  # output of network (fc6_out)
        self.log_probs_list = [
        ]  # log probs from each self.action_prob_list member
        self.reward_list = []  # tracking score
        self.patch_list = []  # input of network
        self.action_dynamic_list = [
        ]  # action_dynamic used for inference (means before updating the action_dynamic)
        self.result_box_list = []
        self.vid_idx_list = []

        print('generating reinforcement learning dataset')
        transform = ADNet_Augmentation(opts)

        self.env = TrackingEnvironment(train_videos,
                                       opts,
                                       transform=transform,
                                       args=args)
        clip_idx = 0
        while True:  # for every clip (l)

            num_step_history = []  # T_l

            num_frame = 1  # the first frame won't be tracked..
            t = 0
            box_history_clip = []  # for checking oscillation in a clip

            if args.cuda:
                net.module.reset_action_dynamic()
            else:
                net.reset_action_dynamic(
                )  # action dynamic should be in a clip (what makes sense...)

            while True:  # for every frame in a clip (t)
                tic = time.time()

                if args.display_images:
                    im_with_bb = display_result(self.env.get_current_img(),
                                                self.env.get_state())
                    cv2.imshow('patch',
                               self.env.get_current_patch_unprocessed())
                    cv2.waitKey(1)
                else:
                    im_with_bb = draw_box(self.env.get_current_img(),
                                          self.env.get_state())

                if args.save_result_images:
                    cv2.imwrite(
                        'images/' + str(clip_idx) + '-' + str(t) + '.jpg',
                        im_with_bb)

                curr_patch = self.env.get_current_patch()
                if args.cuda:
                    curr_patch = curr_patch.cuda()

                # self.patch_list.append(curr_patch.cpu().data.numpy())  # TODO: saving patch takes cuda memory

                # TODO: saving action_dynamic takes cuda memory
                # if args.cuda:
                #     self.action_dynamic_list.append(net.module.get_action_dynamic())
                # else:
                #     self.action_dynamic_list.append(net.get_action_dynamic())

                curr_patch = curr_patch.unsqueeze(
                    0)  # 1 batch input [1, curr_patch.shape]

                # load ADNetDomainSpecific with video index
                if args.multidomain:
                    vid_idx = self.env.get_current_train_vid_idx()
                else:
                    vid_idx = 0
                if args.cuda:
                    net.module.load_domain_specific(
                        domain_specific_nets[vid_idx])
                else:
                    net.load_domain_specific(domain_specific_nets[vid_idx])

                fc6_out, fc7_out = net.forward(curr_patch,
                                               update_action_dynamic=True)

                if args.cuda:
                    action = np.argmax(fc6_out.detach().cpu().numpy()
                                       )  # TODO: really okay to detach?
                    action_prob = fc6_out.detach().cpu().numpy()[0][action]
                else:
                    action = np.argmax(fc6_out.detach().numpy()
                                       )  # TODO: really okay to detach?
                    action_prob = fc6_out.detach().numpy()[0][action]

                m = Categorical(probs=fc6_out)
                action_ = m.sample(
                )  # action and action_ are same value. Only differ in the type (int and tensor)

                self.log_probs_list.append(
                    m.log_prob(action_).cpu().data.numpy())
                self.vid_idx_list.append(vid_idx)

                self.action_list.append(action)
                # TODO: saving action_prob_list takes cuda memory
                # self.action_prob_list.append(action_prob)

                new_state, reward, done, info = self.env.step(action)

                # check oscilating
                if any((np.array(new_state).round() == x).all()
                       for x in np.array(box_history_clip).round()):
                    action = opts['stop_action']
                    reward, done, finish_epoch = self.env.go_to_next_frame()
                    info['finish_epoch'] = finish_epoch

                # check if number of action is already too much
                if t > opts['num_action_step_max']:
                    action = opts['stop_action']
                    reward, done, finish_epoch = self.env.go_to_next_frame()
                    info['finish_epoch'] = finish_epoch

                # TODO: saving result_box takes cuda memory
                # self.result_box_list.append(list(new_state))
                box_history_clip.append(list(new_state))

                t += 1

                if action == opts['stop_action']:
                    num_frame += 1
                    num_step_history.append(t)
                    t = 0

                toc = time.time() - tic
                print('forward time (clip ' + str(clip_idx) + " - frame " +
                      str(num_frame) + " - t " + str(t) + ") = " + str(toc) +
                      " s")

                if done:  # if finish the clip
                    break

            tracking_scores_size = np.array(num_step_history).sum()
            tracking_scores = np.full(
                tracking_scores_size,
                reward)  # seems no discount factor whatsoever

            self.reward_list.extend(tracking_scores)
            # self.reward_list.append(tracking_scores)

            clip_idx += 1

            if info['finish_epoch']:
                break

        print('generating reinforcement learning dataset finish')
Beispiel #4
0
def adnet_test(net, vid_path, opts, args):

    if torch.cuda.is_available():
        if args.cuda:
            torch.set_default_tensor_type('torch.cuda.FloatTensor')
        if not args.cuda:
            print(
                "WARNING: It looks like you have a CUDA device, but aren't " +
                "using CUDA.\nRun with --cuda for optimal training speed.")
            torch.set_default_tensor_type('torch.FloatTensor')
    else:
        torch.set_default_tensor_type('torch.FloatTensor')

    transform = ADNet_Augmentation(opts)

    print('Testing sequences in ' + str(vid_path) + '...')
    t_sum = 0

    if args.visualize:
        writer = SummaryWriter(
            log_dir=os.path.join('tensorboardx_log', 'online_adapatation_' +
                                 args.save_result_npy))

    ################################
    # Load video sequences
    ################################

    vid_info = {'gt': [], 'img_files': [], 'nframes': 0}

    vid_info['img_files'] = glob.glob(os.path.join(vid_path, 'color', '*.jpg'))
    vid_info['img_files'].sort(key=str.lower)

    gt_path = os.path.join(vid_path, 'groundtruth.txt')

    if not os.path.exists(gt_path):
        bboxes = []
        t = 0
        return bboxes, t_sum

    # parse gt
    gtFile = open(gt_path, 'r')
    gt = gtFile.read().split('\n')
    for i in range(len(gt)):
        if gt[i] == '' or gt[i] is None:
            continue

        if ',' in gt[i]:
            separator = ','
        elif '\t' in gt[i]:
            separator = '\t'
        elif ' ' in gt[i]:
            separator = ' '
        else:
            separator = ','

        gt[i] = gt[i].split(separator)
        gt[i] = list(map(float, gt[i]))
    gtFile.close()

    if len(gt[0]) >= 6:
        for gtidx in range(len(gt)):
            if gt[gtidx] == "":
                continue
            x = gt[gtidx][0:len(gt[gtidx]):2]
            y = gt[gtidx][1:len(gt[gtidx]):2]
            gt[gtidx] = [min(x), min(y), max(x) - min(x), max(y) - min(y)]

    vid_info['gt'] = gt
    if vid_info['gt'][-1] == '':  # small hack
        vid_info['gt'] = vid_info['gt'][:-1]
    vid_info['nframes'] = min(len(vid_info['img_files']), len(vid_info['gt']))

    # catch the first box
    curr_bbox = vid_info['gt'][0]

    # init containers
    bboxes = np.zeros(np.array(
        vid_info['gt']).shape)  # tracking result containers

    ntraining = 0

    # setup training
    if args.cuda:
        optimizer = optim.SGD([{
            'params': net.module.base_network.parameters(),
            'lr': 0
        }, {
            'params': net.module.fc4_5.parameters()
        }, {
            'params': net.module.fc6.parameters()
        }, {
            'params': net.module.fc7.parameters(),
            'lr': 1e-3
        }],
                              lr=1e-3,
                              momentum=opts['train']['momentum'],
                              weight_decay=opts['train']['weightDecay'])
    else:
        optimizer = optim.SGD([{
            'params': net.base_network.parameters(),
            'lr': 0
        }, {
            'params': net.fc4_5.parameters()
        }, {
            'params': net.fc6.parameters()
        }, {
            'params': net.fc7.parameters(),
            'lr': 1e-3
        }],
                              lr=1e-3,
                              momentum=opts['train']['momentum'],
                              weight_decay=opts['train']['weightDecay'])

    action_criterion = nn.CrossEntropyLoss()
    score_criterion = nn.CrossEntropyLoss()

    dataset_storage_pos = None
    dataset_storage_neg = None
    is_negative = False  # is_negative = True if the tracking failed
    target_score = 0
    all_iteration = 0
    t = 0

    for idx in range(vid_info['nframes']):
        # for frame_idx, frame_path in enumerate(vid_info['img_files']):
        frame_idx = idx
        frame_path = vid_info['img_files'][idx]
        t0_wholetracking = time.time()
        frame = cv2.imread(frame_path)

        # draw box or with display, then save
        if args.display_images:
            im_with_bb = display_result(frame,
                                        curr_bbox)  # draw box and display
        else:
            im_with_bb = draw_box(frame, curr_bbox)

        if args.save_result_images:
            filename = os.path.join(args.save_result_images,
                                    str(frame_idx) + '-' + str(t) + '.jpg')
            cv2.imwrite(filename, im_with_bb)

        curr_bbox_old = curr_bbox
        cont_negatives = 0

        if frame_idx > 0:
            # tracking
            if args.cuda:
                net.module.set_phase('test')
            else:
                net.set_phase('test')
            t = 0
            while True:
                curr_patch, curr_bbox, _, _ = transform(
                    frame, curr_bbox, None, None)
                if args.cuda:
                    curr_patch = curr_patch.cuda()

                curr_patch = curr_patch.unsqueeze(
                    0)  # 1 batch input [1, curr_patch.shape]

                fc6_out, fc7_out = net.forward(curr_patch)

                curr_score = fc7_out.detach().cpu().numpy()[0][1]

                if ntraining > args.believe_score_result:
                    if curr_score < opts['failedThre']:
                        cont_negatives += 1

                if args.cuda:
                    action = np.argmax(fc6_out.detach().cpu().numpy()
                                       )  # TODO: really okay to detach?
                    action_prob = fc6_out.detach().cpu().numpy()[0][action]
                else:
                    action = np.argmax(fc6_out.detach().numpy()
                                       )  # TODO: really okay to detach?
                    action_prob = fc6_out.detach().numpy()[0][action]

                # do action
                curr_bbox = do_action(curr_bbox, opts, action, frame.shape)

                # bound the curr_bbox size
                if curr_bbox[2] < 10:
                    curr_bbox[0] = min(
                        0, curr_bbox[0] + curr_bbox[2] / 2 - 10 / 2)
                    curr_bbox[2] = 10
                if curr_bbox[3] < 10:
                    curr_bbox[1] = min(
                        0, curr_bbox[1] + curr_bbox[3] / 2 - 10 / 2)
                    curr_bbox[3] = 10

                t += 1

                # draw box or with display, then save
                if args.display_images:
                    im_with_bb = display_result(
                        frame, curr_bbox)  # draw box and display
                else:
                    im_with_bb = draw_box(frame, curr_bbox)

                if args.save_result_images:
                    filename = os.path.join(
                        args.save_result_images,
                        str(frame_idx) + '-' + str(t) + '.jpg')
                    cv2.imwrite(filename, im_with_bb)

                if action == opts[
                        'stop_action'] or t >= opts['num_action_step_max']:
                    break

            print('final curr_score: %.4f' % curr_score)

            # redetection when confidence < threshold 0.5. But when fc7 is already reliable. Else, just trust the ADNet
            if ntraining > args.believe_score_result:
                if curr_score < 0.5:
                    print('redetection')
                    is_negative = True

                    # redetection process
                    redet_samples = gen_samples(
                        'gaussian', curr_bbox_old, opts['redet_samples'], opts,
                        min(1.5, 0.6 * 1.15**cont_negatives),
                        opts['redet_scale_factor'])
                    score_samples = []

                    for redet_sample in redet_samples:
                        temp_patch, temp_bbox, _, _ = transform(
                            frame, redet_sample, None, None)
                        if args.cuda:
                            temp_patch = temp_patch.cuda()

                        temp_patch = temp_patch.unsqueeze(
                            0)  # 1 batch input [1, curr_patch.shape]

                        fc6_out_temp, fc7_out_temp = net.forward(temp_patch)

                        score_samples.append(
                            fc7_out_temp.detach().cpu().numpy()[0][1])

                    score_samples = np.array(score_samples)
                    max_score_samples_idx = np.argmax(score_samples)

                    # replace the curr_box with the samples with maximum score
                    curr_bbox = redet_samples[max_score_samples_idx]

                    # update the final result image
                    if args.display_images:
                        im_with_bb = display_result(
                            frame, curr_bbox)  # draw box and display
                    else:
                        im_with_bb = draw_box(frame, curr_bbox)

                    if args.save_result_images:
                        filename = os.path.join(args.save_result_images,
                                                str(frame_idx) + '-redet.jpg')
                        cv2.imwrite(filename, im_with_bb)
                else:
                    is_negative = False
            else:
                is_negative = False

        if args.save_result_images:
            filename = os.path.join(args.save_result_images,
                                    'final-' + str(frame_idx) + '.jpg')
            cv2.imwrite(filename, im_with_bb)

        # record the curr_bbox result
        bboxes[frame_idx] = curr_bbox

        # create or update storage + set iteration_range for training
        if frame_idx == 0:
            dataset_storage_pos = OnlineAdaptationDatasetStorage(
                initial_frame=frame,
                first_box=curr_bbox,
                opts=opts,
                args=args,
                positive=True)
            if opts['nNeg_init'] != 0:  # (thanks to small hack in adnet_test) the nNeg_online is also 0
                dataset_storage_neg = OnlineAdaptationDatasetStorage(
                    initial_frame=frame,
                    first_box=curr_bbox,
                    opts=opts,
                    args=args,
                    positive=False)

            iteration_range = range(opts['finetune_iters'])
        else:
            assert dataset_storage_pos is not None
            if opts['nNeg_init'] != 0:  # (thanks to small hack in adnet_test) the nNeg_online is also 0
                assert dataset_storage_neg is not None

            # if confident or when always generate samples, generate new samples
            if ntraining < args.believe_score_result:
                always_generate_samples = True  # as FC7 wasn't trained, it is better to wait for some time to believe its confidence result to decide whether to generate samples or not.. Before believe it, better to just generate sample always
            else:
                always_generate_samples = False

            if always_generate_samples or (not is_negative or
                                           target_score > opts['successThre']):
                dataset_storage_pos.add_frame_then_generate_samples(
                    frame, curr_bbox)

            iteration_range = range(opts['finetune_iters_online'])

        # training when depend on the frequency.. else, don't run the training code...
        if False and frame_idx % args.online_adaptation_every_I_frames == 0:
            ntraining += 1
            # generate dataset just before training
            dataset_pos = OnlineAdaptationDataset(dataset_storage_pos)
            data_loader_pos = data.DataLoader(dataset_pos,
                                              opts['minibatch_size'],
                                              num_workers=args.num_workers,
                                              shuffle=True,
                                              pin_memory=False)
            batch_iterator_pos = None

            if opts['nNeg_init'] != 0:  # (thanks to small hack in adnet_test) the nNeg_online is also 0
                dataset_neg = OnlineAdaptationDataset(dataset_storage_neg)
                data_loader_neg = data.DataLoader(dataset_neg,
                                                  opts['minibatch_size'],
                                                  num_workers=args.num_workers,
                                                  shuffle=True,
                                                  pin_memory=False)
                batch_iterator_neg = None
            else:
                dataset_neg = []

            epoch_size_pos = len(dataset_pos) // opts['minibatch_size']
            epoch_size_neg = len(dataset_neg) // opts['minibatch_size']
            epoch_size = epoch_size_pos + epoch_size_neg  # 1 epoch, how many iterations

            which_dataset = list(np.full(epoch_size_pos, fill_value=1))
            which_dataset.extend(np.zeros(epoch_size_neg, dtype=int))
            shuffle(which_dataset)

            print("1 epoch = " + str(epoch_size) + " iterations")

            if args.cuda:
                net.module.set_phase('train')
            else:
                net.set_phase('train')

            # training loop
            for iteration in iteration_range:
                all_iteration += 1  # use this for update the visualization
                # create batch iterator
                if (not batch_iterator_pos) or (iteration % epoch_size == 0):
                    batch_iterator_pos = iter(data_loader_pos)

                if opts['nNeg_init'] != 0:
                    if (not batch_iterator_neg) or (iteration % epoch_size
                                                    == 0):
                        batch_iterator_neg = iter(data_loader_neg)

                # load train data
                if which_dataset[iteration %
                                 len(which_dataset)]:  # if positive
                    images, bbox, action_label, score_label = next(
                        batch_iterator_pos)
                else:
                    images, bbox, action_label, score_label = next(
                        batch_iterator_neg)

                if args.cuda:
                    images = torch.Tensor(images.cuda())
                    bbox = torch.Tensor(bbox.cuda())
                    action_label = torch.Tensor(action_label.cuda())
                    score_label = torch.Tensor(score_label.float().cuda())

                else:
                    images = torch.Tensor(images)
                    bbox = torch.Tensor(bbox)
                    action_label = torch.Tensor(action_label)
                    score_label = torch.Tensor(score_label)

                # forward
                t0 = time.time()
                action_out, score_out = net(images)

                # backprop
                optimizer.zero_grad()
                if which_dataset[iteration %
                                 len(which_dataset)]:  # if positive
                    action_l = action_criterion(action_out,
                                                torch.max(action_label, 1)[1])
                else:
                    action_l = torch.Tensor([0])
                score_l = score_criterion(score_out, score_label.long())
                loss = action_l + score_l
                loss.backward()
                optimizer.step()
                t1 = time.time()

                if all_iteration % 10 == 0:
                    print('Timer: %.4f sec.' % (t1 - t0))
                    print('iter ' + repr(all_iteration) + ' || Loss: %.4f ||' %
                          (loss.data.item()),
                          end=' ')
                    if args.visualize and args.send_images_to_visualization:
                        random_batch_index = np.random.randint(images.size(0))
                        writer.add_image(
                            'image',
                            images.data[random_batch_index].cpu().numpy(),
                            random_batch_index)

                if args.visualize:
                    writer.add_scalars(
                        'data/iter_loss', {
                            'action_loss': action_l.item(),
                            'score_loss': score_l.item(),
                            'total': (action_l.item() + score_l.item())
                        },
                        global_step=all_iteration)

        t1_wholetracking = time.time()
        t_sum += t1_wholetracking - t0_wholetracking
        print('whole tracking time = %.4f sec.' %
              (t1_wholetracking - t0_wholetracking))

    # evaluate the precision
    bboxes = np.array(bboxes)
    vid_info['gt'] = np.array(vid_info['gt'])

    # iou_precisions = iou_precision_plot(bboxes, vid_info['gt'], vid_path, show=args.display_images, save_plot=args.save_result_images)
    #
    # distance_precisions = distance_precision_plot(bboxes, vid_info['gt'], vid_path, show=args.display_images, save_plot=args.save_result_images)
    #
    # precisions = [distance_precisions, iou_precisions]

    np.save(args.save_result_npy + '-bboxes.npy', bboxes)
    np.save(args.save_result_npy + '-ground_truth.npy', vid_info['gt'])

    # return bboxes, t_sum, precisions
    return bboxes, t_sum
Beispiel #5
0
def adnet_train_sl_mot(args, opts, mot, num_obj_to_track=2):
    if torch.cuda.is_available():
        if args.cuda:
            torch.set_default_tensor_type('torch.cuda.FloatTensor')
        if not args.cuda:
            print(
                "WARNING: It looks like you have a CUDA device, but aren't " +
                "using CUDA.\nRun with --cuda for optimal training speed.")
            torch.set_default_tensor_type('torch.FloatTensor')
    else:
        torch.set_default_tensor_type('torch.FloatTensor')

    if not os.path.exists(args.save_folder):
        os.mkdir(args.save_folder)

    if args.visualize:
        writer = SummaryWriter(
            log_dir=os.path.join('tensorboardx_log', args.save_file))

    train_videos = get_train_videos(opts)
    opts['num_videos'] = len(train_videos['video_names'])

    net, domain_specific_nets = adnet_mot(opts=opts,
                                          trained_file=args.resume,
                                          multidomain=args.multidomain)

    if args.cuda:
        net = nn.DataParallel(net)
        cudnn.benchmark = True

        net = net.cuda()

    if args.cuda:
        optimizer = optim.Adam(
            [{
                'params': net.module.base_network.parameters(),
                'lr': 1e-4
            }, {
                'params': net.module.fc4_5.parameters()
            }, {
                'params': net.module.fc6.parameters()
            }, {
                'params': net.module.fc7.parameters()
            }],  # as action dynamic is zero, it doesn't matter
            lr=1e-3,
            weight_decay=opts['train']['weightDecay'])
    else:
        optimizer = optim.SGD([{
            'params': net.base_network.parameters(),
            'lr': 1e-4
        }, {
            'params': net.fc4_5.parameters()
        }, {
            'params': net.fc6.parameters()
        }, {
            'params': net.fc7.parameters()
        }],
                              lr=1e-3,
                              momentum=opts['train']['momentum'],
                              weight_decay=opts['train']['weightDecay'])

    if args.resume:
        # net.load_weights(args.resume)
        checkpoint = torch.load(args.resume)
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

    net.train()

    if not args.resume:
        print('Initializing weights...')

        if args.cuda:
            norm_std = 0.01
            # fc 4
            nn.init.normal_(net.module.fc4_5[0].weight.data, std=norm_std)
            net.module.fc4_5[0].bias.data.fill_(0.1)
            # fc 5
            nn.init.normal_(net.module.fc4_5[3].weight.data, std=norm_std)
            net.module.fc4_5[3].bias.data.fill_(0.1)

            # fc 6
            nn.init.normal_(net.module.fc6.weight.data, std=norm_std)
            net.module.fc6.bias.data.fill_(0)
            # fc 7
            nn.init.normal_(net.module.fc7.weight.data, std=norm_std)
            net.module.fc7.bias.data.fill_(0)
        else:
            scal = torch.Tensor([0.01])
            # fc 4
            nn.init.normal_(net.fc4_5[0].weight.data)
            net.fc4_5[0].weight.data = net.fc4_5[
                0].weight.data * scal.expand_as(net.fc4_5[0].weight.data)
            net.fc4_5[0].bias.data.fill_(0.1)
            # fc 5
            nn.init.normal_(net.fc4_5[3].weight.data)
            net.fc4_5[3].weight.data = net.fc4_5[
                3].weight.data * scal.expand_as(net.fc4_5[3].weight.data)
            net.fc4_5[3].bias.data.fill_(0.1)
            # fc 6
            nn.init.normal_(net.fc6.weight.data)
            net.fc6.weight.data = net.fc6.weight.data * scal.expand_as(
                net.fc6.weight.data)
            net.fc6.bias.data.fill_(0)
            # fc 7
            nn.init.normal_(net.fc7.weight.data)
            net.fc7.weight.data = net.fc7.weight.data * scal.expand_as(
                net.fc7.weight.data)
            net.fc7.bias.data.fill_(0)

    action_criterion = nn.BCEWithLogitsLoss()
    score_criterion = nn.BCEWithLogitsLoss()

    print('generating Supervised Learning dataset..')
    # dataset = SLDataset(train_videos, opts, transform=
    datasets_pos, datasets_neg = initialize_pos_neg_dataset_adnet_mot(
        train_videos, opts, transform=ADNet_Augmentation(opts))
    number_domain = opts['num_videos']
    assert number_domain == len(
        datasets_pos
    ), "Num videos given in opts is incorrect! It should be {}".format(
        len(datasets_neg))

    batch_iterators_pos_train = []
    batch_iterators_neg_train = []

    action_loss_tr = 0
    score_loss_tr = 0

    # calculating number of data
    len_dataset_pos = 0
    len_dataset_neg = 0
    for dataset_pos in datasets_pos:
        len_dataset_pos += len(dataset_pos)
    for dataset_neg in datasets_neg:
        len_dataset_neg += len(dataset_neg)

    epoch_size_pos = len_dataset_pos // opts['minibatch_size']
    epoch_size_neg = len_dataset_neg // opts['minibatch_size']
    epoch_size = epoch_size_pos + epoch_size_neg  # 1 epoch, how many iterations
    print("1 epoch = " + str(epoch_size) + " iterations")

    max_iter = opts['numEpoch'] * epoch_size
    print("maximum iteration = " + str(max_iter))

    data_loaders_pos_train = []
    data_loaders_pos_val = []

    data_loaders_neg_train = []
    data_loaders_neg_val = []

    for dataset_pos in datasets_pos:
        # num_val = int(opts['val_percent'] * len(dataset_pos))
        num_val = 1
        num_train = len(dataset_pos) - num_val
        train, valid = torch.utils.data.random_split(dataset_pos,
                                                     [num_train, num_val])
        data_loaders_pos_train.append(
            data.DataLoader(train,
                            opts['minibatch_size'],
                            num_workers=2,
                            shuffle=True,
                            pin_memory=True))
        data_loaders_pos_val.append(
            data.DataLoader(valid,
                            opts['minibatch_size'],
                            num_workers=0,
                            shuffle=True,
                            pin_memory=False))
    for dataset_neg in datasets_neg:
        num_val = int(opts['val_percent'] * len(dataset_neg))
        num_train = len(dataset_neg) - num_val
        train, valid = torch.utils.data.random_split(dataset_neg,
                                                     [num_train, num_val])
        data_loaders_neg_train.append(
            data.DataLoader(train,
                            opts['minibatch_size'],
                            num_workers=1,
                            shuffle=True,
                            pin_memory=True))
        data_loaders_neg_val.append(
            data.DataLoader(valid,
                            opts['minibatch_size'],
                            num_workers=0,
                            shuffle=True,
                            pin_memory=False))

    epoch = args.start_epoch
    if epoch != 0 and args.start_iter == 0:
        start_iter = epoch * epoch_size
    else:
        start_iter = args.start_iter

    which_dataset = list(np.full(epoch_size_pos, fill_value=1))
    which_dataset.extend(np.zeros(epoch_size_neg, dtype=int))
    shuffle(which_dataset)
    which_dataset = torch.Tensor(which_dataset).cuda()

    which_domain = np.random.permutation(number_domain)

    # training loop
    time_arr = np.zeros(10)
    for iteration in tqdm(range(start_iter, max_iter)):
        t0 = time.time()
        if args.multidomain:
            curr_domain = which_domain[iteration % len(which_domain)]
        else:
            curr_domain = 0

        # if new epoch (not including the very first iteration)
        if (iteration != start_iter) and (iteration % epoch_size == 0):
            epoch += 1
            shuffle(which_dataset)
            np.random.shuffle(which_domain)

            print('Saving state, epoch: {}'.format(epoch))
            domain_specific_nets_state_dict = []
            for domain_specific_net in domain_specific_nets:
                domain_specific_nets_state_dict.append(
                    domain_specific_net.state_dict())

            torch.save(
                {
                    'epoch': epoch,
                    'adnet_state_dict': net.state_dict(),
                    'adnet_domain_specific_state_dict': domain_specific_nets,
                    'optimizer_state_dict': optimizer.state_dict(),
                },
                os.path.join(args.save_folder, args.save_file) + 'epoch' +
                repr(epoch) + '.pth')

            # VAL
            # for curr_domain_temp in range(number_domain):
            #     accuracy = []
            #     action_loss_val = []
            #     score_loss_val = []
            #
            #     # load ADNetDomainSpecific with video index
            #     if args.cuda:
            #         net.module.load_domain_specific(domain_specific_nets[curr_domain_temp])
            #     else:
            #         net.load_domain_specific(domain_specific_nets[curr_domain_temp])
            #     for i, temp in enumerate(
            #             [data_loaders_pos_val[curr_domain_temp], data_loaders_neg_val[curr_domain_temp]]):
            #         for images, bbox, action_label, score_label, _ in temp:
            #             images = images.to('cuda', non_blocking=True)
            #             action_label = action_label.to('cuda', non_blocking=True)
            #             score_label = score_label.float().to('cuda', non_blocking=True)
            #
            #             # forward
            #             action_out, score_out = net(images)
            #
            #             if i == 0:  # if positive
            #                 action_l = action_criterion(action_out, torch.max(action_label, 1)[1])
            #                 accuracy.append(
            #                     int(action_label.argmax(axis=1).eq(action_out.argmax(axis=1)).sum()) / len(
            #                         action_label))
            #                 action_loss_val.append(action_l.item())
            #
            #             score_l = score_criterion(score_out, score_label.reshape(-1, 1))
            #             score_loss_val.append(score_l.item())
            #     print("Vid. {}".format(curr_domain))
            #     print("\tAccuracy: {}".format(np.mean(accuracy)))
            #     print("\tScore loss: {}".format(np.mean(score_loss_val)))
            #     print("\tAction loss: {}".format(np.mean(action_loss_val)))
            #     if args.visualize:
            #         writer.add_scalars('data/val_video_{}'.format(curr_domain_temp),
            #                            {'action_loss_val': np.mean(action_loss_val),
            #                             'score_loss_val': np.mean(score_loss_val),
            #                             'total_val': np.mean(score_loss_val) + np.mean(
            #                                 action_loss_val),
            #                             'accuracy': np.mean(accuracy)},
            #                            global_step=epoch)

            if args.visualize:
                writer.add_scalars('data/epoch_loss', {
                    'action_loss_tr':
                    action_loss_tr / epoch_size_pos,
                    'score_loss_tr':
                    score_loss_tr / epoch_size,
                    'total_tr':
                    action_loss_tr / epoch_size_pos +
                    score_loss_tr / epoch_size
                },
                                   global_step=epoch)

            # reset epoch loss counters
            action_loss_tr = 0
            score_loss_tr = 0

        # if new epoch (including the first iteration), initialize the batch iterator
        # or just resuming where batch_iterator_pos and neg haven't been initialized
        if len(batch_iterators_pos_train) == 0 or len(
                batch_iterators_neg_train) == 0:
            # create batch iterator
            for data_loader_pos in data_loaders_pos_train:
                batch_iterators_pos_train.append(iter(data_loader_pos))

            for data_loader_neg in data_loaders_neg_train:
                batch_iterators_neg_train.append(iter(data_loader_neg))

        # if not batch_iterators_pos_train[curr_domain]:
        #     # create batch iterator
        #     batch_iterators_pos_train[curr_domain] = iter(data_loaders_pos_train[curr_domain])
        #
        # if not batch_iterators_neg_train[curr_domain]:
        #     # create batch iterator
        #     batch_iterators_neg_train[curr_domain] = iter(data_loaders_neg_train[curr_domain])

        # load train data
        if which_dataset[iteration % len(which_dataset)]:  # if positive
            try:
                images_list, bbox_list, action_labels, score_label, vid_idx = next(
                    batch_iterators_pos_train[curr_domain])
            except StopIteration:
                batch_iterators_pos_train[curr_domain] = iter(
                    data_loaders_pos_train[curr_domain])
                images_list, bbox_list, action_labels, score_label, vid_idx = next(
                    batch_iterators_pos_train[curr_domain])
        else:
            try:
                images_list, bbox_list, action_labels, score_label, vid_idx = next(
                    batch_iterators_neg_train[curr_domain])
            except StopIteration:
                batch_iterators_neg_train[curr_domain] = iter(
                    data_loaders_neg_train[curr_domain])
                images_list, bbox_list, action_labels, score_label, vid_idx = next(
                    batch_iterators_neg_train[curr_domain])

        # TODO: make sure different obj are paired differenlty, so not always pos with pos
        if args.cuda:
            images_list = images_list.to('cuda', non_blocking=True)
            # bbox = torch.Tensor(bbox.cuda())
            action_labels = action_labels.to('cuda', non_blocking=True)
            score_label = score_label.float().to('cuda', non_blocking=True)

        else:
            images = torch.Tensor(images)
            bbox = torch.Tensor(bbox)
            action_label = torch.Tensor(action_label)
            score_label = torch.Tensor(score_label)

        # TRAIN
        net.train()
        action_out, score_out = net(images_list)

        # load ADNetDomainSpecific with video index
        if args.cuda:
            net.module.load_domain_specific(domain_specific_nets[curr_domain])
        else:
            net.load_domain_specific(domain_specific_nets[curr_domain])

        # backprop
        optimizer.zero_grad()
        accuracy_arr = []
        score_l = score_criterion(
            score_out,
            torch.cat((score_label.reshape(-1, 1), score_label.reshape(-1, 1)),
                      dim=1))
        if which_dataset[iteration % len(which_dataset)]:  # if positive
            action_l = action_criterion(
                action_out,
                action_labels.reshape(-1,
                                      num_obj_to_track * opts['num_actions']))
            loss = action_l + score_l
            for i in range(num_obj_to_track):
                accuracy_arr.append(int(action_labels[:, i, :].argmax(axis=1).eq(
                    action_out[:, i * opts['num_actions']:(i + 1) * opts['num_actions']].argmax(axis=1)).sum()) \
                                    / len(action_labels))
        else:
            action_l = -1
            accuracy_arr = [-1] * num_obj_to_track
            loss = score_l

        loss.backward()
        optimizer.step()

        if action_l != -1:
            action_loss_tr += action_l.item()
        score_loss_tr += score_l.item()

        # save the ADNetDomainSpecific back to their module
        if args.cuda:
            domain_specific_nets[curr_domain].load_weights_from_adnet(
                net.module)
        else:
            domain_specific_nets[curr_domain].load_weights_from_adnet(net)

        if args.visualize:
            if action_l != -1:
                writer.add_scalars(
                    'data/iter_loss', {
                        'action_loss_tr': action_l.item(),
                        'score_loss_tr': score_l.item(),
                        'total_tr': (action_l.item() + score_l.item())
                    },
                    global_step=iteration)
            else:
                writer.add_scalars('data/iter_loss', {
                    'score_loss_tr': score_l.item(),
                    'total_tr': score_l.item()
                },
                                   global_step=iteration)
            for i in range(num_obj_to_track):
                accuracy = accuracy_arr[i]
                if accuracy >= 0:
                    writer.add_scalars('data/iter_acc_{}'.format(i),
                                       {'accuracy_tr': accuracy},
                                       global_step=iteration)

        t1 = time.time()
        time_arr[iteration % 10] = t1 - t0

        if iteration % 10 == 0:
            # print('Avg. 10 iter time: %.4f sec.' % time_arr.sum())
            # print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.data.item()), end=' ')
            if args.visualize and args.send_images_to_visualization:
                random_batch_index = np.random.randint(images.size(0))
                writer.add_image('image',
                                 images.data[random_batch_index].cpu().numpy(),
                                 random_batch_index)

        if args.visualize:
            writer.add_scalars('data/time', {'time_10_it': time_arr.sum()},
                               global_step=iteration)

        if iteration % 5000 == 0:
            print('Saving state, iter:', iteration)

            domain_specific_nets_state_dict = []
            for domain_specific_net in domain_specific_nets:
                domain_specific_nets_state_dict.append(
                    domain_specific_net.state_dict())

            torch.save(
                {
                    'epoch': epoch,
                    'adnet_state_dict': net.state_dict(),
                    'adnet_domain_specific_state_dict': domain_specific_nets,
                    'optimizer_state_dict': optimizer.state_dict(),
                },
                os.path.join(args.save_folder, args.save_file) +
                repr(iteration) + '_epoch' + repr(epoch) + '.pth')

    # final save
    torch.save(
        {
            'epoch': epoch,
            'adnet_state_dict': net.state_dict(),
            'adnet_domain_specific_state_dict': domain_specific_nets,
            'optimizer_state_dict': optimizer.state_dict(),
        },
        os.path.join(args.save_folder, args.save_file) + '.pth')

    return net, domain_specific_nets, train_videos
Beispiel #6
0
def adnet_test_sl(args, opts, mot):
    if torch.cuda.is_available():
        if args.cuda:
            torch.set_default_tensor_type('torch.cuda.FloatTensor')
        if not args.cuda:
            print(
                "WARNING: It looks like you have a CUDA device, but aren't " +
                "using CUDA.\nRun with --cuda for optimal training speed.")
            torch.set_default_tensor_type('torch.FloatTensor')
    else:
        torch.set_default_tensor_type('torch.FloatTensor')

    if not os.path.exists(args.save_folder):
        os.mkdir(args.save_folder)

    if args.visualize:
        writer = SummaryWriter(
            log_dir=os.path.join('tensorboardx_log', args.save_file))

    train_videos = get_train_videos(opts)
    opts['num_videos'] = len(train_videos['video_names'])

    net, domain_specific_nets = adnet(opts=opts,
                                      trained_file=args.resume,
                                      multidomain=args.multidomain)

    if args.cuda:
        net = nn.DataParallel(net)
        cudnn.benchmark = True

        net = net.cuda()

    net.eval()

    action_criterion = nn.CrossEntropyLoss()
    score_criterion = nn.BCELoss()

    print('generating Supervised Learning dataset..')
    # dataset = SLDataset(train_videos, opts, transform=
    if mot:
        datasets_pos, datasets_neg = initialize_pos_neg_dataset_mot(
            train_videos, opts, transform=ADNet_Augmentation(opts))
    else:
        datasets_pos, datasets_neg = initialize_pos_neg_dataset(
            train_videos, opts, transform=ADNet_Augmentation(opts))
    number_domain = opts['num_videos']
    assert number_domain == len(
        datasets_pos
    ), "Num videos given in opts is incorrect! It should be {}".format(
        len(datasets_neg))

    batch_iterators_pos_val = []
    batch_iterators_neg_val = []

    # calculating number of data
    len_dataset_pos = 0
    len_dataset_neg = 0
    for dataset_pos in datasets_pos:
        len_dataset_pos += len(dataset_pos)
    for dataset_neg in datasets_neg:
        len_dataset_neg += len(dataset_neg)

    epoch_size_pos = len_dataset_pos // opts['minibatch_size']
    epoch_size_neg = len_dataset_neg // opts['minibatch_size']
    epoch_size = epoch_size_pos + epoch_size_neg  # 1 epoch, how many iterations
    print("1 epoch = " + str(epoch_size) + " iterations")

    max_iter = opts['numEpoch'] * epoch_size
    print("maximum iteration = " + str(max_iter))

    data_loaders_pos_val = []
    data_loaders_neg_val = []

    for dataset_pos in datasets_pos:
        data_loaders_pos_val.append(
            data.DataLoader(dataset_pos,
                            opts['minibatch_size'],
                            num_workers=2,
                            shuffle=True,
                            pin_memory=True))
    for dataset_neg in datasets_neg:
        data_loaders_neg_val.append(
            data.DataLoader(dataset_neg,
                            opts['minibatch_size'],
                            num_workers=2,
                            shuffle=True,
                            pin_memory=True))

    net.eval()

    for curr_domain in range(number_domain):
        accuracy = []
        action_loss_val = []
        score_loss_val = []

        # load ADNetDomainSpecific with video index
        if args.cuda:
            net.module.load_domain_specific(domain_specific_nets[curr_domain])
        else:
            net.load_domain_specific(domain_specific_nets[curr_domain])
        for i, temp in enumerate([
                data_loaders_pos_val[curr_domain],
                data_loaders_neg_val[curr_domain]
        ]):
            dont_show = False
            for images, bbox, action_label, score_label, indices in tqdm(temp):
                images = images.to('cuda', non_blocking=True)
                action_label = action_label.to('cuda', non_blocking=True)
                score_label = score_label.float().to('cuda', non_blocking=True)

                # forward
                action_out, score_out = net(images)

                if i == 0:  # if positive
                    action_l = action_criterion(action_out,
                                                torch.max(action_label, 1)[1])
                    action_loss_val.append(action_l.item())
                    accuracy.append(
                        int(
                            action_label.argmax(axis=1).eq(
                                action_out.argmax(axis=1)).sum()) /
                        len(action_label))

                score_l = score_criterion(score_out,
                                          score_label.reshape(-1, 1))
                score_loss_val.append(score_l.item())

                if args.display_images and not dont_show:
                    if i == 0:
                        dataset = datasets_pos[curr_domain]
                        color = (0, 255, 0)
                        conf = 1
                    else:
                        dataset = datasets_neg[curr_domain]
                        color = (0, 0, 255)
                        conf = 0
                    for j, index in enumerate(indices):
                        im = cv2.imread(dataset.train_db['img_path'][index])
                        bbox = dataset.train_db['bboxes'][index]
                        action_label = np.array(
                            dataset.train_db['labels'][index])
                        cv2.rectangle(im, (bbox[0], bbox[1]),
                                      (bbox[0] + bbox[2], bbox[1] + bbox[3]),
                                      color, 2)

                        print("\n\nTarget actions: {}".format(
                            action_label.argmax()))
                        print("Predicted actions: {}".format(
                            action_out.data[j].argmax()))

                        print("Target conf: {}".format(conf))
                        print("Predicted conf: {}".format(score_out.data[j]))
                        # print("Score loss: {}".format(score_l.item()))
                        # print("Action loss: {}".format(action_l.item()))
                        cv2.imshow("Test", im)
                        key = cv2.waitKey(0) & 0xFF

                        # if the `q` key was pressed, break from the loop
                        if key == ord("q"):
                            dont_show = True
                            break
                        elif key == ord("s"):
                            cv2.imwrite(
                                "vid {} t:{} p:{} c:{}.png".format(
                                    curr_domain, action_label.argmax(),
                                    action_out.data[i].argmax(),
                                    score_out.data[i].item()), im)

        print("Vid. {}".format(curr_domain))
        print("\tAccuracy: {}".format(np.mean(accuracy)))
        print("\tScore loss: {}".format(np.mean(score_loss_val)))
        print("\tAction loss: {}".format(np.mean(action_loss_val)))

    sys.exit(0)
    return net, domain_specific_nets, train_videos