def BSN_Train_TEM(opt):
    writer = SummaryWriter()
    model = TEM(opt)
    model = torch.nn.DataParallel(model, device_ids=GPU_IDs).cuda()

    optimizer = optim.Adam(model.parameters(),
                           lr=opt["tem_training_lr"],
                           weight_decay=opt["tem_weight_decay"])

    train_loader = torch.utils.data.DataLoader(
        VideoDataSet(opt, subset="train"),
        batch_size=model.module.batch_size,
        shuffle=True,
        num_workers=8,
        pin_memory=True,
        drop_last=True)

    test_loader = torch.utils.data.DataLoader(
        VideoDataSet(opt, subset="validation"),
        batch_size=model.module.batch_size,
        shuffle=False,
        num_workers=8,
        pin_memory=True,
        drop_last=True)

    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=opt["tem_step_size"],
                                                gamma=opt["tem_step_gamma"])

    for epoch in range(opt["tem_epoch"]):
        scheduler.step()
        train_TEM(train_loader, model, optimizer, epoch, writer, opt)
        test_TEM(test_loader, model, epoch, writer, opt)
    writer.close()
def BMN_Train(opt):
    writer = SummaryWriter()
    model = BMN(opt).cuda()

    optimizer = optim.Adam(model.parameters(),
                           lr=opt["training_lr"],
                           weight_decay=opt["weight_decay"])

    train_loader = torch.utils.data.DataLoader(VideoDataSet(opt,
                                                            subset="train"),
                                               batch_size=opt["batch_size"],
                                               shuffle=True,
                                               num_workers=8,
                                               pin_memory=True)

    test_loader = torch.utils.data.DataLoader(VideoDataSet(
        opt, subset="validation"),
                                              batch_size=opt["batch_size"],
                                              shuffle=False,
                                              num_workers=8,
                                              pin_memory=True)

    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=opt["step_size"],
                                                gamma=opt["step_gamma"])

    for epoch in range(opt["train_epochs"]):
        scheduler.step()
        train_BMN(train_loader, model, optimizer, epoch, writer, opt)
        test_BMN(test_loader, model, epoch, writer, opt)
    writer.close()
def BMN_Train(opt):
    model = BMN(opt)
    model = torch.nn.DataParallel(model, device_ids=[0, 1]).cuda()
    optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                  model.parameters()),
                           lr=opt["training_lr"],
                           weight_decay=opt["weight_decay"])

    train_loader = torch.utils.data.DataLoader(VideoDataSet(opt,
                                                            subset="train"),
                                               batch_size=opt["batch_size"],
                                               shuffle=True,
                                               num_workers=8,
                                               pin_memory=True)

    test_loader = torch.utils.data.DataLoader(VideoDataSet(
        opt, subset="validation"),
                                              batch_size=opt["batch_size"],
                                              shuffle=False,
                                              num_workers=8,
                                              pin_memory=True)

    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=opt["step_size"],
                                                gamma=opt["step_gamma"])
    bm_mask = get_mask(opt["temporal_scale"])
    for epoch in range(opt["train_epochs"]):
        scheduler.step()
        train_BMN(train_loader, model, optimizer, epoch, bm_mask)
        test_BMN(test_loader, model, epoch, bm_mask)
Example #4
0
    def train(self, n_epochs):
        exp_id = max([0] + [
            int(run.split('_')[-1])
            for run in os.listdir(self.cfg.TRAIN.LOG_DIR)
        ]) + 1
        log_dir = os.path.join(self.cfg.TRAIN.LOG_DIR, 'run_' + str(exp_id))
        if not os.path.isdir(os.path.dirname(log_dir)):
            os.makedirs(os.path.dirname(log_dir))

        writer = SummaryWriter(log_dir)
        checkpoint_dir = os.path.join(self.cfg.MODEL.CHECKPOINT_DIR,
                                      'checkpoint_' + str(exp_id))
        assert not os.path.isdir(
            checkpoint_dir
        ), 'Checkpoint directory %s has already been created.' % checkpoint_dir
        os.makedirs(checkpoint_dir)

        train_loader = torch.utils.data.DataLoader(
            VideoDataSet(self.cfg, split=self.cfg.TRAIN.SPLIT),
            batch_size=self.cfg.TRAIN.BATCH_SIZE,
            shuffle=True,
            num_workers=12,
            pin_memory=True,
            collate_fn=self.train_collator)

        eval_loader = torch.utils.data.DataLoader(
            VideoDataSet(self.cfg, split=self.cfg.VAL.SPLIT),
            batch_size=self.cfg.VAL.BATCH_SIZE,
            shuffle=False,
            num_workers=12,
            pin_memory=True,
            drop_last=False,
            collate_fn=self.test_collator)

        bm_mask = get_mask(self.temporal_dim, self.max_duration).cuda()
        scores = []
        for epoch in range(n_epochs):
            #print('Current LR: {}'.format(self.scheduler.get_last_lr()[0]))
            self.train_epoch(train_loader, bm_mask, epoch, writer)
            #self.scheduler.step()
            score = self.evaluate(eval_loader, self.cfg.VAL.SPLIT)

            state = {
                'epoch': epoch + 1,
                'score': score,
                'state_dict': self.model.state_dict()
            }
            if len(scores) == 0 or score > max(scores):
                torch.save(
                    state,
                    os.path.join(checkpoint_dir,
                                 "best_{}.pth".format(self.cfg.EVAL_SCORE)))
            torch.save(
                state,
                os.path.join(checkpoint_dir, "model_{}.pth".format(epoch + 1)))

            writer.add_scalar(self.cfg.EVAL_SCORE, score, epoch)
            scores.append(score)
Example #5
0
def BSN_inference_TEM(opt):
    model = TEM(opt)
    checkpoint = torch.load(opt["checkpoint_path"]+"/tem_best.pth.tar")
    base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())}
    model.load_state_dict(base_dict)
    model = torch.nn.DataParallel(model, device_ids=[0]).cuda()
    model.eval()
    
    test_loader = torch.utils.data.DataLoader(VideoDataSet(opt,subset="full"),
                                                batch_size=model.module.batch_size, shuffle=False,
                                                num_workers=8, pin_memory=True,drop_last=False)
    
    columns=["action","start","end","xmin","xmax"]
    for index_list,input_data,anchor_xmin,anchor_xmax in test_loader:
        
        TEM_output = model(input_data).detach().cpu().numpy()
        batch_action = TEM_output[:,0,:]
        batch_start = TEM_output[:,1,:]
        batch_end = TEM_output[:,2,:]
        
        index_list = index_list.numpy()
        anchor_xmin = np.array([x.numpy()[0] for x in anchor_xmin])
        anchor_xmax = np.array([x.numpy()[0] for x in anchor_xmax])
        
        for batch_idx,full_idx in enumerate(index_list):            
            video = test_loader.dataset.video_list[full_idx]
            video_action = batch_action[batch_idx]
            video_start = batch_start[batch_idx]
            video_end = batch_end[batch_idx]    
            video_result = np.stack((video_action,video_start,video_end,anchor_xmin,anchor_xmax),axis=1)
            video_df = pd.DataFrame(video_result,columns=columns)  
            video_df.to_csv("./output/TEM_results/"+video+".csv",index=False)
def BMN_inference(opt):
    model = BMN(opt)
    model = torch.nn.DataParallel(model, device_ids=[0, 1]).cuda()
    checkpoint = torch.load(opt["checkpoint_path"] + "/BMN_best.pth.tar")
    model.load_state_dict(checkpoint['state_dict'])
    model.eval()

    test_loader = torch.utils.data.DataLoader(VideoDataSet(
        opt, subset="validation"),
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=8,
                                              pin_memory=True,
                                              drop_last=False)
    tscale = opt["temporal_scale"]
    with torch.no_grad():
        for idx, input_data in test_loader:
            video_name = test_loader.dataset.video_list[idx[0]]
            input_data = input_data.cuda()
            confidence_map, start, end = model(input_data)

            # print(start.shape,end.shape,confidence_map.shape)
            start_scores = start[0].detach().cpu().numpy()
            end_scores = end[0].detach().cpu().numpy()
            clr_confidence = (confidence_map[0][1]).detach().cpu().numpy()
            reg_confidence = (confidence_map[0][0]).detach().cpu().numpy()

            # 遍历起始分界点与结束分界点的组合
            new_props = []
            for idx in range(tscale):
                for jdx in range(tscale):
                    start_index = idx
                    end_index = jdx + 1
                    if start_index < end_index and end_index < tscale:
                        xmin = start_index / tscale
                        xmax = end_index / tscale
                        xmin_score = start_scores[start_index]
                        xmax_score = end_scores[end_index]
                        clr_score = clr_confidence[idx, jdx]
                        reg_score = reg_confidence[idx, jdx]
                        score = xmin_score * xmax_score * clr_score * reg_score
                        new_props.append([
                            xmin, xmax, xmin_score, xmax_score, clr_score,
                            reg_score, score
                        ])
            new_props = np.stack(new_props)
            #########################################################################

            col_name = [
                "xmin", "xmax", "xmin_score", "xmax_score", "clr_score",
                "reg_socre", "score"
            ]
            new_df = pd.DataFrame(new_props, columns=col_name)
            new_df.to_csv("./output/BMN_results/" + video_name + ".csv",
                          index=False)
Example #7
0
def BSN_inference_TEM(opt):
    model = TEM(opt)
    checkpoint = torch.load(opt["checkpoint_path"]+"/"+opt["arch"]+"_tem_best.pth.tar")
    base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())}
    model.load_state_dict(base_dict)
    model = torch.nn.DataParallel(model, device_ids=GPU_IDs).cuda()
    model.eval()
    
    test_loader = torch.utils.data.DataLoader(VideoDataSet(opt,subset="full"),
                                                batch_size=model.module.batch_size, shuffle=False,
                                                num_workers=8, pin_memory=True,drop_last=False)
#     test_loader = torch.utils.data.DataLoader(VideoDataSet(opt,subset="trainval"),
#                                                 batch_size=model.module.batch_size, shuffle=False,
#                                                 num_workers=8, pin_memory=True,drop_last=False)    
    columns=["action","start","end","xmin","xmax"]
    count = 0
    for index_list,input_data,anchor_xmin,anchor_xmax in test_loader:
        #for video with different length
#         if opt['fix_scale'] is False:
        if opt['fix_scale'] == 'nonrescale':
            if len(anchor_xmin) != input_data.shape[2]:
                temporal_scale = input_data.shape[2]
                temporal_gap = 1. / temporal_scale
                anchor_xmin=[temporal_gap*i for i in range(temporal_scale)]
                anchor_xmin = [torch.tensor([x]) for x in anchor_xmin]
                anchor_xmax=[temporal_gap*i for i in range(1,temporal_scale+1)]
                anchor_xmax = [torch.tensor([x]) for x in anchor_xmax]

        #############################################################
        TEM_output = model(input_data).detach().cpu().numpy()
        batch_action = TEM_output[:,0,:]
        batch_start = TEM_output[:,1,:]
        batch_end = TEM_output[:,2,:]
        
        index_list = index_list.numpy()
        anchor_xmin = np.array([x.numpy()[0] for x in anchor_xmin])
        anchor_xmax = np.array([x.numpy()[0] for x in anchor_xmax])
        
        for batch_idx,full_idx in enumerate(index_list):            
            video = test_loader.dataset.video_list[full_idx]
            video_action = batch_action[batch_idx]
            video_start = batch_start[batch_idx]
            video_end = batch_end[batch_idx]    
            video_result = np.stack((video_action,video_start,video_end,anchor_xmin,anchor_xmax),axis=1)
            video_df = pd.DataFrame(video_result,columns=columns)  
            video_df.to_csv("./output/"+opt["arch"]+opt["fix_scale"]+"_TEM_results/"+video+".csv",index=False)
            count += 1
        if count % 100 == 0:
            print('finish', count)
            sys.stdout.flush()
def BMN_Train(opt):
    start_time = time.time()
    model = BMN(opt)
    model = torch.nn.DataParallel(model,
                                  device_ids=list(range(opt['n_gpu']))).cuda()
    print('using {} gpus to train!'.format(opt['n_gpu']))
    optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                  model.parameters()),
                           lr=opt["training_lr"],
                           weight_decay=opt["weight_decay"])

    train_loader = torch.utils.data.DataLoader(VideoDataSet(opt,
                                                            subset="train"),
                                               batch_size=opt["batch_size"],
                                               shuffle=True,
                                               num_workers=opt['num_workers'],
                                               pin_memory=True)

    test_loader = torch.utils.data.DataLoader(VideoDataSet(
        opt, subset="validation"),
                                              batch_size=opt["batch_size"],
                                              shuffle=False,
                                              num_workers=opt['num_workers'],
                                              pin_memory=True)

    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=opt["step_size"],
                                                gamma=opt["step_gamma"])
    bm_mask = get_mask(opt["temporal_scale"])
    best_loss = 1e10
    for epoch in range(opt["train_epochs"]):
        train_BMN(train_loader, model, optimizer, epoch, bm_mask)
        best_loss = test_BMN(test_loader, model, epoch, bm_mask, best_loss)
        scheduler.step()

    print("Total time (BMN_Train):",
          datetime.timedelta(seconds=time.time() - start_time))
Example #9
0
def BSN_Train_TEM(opt):
    writer = SummaryWriter()
    model = TEM(opt)
    model = torch.nn.DataParallel(model, device_ids=[0]).cuda()
    state_dict = torch.load('checkpoint/tem_best.pth.tar')['state_dict']
    model.load_state_dict(state_dict)
    optimizer = optim.Adam(model.parameters(),
                           lr=opt["tem_training_lr"],
                           weight_decay=opt["tem_weight_decay"])

    train_loader = torch.utils.data.DataLoader(
        VideoDataSet(opt, subset="train"),
        batch_size=model.module.batch_size,
        shuffle=True,
        num_workers=4,
        pin_memory=True,
        drop_last=True)

    test_loader = torch.utils.data.DataLoader(
        VideoDataSet(opt, subset="validation"),
        batch_size=model.module.batch_size,
        shuffle=False,
        num_workers=4,
        pin_memory=True,
        drop_last=True)

    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=opt["tem_step_size"],
                                                gamma=opt["tem_step_gamma"])

    for epoch in range(opt["tem_epoch"]):
        train_TEM(train_loader, model, optimizer, epoch, writer, opt)
        scheduler.step()
        if (epoch + 1) % 3 == 0:
            test_TEM(test_loader, model, epoch, writer, opt)
    writer.close()
Example #10
0
    def inference(self, data_loader=None, split=None, batch_size=None):
        if not os.path.isdir('results/outputs/'):
            os.makedirs('results/outputs/')

        annotations = getDatasetDict(
            self.cfg.DATA.ANNOTATION_FILE,
            split) if self.cfg.DATASET == 'thumos' else None
        self.prop_gen = ProposalGenerator(self.temporal_dim, self.max_duration,
                                          annotations)
        self.post_processing = PostProcessor(self.cfg, split)
        if data_loader is None:
            data_loader = torch.utils.data.DataLoader(
                VideoDataSet(self.cfg, split=split),
                batch_size=batch_size,
                shuffle=False,
                num_workers=12,
                pin_memory=True,
                drop_last=False,
                collate_fn=self.test_collator)

        col_name = [
            "xmin", "xmax", "xmin_score", "xmax_score", "clr_score",
            "reg_score", "score"
        ]
        self.model.eval()
        with torch.no_grad():
            for video_names, env_features, agent_features, agent_masks in tqdm(
                    data_loader):
                env_features = env_features.cuda(
                ) if self.cfg.USE_ENV else None
                agent_features = agent_features.cuda(
                ) if self.cfg.USE_AGENT else None
                agent_masks = agent_masks.cuda(
                ) if self.cfg.USE_AGENT else None

                confidence_map, start_map, end_map = self.model(
                    env_features, agent_features, agent_masks)
                confidence_map = confidence_map.cpu().numpy()
                start_map = start_map.cpu().numpy()
                end_map = end_map.cpu().numpy()

                batch_props = self.prop_gen(start_map, end_map, confidence_map,
                                            video_names)
                for video_name, new_props in zip(video_names, batch_props):
                    new_df = pd.DataFrame(new_props, columns=col_name)
                    new_df.to_feather("./results/outputs/" + video_name +
                                      ".feather")
        self.post_processing()
Example #11
0
def BSN_inference_TEM(opt):
    '''
    Inference of TEM
    step - 1. load the best_model
    step - 2. the output of TEM is three pdf-curve for each scaled-video
    '''

    # step - 1
    model = TEM(opt)
    checkpoint = torch.load(opt["checkpoint_path"]+"/tem_best.pth.tar")
    base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())}
    model.load_state_dict(base_dict)
    model = torch.nn.DataParallel(model, device_ids=[0]).cuda()

    model.eval()

    # step - 2
    # set subset = 'full' to generate the pdf of all video
    test_loader = torch.utils.data.DataLoader(VideoDataSet(opt,subset="full"),
                                                batch_size=model.module.batch_size, shuffle=False,
                                                num_workers=8, pin_memory=True,drop_last=False)

    columns = ['action', 'start', 'end', 'xmin', 'xmax']

    for index_list, input_data, anchor_xmin, anchor_xmax in test_loader:

        TEM_output = model(input_data).detach().cpu().numpy()
        batch_action = TEM_output[:,0,:]
        batch_start  = TEM_output[:,1,:]
        batch_end    = TEM_output[:,2,:]

        index_list  = index_list.numpy()
        anchor_xmin = np.array([x.numpy()[0] for x in anchor_xmin])
        anchor_xmax = np.array([x.numpy()[0] for x in anchor_xmax])

        for batch_idx, full_idx in enumerate(index_list):

            video_name = test_loader.dataset.video_list[full_idx]
            video_action = batch_action[batch_idx]
            video_start = batch_start[batch_idx]
            video_end = batch_end[batch_idx]
            video_result = np.stack((video_action, video_start, video_end, anchor_xmin, anchor_xmax),axis=1)
            video_df = pd.DataFrame(video_result, columns=columns)
            video_df.to_csv('./output/TEM_results/' + video_name + '.csv', index=False)
def BMN_inference(opt):
    model = BMN(opt).cuda()
    checkpoint = torch.load(opt["checkpoint_path"] + "/BMN_best.pth.tar")
    model.load_state_dict(checkpoint['state_dict'])
    model.eval()

    test_loader = torch.utils.data.DataLoader(VideoDataSet(
        opt, subset="validation"),
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=8,
                                              pin_memory=True,
                                              drop_last=False)
    tscale = opt["temporal_scale"]
    tgap = 1. / tscale
    peak_thres = opt["pgm_threshold"]
    with torch.no_grad():
        for idx, input_data in test_loader:
            video_name = test_loader.dataset.video_list[idx[0]]
            input_data = input_data.cuda()
            start_end, confidence_map = model(input_data)

            start_scores = start_end[0][0].detach().cpu().numpy()
            end_scores = start_end[0][1].detach().cpu().numpy()
            clr_confidence = (confidence_map[0][0] *
                              confidence_mask).detach().cpu().numpy()
            reg_confidence = (confidence_map[0][1] *
                              confidence_mask).detach().cpu().numpy()

            max_start = max(start_scores)
            max_end = max(end_scores)

            ####################################################################################################
            # generate the set of start points and end points
            start_bins = np.zeros(len(start_scores))
            start_bins[[0, -1]] = 1  # [1,0,0...,0,1] 首末两帧
            for idx in range(1, tscale - 1):
                if start_scores[idx] > start_scores[
                        idx + 1] and start_scores[idx] > start_scores[idx - 1]:
                    start_bins[idx] = 1
                elif start_scores[idx] > (peak_thres * max_start):
                    start_bins[idx] = 1

            end_bins = np.zeros(len(end_scores))
            end_bins[[0, -1]] = 1
            for idx in range(1, tscale - 1):
                if end_scores[idx] > end_scores[
                        idx + 1] and end_scores[idx] > end_scores[idx - 1]:
                    end_bins[idx] = 1
                elif end_scores[idx] > (peak_thres * max_end):
                    end_bins[idx] = 1
            ########################################################################################################

            xmin_list = []
            xmin_score_list = []
            xmax_list = []
            xmax_score_list = []
            for j in range(tscale):
                if start_bins[j] == 1:
                    xmin_list.append(
                        tgap / 2 +
                        tgap * j)  # [0.01,0.02]与gt的重合度高,那么实际上区间的中点才是分界点
                    xmin_score_list.append(start_scores[j])
                if end_bins[j] == 1:
                    xmax_list.append(tgap / 2 + tgap * j)
                    xmax_score_list.append(end_scores[j])

            #########################################################################
            # 遍历起始分界点与结束分界点的组合
            new_props = []
            for ii in range(len(xmax_list)):
                tmp_xmax = xmax_list[ii]
                tmp_xmax_score = xmax_score_list[ii]
                for ij in range(len(xmin_list)):
                    tmp_xmin = xmin_list[ij]
                    tmp_xmin_score = xmin_score_list[ij]
                    if tmp_xmin >= tmp_xmax:
                        break
                    start_point = int((tmp_xmin - tgap / 2) / tgap)
                    end_point = int((tmp_xmax - tgap / 2) / tgap)
                    duration = end_point - start_point
                    clr_score = clr_confidence[duration, start_point]
                    reg_score = reg_confidence[duration, start_point]
                    score = tmp_xmax_score * tmp_xmax_score * np.sqrt(
                        clr_score * reg_score)
                    if score == 0:
                        print(video_name, tmp_xmin, tmp_xmax, tmp_xmin_score,
                              tmp_xmax_score, clr_score, reg_score, score,
                              confidence_map[0, 0, duration, start_point],
                              duration, start_point)
                    new_props.append([
                        tmp_xmin, tmp_xmax, tmp_xmin_score, tmp_xmax_score,
                        clr_score, reg_score, score
                    ])
            new_props = np.stack(new_props)
            #########################################################################

            col_name = [
                "xmin", "xmax", "xmin_score", "xmax_score", "clr_score",
                "reg_socre", "score"
            ]
            new_df = pd.DataFrame(new_props, columns=col_name)
            new_df.to_csv("./output/BMN_results/" + video_name + ".csv",
                          index=False)
Example #13
0
def main():
    global args, best_prec1
    args = parser.parse_args()
    check_rootfolders()

    categories, train_list, val_list, root_path, prefix = datasets_video.return_dataset(
        args.dataset, args.root_path)
    num_class = len(categories)

    global store_name
    store_name = '_'.join([
        args.type, args.dataset, args.arch,
        'segment%d' % args.num_segments, args.store_name
    ])
    print(('storing name: ' + store_name))

    if args.dataset == 'somethingv1' or args.dataset == 'somethingv2':
        # label transformation for left/right categories
        # please refer to labels.json file in sometingv2 for detail.
        target_transforms = {
            86: 87,
            87: 86,
            93: 94,
            94: 93,
            166: 167,
            167: 166
        }
    else:
        target_transforms = None

    model = TemporalModel(num_class,
                          args.num_segments,
                          model=args.type,
                          backbone=args.arch,
                          alpha=args.alpha,
                          beta=args.beta,
                          dropout=args.dropout,
                          target_transforms=target_transforms)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = get_optim_policies(model)
    train_augmentation = model.get_augmentation()

    if torch.cuda.is_available():
        model = torch.nn.DataParallel(model).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)

            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.module.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    normalize = GroupNormalize(input_mean, input_std)

    train_loader = torch.utils.data.DataLoader(VideoDataSet(
        root_path,
        train_list,
        num_segments=args.num_segments,
        image_tmpl=prefix,
        transform=torchvision.transforms.Compose([
            train_augmentation,
            Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
            ToTorchFormatTensor(
                div=(args.arch not in ['BNInception', 'InceptionV3'])),
            normalize,
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               drop_last=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    val_loader = torch.utils.data.DataLoader(VideoDataSet(
        root_path,
        val_list,
        num_segments=args.num_segments,
        image_tmpl=prefix,
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
            ToTorchFormatTensor(
                div=(args.arch not in ['BNInception', 'InceptionV3'])),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # define loss function (criterion) and optimizer
    criterion = torch.nn.CrossEntropyLoss().cuda()

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.evaluate:
        prec1 = validate(val_loader, model, criterion, 0)
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        save_checkpoint(
            {
                'epoch': args.start_epoch,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
            }, is_best)

        return

    log_training = open(
        os.path.join(args.checkpoint_dir, 'log', '%s.csv' % store_name), 'w')
    for epoch in range(args.start_epoch, args.epochs):
        # adjust learning rate
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, log_training)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion,
                             (epoch + 1) * len(train_loader), log_training)

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                }, is_best)
Example #14
0
def main():

    global args, best_loss, writer

    configs = get_and_save_args(parser)
    parser.set_defaults(**configs)
    dataset_configs = configs["dataset_configs"]
    model_configs = configs["model_configs"]
    args = parser.parse_args()
    if 'batch_size' in model_configs:
        args.batch_size = model_configs['batch_size']
    if 'iter_size' in model_configs:
        args.iter_size = model_configs['iter_size']

    model = TwoStageDetector(model_configs,
                             roi_size=dataset_configs['roi_pool_size'])
    cnt = 0
    for p in model.parameters():
        cnt += p.data.numel()
    print(cnt)
    """copy codes and creat dir for saving models and logs"""
    if not os.path.isdir(args.snapshot_pref):
        os.makedirs(args.snapshot_pref)

    date = time.strftime('%Y%m%d%H%M', time.localtime(time.time()))
    logfile = os.path.join(args.snapshot_pref, date + '_train.log')
    get_logger(args, logfile)
    logging.info(' '.join(sys.argv))
    logging.info('\ncreating folder: ' + args.snapshot_pref)

    if not args.evaluate:
        pass
        # writer = SummaryWriter(args.snapshot_pref)
        # make a copy of the entire project folder, which can cost huge space
        # recorder = Recorder(args.snapshot_pref, ["models", "__pycache__"])
        # recorder.writeopt(args)

    logging.info('\nruntime args\n\n{}\n\nconfig\n\n{}'.format(
        args, dataset_configs))
    logging.info(str(model))
    logging.info(str(cnt))
    if 'lr' in model_configs:
        args.lr = model_configs['lr']
        logging.info('Using learning rate {}'.format(args.lr))
    """construct model"""

    policies = model.get_optim_policies()
    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            logging.info(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_loss = checkpoint['best_loss']
            model.load_state_dict(checkpoint['state_dict'])
            logging.info(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            logging.info(
                ("=> no checkpoint found at '{}'".format(args.resume)))
    """construct dataset"""

    train_dataset = VideoDataSet(
        dataset_configs,
        prop_file=dataset_configs['train_prop_file'],
        ft_path=dataset_configs['train_ft_path'],
        epoch_multiplier=dataset_configs['training_epoch_multiplier'],
        test_mode=False)
    kwargs = {}
    kwargs['shuffle'] = True

    loss_kwargs = {}

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=args.batch_size,
        num_workers=args.workers,
        pin_memory=True,
        drop_last=True,
        **kwargs)  # in training we drop the last incomplete minibatch

    # val_loader = None
    val_loader = torch.utils.data.DataLoader(VideoDataSet(
        dataset_configs,
        prop_file=dataset_configs['test_prop_file'],
        ft_path=dataset_configs['test_ft_path'],
        epoch_multiplier=dataset_configs['testing_epoch_multiplier'],
        reg_stats=train_loader.dataset.stats,
        test_mode=False),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             drop_last=True,
                                             num_workers=args.workers,
                                             pin_memory=True)
    logging.info('Dataloaders constructed')
    """loss and optimizer"""
    activity_criterion = torch.nn.CrossEntropyLoss(**loss_kwargs).cuda()
    completeness_criterion = CompletenessLoss().cuda()
    regression_criterion = ClassWiseRegressionLoss().cuda()

    # for group in policies:
    #     logging.info(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
    #         group['name'], len(group['params']), group['lr_mult'], group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.evaluate:
        validate(val_loader, model, activity_criterion, completeness_criterion,
                 regression_criterion, 0, -1)
        return

    print('Start training loop')

    for epoch in range(args.start_epoch, args.epochs):

        adjust_learning_rate(optimizer, epoch, args.lr_steps)
        train(train_loader, model, activity_criterion, completeness_criterion,
              regression_criterion, optimizer, epoch)

        # evaluate on validation set
        latest_ckpt_path = args.snapshot_pref + \
            '_'.join((args.dataset, 'latest', 'checkpoint.pth.tar'))
        ckpt = {
            'epoch': epoch + 1,
            'state_dict': model.state_dict(),
            'best_loss': 1000,
            'reg_stats': torch.from_numpy(train_loader.dataset.stats)
        }

        torch.save(ckpt, latest_ckpt_path)

        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            loss = validate(val_loader, model, activity_criterion,
                            completeness_criterion, regression_criterion,
                            (epoch + 1) * len(train_loader), epoch)
            # remember best validation loss and save checkpoint
            # loss = np.exp(-epoch/100)
            is_best = loss < best_loss
            best_loss = min(loss, best_loss)
            ckpt['best_loss'] = best_loss
            save_checkpoint(ckpt,
                            is_best,
                            epoch,
                            filename='checkpoint.pth.tar')
Example #15
0

    # This net is used to provides setup settings. It is not used for testing.

    checkpoint = torch.load(args.weights)
    # pdb.set_trace()
    print("model epoch {} loss: {}".format(checkpoint['epoch'], checkpoint['best_loss']))
    base_dict = {'.'.join(k.split('.')[1:]): v for k, v in list(checkpoint['state_dict'].items())}

    stats = checkpoint['reg_stats'].numpy()

    prop_file = dataset_configs['test_prop_file']
    print('using prop_file ' + prop_file)
    
    dataset = VideoDataSet(dataset_configs,
                          prop_file=prop_file,
                          ft_path=dataset_configs['test_ft_path'],
                          test_mode=True)
    print('Dataset Initilized')


    index_queue = ctx.Queue()
    result_queue = ctx.Queue()
    workers = [ctx.Process(target=runner_func,
                           args=(dataset, base_dict, stats, gpu_list[i % len(gpu_list)],
                                 index_queue, result_queue))
               for i in range(args.workers)]


    for w in workers:
        w.daemon = True
        w.start()
Example #16
0
def main():

    #*************************Processing Data**************************
    global args, best_prec1
    args = parser.parse_args()
    check_rootfolders()

    # 对Something-something数据集进行预处理,将.txt文件读入内存
    categories, train_list, val_list, root_path, prefix = datasets_video.return_dataset(
        args.dataset, args.root_path)
    num_class = len(categories)

    if args.dataset == 'somethingv1' or args.dataset == 'somethingv2':
        # label transformation for left/right categories
        # please refer to labels.json file in sometingv2 for detail.
        target_transforms = {
            86: 87,
            87: 86,
            93: 94,
            94: 93,
            166: 167,
            167: 166
        }
    else:
        target_transforms = None

    #****************************Create Model***************************
    model = getattr(CSN, args.arch)(num_class,
                                    target_transforms=target_transforms,
                                    mode=args.mode)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = get_optim_policies(model)
    train_augmentation = model.get_augmentation()

    # ***************************Data loading code****************************
    normalize = GroupNormalize(input_mean, input_std)

    train_loader = torch.utils.data.DataLoader(VideoDataSet(
        root_path,
        train_list,
        num_segments=args.num_segments,
        image_tmpl=prefix,
        transform=torchvision.transforms.Compose([
            train_augmentation,
            Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
            ToTorchFormatTensor(
                div=(args.arch not in ['BNInception', 'InceptionV3'])),
            normalize,
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               drop_last=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    val_loader = torch.utils.data.DataLoader(VideoDataSet(
        root_path,
        val_list,
        num_segments=args.num_segments,
        image_tmpl=prefix,
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
            ToTorchFormatTensor(
                div=(args.arch not in ['BNInception', 'InceptionV3'])),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    #**************************Training config**************************
    device = 'cuda'
    if torch.cuda.is_available():
        devices = ['cuda:' + id for id in args.gpus.split(',')]
        if len(devices) > 1:
            model = torch.nn.DataParallel(model,
                                          device_ids=devices)  # 使用单机多卡进行训练
    else:
        device = 'cpu'
    model = model.to(device)

    if args.resume:  # 用于中断训练后继续训练
        if os.path.isfile(args.resume):  # 用指定的检查点进行训练
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)

            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # define loss function (criterion) and optimizer
    criterion = torch.nn.CrossEntropyLoss().cuda()  # 交叉熵损失

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    #******************************Training**********************************
    if args.evaluate:
        prec1 = validate(val_loader, model, criterion, 0)
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        save_checkpoint(
            {
                'epoch': args.start_epoch,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
            },
            is_best,
            filename='evaluate')

        return

    # 模型存储的名字
    global store_name
    store_name = '_'.join([
        args.type, args.dataset, args.arch,
        'segment%d' % args.num_segments, args.store_name
    ])
    log('storing name: ' + store_name, file=log_stream)

    for epoch in range(args.start_epoch, args.epochs):
        log("********************************\n", file=log_stream)
        log("EPOCH:" + str(epoch + 1) + "\n", file=log_stream)
        # adjust learning rate
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion)

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                },
                is_best,
                filename=str(epoch + 1))

        log("********************************\n", file=log_stream)
Example #17
0
print('Merge detection scores from {} sources...'.format(
    len(score_pickle_list)))
detection_scores = {k: merge_scores(k) for k in score_pickle_list[0]}
print('Done.')

if 'deploy_prop_file' in dataset_configs:
    prop_file = dataset_configs['deploy_prop_file']
else:
    prop_file = dataset_configs['test_prop_file']
if 'deploy_online_slice' in dataset_configs:
    online_slice = dataset_configs['deploy_online_slice']
else:
    online_slice = dataset_configs.get('online_slice', False)

dataset = VideoDataSet(dataset_configs,
                       prop_file=prop_file,
                       ft_path=dataset_configs['train_ft_path'],
                       test_mode=True)
from functools import reduce

gt_lens = np.array(
    reduce(lambda x, y: x + y, [[(x.end_frame - x.start_frame) / 6
                                 for x in v.gt] for v in dataset.video_list]))
# pdb.set_trace()
dataset_detections = [dict() for i in range(num_class)]


def merge_all_vid_scores(pickle_list):
    def merge_op(arrs, index, weights):
        if arrs[0][index] is not None:
            return np.sum([a[index] * w for a, w in zip(arrs, weights)],
                          axis=0)
Example #18
0
File: main.py Project: cxqj/44-BMN
def BMN_inference(opt):
    model = BMN(opt)
    model = torch.nn.DataParallel(model, device_ids=[0, 1]).cuda()
    checkpoint = torch.load(opt["checkpoint_path"] + "/BMN_best.pth.tar")
    model.load_state_dict(checkpoint['state_dict'])
    model.eval()

    test_loader = torch.utils.data.DataLoader(VideoDataSet(opt, subset="validation"),
                                              batch_size=1, shuffle=False,
                                              num_workers=8, pin_memory=True, drop_last=False)
    tscale = opt["temporal_scale"]  # 100
    with torch.no_grad():
        for idx, input_data in test_loader:
            video_name = test_loader.dataset.video_list[idx[0]]
            input_data = input_data.cuda()
            confidence_map, start, end = model(input_data)  #(1,2,100,100),(1,100),(1,100)

            #print(start.shape,end.shape,confidence_map.shape)
            start_scores = start[0].detach().cpu().numpy()  # (100,)
            end_scores = end[0].detach().cpu().numpy()  # (100,)
            clr_confidence = (confidence_map[0][1]).detach().cpu().numpy()  # (100,100)
            reg_confidence = (confidence_map[0][0]).detach().cpu().numpy()  # (100,100)

            # 获取得分的峰值
            max_start = max(start_scores)
            max_end = max(end_scores)

            ####################################################################################################
            # generate the set of start points and end points
            start_bins = np.zeros(len(start_scores))  # [0,0,0,....,0] 100个时序点
            start_bins[0] = 1   # 将第一个时序点置为1
            for idx in range(1, tscale - 1):
                if start_scores[idx] > start_scores[idx + 1] and start_scores[idx] > start_scores[idx - 1]:
                    start_bins[idx] = 1
                elif start_scores[idx] > (0.5 * max_start):
                    start_bins[idx] = 1

            end_bins = np.zeros(len(end_scores))  
            end_bins[-1] = 1   # 将最后一个时序点置为1
            for idx in range(1, tscale - 1):
                if end_scores[idx] > end_scores[idx + 1] and end_scores[idx] > end_scores[idx - 1]:
                    end_bins[idx] = 1
                elif end_scores[idx] > (0.5 * max_end):
                    end_bins[idx] = 1
            ########################################################################################################

            #########################################################################
            # 遍历起始分界点与结束分界点的组合
            new_props = []
            # 相当于遍历每种提议时长的每个时间点
            for idx in range(tscale): # 用于索引duration,对于某一个idx,其对应提议的时长都相同
                for jdx in range(tscale):  # 用于遍历100时间点
                    start_index = jdx
                    end_index = start_index + idx+1
                    if end_index < tscale and start_bins[start_index] == 1 and end_bins[end_index] == 1:
                        xmin = start_index/tscale
                        xmax = end_index/tscale
                        xmin_score = start_scores[start_index]
                        xmax_score = end_scores[end_index]
                        clr_score = clr_confidence[idx, jdx]
                        reg_score = reg_confidence[idx, jdx]
                        score = xmin_score * xmax_score * clr_score*reg_score
                        new_props.append([xmin, xmax, xmin_score, xmax_score, clr_score, reg_score, score])
            new_props = np.stack(new_props)
            #########################################################################

            col_name = ["xmin", "xmax", "xmin_score", "xmax_score", "clr_score", "reg_socre", "score"]
            new_df = pd.DataFrame(new_props, columns=col_name)
            new_df.to_csv("./output/BMN_results/" + video_name + ".csv", index=False)