def _train_one_epoch(self, epoch: int): stat = Statistic() self.model.train() if epoch == 10: for param_group in self.optimizer.param_groups: param_group['lr'] = self.cfg.pem.learning_rate / 10 log.log_info("The learning rate is adjusted in the beginning of epoch 10 ") t = time.time() for idx, (props, features, _) in enumerate(self.train_loader): props = props.cuda() features = features.cuda() self.model.zero_grad() pred_scores = self.model(features) loss = self.loss(pred_scores, props[:, 5]) # log.log_warn("Loss is None!") stat.update("train_loss", loss.item()) cost = 10 * loss cost.backward() self.optimizer.step(None) self.model.eval() for idx, (props, features, _) in enumerate(self.val_loader): props = props.cuda() features = features.cuda() pred_scores = self.model(features) loss = self.loss(pred_scores, props[:, 5]) # log.log_warn("Loss is None!") stat.update("val_loss", loss.item()) self.save_state(epoch, self.cfg.pem.save_root) log.log_info("[{:.2}s] Epoch {}: {}".format(time.time() - t, epoch, stat.format()))
def main(): parser = argparse.ArgumentParser() parser.add_argument("--yml_cfg_file", type=str, default="./cfgs/bsn.yml") args = parser.parse_args() cfg = load_yml(args.yml_cfg_file) anet = cfg.anet train_dataset = get_pgm_feature_dataset(cfg.tem.tem_csv_dir, cfg.pgm.proposal_csv_path, anet.json_path, anet.video_info_new_csv_path, "training") val_dataset = get_pgm_feature_dataset(cfg.tem.tem_csv_dir, cfg.pgm.proposal_csv_path, anet.json_path, anet.video_info_new_csv_path, "validation") queue = mp.Queue() procs = [] mkdir_p(cfg.pgm.pgm_feature_path) for i in range(len(train_dataset)): queue.put(('training', i)) for i in range(len(val_dataset)): queue.put(('validation', i)) for i in range(cfg.pgm.pgm_feature_workers): proc = mp.Process(target=proc_cb, args=(queue, cfg, {"training": train_dataset, "validation": val_dataset})) procs.append(proc) proc.start() t = 0 while not queue.empty(): log.log_info('{}s: Remain {} videos to be handled.'.format(t, queue.qsize())) time.sleep(1) t += 1 log.log_warn("All video processed.") for proc in procs: proc.terminate()
def test(self): mkdir_p(self.cfg.tem.tem_csv_dir) self.load_state(self.cfg.tem.save_root) self.model.eval() for dataloader_id, dataloader in enumerate( [self.train_dataloader, self.val_dataloader]): for idx, (batch_feature, batch_proposals, video_records) in enumerate(dataloader): features = batch_feature.cuda() pred_scores: torch.Tensor = self.model(features) pred_scores: np.ndarray = pred_scores.detach().cpu().numpy() for i, video_record in enumerate(video_records): latent_df = pd.DataFrame( columns=["action", "start", "end", "xmin", "xmax"]) pred_score = pred_scores[i].T # [L, 3] latent_df["start"] = pred_score[:, 0] latent_df["action"] = pred_score[:, 1] latent_df["end"] = pred_score[:, 2] latent_df["xmin"] = np.arange(0, 1, 1 / pred_score.shape[0]) latent_df["xmax"] = np.arange(1 / pred_score.shape[0], 1 + 1 / pred_score.shape[0], 1 / pred_score.shape[0]) latent_df.to_csv(os.path.join( self.cfg.tem.tem_csv_dir, "{}.csv".format(video_record.video_name)), index=False) log.log_info("[{}/2 dataset] Handled {}/{} videos.".format( dataloader_id + 1, idx * self.cfg.tem.batch_size, len(dataloader.dataset)))
def test(self): mkdir_p(self.cfg.pem.pem_csv_dir) self.load_state(self.cfg.pem.save_root) self.model.eval() for idx, (props, features, lengths, video_records) in enumerate(self.val_loader): features = features.cuda() pred_scores: torch.Tensor = self.model(features) pred_scores = pred_scores.detach().cpu().numpy() log.log_debug(lengths) for i, (length, video_record) in enumerate(zip(lengths, video_records)): latent_df = pd.DataFrame(columns=[ "xmin", "xmax", "xmin_score", "xmax_score", "iou_score" ]) latent_df["xmin"] = props[:length, 0] latent_df["xmax"] = props[:length, 1] latent_df["xmin_score"] = props[:length, 2] latent_df["xmax_score"] = props[:length, 3] props = props[length:, :] latent_df["iou_score"] = pred_scores[:length] pred_scores = pred_scores[length:] latent_df.to_csv(os.path.join( self.cfg.pem.pem_csv_dir, "{}.csv".format(video_record.video_name)), index=False) log.log_info("Handled {}/{} videos.".format( idx * self.cfg.pem.batch_size, len(self.val_dataset)))
def _train_one_epoch(self, epoch: int): t = time.time() statistic = Statistic() # train model self.model.train() if epoch == 10: for param_group in self.optimizer.param_groups: param_group['lr'] = self.cfg.tem.learning_rate / 10 log.log_info( "The learning rate is adjusted in the beginning of epoch {}". format(epoch)) for idx, (batch_feature, batch_proposals, video_records) in enumerate(self.train_dataloader): batch_feature = batch_feature.cuda() batch_proposals = batch_proposals.cuda() self.optimizer.zero_grad() batch_pred = self.model(batch_feature) loss_start, loss_action, loss_end = self.loss( batch_pred, batch_proposals) loss = (loss_start + 2 * loss_action + loss_end).mean() statistic.update('train_loss', loss.item()) loss.backward() self.optimizer.step(None) # if idx % 100 == 0: # print("epoch {}, iter {}: loss {}".format(epoch, idx, loss)) # validate model self.model.eval() for idx, (batch_feature, batch_proposals, video_records) in enumerate(self.val_dataloader): batch_feature = batch_feature.cuda() batch_proposals = batch_proposals.cuda() batch_pred = self.model(batch_feature) loss_start, loss_action, loss_end = self.loss( batch_pred, batch_proposals) loss: torch.Tensor = (loss_start + 2 * loss_action + loss_end).mean() statistic.update('val_loss', loss.item()) log.log_info("[{:.2f}s]: epoch {}: {}".format(time.time() - t, epoch, statistic.format())) self.save_state(epoch, self.cfg.tem.save_root)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--yml_cfg_file", type=str, default="./cfgs/bsn.yml") args = parser.parse_args() cfg = load_yml(args.yml_cfg_file) anet = cfg.anet train_dataset = get_tem_dataset(cfg.tem.tem_csv_dir, anet.json_path, anet.video_info_new_csv_path, "training") val_dataset = get_tem_dataset(cfg.tem.tem_csv_dir, anet.json_path, anet.video_info_new_csv_path, "validation") mkdir_p(cfg.pgm.proposal_csv_path) # prepare workers queue = mp.Queue() procs = [] # feed datas for i in range(len(train_dataset)): queue.put(('training', i)) for i in range(len(val_dataset)): queue.put(('validation', i)) for i in range(cfg.pgm.proposal_workers): proc = mp.Process(target=sub_proc, args=(queue, cfg, { "training": train_dataset, "validation": val_dataset })) proc.start() procs.append(proc) t = 0 while not queue.empty(): remain = queue.qsize() log.log_info("Time: {}s, remain {} videos to be handled.".format( t, remain)) time.sleep(1) t += 1 log.log_info("All videos handled.") for proc in procs: proc.terminate()
def evaluate(self): """Evaluates a proposal file. To measure the performance of a method for the proposal task, we computes the area under the average recall vs average number of proposals per video curve. """ recall, avg_recall, proposals_per_video = average_recall_vs_avg_nr_proposals( self.ground_truth, self.proposal, max_avg_nr_proposals=self.max_avg_nr_proposals, tiou_thresholds=self.tiou_thresholds) area_under_curve = np.trapz(avg_recall, proposals_per_video) if self.verbose: log.log_info('[RESULTS] Performance on ActivityNet proposal task.') log.log_info('\tArea Under the AR vs AN curve: {}%'.format( 100. * float(area_under_curve) / proposals_per_video[-1])) self.recall = recall self.avg_recall = avg_recall self.proposals_per_video = proposals_per_video
def main(): parser = argparse.ArgumentParser() parser.add_argument("--yml_cfg_file", default="./cfgs/bsn.yml") args = parser.parse_args() cfg = load_yml(args.yml_cfg_file) anet = cfg.anet val_dataset = get_post_processing_dataset(cfg.pem.pem_csv_dir, anet.json_path, anet.video_info_new_csv_path, 'validation') queue = mp.Queue() mp_dict = mp.Manager().dict() # for i in range(100): for i in range(len(val_dataset)): queue.put(i) procs = [] for i in range(cfg.post_processing.pp_workers): proc = mp.Process(target=sub_proc, args=(queue, mp_dict, val_dataset, cfg)) procs.append(proc) proc.start() for proc in procs: proc.join() output_dict = { "version": "VERSION 1.3", "results": dict(mp_dict), "external_data": {} } log.log_info(len(mp_dict.keys())) mkdir_p(os.path.split(cfg.eval.results_json)[0]) with open(cfg.eval.results_json, "w") as f: json.dump(output_dict, f) log.log_info("Dump results to {}.".format(cfg.eval.results_json))
def sub_proc(queue: mp.Queue, mp_dict, dataset: ActivityNetDataset, cfg: EasyDict): while True: try: idx = queue.get(block=False) except: break if idx % 100 == 0: log.log_info("Handled {}/{} videos.".format(idx, len(dataset))) props, video_record = dataset[idx] props = soft_nms(props) # new_props = new_props[np.argsort(new_props[:, 2])][::-1] # log.log_debug(len(new_props)) props_list = [] for i in range(min(100, props.shape[0])): props_list.append({ "score": props[i, 2], "segment": list(props[i, :2] * video_record.duration) }) mp_dict[video_record.video_name[2:]] = props_list
def __init__(self, ground_truth_filename=None, proposal_filename=None, ground_truth_fields=GROUND_TRUTH_FIELDS, proposal_fields=PROPOSAL_FIELDS, tiou_thresholds=np.linspace(0.5, 0.95, 10), max_avg_nr_proposals=None, subset='validation', verbose=False, check_status=True): if not ground_truth_filename: raise IOError('Please input a valid ground truth file.') if not proposal_filename: raise IOError('Please input a valid proposal file.') self.subset = subset self.tiou_thresholds = tiou_thresholds self.max_avg_nr_proposals = max_avg_nr_proposals self.verbose = verbose self.gt_fields = ground_truth_fields self.pred_fields = proposal_fields self.recall = None self.avg_recall = None self.proposals_per_video = None self.check_status = check_status # Retrieve blocked videos from server. if self.check_status: self.blocked_videos = get_blocked_videos() else: self.blocked_videos = list() # Import ground truth and proposals. self.ground_truth, self.activity_index = self._import_ground_truth( ground_truth_filename) self.proposal = self._import_proposal(proposal_filename) if self.verbose: log.log_info( '[INIT] Loaded annotations from {} subset.'.format(subset)) nr_gt = len(self.ground_truth) log.log_info( '\tNumber of ground truth instances: {}'.format(nr_gt)) nr_pred = len(self.proposal) log.log_info('\tNumber of proposals: {}'.format(nr_pred)) log.log_info('\tFixed threshold for tiou score: {}'.format( self.tiou_thresholds))
"{}.csv".format(video_record.video_name)), index=False) log.log_info("[{}/2 dataset] Handled {}/{} videos.".format( dataloader_id + 1, idx * self.cfg.tem.batch_size, len(dataloader.dataset))) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("--yml_cfg_file", type=str, default="./cfgs/bsn.yml", help="The config file path.") parser.add_argument("--run_type", type=str, default="train", help="train or test.") args = parser.parse_args() cfg = load_yml(args.yml_cfg_file) bsn_trainer = TemTrainer(cfg, cfg.tem) if args.run_type == "train": log.log_info("Start to train TEM.") bsn_trainer.train() elif args.run_type == "test": log.log_info("Start to test TEM.") bsn_trainer.test() else: log.log_error( "You can only use `train` or `test` as run_type. Found `{}`.". format(args.run_type))