def git_repo_perform_checkout_and_postcmd( repo, co_repo_fold, co_commit_sha, post_cmd, n_post_cmd_tries=2): """ Checkout repo to co_repo_fold, copy submodules, run post_cmd code """ # Create nice repo folder vst.mkdir(co_repo_fold) git_shared_clone(repo, '.', co_repo_fold, co_commit_sha) # Submodules cloned individually (avoid querying the remote) submodules = repo.git.submodule('status').split('\n') submodules = [x.strip().split(' ')[:2] for x in submodules] for commit_sha, subfold in submodules: git_shared_clone(repo, subfold, co_repo_fold/subfold, commit_sha) # Perform post-checkout actions if set if post_cmd is not None: post_output = None for i in range(n_post_cmd_tries): try: post_output = subprocess.check_output( f'cd {co_repo_fold} && {post_cmd}', shell=True, stderr=subprocess.STDOUT, executable='/bin/bash').strip().decode() break except subprocess.CalledProcessError as e: log.info('({}) Waiting a bit. Caught ({}):\n{}'.format( i, e, e.output.decode())) time.sleep(5) if post_output is None: raise OSError(f'Could not execute {post_cmd}') log.info(f'Executed {post_cmd} at {co_repo_fold}') log.debug(f'Output of execution:\n{post_output}') # Create 'FINISHED' file to indicate that repo is ready (co_repo_fold/'FINISHED').touch()
def _save(self, i): if self._folder is None: log.debug('Isaver folder is None, no saving') return ifiles = self._get_filenames(i) savepath = ifiles['pkl'] vst.mkdir(savepath.parent) vst.save_pkl(savepath, self.result) ifiles['finished'].touch()
def manage_workfolder(path, ycfg, co_commit_sha): # If separate output disabled - output goes to a subfolder if ycfg['_experiment']['output']['enable']: # Create and symlink outputfolder outputfolder = get_outputfolder_given_path( path, Path(ycfg['_experiment']['output']['dervo_root']), Path(ycfg['_experiment']['output']['store_root'])) create_symlink_to_outputfolder(outputfolder, path, ycfg['_experiment']['output']['sl_relative'], ycfg['_experiment']['output']['sl_prefix']) else: outputfolder = vst.mkdir(path/'_workfolder') # Workfolder - specified by commit workfolder = vst.mkdir(outputfolder/co_commit_sha) return workfolder
def get_outputfolder_given_path( path: Path, dervo_root: Path, output_root: Path): """Create output folder, create symlink to it """ # Create output folder (name defined by relative path wrt root_dervo) output_foldername = str(path.relative_to(dervo_root)).replace('/', '.') workfolder = vst.mkdir(output_root/output_foldername) return workfolder
def __init__(self, folder, total): self._re_finished = (r'item_(?P<i>\d+)_of_(?P<N>\d+).finished') self._fmt_finished = 'item_{:04d}_of_{:04d}.finished' self._history_size = 3 self._folder = folder self._total = total if self._folder is None: log.debug('Isaver without folder, no saving will be performed') else: self._folder = vst.mkdir(self._folder)
def add_logging_filehandlers(workfolder): # Create two output files in /_log subfolder, start loggign assert isinstance( logging.getLogger().handlers[0], logging.StreamHandler), 'First handler should be StreamHandler' logfolder = vst.mkdir(workfolder / '_log') id_string = vst.get_experiment_id_string() logfilename_debug = vst.add_filehandler( logfolder / f'{id_string}.DEBUG.log', logging.DEBUG, 'extended') logfilename_info = vst.add_filehandler(logfolder / f'{id_string}.INFO.log', logging.INFO, 'short') return logfilename_debug, logfilename_info
def git_repo_perform_checkout_and_postcmd( repo, co_repo_fold, co_commit_sha, post_cmd, n_post_cmd_tries=2): """ Checkout repo to co_repo_fold, copy submodules, run post_cmd code """ # Create nice repo folder vst.mkdir(co_repo_fold) git_shared_clone(repo, '.', co_repo_fold, co_commit_sha) # Don't initilize, instead clone submodules individually # This avoid querying the remote url over network. Useful w/o internet # TODO: Make it work for submodules included at lower levels co_repo = git.Repo(str(co_repo_fold)) for line in co_repo.git.submodule('status').split('\n'): sm_commit_sha, sm_name = line.split() sm_commit_sha = sm_commit_sha.removeprefix('-') git_shared_clone(repo, sm_name, co_repo_fold/sm_name, sm_commit_sha) # Perform post-checkout actions if set if post_cmd is not None: post_output = None for i in range(n_post_cmd_tries): try: post_output = subprocess.check_output( f'cd {co_repo_fold} && {post_cmd}', shell=True, stderr=subprocess.STDOUT, executable='/bin/bash').strip().decode() break except subprocess.CalledProcessError as e: log.info('({}) Waiting a bit. Caught ({}):\n{}'.format( i, e, e.output.decode())) time.sleep(5) if post_output is None: raise OSError(f'Could not execute {post_cmd}') log.info(f'Executed {post_cmd} at {co_repo_fold}') log.debug(f'Output of execution:\n{post_output}') # Create 'FINISHED' file to indicate that repo is ready (co_repo_fold/'FINISHED').touch()
def train_frame_classifier(workfolder, cfg_dict, add_args): out, = vst.exp.get_subfolders(workfolder, ['out']) cfg = vst.exp.YConfig(cfg_dict) Ncfg_daly.set_defcfg_v2(cfg) cfg.set_defaults_yaml(""" seed: 42 inputs: tubes_dwein: ~ split_assignment: !def ['train/val', ['train/val', 'trainval/test']] CN: SOLVER: BASE_LR: 0.0375 LR_POLICY: steps_with_relative_lrs LRS: [1, 0.1, 0.01, 0.001, 0.0001, 0.00001] STEPS: [0, 41, 49] MAX_EPOCH: 57 MOMENTUM: 0.9 WEIGHT_DECAY: 1e-4 WARMUP_EPOCHS: 4.0 WARMUP_START_LR: 0.0001 OPTIMIZING_METHOD: sgd period: i_batch: loss_log: '0::10' eval_krgb: '::' i_epoch: eval_krgb: '0::1' train: num_workers: 8 augment: scale: False hflip: False """) cf = cfg.parse() cn = _config_preparations_c2d_1x1(cfg.without_prefix('CN.')) initial_seed = cf['seed'] enforce_all_seeds(initial_seed) # prepare data dataset: Dataset_daly_ocv = Ncfg_daly.get_dataset(cf) vgroup = Ncfg_daly.get_vids(cf, dataset) sset_train, sset_eval = cf['split_assignment'].split('/') vids_train, vids_eval = vgroup[sset_train], vgroup[sset_eval] # wein tubes tubes_dwein_d, tubes_dgt_d = load_gt_and_wein_tubes( cf['inputs.tubes_dwein'], dataset, vgroup) tubes_dgt_train = tubes_dgt_d[sset_train] # Means norm_mean_cu = np_to_gpu(cn.DATA.MEAN) norm_std_cu = np_to_gpu(cn.DATA.STD) # Model model = C2D_1x1_fullframe(cn, 11, 0.5, False) optimizer = tsf_optim.construct_optimizer(model, cn) loss_fn = torch.nn.CrossEntropyLoss(reduction='mean') model.init_weights(0.01) device = get_device() model.to(device) # Training setup max_epoch = cn.SOLVER.MAX_EPOCH NUM_WORKERS = cf['train.num_workers'] man_ckpt = Manager_model_checkpoints(model, optimizer, 'c2d_1x1') # Restore previous run rundir = vst.mkdir(out / 'rundir') checkpoint_path = (Manager_checkpoint_name.find_last_checkpoint(rundir)) start_epoch = (man_ckpt.restore_model_magic(checkpoint_path)) # Positives (from training videos) stride = 1 max_distance = np.inf labeled_frames: List[Frame_labeled] = \ prepare_label_fullframes_for_training( tubes_dgt_train, dataset, stride, max_distance) # Get all negative frames from training videos negative_frames = [] for vid in vids_train: v = dataset.videos_ocv[vid] instance_franges = get_keyframe_ranges(v, include_diff=True) good_frames = np.arange(0, v['nframes'], stride) for s, e, kf in instance_franges: bad_frames = np.arange(s, e) good_frames = np.setdiff1d(good_frames, bad_frames) for frame_ind in good_frames: negative_frame = { 'vid': vid, 'frame_ind': frame_ind, 'label': 10 } negative_frames.append(negative_frame) import pudb pudb.set_trace() # XXX BREAKPOINT # Kinda sparsely sampled frames from all videos sparse_samples = [] for vid in vids_eval: v = dataset.videos_ocv[vid] instance_franges = get_keyframe_ranges(v, include_diff=True) # Training for i_epoch in range(start_epoch, max_epoch): rgen = enforce_all_seeds(initial_seed + i_epoch) # Sample negative frames sample_ids = rgen.choice(len(negative_frames), size=len(labeled_frames)) sampled_negative_frames = [negative_frames[i] for i in sample_ids] all_frames = labeled_frames + sampled_negative_frames random.shuffle(all_frames) tdataset = TDataset_over_frames(cf, cn, labeled_frames, dataset) train_loader = torch.utils.data.DataLoader( tdataset, num_workers=NUM_WORKERS, collate_fn=sequence_batch_collate_v2) pbar = tqdm(train_loader, total=len(tdataset)) total_batches = len(tdataset) avg_loss = vst.Averager() for i_batch, data_input in enumerate(pbar): model.train() # Update learning rate lr = tsf_optim.get_lr_at_epoch( cn, i_epoch + float(i_batch) / total_batches) set_lr(optimizer, lr) frame_list, metas, = data_input labels_np = np.array([m['label'] for m in metas]) labels_t = torch.from_numpy(labels_np) labels_c = labels_t.cuda() inputs = [x.type(torch.cuda.FloatTensor) for x in frame_list] result = model(inputs, None) preds = result['x_final'] # Compute loss loss = loss_fn(preds, labels_c) # check nan Loss. sf_misc.check_nan_losses(loss) # Perform the backward pass. optimizer.zero_grad() loss.backward() # Update the parameters. optimizer.step() # Loss update avg_loss.update(loss.item()) if vst.check_step(i_batch, cf['period.i_batch.loss_log']): log.info(f'[{i_epoch}, {i_batch}/{total_batches}]' f' {lr=} loss={avg_loss}')
def get_subfolders(folder, subfolder_names=['out', 'temp']): return [vst.mkdir(folder / name) for name in subfolder_names]
def extract_dataset_fullframe_features(workfolder, cfg_dict, add_args): out, = vst.exp.get_subfolders(workfolder, ['out']) cfg = vst.exp.YConfig(cfg_dict) Ncfg_daly.set_defcfg_v2(cfg) Ncfg_extractor.set_defcfg_v2(cfg) cf = cfg.parse() # prepare extractor norm_mean_t, norm_std_t, sampler_grid, frameloader_vsf, fextractor = \ Ncfg_extractor.prepare(cf) BATCH_SIZE = cf['extraction.batch_size'] NUM_WORKERS = cf['extraction.num_workers'] # prepare data dataset: Dataset_daly_ocv = Ncfg_daly.get_dataset(cf) # / extract def prepare_func(start_i): remaining_keyframes_dict = dict( list(keyframes_dict.items())[start_i + 1:]) tdataset_kf = TDataset_over_keyframes(remaining_keyframes_dict, sampler_grid, frameloader_vsf) loader = torch.utils.data.DataLoader( tdataset_kf, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True, collate_fn=sequence_batch_collate_v2) return loader bboxes_batch_index = torch.arange(BATCH_SIZE).type( torch.DoubleTensor)[:, None] def func(data_input): metas, Xts, bboxes = data_input kkeys = [tuple(m['kkey']) for m in metas] Xts_f32c = [ to_gpu_normalize_permute(x, norm_mean_t, norm_std_t) for x in Xts ] bsize = bboxes.shape[0] bboxes0 = torch.cat((bboxes_batch_index[:bsize], bboxes), axis=1) bboxes0_c = bboxes0.type(torch.cuda.FloatTensor) with torch.no_grad(): result = fextractor.forward(Xts_f32c, bboxes0_c) result_dict = {k: v.cpu().numpy() for k, v in result.items()} last_i = list(keyframes_dict.keys()).index(kkeys[-1]) return result_dict, last_i def extract_func(key): pass stride = 4 features_temp = vst.mkdir(out / 'features') for vid, video in dataset.videos_ocv.items(): output_file = features_temp / f'{vid}.pkl' if output_file.exists(): continue # Extract keyframes specifically all_keyframes = [] for action_name, instances in video['instances'].items(): for ins_ind, instance in enumerate(instances): keyframes = [int(kf['frame']) for kf in instance['keyframes']] all_keyframes.extend(keyframes) # Sample at stride strided_frames = set(range(0, video['nframes'], stride)) frames_to_sample = np.array( sorted(set(all_keyframes) | strided_frames)) # Dataset tdataset_kf = TDataset_over_frames(video['path'], video['nframes'], frames_to_sample, sampler_grid, frameloader_vsf) loader = torch.utils.data.DataLoader( tdataset_kf, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True, collate_fn=sequence_batch_collate_v2) pbar = tqdm(loader, total=len(tdataset_kf)) features = [] for data_input in pbar: frame_list, metas = data_input Xts_f32c = [ to_gpu_normalize_permute(x, norm_mean_t, norm_std_t) for x in frame_list ] with torch.no_grad(): result = fextractor.forward(Xts_f32c, None) features.append(result) import pudb pudb.set_trace() # XXX BREAKPOINT pass