def main(): # setup arguments parser = utils.ArgParser(description=__doc__) arguments.add_multi_experiment_args(parser) # support multi experiment groups and search arguments.add_show_args(parser) # options for the output table arguments.add_path_args(parser, dataset_path=False) # source path for experiments arguments.add_default_args(parser) args = parser.parse_args() utils.create_logger_without_file(utils.LOGGER_NAME, log_level=args.log_level, no_print=True) # find experiments to show depending on arguments exp_groups_names = utils.match_folder(args.log_dir, EXP_TYPE, args.exp_group, args.exp_list, args.search) collector = collect_results_data( EXP_TYPE, exp_groups_names, log_dir=args.log_dir, read_last_epoch=args.last, add_group=args.add_group) # ---------- Define the custom retrieval metrics to show for these experiments ---------- # define the retrieval validation metrics to show retrieval_metrics = {} # retrieval validation metrics must be constructed as product of two lists re_retrieval_at = re.compile(r"r[0-9]+") for modality, shortcut in zip(CootMetersConst.RET_MODALITIES, CootMetersConst.RET_MODALITIES_SHORT): # modality: retrieval from where to where for metric in CootMetersConst.RET_METRICS: # metric: retrieval@1, mean, ... if metric == "r1": # log r1 metric to the overview class metric_class = "val_base" else: # log all other metrics to the detail class metric_class = "val_ret" decimals = 2 formatting = "%" if re_retrieval_at.match(metric) else "f" key = f"{metric_class}/{modality}-{metric}" print_group = CootPrintGroupConst.VID if "vid" in modality else CootPrintGroupConst.CLIP retrieval_metrics[f"{shortcut}-{metric}"] = PrintMetric(key, formatting, decimals, print_group) # define average of R@1 text->video, video->text to get a single metric. same for clip->sentence, sentence->clip retrieval_metrics["vp-r1"] = PrintMetric("vp-r1", "%", 2, CootPrintGroupConst.RETRIEVAL) retrieval_metrics["cs-r1"] = PrintMetric("cs-r1", "%", 2, CootPrintGroupConst.RETRIEVAL) # calculate those R@1 averages for each model for model_name, metrics in collector.items(): try: metrics["vp-r1"] = (metrics[f"val_base/vid2par-r1"] + metrics[f"val_base/par2vid-r1"]) / 2 # only calculate average clip-sentence r1 if clips where evaluated if f"val_base/cli2sen-r1" in metrics: metrics["cs-r1"] = (metrics[f"val_base/cli2sen-r1"] + metrics[f"val_base/sen2cli-r1"]) / 2 except KeyError as e: print(f"WARNING: {e} for {model_name}") # ---------- Define which metrics to print ---------- default_metrics = [] default_fields = ["v2p-r1", "p2v-r1", "c2s-r1", "s2c-r1", "time (h)"] output_results(collector, custom_metrics=retrieval_metrics, metrics=args.metrics, default_metrics=default_metrics, fields=args.fields, default_fields=default_fields, mean=args.mean, mean_all=args.mean_all, sort=args.sort, sort_asc=args.sort_asc, compact=args.compact)
def main(): parser = utils.ArgParser(description=__doc__) parser.add_argument("path_to_embeddings", type=str, help="Provide path to h5 embeddings file.") args = parser.parse_args() path_to_embeddings = Path(args.path_to_embeddings) print(f"Testing retrieval on embeddings: {path_to_embeddings}") # load embeddings with h5py.File(path_to_embeddings, "r") as h5: data_collector = dict( (key, np.array(h5[key])) for key in ["vid_emb", "par_emb", "clip_emb", "sent_emb"]) # compute retrieval print(retrieval.VALHEADER) retrieval.compute_retrieval(data_collector, "vid_emb", "par_emb") retrieval.compute_retrieval(data_collector, "clip_emb", "sent_emb")
def main(): # setup arguments parser = utils.ArgParser(description=__doc__) arguments.add_multi_experiment_args( parser) # support multi experiment groups and search arguments.add_show_args(parser) # options for the output table arguments.add_path_args(parser, dataset_path=False) # source path for experiments arguments.add_default_args(parser) args = parser.parse_args() utils.create_logger_without_file(utils.LOGGER_NAME, log_level=args.log_level, no_print=True) # find experiments to show depending on arguments exp_groups_names = utils.match_folder(args.log_dir, EXP_TYPE, args.exp_group, args.exp_list, args.search) collector = collect_results_data(EXP_TYPE, exp_groups_names, log_dir=args.log_dir, read_last_epoch=args.last, add_group=args.add_group) collector = update_performance_profile(collector) # ---------- Define which metrics to print ---------- default_metrics = [] default_fields = ["bleu4", "meteo", "rougl", "cider", "re4"] output_results(collector, custom_metrics=TEXT_METRICS, metrics=args.metrics, default_metrics=default_metrics, fields=args.fields, default_fields=default_fields, mean=args.mean, mean_all=args.mean_all, sort=args.sort, sort_asc=args.sort_asc, compact=args.compact)
def main(): parser = utils.ArgParser(description=__doc__) parser.add_argument("path_to_embeddings", type=str, help="Provide path to h5 embeddings file.") args = parser.parse_args() path_to_embeddings = Path(args.path_to_embeddings) print(f"Testing retrieval on embeddings: {path_to_embeddings}") # load embeddings with h5py.File(path_to_embeddings, "r") as h5: if "vid_emb" not in h5: # backwards compatibility (f_vid_emb, f_vid_emb_before_norm, f_clip_emb, f_clip_emb_before_norm, f_vid_context, f_vid_context_before_norm, f_par_emb, f_par_emb_before_norm, f_sent_emb, f_sent_emb_before_norm, f_par_context, f_par_context_before_norm) = ("vid_norm", "vid", "clip_norm", "clip", "vid_ctx_norm", "vid_ctx", "par_norm", "par", "sent_norm", "sent", "par_ctx_norm", "par_ctx") data_collector = dict( (key_target, np.array(h5[key_source])) for key_target, key_source in zip( ["vid_emb", "par_emb", "clip_emb", "sent_emb"], [f_vid_emb, f_par_emb, f_clip_emb, f_sent_emb])) else: # new version data_collector = dict( (key, np.array(h5[key])) for key in ["vid_emb", "par_emb", "clip_emb", "sent_emb"]) # compute retrieval print(retrieval.VALHEADER) retrieval.compute_retrieval(data_collector, "vid_emb", "par_emb") retrieval.compute_retrieval(data_collector, "clip_emb", "sent_emb")
def main(): # setup arguments parser = utils.ArgParser(description=__doc__) arguments.add_default_args(parser) arguments.add_trainer_args(parser) arguments.add_exp_identifier_args(parser) arguments.add_dataset_test_arg(parser) args = parser.parse_args() # load experiment config exp_group, exp_name, config_file = arguments.setup_experiment_identifier_from_args( args, EXP_TYPE) config = load_yaml_config_file(config_file) # update experiment config and dataset path given the script arguments config = arguments.update_config_from_args(config, args) dataset_path = arguments.update_path_from_args(args) # create configuration object cfg = MLPMNISTExperimentConfig(config) if args.print_config: print(cfg) # set seed if cfg.random_seed is not None: print(f"Set seed to {cfg.random_seed}") set_seed( cfg.random_seed, set_deterministic=False) # set deterministic via config if needed # create datasets train_set = MNIST(str(dataset_path), train=True, download=True, transform=ToTensor()) val_set = MNIST(str(dataset_path), train=False, download=True, transform=ToTensor()) # make datasets smaller if requested in config if cfg.dataset_train.max_datapoints > -1: train_set.data = train_set.data[:cfg.dataset_train.max_datapoints] if cfg.dataset_val.max_datapoints > -1: val_set.data = val_set.data[:cfg.dataset_val.max_datapoints] # create dataloaders train_loader = create_loader(train_set, cfg.dataset_train, batch_size=cfg.train.batch_size) val_loader = create_loader(val_set, cfg.dataset_val, batch_size=cfg.val.batch_size) # create model model_mgr = MLPModelManager(cfg) if args.test_dataset: # run dataset test and exit run_mlpmnist_dataset_test(train_set, train_loader) return # always load best epoch during validation load_best = args.load_best or args.validate # create trainer trainer = MLPMNISTTrainer(cfg, model_mgr, exp_group, exp_name, args.run_name, len(train_loader), log_dir=args.log_dir, log_level=args.log_level, logger=None, print_graph=args.print_graph, reset=args.reset, load_best=load_best, load_epoch=args.load_epoch, inference_only=args.validate) if args.validate: # run validation trainer.validate_epoch(val_loader) else: # run training trainer.train_model(train_loader, val_loader)
def main(): parser = utils.ArgParser() parser.add_argument("dataset_name", type=str, help="dataset name") arguments.add_dataset_path_arg(parser) arguments.add_test_arg(parser) parser.add_argument("--metadata_name", type=str, default="text_data", help="change which metadata to load") parser.add_argument("--cuda", action="store_true", help="use cuda") parser.add_argument("--multi_gpu", action="store_true", help="use multiple gpus") parser.add_argument("--model_path", type=str, default=None, help="Cache path for transformers package.") parser.add_argument("--model_name", type=str, default="bert-base-uncased", help="Which model to use.") parser.add_argument("--model_source", type=str, default="transformers", help="Where to get the models from.") parser.add_argument("--layers", type=str, default="-2,-1", help="Read the features from these layers. Careful: Multiple layers must be specified like " "this: --layers=-2,-1 because of argparse handling minus as new argument.") parser.add_argument("--batch_size", type=int, default=1, help="Batch size.") parser.add_argument("--workers", type=int, default=0, help="Dataloader workers.") parser.add_argument("--add_name", type=str, default="", help="Add additional identifier to output files.") parser.add_argument("-f", "--force", action="store_true", help="Overwrite embedding if exists.") parser.add_argument("--encoder_only", action="store_true", help="Flag for hybrid models (BART: bilinear and unilinear) that return " "both encoder and decoder output, if the decoder output should be discarded.") parser.add_argument("--set_tokenizer", type=str, default="", help=f"Manually define the tokenizer instead of determining it from model name. " f"Options: {nntrainer.data_text.TextPreprocessing.values()}") parser.add_argument("--add_special_tokens", action="store_true", help=f"Set the tokenizer to add special tokens (like [CLS], [SEP] for BERT).") parser.add_argument("--token_stride", action="store_true", help=f"If set, too long texts will be strided over instead of cut to max.") parser.add_argument("--token_stride_factor", type=int, default=2, help=f"Default 2 means to stride half the window size. Set to 1 for non-overlapping windows.") parser.add_argument("--print_model", action="store_true", help=f"Print model and config") args = parser.parse_args() data_path = Path("data") dataset_path = data_path / args.dataset_name model_name = args.model_name token_stride = args.token_stride model_ident = f"{args.model_source}_{model_name.replace('/', '--')}_{args.layers}" full_ident = f"text_feat_{args.dataset_name}_meta_{args.metadata_name}_{model_ident}{args.add_name}" # setup paths text_features_path = dataset_path os.makedirs(text_features_path, exist_ok=True) lengths_file = text_features_path / f"{full_ident}_sentence_splits.json" data_file_only = f"{full_ident}.h5" data_file = text_features_path / data_file_only ''' if data_file.exists() and lengths_file.exists() and not args.force: print(f"{data_file} already exists. nothing to do.") return ''' # Load pretrained model print("*" * 20, f"Loading model {model_name} from {args.model_source}") if args.model_source == "transformers": tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=args.model_path) model: BertModel = AutoModel.from_pretrained(model_name, cache_dir=args.model_path) if args.print_model: print("*" * 40, "Model") print(f"{model}") print("*" * 40, "Config") print(model.config) # noinspection PyUnresolvedReferences max_text_len = model.config.max_position_embeddings model.eval() else: raise NotImplementedError(f"Model source unknown: {args.model_source}") if args.cuda: if args.multi_gpu: model = nn.DataParallel(model).cuda() else: model = model.cuda() print(f"Running model on device {next(model.parameters()).device}") print(f"Maximum input length {max_text_len}") # define preprocessor is_tp = False add_special_tokens = args.add_special_tokens if args.set_tokenizer != "": print(f"Set tokenizer via flag to {args.set_tokenizer}") preprocessor = get_text_preprocessor(args.set_tokenizer) elif model_name == "bert-base-uncased": # paper results preprocessor = get_text_preprocessor(nntrainer.data_text.TextPreprocessing.BERT_PAPER) elif model_name.startswith(TextModelConst.BERT) or model_name.startswith(TextModelConst.DISTILBERT): # new results bert-large-cased preprocessor = get_text_preprocessor(nntrainer.data_text.TextPreprocessing.BERT_NEW) elif model_name.startswith(TextModelConst.GPT2): # new results with gpt2 preprocessor = get_text_preprocessor(nntrainer.data_text.TextPreprocessing.GPT2) else: print(f"WARNING: no text preprocessing defined for model {model_name}, using default preprocessing which " f"does not add any special tokens.") preprocessor = get_text_preprocessor(nntrainer.data_text.TextPreprocessing.SIMPLE) # else: # raise NotImplementedError(f"No preprocessing defined for model {model_name}") # define feature layers to extract layer_list_int = [int(layer.strip()) for layer in args.layers.strip().split(",")] # load metadata meta_file = dataset_path / f"{args.metadata_name}.json" print(f"Loading meta file of {meta_file.stat().st_size // 1024 ** 2:.0f} MB") timer_start = timer() meta_dict = json.load(meta_file.open("rt", encoding="utf8")) print(f"Took {timer() - timer_start:.1f} seconds for {len(meta_dict)}.") text_dict: Dict[str, List[str]] = {} for key, meta in meta_dict.items(): text_dict[key] = [item for key, item in meta.items()] #text_dict[key] = [seg["text"] for seg in meta["segments"]] # get max number of words length total_words = 0 max_words = 0 for key, val in tqdm(text_dict.items(), desc="Compute total_words and max_words"): num_words = sum(len(text.split(" ")) for text in val) total_words += num_words max_words = max(num_words, max_words) print(f"Total {total_words} average {total_words / len(meta_dict):.2f} max {max_words}") # create dataset and loader print("*" * 20, "Loading and testing dataset.") dataset = TextConverterDataset(tokenizer, text_dict, preprocessor, max_text_len=max_text_len, token_stride=token_stride, add_special_tokens=add_special_tokens) dataloader = data.DataLoader(dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, collate_fn=dataset.collate_fn) # print first datapoint for key, value in dataset[0].items(): print(f"{key}: {value}\n") if args.test: # print first datapoint for point in dataset: for key, value in dict(point).items(): print(f"{key}: {value}\n") print("Test, stopping here.") return # loop videos and encode features print("*" * 20, "Running the encoding.") print(f"Encoding text with model: {model_name}, layers: {layer_list_int}, " f"batch size: {args.batch_size}, workers: {args.workers}") temp_file = text_features_path / f"TEMP_{utils.get_timestamp_for_filename()}_{data_file_only}" data_h5 = h5py.File(temp_file, "w") lengths = {} total_feat_dim = None printed_warning = False pbar = tqdm(desc="compute text features", total=maths.ceil(len(dataset) / args.batch_size)) for i, batch in enumerate(dataloader): # type: TextDataBatchPoint if args.cuda: batch.to_cuda(non_blocking=True) batch_size = len(batch.key) total_max_seq_len = batch.tokens.shape[1] if total_max_seq_len <= max_text_len: # everything is fine # compute model output and read hidden states model_outputs = model(input_ids=batch.tokens, attention_mask=batch.mask, output_hidden_states=True) hidden_states = model_outputs["hidden_states"] # pbar.write(f"tokens {batch.tokens.shape[1]}") # pbar.write(f"outputs {list(state.shape[1] for state in hidden_states)}") # concatenate the features from the requested layers of the hidden state (-1 is the output layer) features = [] for layer_num in layer_list_int: layer_features = hidden_states[layer_num] features.append(layer_features.detach().cpu().numpy()) # concatenate features of individual hidden layers features = np.concatenate(features, axis=-1) # shape (batch_size, max_sent_len, num_layers * feat_dim) # pbar.write(f"features {features.shape}") else: print('Hoy') # if batch tokens is too long we need multiple steps depending on stride stride = max_text_len // args.token_stride_factor positions = list(range(0, total_max_seq_len - stride, stride)) all_model_outputs = [] pbar.write(f"Length {total_max_seq_len}! Split with window {max_text_len} stride {stride} " f"into {len(positions)} batches at positions {positions} ") for pos in positions: end_pos = pos + max_text_len these_tokens = batch.tokens[:, pos:end_pos] these_masks = batch.mask[:, pos:end_pos] these_model_outputs = model(input_ids=these_tokens, attention_mask=these_masks, output_hidden_states=True) these_hidden_states = these_model_outputs["hidden_states"] # pbar.write(f"tokens {these_tokens.shape[1]}") # pbar.write(f"outputs {list(state.shape[1] for state in these_hidden_states)}") # concatenate the features from the requested layers of the hidden state (-1 is the output layer) features = [] for layer_num in layer_list_int: layer_features = these_hidden_states[layer_num] if pos != 0: layer_features = layer_features[:, stride:] features.append(layer_features.detach().cpu().numpy()) # concatenate features of individual hidden layers features = np.concatenate(features, axis=-1) # shape (batch_size, max_sent_len, num_layers * feat_dim) # pbar.write(f"features {features.shape}") all_model_outputs.append(features) # concatenate outputs back together features = np.concatenate(all_model_outputs, axis=1) # compute total output size, need to know this for model architecture if total_feat_dim is None: total_feat_dim = features.shape[-1] # extract single datapoint information from the batch for batch_num in range(batch_size): key = batch.key[batch_num] length = batch.lengths[batch_num] # given length (number of tokens), cut off the padded tokens feature = features[batch_num, :length] # store sentence lengths so features can be mapped to sentences later sentence_lengths = batch.sentence_lengths[batch_num] if is_tp: sentence_lengths = [int(np.round(length / 4)) for length in sentence_lengths] # make sure correspondence between paragraph features and sentence lengths is still there if feature.shape[0] != sum(sentence_lengths) and not printed_warning: pbar.write("*" * 40) pbar.write(f"WARNING: Feature sequence length {feature.shape[0]} is not equal sum of the sentence " f"lengths: "f"{sum(sentence_lengths)}") pbar.write(f"{sentence_lengths}") pbar.write(f"It may be hard to get the correspondence between tokens and features back and the " f"correct hierarchical sentence structure back from these features..") printed_warning = True # write features data_h5[key] = feature lengths[key] = sentence_lengths pbar.update() pbar.close() data_h5.close() print(f"Wrote data to {temp_file}, moving to {data_file}") if data_file.is_file(): os.remove(data_file) time.sleep(0.1) shutil.move(temp_file, data_file) # write lengths file json.dump(lengths, lengths_file.open("wt", encoding="utf8")) print(f"Wrote sentence splits to {lengths_file}") print(f"Total feature dim of {len(layer_list_int)} is {total_feat_dim}")
def main(): # ---------- Setup script arguments. ---------- parser = utils.ArgParser(description=__doc__) arguments.add_default_args(parser) # logging level etc. arguments.add_exp_identifier_args( parser) # arguments to identify the experiment to run arguments.add_trainer_args(parser, dataset_path=False) # general trainer arguments parser.add_argument("--preload", action="store_true", help="Preload everything.") # feature preloading arguments_mart.add_mart_args(parser) # some more paths for mart parser.add_argument("--load_model", type=str, default=None, help="Load model from file.") parser.add_argument("--print_model", action="store_true", help=f"Print model") args = parser.parse_args() # load repository config yaml file to dict exp_group, exp_name, config_file = arguments.setup_experiment_identifier_from_args( args, EXP_TYPE) config = load_yaml_config_file(config_file) # update experiment config given the script arguments config = arguments.update_config_from_args(config, args) config = arguments_mart.update_mart_config_from_args(config, args) # read experiment config dict cfg = Config(config) if args.print_config: print(cfg) # set seed if cfg.random_seed is not None: print(f"Set seed to {cfg.random_seed}") set_seed( cfg.random_seed, set_deterministic=False) # set deterministic via config if needed # create dataset train_set, val_set, train_loader, val_loader = create_mart_datasets_and_loaders( cfg, args.coot_feat_dir, args.annotations_dir, args.video_feature_dir) for i, run_number in enumerate( range(args.start_run, args.start_run + args.num_runs)): run_name = f"{args.run_name}{run_number}" # create model from config model = create_mart_model(cfg, len(train_set.word2idx), cache_dir=args.cache_dir) # print model for debug if requested if args.print_model and i == 0: print(model) # always load best epoch during validation load_best = args.load_best or args.validate # create trainer trainer = MartTrainer(cfg, model, exp_group, exp_name, run_name, len(train_loader), log_dir=args.log_dir, log_level=args.log_level, logger=None, print_graph=args.print_graph, reset=args.reset, load_best=load_best, load_epoch=args.load_epoch, load_model=args.load_model, inference_only=args.validate, annotations_dir=args.annotations_dir) if args.validate: # run validation if not trainer.load and not args.ignore_untrained: raise ValueError( "Validating an untrained model! No checkpoints were loaded. Add --ignore_untrained " "to ignore this error.") trainer.validate_epoch(val_loader) else: # run training trainer.train_model(train_loader, val_loader) # done with this round trainer.close() del model del trainer
def main(): # ---------- Setup script arguments. ---------- parser = utils.ArgParser(description=__doc__) arguments.add_default_args(parser) # logging level etc. arguments.add_exp_identifier_args( parser) # arguments to identify the experiment to run arguments.add_trainer_args(parser) # general trainer arguments arguments.add_dataset_test_arg(parser) # flag for dataset testing arguments_coot.add_dataloader_args(parser) # feature preloading parser.add_argument("--load_model", type=str, default=None, help="Load model from file.") parser.add_argument("--save_embeddings", action="store_true", help="Save generated COOT embeddings.") args = parser.parse_args() if args.save_embeddings: assert args.validate, "Saving embeddings only works in validation with --validate" # load repository config yaml file to dict exp_group, exp_name, config_file = arguments.setup_experiment_identifier_from_args( args, EXP_TYPE) config = load_yaml_config_file(config_file) # update experiment config and dataset path given the script arguments path_data = arguments.update_path_from_args(args) config = arguments.update_config_from_args(config, args) config = arguments_coot.update_coot_config_from_args(config, args) # read experiment config dict cfg = Config(config, is_train=not args.validate and not args.test_dataset) if args.print_config: print(cfg) # set seed if cfg.random_seed is not None: print(f"Set seed to {cfg.random_seed}") set_seed( cfg.random_seed, set_deterministic=False) # set deterministic via config if needed # create dataset and dataloader if (cfg.dataset_train.preload_vid_feat or cfg.dataset_train.preload_text_feat or cfg.dataset_val.preload_vid_feat or cfg.dataset_val.preload_text_feat): cmd = "ulimit -n 100000" print(f"Run system command to avoid TooManyFiles error:\n{cmd}") os.system(cmd) ################## CREATE DATASETS FROM PATH DATA ################ train_set, val_set, train_loader, val_loader = create_retrieval_datasets_and_loaders( cfg, path_data) if args.test_dataset: # run dataset test and exit run_retrieval_dataset_test(train_set, train_loader) return print("---------- Setup done!") for run_number in range(1, args.num_runs + 1): run_name = f"{args.run_name}{run_number}" # create coot models model_mgr = ModelManager(cfg) # always load best epoch during validation load_best = args.load_best or args.validate # create trainer trainer = Trainer(cfg, model_mgr, exp_group, exp_name, run_name, len(train_loader), log_dir=args.log_dir, log_level=args.log_level, logger=None, print_graph=args.print_graph, reset=args.reset, load_best=load_best, load_epoch=args.load_epoch, load_model=args.load_model, inference_only=args.validate) if args.validate: # run validation trainer.validate_epoch(val_loader, val_clips=cfg.val.val_clips, save_embs=args.save_embeddings) else: # run training train_loss = trainer.train_model(train_loader, val_loader) # save train loss ipdb.set_trace() # done with this round trainer.close() del model_mgr del trainer
def main(): # argparser parser = utils.ArgParser(description=__doc__) arguments.add_path_args(parser) args = parser.parse_args() # setup dataset path path_data = args.data_path if args.data_path is not None else repo_config.DATA_PATH path_dataset = Path(path_data) / "activitynet" captions_path = Path("annotations") / "activitynet" print( f"Working on dataset path {path_dataset} captions from {captions_path}" ) # setup other paths meta_file = path_dataset / "meta_all.json" meta_dict = {} for split in ["train", "val_1", "val_2"]: raw_data = json.load( (captions_path / f"{split}.json").open("rt", encoding="utf8")) for key, val in raw_data.items(): # load video information timestamps = val["timestamps"] sentences = val["sentences"] duration_sec = val["duration"] # build segments segments = [] for num_seg in range(len(timestamps)): # load narration sentence and preprocess line separators sentence = sentences[num_seg] sentence = RE_SPACELIKES.sub(" ", sentence) # load start and stop timestamps start_sec = timestamps[num_seg][0] stop_sec = timestamps[num_seg][1] # switch them in case stop < start if stop_sec < start_sec: print( f"switch: stop_sec {stop_sec} > start_sec {start_sec}") temp_ms = start_sec start_sec = stop_sec stop_sec = temp_ms segments.append({ "text": sentence, "start_sec": start_sec, "stop_sec": stop_sec }) # shorten video key to 11 youtube letters for consistency assert key[:2] == "v_" short_key = key[2:] # multiple datapoints with different annotations point to the same video. add split to the key item_key = f"{short_key}_{split}" meta_dict[item_key] = { "data_key": short_key, "split": split, "segments": segments, "duration_sec": duration_sec } # write meta to file json.dump(meta_dict, meta_file.open("wt", encoding="utf8"), sort_keys=True) print(f"wrote {meta_file}")
def main(): # setup arguments parser = utils.ArgParser(description=__doc__) arguments.add_default_args(parser) arguments.add_exp_identifier_args(parser) arguments.add_trainer_args(parser) arguments.add_dataset_test_arg(parser) args = parser.parse_args() # load repository config yaml file to dict exp_group, exp_name, config_file = arguments.setup_experiment_identifier_from_args( args, EXP_TYPE) config = load_yaml_config_file(config_file) # update experiment config and dataset path given the script arguments config = arguments.update_config_from_args(config, args) dataset_path = arguments.update_path_from_args(args) # read experiment config dict cfg = MLPMNISTExperimentConfig(config) if args.print_config: print(cfg) # set seed verb = "Set seed" if cfg.random_seed is None: cfg.random_seed = np.random.randint(0, 2**15, dtype=np.int32) verb = "Randomly generated seed" print(f"{verb} {cfg.random_seed} deterministic {cfg.cudnn_deterministic} " f"benchmark {cfg.cudnn_benchmark}") set_seed(cfg.random_seed, cudnn_deterministic=cfg.cudnn_deterministic, cudnn_benchmark=cfg.cudnn_benchmark) # create datasets train_set = MNIST(str(dataset_path), train=True, download=True, transform=ToTensor()) val_set = MNIST(str(dataset_path), train=False, download=True, transform=ToTensor()) # make datasets smaller if requested in config if cfg.dataset_train.max_datapoints > -1: train_set.data = train_set.data[:cfg.dataset_train.max_datapoints] if cfg.dataset_val.max_datapoints > -1: val_set.data = val_set.data[:cfg.dataset_val.max_datapoints] # create dataloaders train_loader = create_loader(train_set, cfg.dataset_train, batch_size=cfg.train.batch_size) val_loader = create_loader(val_set, cfg.dataset_val, batch_size=cfg.val.batch_size) if args.test_dataset: # run dataset test and exit run_mlpmnist_dataset_test(train_set, train_loader) return print("---------- Setup done!") for run_number in range(1, args.num_runs + 1): run_name = f"{args.run_name}{run_number}" # create model model_mgr = MLPModelManager(cfg) # always load best epoch during validation load_best = args.load_best or args.validate # create trainer trainer = MLPMNISTTrainer(cfg, model_mgr, exp_group, exp_name, run_name, len(train_loader), log_dir=args.log_dir, log_level=args.log_level, logger=None, print_graph=args.print_graph, reset=args.reset, load_best=load_best, load_epoch=args.load_epoch, inference_only=args.validate) if args.validate: # run validation trainer.validate_epoch(val_loader) else: # run training trainer.train_model(train_loader, val_loader) # done with this round trainer.close() del model_mgr del trainer
def main(): # argparser parser = utils.ArgParser(description=__doc__) arguments.add_path_args(parser) args = parser.parse_args() # setup dataset path path_data = Path("data/") path_dataset = Path(path_data) / "bksnmovies" captions_path = Path("annotations") / "bksnmovies" print( f"Working on dataset path {path_dataset} captions from {captions_path}" ) # setup other paths meta_file = path_dataset / "meta_all.json" # load input meta meta_in_file = (captions_path / "bknmovies_v0_split_1_nonempty.json") with meta_in_file.open("rt", encoding="utf8") as fh: meta_raw = json.load(fh)["database"] # load text, video and positives meta text_in_file = (captions_path / "text_data.json") vid_in_file = (captions_path / "vid_data.json") pos_in_file = (captions_path / "pos_data.json") text_data = json.load(open(text_in_file, 'r')) vid_data = json.load(open(vid_in_file, 'r')) pos_data = json.load(open(pos_in_file, 'r')) # loop all videos in the dataset meta_dict = {} for key, meta in meta_raw.items(): # load relevant meta fields duration_sec = meta["duration"] split = SPLIT_MAP[meta["subset"]] # get text and video segments, as well as positive pairs annotations text_segs = [{ "text": sent } for sent_id, sent in text_data[key].items()] vid_segs = [{ "start_sec": clip[0], "stop_sec": clip[1] } for clip_id, clip in vid_data[key].items()] # get positive bags positives = [] for anno in pos_data[key]: bop = [] for clip_id in anno['positive_shots']: for sent_id in anno['positive_sentences']: bop.append((clip_id, sent_id)) positives.append(bop) # create video meta meta_dict[key] = { "data_key": key, "duration_sec": duration_sec, "split": split, "text_segments": text_segs, "vid_segments": vid_segs, "positives": positives } # write meta to file json.dump(meta_dict, meta_file.open("wt", encoding="utf8"), sort_keys=True) print(f"wrote {meta_file}")