def test_build_indexers(self): self.params2 = params_from_file(self.DEFAULTS_PATH, self.HOCON2) self.params3 = params_from_file(self.DEFAULTS_PATH, self.HOCON3) self.params4 = params_from_file(self.DEFAULTS_PATH, self.HOCON4) indexer = build_indexers(self.params1) len(indexer) == 1 and list(indexer.keys())[0] == "bert_cased" indexer = build_indexers(self.params2) len(indexer) == 1 and list(indexer.keys())[0] == "words" indexer = build_indexers(self.params3) len(indexer) == 1 and list(indexer.keys())[0] == "openai_gpt" with self.assertRaises(AssertionError) as error: # BERT model and tokenizer must be equal, so this should throw an error. indexer = build_indexers(self.params4)
def setUp(self): HOCON = """ lr = 123.456 pretrain_data_fraction = .123 target_train_data_fraction = .1234 mnli = { lr = 4.56, batch_size = 123 max_epochs = 456 training_data_fraction = .456 } qqp = { max_epochs = 789 } """ DEFAULTS_PATH = resource_filename( "jiant", "config/defaults.conf") # To get other required replacers. params = params_from_file(DEFAULTS_PATH, HOCON) cuda_device = -1 self.processed_pretrain_params = build_trainer_params(params, cuda_device, ["mnli", "qqp"], phase="pretrain") self.processed_mnli_target_params = build_trainer_params( params, cuda_device, ["mnli"], phase="target_train") self.processed_qqp_target_params = build_trainer_params( params, cuda_device, ["qqp"], phase="target_train")
def setUp(self): HOCON = """ lr = 123.456 pretrain_data_fraction = .123 target_train_data_fraction = .1234 mnli = { lr = 4.56, batch_size = 123 max_epochs = 456 training_data_fraction = .456 } qqp = { max_epochs = 789 } """ DEFAULTS_PATH = "config/defaults.conf" # To get other required values. params = params_from_file(DEFAULTS_PATH, HOCON) self.processed_pretrain_params = build_trainer_params( params, ["mnli", "qqp"], phase="pretrain" ) self.processed_mnli_target_params = build_trainer_params( params, ["mnli"], phase="target_train" ) self.processed_qqp_target_params = build_trainer_params( params, ["qqp"], phase="target_train" )
def setUp(self): self.HOCON1 = """ pretrain_tasks = mnli target_tasks = qqp tokenizer = bert-large-cased input_module = bert-large-cased """ self.HOCON2 = """ pretrain_task s= mnli target_tasks = qqp input_module = glove tokenizer = MosesTokenizer """ self.HOCON3 = """ pretrain_task s= mnli target_tasks = qqp input_module = openai-gpt tokenizer = openai-gpt """ self.HOCON4 = """ pretrain_tasks = mnli target_tasks = qqp tokenizer = bert-large-cased input_module = bert-base-cased """ self.DEFAULTS_PATH = resource_filename( "jiant", "config/defaults.conf") # To get other required replacers. self.params1 = params_from_file(self.DEFAULTS_PATH, self.HOCON1)
def test_steps_between_gradient_accumulations_cannot_be_set_to_a_negative_number( self): self.args = params_from_file( resource_filename("jiant", "config/defaults.conf"), "accumulation_steps = -1") self.assertRaises(AssertionError, check_configurations, self.args, [], [])
def setUp(self): self.HOCON1 = """ pretrain_tasks = mnli target_tasks = qqp tokenizer = bert-large-cased input_module = bert-large-cased """ self.HOCON2 = """ pretrain_task s= mnli target_tasks = qqp input_module = glove tokenizer = MosesTokenizer """ self.HOCON3 = """ pretrain_task s= mnli target_tasks = qqp input_module = gpt tokenizer = OpenAI.BPE """ self.HOCON4 = """ pretrain_tasks = mnli target_tasks = qqp tokenizer = bert-large-cased input_module = bert-base-cased """ self.DEFAULTS_PATH = "config/defaults.conf" # To get other required values. self.params1 = params_from_file(self.DEFAULTS_PATH, self.HOCON1)
def main(cl_arguments): """ Run REPL for a CoLA model """ # Arguments handling # cl_args = handle_arguments(cl_arguments) args = config.params_from_file(cl_args.config_file, cl_args.overrides) check_arg_name(args) assert args.target_tasks == "cola", "Currently only supporting CoLA. ({})".format( args.target_tasks ) if args.cuda >= 0: try: if not torch.cuda.is_available(): raise EnvironmentError("CUDA is not available, or not detected" " by PyTorch.") log.info("Using GPU %d", args.cuda) torch.cuda.set_device(args.cuda) except Exception: log.warning( "GPU access failed. You might be using a CPU-only" " installation of PyTorch. Falling back to CPU." ) args.cuda = -1 # Prepare data # _, target_tasks, vocab, word_embs = build_tasks(args) tasks = sorted(set(target_tasks), key=lambda x: x.name) # Build or load model # model = build_model(args, vocab, word_embs, tasks) log.info("Loading existing model from %s...", cl_args.model_file_path) load_model_state(model, cl_args.model_file_path, args.cuda, [], strict=False) # Inference Setup # model.eval() vocab = Vocabulary.from_files(os.path.join(args.exp_dir, "vocab")) indexers = build_indexers(args) task = take_one(tasks) # Run Inference # if cl_args.inference_mode == "repl": assert cl_args.input_path is None assert cl_args.output_path is None print("Running REPL for task: {}".format(task.name)) run_repl(model, vocab, indexers, task, args) elif cl_args.inference_mode == "corpus": run_corpus_inference( model, vocab, indexers, task, args, cl_args.input_path, cl_args.input_format, cl_args.output_path, cl_args.eval_output_path, ) else: raise KeyError(cl_args.inference_mode)
def test_by_default_steps_between_gradient_accumulations_is_set_to_1(self): with mock.patch("jiant.models.MultiTaskModel") as MockModel: self.args = params_from_file( resource_filename("jiant", "config/defaults.conf")) self.args.cuda = -1 self.args.run_dir = self.temp_dir self.args.exp_dir = self.temp_dir model = MockModel() _, train_params, _, _ = build_trainer( self.args, self.args.cuda, ["wic"], model, self.args.run_dir, self.wic.val_metric_decreases, phase="pretrain", ) self.assertEqual(train_params["accumulation_steps"], 1)
def setUp(self): HOCON = """ lr = 123.456 pretrain_data_fraction = .123 target_train_data_fraction = .1234 mnli = { lr = 4.56, batch_size = 123 max_epochs = 456 training_data_fraction = .456 } qqp = { max_epochs = 789 } """ DEFAULTS_PATH = resource_filename( "jiant", "config/defaults.conf") # To get other required values. params = params_from_file(DEFAULTS_PATH, HOCON) cuda_device = -1 self.processed_pretrain_params = build_trainer_params(params, cuda_device, ["mnli", "qqp"], phase="pretrain") self.processed_mnli_target_params = build_trainer_params( params, cuda_device, ["mnli"], phase="target_train") self.processed_qqp_target_params = build_trainer_params( params, cuda_device, ["qqp"], phase="target_train") self.pretrain_tasks = [] pretrain_task_registry = { "sst": REGISTRY["sst"], "winograd-coreference": REGISTRY["winograd-coreference"], "commitbank": REGISTRY["commitbank"], } for name, (cls, _, kw) in pretrain_task_registry.items(): task = cls("dummy_path", max_seq_len=1, name=name, tokenizer_name="dummy_tokenizer_name", **kw) self.pretrain_tasks.append(task)
def strip_exp(exps_dir, model, task): exp_dir = join(exps_dir, model) for subdir in ['preproc', 'tasks']: dir = join(exp_dir, subdir) for item in listdir(dir): # rwsd__test_data # rwsd__train_data # rwsd.DeepPavlov name = re.match(r'([^_\.]+)', item).group(1) if name == task or (task == TERRA and name == LIDIRUS): rm_any(join(dir, item)) if not listdir(dir): rmdir(dir) dir = join(exp_dir, task) for item in listdir(dir): # metric_state_pretrain_val_10.th # metric_state_pretrain_val_3.best.th # model_state_pretrain_val_10.th # params.conf # RWSD.jsonl # tensorboard # training_state_pretrain_val_3.best.th if is_best_model(item): rename( join(dir, item), join(dir, 'model.th') ) elif item not in ('log.log', 'params.conf'): rm_any(join(dir, item)) path = join(dir, 'params.conf') with no_loggers([LOGGER]): params = params_from_file(path) patch_exp_params(params, model) write_params(params, path)
def main(cl_arguments): """ Train a model for multitask-training.""" cl_args = handle_arguments(cl_arguments) args = config.params_from_file(cl_args.config_file, cl_args.overrides) train_type = args.get('train_type', "SamplingMultiTaskTrainer") if train_type != "SamplingMultiTaskTrainer": print("\n\n\n", train_type, "\n\n\n") # Check for deprecated arg names check_arg_name(args) args, seed = initial_setup(args, cl_args) # Load tasks log.info("Loading tasks...") start_time = time.time() pretrain_tasks, target_tasks, vocab, word_embs = build_tasks(args) tasks = sorted(set(pretrain_tasks + target_tasks), key=lambda x: x.name) log.info("\tFinished loading tasks in %.3fs", time.time() - start_time) log.info("\t Tasks: {}".format([task.name for task in tasks])) # Build model log.info("Building model...") start_time = time.time() model = build_model(args, vocab, word_embs, tasks) log.info("Finished building model in %.3fs", time.time() - start_time) # Start Tensorboard if requested if cl_args.tensorboard: tb_logdir = os.path.join(args.run_dir, "tensorboard") _run_background_tensorboard(tb_logdir, cl_args.tensorboard_port) check_configurations(args, pretrain_tasks, target_tasks) if args.do_pretrain: # Train on pretrain tasks log.info("Training...") stop_metric = pretrain_tasks[0].val_metric if len( pretrain_tasks) == 1 else "macro_avg" should_decrease = (pretrain_tasks[0].val_metric_decreases if len(pretrain_tasks) == 1 else False) trainer, _, opt_params, schd_params = build_trainer( args, [], model, args.run_dir, should_decrease, phase="pretrain", train_type=train_type) to_train = [(n, p) for n, p in model.named_parameters() if p.requires_grad] _ = trainer.train( pretrain_tasks, stop_metric, args.batch_size, args.weighting_method, args.scaling_method, to_train, opt_params, schd_params, args.load_model, phase="pretrain", ) # For checkpointing logic if not args.do_target_task_training: strict = True else: strict = False if args.do_target_task_training: # Train on target tasks pre_target_train_path = setup_target_task_training( args, target_tasks, model, strict) target_tasks_to_train = copy.deepcopy(target_tasks) # Check for previous target train checkpoints task_to_restore, _, _ = check_for_previous_checkpoints( args.run_dir, target_tasks_to_train, "target_train", args.load_model) if task_to_restore is not None: # If there is a task to restore from, target train only on target tasks # including and following that task. last_task_index = [task.name for task in target_tasks_to_train ].index(task_to_restore) target_tasks_to_train = target_tasks_to_train[last_task_index:] for task in target_tasks_to_train: # Skip tasks that should not be trained on. if task.eval_only_task: continue params_to_train = load_model_for_target_train_run( args, pre_target_train_path, model, strict, task) trainer, _, opt_params, schd_params = build_trainer( args, [task.name], model, args.run_dir, task.val_metric_decreases, phase="target_train", train_type=train_type) _ = trainer.train( tasks=[task], stop_metric=task.val_metric, batch_size=args.batch_size, weighting_method=args.weighting_method, scaling_method=args.scaling_method, train_params=params_to_train, optimizer_params=opt_params, scheduler_params=schd_params, load_model=(task.name == task_to_restore), phase="target_train", ) if args.do_full_eval: log.info("Evaluating...") splits_to_write = evaluate.parse_write_preds_arg(args.write_preds) # Evaluate on target_tasks. for task in target_tasks: # Find the task-specific best checkpoint to evaluate on. task_to_use = model._get_task_params(task.name).get( "use_classifier", task.name) ckpt_path = get_best_checkpoint_path(args, "eval", task_to_use) assert ckpt_path is not None load_model_state(model, ckpt_path, args.cuda, skip_task_models=[], strict=strict) evaluate_and_write(args, model, [task], splits_to_write) if args.delete_checkpoints_when_done and not args.keep_all_checkpoints: log.info("Deleting all checkpoints.") delete_all_checkpoints(args.run_dir) log.info("Done!")
def infer_jiant(exp_dir, task, items, batch_size=4): # use cached tokenizer path = join(exp_dir, 'transformers_cache') with env(PYTORCH_TRANSFORMERS_CACHE=path): reload(transformers.file_utils) # use terra model for lidirus run_dir = join( exp_dir, TERRA if task == LIDIRUS else task ) loggers = [ LOGGER, pytorch_pretrained_bert.modeling.logger, transformers.file_utils.logger, transformers.configuration_utils.logger, transformers.modeling_utils.logger, transformers.tokenization_utils.logger, allennlp.nn.initializers.logger ] with no_loggers(loggers): path = join(run_dir, 'params.conf') args = params_from_file(path) cuda_device = parse_cuda_list_arg('auto') args.local_log_path = join(run_dir, 'log.log') args.exp_dir = args.project_dir = exp_dir args.run_dir = run_dir log('Build tasks') with no_loggers(loggers), TemporaryDirectory() as dir: args.exp_dir = args.data_dir = dir # hide pkl, preproc dump_task(dir, task, items=[]) # mock empty train, val, test if task in (TERRA, LIDIRUS): dump_task(dir, LIDIRUS if task == TERRA else TERRA, items=[]) _, tasks, vocab, word_embs = build_tasks(args, cuda_device) log('Build model, load transformers pretrain') with no_loggers(loggers): args.exp_dir = exp_dir # use transformers cache model = build_model(args, vocab, word_embs, tasks, cuda_device) path = join(run_dir, 'model.th') log(f'Load state {path!r}') load_model_state(model, path, cuda_device) log(f'Build mock task, infer via eval, batch_size={batch_size}') with no_loggers(loggers), TemporaryDirectory() as dir: args.exp_dir = args.data_dir = dir dump_task(dir, task, items) if task in (TERRA, LIDIRUS): # choose one at inference args.pretrain_tasks = task args.target_tasks = task _, tasks, _, _ = build_tasks(args, cuda_device) _, preds = evaluate.evaluate( model, tasks, batch_size, cuda_device, 'test' ) evaluate.write_preds( tasks, preds, dir, 'test', args.write_strict_glue_format ) return list(load_preds(dir, task))
def main(cl_arguments): """ Train a model for multitask-training.""" cl_args = handle_arguments(cl_arguments) args = config.params_from_file(cl_args.config_file, cl_args.overrides) # Check for deprecated arg names check_arg_name(args) args, seed = initial_setup(args, cl_args) #XXX Dylan's code try: log.info(f'\nK syn is {args.k_syn}') log.info(f'\nK sem is {args.k_sem}\n') except Exception: log.info('No projection matrices.') pass #XXX # Load tasks log.info("Loading tasks...") start_time = time.time() pretrain_tasks, target_tasks, vocab, word_embs = build_tasks(args) #pretrain_tasks[0].load_data() #exit() tasks = sorted(set(pretrain_tasks + target_tasks), key=lambda x: x.name) log.info("\tFinished loading tasks in %.3fs", time.time() - start_time) log.info("\t Tasks: {}".format([task.name for task in tasks])) training_flag = args.do_pretrain if training_flag and args.records_pickle_path: with open(args.records_pickle_path, 'wb') as f: records_dict = dict() records_dict['run_name'] = args.run_name records_dict['last_checkpoint'] = '' records_dict['training'] = dict() records_dict['best_val'] = dict() records_dict['last_val'] = dict() pickle.dump(records_dict, f) # Build model log.info("Building model...") start_time = time.time() model = build_model(args, vocab, word_embs, tasks) log.info("Finished building model in %.3fs", time.time() - start_time) # Start Tensorboard if requested if cl_args.tensorboard: tb_logdir = os.path.join(args.run_dir, "tensorboard_" + str(args.run_name)) _run_background_tensorboard(tb_logdir, cl_args.tensorboard_port) check_configurations(args, pretrain_tasks, target_tasks) if args.do_pretrain: # Train on pretrain tasks log.info("Training...") stop_metric = pretrain_tasks[0].val_metric if len( pretrain_tasks) == 1 else "macro_avg" should_decrease = (pretrain_tasks[0].val_metric_decreases if len(pretrain_tasks) == 1 else False) trainer, _, opt_params, schd_params = build_trainer(args, [], model, args.run_dir, should_decrease, phase="pretrain") to_train = [(n, p) for n, p in model.named_parameters() if p.requires_grad] _ = trainer.train(pretrain_tasks, stop_metric, args.batch_size, args.weighting_method, args.scaling_method, to_train, opt_params, schd_params, args.load_model, phase="pretrain", args=args) # For checkpointing logic if not args.do_target_task_training: strict = True else: strict = False if args.do_target_task_training: # Train on target tasks pre_target_train_path = setup_target_task_training( args, target_tasks, model, strict) target_tasks_to_train = copy.deepcopy(target_tasks) # Check for previous target train checkpoints task_to_restore, _, _ = check_for_previous_checkpoints( args.run_dir, target_tasks_to_train, "target_train", args.load_model) if task_to_restore is not None: # If there is a task to restore from, target train only on target tasks # including and following that task. last_task_index = [task.name for task in target_tasks_to_train ].index(task_to_restore) target_tasks_to_train = target_tasks_to_train[last_task_index:] for task in target_tasks_to_train: # Skip tasks that should not be trained on. if task.eval_only_task: continue params_to_train = load_model_for_target_train_run( args, pre_target_train_path, model, strict, task) trainer, _, opt_params, schd_params = build_trainer( args, [task.name], model, args.run_dir, task.val_metric_decreases, phase="target_train", ) _ = trainer.train( tasks=[task], stop_metric=task.val_metric, batch_size=args.batch_size, weighting_method=args.weighting_method, scaling_method=args.scaling_method, train_params=params_to_train, optimizer_params=opt_params, scheduler_params=schd_params, load_model=(task.name == task_to_restore), phase="target_train", ) tasks_for_eval = [ task for task in target_tasks if (not 'adv' in task.name and not 'discriminator' in task.name) ] if args.do_full_eval: log.info("Evaluating...") splits_to_write = evaluate.parse_write_preds_arg(args.write_preds) # Evaluate on target_tasks. #for task in target_tasks: for task in tasks_for_eval: # Find the task-specific best checkpoint to evaluate on. task_to_use = model._get_task_params(task.name).get( "use_classifier", task.name) ckpt_path = get_best_checkpoint_path(args, "eval", task_to_use) assert ckpt_path is not None load_model_state(model, ckpt_path, args.cuda, skip_task_models=[], strict=strict) records_dict = get_records_dict( args.records_pickle_path) if args.evaluate_final else None evaluate_and_write( args, model, [task], splits_to_write, mode='best_val', do_write=(not args.evaluate_final) or (records_dict != None and ckpt_path == records_dict['last_checkpoint'])) if args.evaluate_final: records_dict = get_records_dict(args.records_pickle_path) if ckpt_path != records_dict['last_checkpoint']: try: load_model_state(model, records_dict['last_checkpoint'], args.cuda, skip_task_models=[], strict=strict) for task in tasks_for_eval: evaluate_and_write(args, model, [task], splits_to_write, mode='last_val', do_write=True) except Exception: log.info( f"Did not record last_checkpoint path properly. Looks like: {records_dict['last_checkpoint']}" ) else: records_dict['last_val'] = records_dict['best_val'] write_records_dict(records_dict, args.records_pickle_path) log.info("Done!")
Overwrite a config file by renaming args. Require one argument: path_to_file. """ import sys from jiant.utils import config # use symlink from scripts to jiant # Mapping - key: old name, value: new name name_dict = { "task_patience": "lr_patience", "do_train": "do_pretrain", "train_for_eval": "do_target_task_training", "do_eval": "do_full_eval", "train_tasks": "pretrain_tasks", "eval_tasks": "target_tasks", "eval_data_fraction": "target_train_data_fraction", "eval_val_interval": "target_train_val_interval", "eval_max_vals": "target_train_max_vals", "load_eval_checkpoint": "load_target_train_checkpoint", "eval_data_fraction": "target_train_data_fraction", } path = sys.argv[1] params = config.params_from_file(path) for old_name, new_name in name_dict.items(): if old_name in params: params[new_name] = params[old_name] del params[old_name] config.write_params(params, path)
def test_steps_between_gradient_accumulations_must_be_defined(self): self.args = params_from_file( resource_filename("jiant", "config/defaults.conf")) del self.args.accumulation_steps self.assertRaises(AssertionError, check_configurations, self.args, [], [])
def main(cl_arguments): """ Train a model for multitask-training.""" cl_args = handle_arguments(cl_arguments) args = config.params_from_file(cl_args.config_file, cl_args.overrides) # Check for deprecated arg names check_arg_name(args) args, seed = initial_setup(args, cl_args) #Store the run description, if any if FLAGS.description: with open(Path(args.run_dir, 'description.txt'), 'w') as f: f.write(FLAGS.description) # Load tasks log.info("Loading tasks...") start_time = time.time() # cuda_device = parse_cuda_list_arg(args.cuda) cuda_device = FLAGS.device_idxs pretrain_tasks, target_tasks, vocab, word_embs = build_tasks( args, cuda_device) tasks = sorted(set(pretrain_tasks + target_tasks), key=lambda x: x.name) log.info("\tFinished loading tasks in %.3fs", time.time() - start_time) log.info("\t Tasks: {}".format([task.name for task in tasks])) # Build model log.info("Building model...") start_time = time.time() model = build_model(args, vocab, word_embs, tasks, cuda_device) log.info("Finished building model in %.3fs", time.time() - start_time) # Start Tensorboard if requested if cl_args.tensorboard: tb_logdir = os.path.join(args.run_dir, "tensorboard") _run_background_tensorboard(tb_logdir, cl_args.tensorboard_port) check_configurations(args, pretrain_tasks, target_tasks) if args.do_pretrain: # Train on pretrain tasks log.info("Training...") stop_metric = pretrain_tasks[0].val_metric if len( pretrain_tasks) == 1 else "macro_avg" should_decrease = (pretrain_tasks[0].val_metric_decreases if len(pretrain_tasks) == 1 else False) trainer, _, opt_params, schd_params = build_trainer(args, cuda_device, [], model, args.run_dir, should_decrease, phase="pretrain") to_train = [(n, p) for n, p in model.named_parameters() if p.requires_grad] _ = trainer.train( pretrain_tasks, stop_metric, args.batch_size, args.weighting_method, args.scaling_method, to_train, opt_params, schd_params, args.load_model, phase="pretrain", ) # For checkpointing logic if not args.do_target_task_training: strict = True else: strict = False if args.do_target_task_training: # Train on target tasks pre_target_train_path = setup_target_task_training( args, target_tasks, model, strict) target_tasks_to_train = copy.deepcopy(target_tasks) # Check for previous target train checkpoints task_to_restore, _, _ = check_for_previous_checkpoints( args.run_dir, target_tasks_to_train, "target_train", args.load_model) if task_to_restore is not None: # If there is a task to restore from, target train only on target tasks # including and following that task. last_task_index = [task.name for task in target_tasks_to_train ].index(task_to_restore) target_tasks_to_train = target_tasks_to_train[last_task_index:] for task in target_tasks_to_train: # Skip tasks that should not be trained on. if task.eval_only_task: continue params_to_train = load_model_for_target_train_run( args, pre_target_train_path, model, strict, task, cuda_device) trainer, _, opt_params, schd_params = build_trainer( args, cuda_device, [task.name], model, args.run_dir, task.val_metric_decreases, phase="target_train", ) _ = trainer.train( tasks=[task], stop_metric=task.val_metric, batch_size=args.batch_size, weighting_method=args.weighting_method, scaling_method=args.scaling_method, train_params=params_to_train, optimizer_params=opt_params, scheduler_params=schd_params, load_model=(task.name == task_to_restore), phase="target_train", ) if args.do_full_eval: log.info("Evaluating...") splits_to_write = evaluate.parse_write_preds_arg(args.write_preds) results_dict = {'run_name': [args.run_name]} # Evaluate on target_tasks. for task in target_tasks: # Find the task-specific best checkpoint to evaluate on. task_params = get_model_attribute(model, "_get_task_params", cuda_device) task_to_use = task_params(task.name).get("use_classifier", task.name) ckpt_path = get_best_checkpoint_path(args, "eval", task_to_use) assert ckpt_path is not None load_model_state(model, ckpt_path, cuda_device, skip_task_models=[], strict=strict) current_tasks_val_results = evaluate_and_write( args, model, [task], splits_to_write, cuda_device) results_dict = {**results_dict, **current_tasks_val_results} tabular_results_csv = os.path.join(SMALL_SHARED_SERVER_DIR, "tabular_results.csv") existing_results_df = pd.read_csv(tabular_results_csv, index_col=False) new_results_df = pd.DataFrame.from_dict(results_dict) updated_results_df = new_results_df.append(existing_results_df, sort=False) with open(tabular_results_csv, 'w') as f: log.info(f"Prepending results to {tabular_results_csv}.") updated_results_df.to_csv(f, header=True, index=False) if args.delete_checkpoints_when_done and not args.keep_all_checkpoints: log.info("Deleting all checkpoints.") delete_all_checkpoints(args.run_dir) log.info("Done!")