def main(cfg: DictConfig): if cfg.output_dir is not None: os.makedirs(cfg.output_dir, exist_ok=True) cfg = setup_cfg_gpu(cfg) set_seed(cfg) get_gpu_info( rank=cfg.local_rank) # for now only work with single-GPU and DDP mode if cfg.local_rank in [-1, 0]: logger.info("CFG (after gpu configuration):") logger.info("%s", OmegaConf.to_yaml(cfg)) # Save config with open("config.yaml", "w") as fout: yaml.dump(eval(str(cfg)), fout) trainer = ReaderTrainer(cfg) if cfg.train_files is not None: trainer.run_train() elif cfg.dev_files: logger.info("No train files are specified. Run validation.") trainer.validate() else: logger.warning( "Neither train_file or (model_file & dev_file) parameters are specified. Nothing to do." )
def main(cfg: DictConfig): if cfg.train.gradient_accumulation_steps < 1: raise ValueError( "Invalid gradient_accumulation_steps parameter: {}, should be >= 1".format( cfg.train.gradient_accumulation_steps ) ) if cfg.output_dir is not None: os.makedirs(cfg.output_dir, exist_ok=True) cfg = setup_cfg_gpu(cfg) set_seed(cfg) if cfg.local_rank in [-1, 0]: logger.info("CFG (after gpu configuration):") logger.info("%s", OmegaConf.to_yaml(cfg)) trainer = BiEncoderTrainer(cfg) if cfg.train_datasets and len(cfg.train_datasets) > 0: trainer.run_train() elif cfg.model_file and cfg.dev_datasets: logger.info( "No train files are specified. Run 2 types of validation for specified model file" ) trainer.validate_nll() trainer.validate_average_rank() else: logger.warning( "Neither train_file or (model_file & dev_file) parameters are specified. Nothing to do." )
def main(): parser = argparse.ArgumentParser() add_encoder_params(parser) add_training_params(parser) add_tokenizer_params(parser) add_reader_preprocessing_params(parser) # reader specific params parser.add_argument("--max_n_answers", default=10, type=int, help="Max amount of answer spans to marginalize per singe passage") parser.add_argument('--passages_per_question', type=int, default=2, help="Total amount of positive and negative passages per question") parser.add_argument('--passages_per_question_predict', type=int, default=50, help="Total amount of positive and negative passages per question for evaluation") parser.add_argument("--max_answer_length", default=10, type=int, help="The maximum length of an answer that can be generated. This is needed because the start " "and end predictions are not conditioned on one another.") parser.add_argument('--eval_top_docs', nargs='+', type=int, help="top retrival passages thresholds to analyze prediction results for") parser.add_argument('--checkpoint_file_name', type=str, default='dpr_reader') parser.add_argument('--prediction_results_file', type=str, help='path to a file to write prediction results to') # training parameters parser.add_argument("--eval_step", default=2000, type=int, help="batch steps to run validation and save checkpoint") parser.add_argument("--output_dir", default=None, type=str, help="The output directory where the model checkpoints will be written to") parser.add_argument('--fully_resumable', action='store_true', help="Enables resumable mode by specifying global step dependent random seed before shuffling " "in-batch data") args = parser.parse_args() if args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) setup_args_gpu(args) set_seed(args) print_args(args) trainer = ReaderTrainer(args) if args.train_file is not None: trainer.run_train() elif args.dev_file: logger.info("No train files are specified. Run validation.") trainer.validate() else: logger.warning("Neither train_file or (model_file & dev_file) parameters are specified. Nothing to do.")
def setup_reader(model_file): global reader parser = argparse.ArgumentParser() add_encoder_params(parser) add_training_params(parser) add_tokenizer_params(parser) add_reader_preprocessing_params(parser) args = parser.parse_args() setup_args_gpu(args) set_seed(args) print_args(args) reader = Reader(args, model_file)
def main(cfg: DictConfig): if cfg.train.gradient_accumulation_steps < 1: raise ValueError( "Invalid gradient_accumulation_steps parameter: {}, should be >= 1".format( cfg.train.gradient_accumulation_steps ) ) if cfg.output_dir is not None: os.makedirs(cfg.output_dir, exist_ok=True) if cfg.deepspeed: os.environ.setdefault('RANK', '0') os.environ.setdefault('LOCAL_RANK', '0') os.environ.setdefault('WORLD_SIZE', '1') os.environ.setdefault('MASTER_PORT', '3600') os.environ.setdefault('MASTER_ADDR', '127.0.0.1') # dist_init in cfg = setup_cfg_gpu(cfg) set_seed(cfg) if cfg.local_rank in [-1, 0]: logger.info("CFG (after gpu configuration):") logger.info("%s", OmegaConf.to_yaml(cfg)) trainer = BiEncoderTrainer(cfg) if cfg.train_datasets and len(cfg.train_datasets) > 0: trainer.run_train() elif cfg.model_file and cfg.dev_datasets: logger.info( "No train files are specified. Run 2 types of validation for specified model file" ) trainer.validate_nll() trainer.validate_average_rank() else: logger.warning( "Neither train_file or (model_file & dev_file) parameters are specified. Nothing to do." ) if cfg.deepspeed: dist_cleanup()
def main(cfg: DictConfig): if cfg.train.gradient_accumulation_steps < 1: raise ValueError( "Invalid gradient_accumulation_steps parameter: {}, should be >= 1".format( cfg.train.gradient_accumulation_steps ) ) if cfg.output_dir is not None: os.makedirs(cfg.output_dir, exist_ok=True) cfg = setup_cfg_gpu(cfg) set_seed(cfg) if cfg.local_rank in [-1, 0]: logger.info("CFG (after gpu configuration):") logger.info("%s", OmegaConf.to_yaml(cfg)) # Save config with open("config.yaml", "w") as fout: yaml.dump(eval(str(cfg)), fout) trainer = OneForAllTrainer(cfg) if cfg.train_datasets and len(cfg.train_datasets) > 0: trainer.run_train() elif cfg.model_file and cfg.dev_datasets: logger.info("No train files are specified.") if cfg.evaluate_retriever: logger.info("Run 2 types of retriever validation for specified model file") trainer.validate_biencoder_nll() trainer.validate_biencoder_average_rank() if cfg.evaluate_reader: logger.info("Run reader validation for specified model file") trainer.validate_reader() else: logger.warning( "Neither train_file or (model_file & dev_file) parameters are specified. Nothing to do." )
def main(cfg: DictConfig): if cfg.output_dir is not None: os.makedirs(cfg.output_dir, exist_ok=True) cfg = setup_cfg_gpu(cfg) set_seed(cfg) if cfg.local_rank in [-1, 0]: logger.info("CFG (after gpu configuration):") logger.info("%s", OmegaConf.to_yaml(cfg)) trainer = ReaderTrainer(cfg) if cfg.train_files is not None: trainer.run_train() elif cfg.dev_files: logger.info("No train files are specified. Run validation.") trainer.validate() else: logger.warning( "Neither train_file or (model_file & dev_file) parameters are specified. Nothing to do." )
def main(): parser = argparse.ArgumentParser() add_encoder_params(parser) add_training_params(parser) add_tokenizer_params(parser) # biencoder specific training features parser.add_argument( "--eval_per_epoch", default=1, type=int, help="How many times it evaluates on dev set per epoch and saves a checkpoint", ) parser.add_argument( "--global_loss_buf_sz", type=int, default=150000, help='Buffer size for distributed mode representations al gather operation. \ Increase this if you see errors like "encoded data exceeds max_size ..."', ) parser.add_argument("--fix_ctx_encoder", action="store_true") parser.add_argument("--shuffle_positive_ctx", action="store_true") # input/output src params parser.add_argument( "--output_dir", default=None, type=str, help="The output directory where the model checkpoints will be written or resumed from", ) # data handling parameters parser.add_argument( "--hard_negatives", default=1, type=int, help="amount of hard negative ctx per question", ) parser.add_argument( "--other_negatives", default=0, type=int, help="amount of 'other' negative ctx per question", ) parser.add_argument( "--train_files_upsample_rates", type=str, help="list of up-sample rates per each train file. Example: [1,2,1]", ) # parameters for Av.rank validation method parser.add_argument( "--val_av_rank_start_epoch", type=int, default=10000, help="Av.rank validation: the epoch from which to enable this validation", ) parser.add_argument( "--val_av_rank_hard_neg", type=int, default=30, help="Av.rank validation: how many hard negatives to take from each question pool", ) parser.add_argument( "--val_av_rank_other_neg", type=int, default=30, help="Av.rank validation: how many 'other' negatives to take from each question pool", ) parser.add_argument( "--val_av_rank_bsz", type=int, default=128, help="Av.rank validation: batch size to process passages", ) parser.add_argument( "--val_av_rank_max_qs", type=int, default=10000, help="Av.rank validation: max num of questions", ) parser.add_argument( "--checkpoint_file_name", type=str, default="dpr_biencoder", help="Checkpoints file prefix", ) # My Model specific params parser.add_argument('--use_linear', default=False, action='store_true') args = parser.parse_args() if args.gradient_accumulation_steps < 1: raise ValueError( "Invalid gradient_accumulation_steps parameter: {}, should be >= 1".format( args.gradient_accumulation_steps ) ) if args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) setup_args_gpu(args) set_seed(args) print_args(args) trainer = BiEncoderTrainer(args) if args.train_file is not None: trainer.run_train() elif args.model_file and args.dev_file: logger.info( "No train files are specified. Run 2 types of validation for specified model file" ) trainer.validate_nll() trainer.validate_average_rank() else: logger.warning( "Neither train_file or (model_file & dev_file) parameters are specified. Nothing to do." )