Beispiel #1
0
    def add_model_specific_args(parser, root_dir):
        BaseTransformer.add_model_specific_args(parser, root_dir)
        parser.add_argument(
            "--max_seq_length",
            default=128,
            type=int,
            help=
            "The maximum total input sequence length after tokenization. Sequences longer "
            "than this will be truncated, sequences shorter will be padded.",
        )

        parser.add_argument(
            "--task",
            default="",
            type=str,
            required=True,
            help="The GLUE task to run",
        )

        parser.add_argument(
            "--data_dir",
            default=None,
            type=str,
            required=True,
            help=
            "The input data dir. Should contain the training files for the CoNLL-2003 NER task.",
        )

        parser.add_argument(
            "--overwrite_cache",
            action="store_true",
            help="Overwrite the cached training and evaluation sets")

        return parser
Beispiel #2
0
    def add_model_specific_args(parser, root_dir):
        BaseTransformer.add_model_specific_args(parser, root_dir)
        # Add BART specific options
        parser.add_argument(
            "--max_source_length",
            default=1024,
            type=int,
            help="The maximum total input sequence length after tokenization. Sequences longer "
            "than this will be truncated, sequences shorter will be padded.",
        )
        parser.add_argument(
            "--max_target_length",
            default=56,
            type=int,
            help="The maximum total input sequence length after tokenization. Sequences longer "
            "than this will be truncated, sequences shorter will be padded.",
        )

        parser.add_argument(
            "--data_dir",
            default=None,
            type=str,
            required=True,
            help="The input data dir. Should contain the dataset files for the CNN/DM summarization task.",
        )
        parser.add_argument(
            "--logging_dir",
            default='tensorboard_logs',
            type=str,
            required=False,
            help="The directory for tensorboard_logs",
        )
        return parser
    def add_model_specific_args(parser, root_dir):
        BaseTransformer.add_model_specific_args(parser, root_dir)
        add_generic_args(parser, root_dir)
        parser.add_argument(
            "--max_source_length",
            default=128,
            type=int,
            help="The maximum total input sequence length after tokenization. Sequences longer "
            "than this will be truncated, sequences shorter will be padded.",
        )
        parser.add_argument(
            "--max_target_length",
            default=25,
            type=int,
            help="The maximum total input sequence length after tokenization. Sequences longer "
            "than this will be truncated, sequences shorter will be padded.",
        )
        parser.add_argument(
            "--val_max_target_length",
            default=25,
            type=int,
            help="The maximum total input sequence length after tokenization. Sequences longer "
            "than this will be truncated, sequences shorter will be padded.",
        )
        parser.add_argument(
            "--test_max_target_length",
            default=25,
            type=int,
            help="The maximum total input sequence length after tokenization. Sequences longer "
            "than this will be truncated, sequences shorter will be padded.",
        )
        parser.add_argument("--logger_name", type=str, choices=["default", "wandb", "wandb_shared"], default="default")
        parser.add_argument("--n_train", type=int, default=-1, required=False, help="# examples. -1 means use all.")
        parser.add_argument("--n_val", type=int, default=-1, required=False, help="# examples. -1 means use all.")
        parser.add_argument("--n_test", type=int, default=-1, required=False, help="# examples. -1 means use all.")
        parser.add_argument("--label_smoothing", type=float, default=0.0, required=False)
        parser.add_argument(
            "--prefix",
            type=str,
            default=None,
            help="Prefix added at the beginning of each text, typically used with T5-based models.",
        )
        parser.add_argument(
            "--early_stopping_patience",
            type=int,
            default=-1,
            required=False,
            help="-1 means never early stop. early_stopping_patience is measured in validation checks, not epochs. So val_check_interval will effect it.",
        )
        parser.add_argument(
            "--distributed-port", type=int, default=-1, required=False, help="Port number for distributed training."
        )
        parser.add_argument(
            "--model_type",
            choices=["rag_sequence", "rag_token", "bart", "t5"],
            type=str,
            help="RAG model type: sequence or token, if none specified, the type is inferred from the model_name_or_path",
        )

        return parser
Beispiel #4
0
    def add_model_specific_args(parser, root_dir):
        BaseTransformer.add_model_specific_args(parser, root_dir)
        parser.add_argument(
            "--max_seq_length",
            default=128,
            type=int,
            help=
            "The maximum total input sequence length after tokenization. Sequences longer "
            "than this will be truncated, sequences shorter will be padded.",
        )

        parser.add_argument(
            "--task",
            default="",
            type=str,
            required=True,
            help="The GLUE task to run",
        )
        parser.add_argument(
            "--gpus",
            default=0,
            type=int,
            help=
            "The number of GPUs allocated for this, it is by default 0 meaning none",
        )

        parser.add_argument(
            "--overwrite_cache",
            action="store_true",
            help="Overwrite the cached training and evaluation sets")

        return parser
Beispiel #5
0
    def add_model_specific_args(parser, root_dir):
        BaseTransformer.add_model_specific_args(parser, root_dir)
        parser.add_argument(
            "--max_seq_length",
            default=128,
            type=int,
            help="The maximum total input sequence length after tokenization. Sequences longer "
            "than this will be truncated, sequences shorter will be padded.",
        )

        parser.add_argument(
            "--gpus",
            default=0,
            type=int,
            help="The number of GPUs allocated for this, it is by default 0 meaning none",
        )

        parser.add_argument(
            "--overwrite_cache", action="store_true", help="Overwrite the cached training and evaluation sets"
        )

        def IntorFloat(string):
            if '.' in string: return float(string)
            else: return int(string)
        parser.add_argument(
            "--val_check_interval", 
            default=1.0,
            type=IntorFloat, help="How often to check the validation set. Use float to check within a training epoch"
        )

        return parser
Beispiel #6
0
 def add_model_specific_args(parser, root_dir):
     BaseTransformer.add_model_specific_args(parser, root_dir)
     add_generic_args(parser, root_dir)
     parser.add_argument(
         "--max_source_length",
         default=1024,
         type=int,
         help="The maximum total input sequence length after tokenization. Sequences longer "
         "than this will be truncated, sequences shorter will be padded.",
     )
     parser.add_argument(
         "--max_target_length",
         default=100,
         type=int,
         help="The maximum total input sequence length after tokenization. Sequences longer "
         "than this will be truncated, sequences shorter will be padded.",
     )
     parser.add_argument(
         "--val_max_target_length",
         default=100,  # these defaults are optimized for CNNDM. For xsum, see README.md.
         type=int,
         help="The maximum total input sequence length after tokenization. Sequences longer "
         "than this will be truncated, sequences shorter will be padded.",
     )
     parser.add_argument(
         "--test_max_target_length",
         default=100,
         type=int,
         help="The maximum total input sequence length after tokenization. Sequences longer "
         "than this will be truncated, sequences shorter will be padded.",
     )
     parser.add_argument("--freeze_encoder", action="store_true")
     parser.add_argument("--freeze_embeds", action="store_true")
     parser.add_argument("--sortish_sampler", action="store_true", default=False)
     parser.add_argument("--max_tokens_per_batch", type=int, default=None)
     parser.add_argument("--logger_name", type=str, choices=["default", "wandb", "wandb_shared"], default="default")
     parser.add_argument("--n_train", type=int, default=-1, required=False, help="# examples. -1 means use all.")
     parser.add_argument("--n_val", type=int, default=500, required=False, help="# examples. -1 means use all.")
     parser.add_argument("--n_test", type=int, default=-1, required=False, help="# examples. -1 means use all.")
     parser.add_argument(
         "--task", type=str, default="summarization", required=False, help="# examples. -1 means use all."
     )
     parser.add_argument("--label_smoothing", type=float, default=0.0, required=False)
     parser.add_argument("--src_lang", type=str, default="", required=False)
     parser.add_argument("--tgt_lang", type=str, default="", required=False)
     parser.add_argument("--eval_beams", type=int, default=None, required=False)
     parser.add_argument(
         "--val_metric", type=str, default=None, required=False, choices=["bleu", "rouge2", "loss", None]
     )
     parser.add_argument("--eval_max_gen_length", type=int, default=None, help="never generate more than n tokens")
     parser.add_argument("--length_penalty", type=float, default=1.0, help="never generate more than n tokens")
     parser.add_argument("--save_top_k", type=int, default=1, required=False, help="How many checkpoints to save")
     parser.add_argument(
         "--early_stopping_patience",
         type=int,
         default=-1,
         required=False,
         help="-1 means never early stop. early_stopping_patience is measured in validation checks, not epochs. So val_check_interval will effect it.",
     )
     return parser
Beispiel #7
0
    def add_model_specific_args(parser, root_dir):
        # Add NER specific options
        BaseTransformer.add_model_specific_args(parser, root_dir)
        parser.add_argument(
            "--task_type", default="NER", type=str, help="Task type to fine tune in training (e.g. NER, POS, etc)"
        )
        parser.add_argument(
            "--max_seq_length",
            default=128,
            type=int,
            help="The maximum total input sequence length after tokenization. Sequences longer "
            "than this will be truncated, sequences shorter will be padded.",
        )

        parser.add_argument(
            "--labels",
            default="",
            type=str,
            help="Path to a file containing all labels. If not specified, CoNLL-2003 labels are used.",
        )
        parser.add_argument(
            "--gpus",
            default=0,
            type=int,
            help="The number of GPUs allocated for this, it is by default 0 meaning none",
        )

        parser.add_argument(
            "--overwrite_cache", action="store_true", help="Overwrite the cached training and evaluation sets"
        )

        return parser
Beispiel #8
0
 def add_model_specific_args(parser, root_dir):
     BaseTransformer.add_model_specific_args(parser, root_dir)
     add_generic_args(parser, root_dir)
     parser.add_argument(
         "--max_source_length",
         default=1024,
         type=int,
         help="The maximum total input sequence length after tokenization. Sequences longer "
         "than this will be truncated, sequences shorter will be padded.",
     )
     parser.add_argument(
         "--max_target_length",
         default=56,
         type=int,
         help="The maximum total input sequence length after tokenization. Sequences longer "
         "than this will be truncated, sequences shorter will be padded.",
     )
     parser.add_argument(
         "--val_max_target_length",
         default=142,  # these defaults are optimized for CNNDM. For xsum, see README.md.
         type=int,
         help="The maximum total input sequence length after tokenization. Sequences longer "
         "than this will be truncated, sequences shorter will be padded.",
     )
     parser.add_argument(
         "--test_max_target_length",
         default=142,
         type=int,
         help="The maximum total input sequence length after tokenization. Sequences longer "
         "than this will be truncated, sequences shorter will be padded.",
     )
     parser.add_argument(
         "--data_dir",
         type=str,
         required=True,
         help="The input data dir. Should contain train.source, train.target, val.source, val.target, test.source, test.target",
     )
     parser.add_argument("--freeze_encoder", action="store_true")
     parser.add_argument("--freeze_embeds", action="store_true")
     parser.add_argument("--sortish_sampler", action="store_true", default=False)
     parser.add_argument("--logger_name", type=str, choices=["default", "wandb", "wandb_shared"], default="default")
     parser.add_argument("--n_train", type=int, default=-1, required=False, help="# examples. -1 means use all.")
     parser.add_argument("--n_val", type=int, default=500, required=False, help="# examples. -1 means use all.")
     parser.add_argument("--n_test", type=int, default=-1, required=False, help="# examples. -1 means use all.")
     parser.add_argument(
         "--task", type=str, default="summarization", required=False, help="# examples. -1 means use all."
     )
     parser.add_argument("--label_smoothing", type=float, default=0.0, required=False)
     parser.add_argument("--src_lang", type=str, default="", required=False)
     parser.add_argument("--tgt_lang", type=str, default="", required=False)
     parser.add_argument(
         "--early_stopping_patience",
         type=int,
         default=-1,
         required=False,
         help="-1 means never early stop. early_stopping_patience is measured in validation checks, not epochs. So val_check_interval will effect it.",
     )
     return parser
Beispiel #9
0
 def add_model_specific_args(parser, root_dir):
     BaseTransformer.add_model_specific_args(parser, root_dir)
     add_generic_args(parser, root_dir)
     parser.add_argument(
         "--max_source_length",
         default=48,
         type=int,
         help="The maximum total input sequence length after tokenization. Sequences longer "
         "than this will be truncated, sequences shorter will be padded.",
     )
     parser.add_argument(
         "--max_target_length",
         default=24,
         type=int,
         help="The maximum total input sequence length after tokenization. Sequences longer "
         "than this will be truncated, sequences shorter will be padded.",
     )
     parser.add_argument(
         "--val_max_target_length",
         default=24,
         type=int,
         help="The maximum total input sequence length after tokenization. Sequences longer "
         "than this will be truncated, sequences shorter will be padded.",
     )
     parser.add_argument(
         "--test_max_target_length",
         default=24,
         type=int,
         help="The maximum total input sequence length after tokenization. Sequences longer "
         "than this will be truncated, sequences shorter will be padded.",
     )
     parser.add_argument(
         "--data_dir",
         type=str,
         required=True,
         help="The input data dir. Should contain train.source, train.target, val.source, val.target, test.source, test.target",
     )
     parser.add_argument("--freeze_encoder", action="store_true")
     parser.add_argument("--freeze_embeds", action="store_true")
     parser.add_argument("--sortish_sampler",
                         action="store_true", default=False)
     parser.add_argument("--logger_name", type=str,
                         choices=["default", "wandb", "wandb_shared"], default="default")
     parser.add_argument("--n_train", type=int, default=-1,
                         required=False, help="# examples. -1 means use all.")
     parser.add_argument("--n_val", type=int, default=500,
                         required=False, help="# examples. -1 means use all.")
     parser.add_argument("--n_test", type=int, default=-1,
                         required=False, help="# examples. -1 means use all.")
     parser.add_argument(
         "--task", type=str, default="summarization", required=False, help="# examples. -1 means use all."
     )
     parser.add_argument("--src_lang", type=str, default="", required=False)
     parser.add_argument("--tgt_lang", type=str, default="", required=False)
     parser.add_argument("--atomic", action="store_true")
     return parser
Beispiel #10
0
    def add_model_specific_args(parser, root_dir):
        BaseTransformer.add_model_specific_args(parser, root_dir)
        # Add BART specific options
        parser.add_argument(
            "--max_source_length",
            default=384,
            type=int,
            help=
            "The maximum total input sequence length after tokenization. Sequences longer "
            "than this will be truncated, sequences shorter will be padded.",
        )
        parser.add_argument(
            "--max_target_length",
            default=512,
            type=int,
            help=
            "The maximum total input sequence length after tokenization. Sequences longer "
            "than this will be truncated, sequences shorter will be padded.",
        )

        parser.add_argument(
            "--data_dir",
            default=None,
            type=str,
            required=True,
            help=
            "The input data dir. Should contain the dataset files for the CNN/DM summarization task.",
        )

        parser.add_argument(
            "--early_stopping_patience",
            type=int,
            default=-1,
            required=False,
            help=
            "-1 means never early stop. early_stopping_patience is measured in validation checks, not epochs. So val_check_interval will effect it.",
        )
        parser.add_argument(
            "--checkpoint",
            default=None,
            type=str,
            help="The checkpoint to initialize model",
        )
        parser.add_argument(
            "--checkpoint_model",
            default=None,
            type=str,
            help=
            "The input data dir. Should contain the dataset files for the CNN/DM summarization task.",
        )
        return parser
Beispiel #11
0
 def add_model_specific_args(parser, root_dir):
     BaseTransformer.add_model_specific_args(parser, root_dir)
     add_generic_args(parser, root_dir)
     # fmt: off
     parser.add_argument(
         "--max_source_length",
         default=1024,
         type=int,
         help="The maximum total input sequence length after tokenization. Sequences longer "
         "than this will be truncated, sequences shorter will be padded.",
     )
     parser.add_argument(
         "--max_target_length",
         default=56,
         type=int,
         help="The maximum total input sequence length after tokenization. Sequences longer "
         "than this will be truncated, sequences shorter will be padded.",
     )
     parser.add_argument(
         "--val_max_target_length",
         default=142,  # these defaults are optimized for CNNDM. For xsum, see README.md.
         type=int,
         help="The maximum total input sequence length after tokenization. Sequences longer "
         "than this will be truncated, sequences shorter will be padded.",
     )
     parser.add_argument(
         "--test_max_target_length",
         default=142,
         type=int,
         help="The maximum total input sequence length after tokenization. Sequences longer "
         "than this will be truncated, sequences shorter will be padded.",
     )
     parser.add_argument(
         "--data_dir",
         type=str,
         required=True,
         help="The input data dir. Should contain train.source, train.target, val.source, val.target, test.source, test.target",
     )
     parser.add_argument("--freeze_encoder", action="store_true")
     parser.add_argument("--freeze_embeds", action="store_true")
     parser.add_argument("--sortish_sampler", action="store_true", default=False)
     parser.add_argument("--logger", type=str, choices=["default", "wandb", "wandb_shared"], default="default")
     # parser.add_argument("--wandb_project", type=str, default="default")
     parser.add_argument("--n_train", type=int, default=-1, required=False, help="# examples. -1 means use all.")
     parser.add_argument("--n_val", type=int, default=500, required=False, help="# examples. -1 means use all.")
     parser.add_argument("--n_test", type=int, default=-1, required=False, help="# examples. -1 means use all.")
     # fmt: on
     return parser
Beispiel #12
0
def parse_args():
    parser = ArgumentParser()

    # add some script specific args
    parser.add_argument("--seed", type=int, default=42)
    parser.add_argument(
        "--output_dir", type=str, default="", help="Directory to write outputs to or load checkpoint from"
    )
    parser.add_argument("--do_train", action="store_true", help="Run training loop")
    parser.add_argument("--do_predict", action="store_true", help="Run test loop")

    # enable all trainer args
    parser = Trainer.add_argparse_args(parser)

    # add the base module args
    parser = BaseTransformer.add_model_specific_args(parser)

    # add the glue module args
    parser = GLUETransformer.add_model_specific_args(parser)

    # cook them all up :)
    args = parser.parse_args()

    return args
Beispiel #13
0
    def add_model_specific_args(parser, root_dir):
        BaseTransformer.add_model_specific_args(parser, root_dir)
        add_generic_args(parser, root_dir)
        parser.add_argument(
            "--max_source_length",
            default=1024,
            type=int,
            help=
            "The maximum total input sequence length after tokenization. Sequences longer "
            "than this will be truncated, sequences shorter will be padded.",
        )
        parser.add_argument(
            "--max_target_length",
            default=56,
            type=int,
            help=
            "The maximum total input sequence length after tokenization. Sequences longer "
            "than this will be truncated, sequences shorter will be padded.",
        )
        parser.add_argument(
            "--val_max_target_length",
            default=
            142,  # these defaults are optimized for CNNDM. For xsum, see README.md.
            type=int,
            help=
            "The maximum total input sequence length after tokenization. Sequences longer "
            "than this will be truncated, sequences shorter will be padded.",
        )
        parser.add_argument(
            "--test_max_target_length",
            default=142,
            type=int,
            help=
            "The maximum total input sequence length after tokenization. Sequences longer "
            "than this will be truncated, sequences shorter will be padded.",
        )
        parser.add_argument("--freeze_encoder", action="store_true")
        parser.add_argument("--freeze_embeds", action="store_true")
        parser.add_argument("--sortish_sampler",
                            action="store_true",
                            default=False)
        parser.add_argument("--max_tokens_per_batch", type=int, default=None)
        parser.add_argument("--logger_name",
                            type=str,
                            choices=["default", "wandb", "wandb_shared"],
                            default="default")
        parser.add_argument("--n_train",
                            type=int,
                            default=-1,
                            required=False,
                            help="# examples. -1 means use all.")
        parser.add_argument("--n_val",
                            type=int,
                            default=500,
                            required=False,
                            help="# examples. -1 means use all.")
        parser.add_argument("--n_test",
                            type=int,
                            default=-1,
                            required=False,
                            help="# examples. -1 means use all.")
        parser.add_argument("--task",
                            type=str,
                            default="summarization",
                            required=False,
                            help="# examples. -1 means use all.")
        parser.add_argument("--label_smoothing",
                            type=float,
                            default=0.0,
                            required=False)
        parser.add_argument("--src_lang", type=str, default="", required=False)
        parser.add_argument("--tgt_lang", type=str, default="", required=False)
        parser.add_argument(
            "--eval_beams",
            type=int,
            default=None,
            required=False,
            help="# beams to use. 0 corresponds to not using beam search.")
        parser.add_argument("--val_metric",
                            type=str,
                            default=None,
                            required=False,
                            choices=["bleu", "rouge2", "loss", None])
        parser.add_argument("--eval_max_gen_length",
                            type=int,
                            default=None,
                            help="never generate more than n tokens")
        parser.add_argument("--save_top_k",
                            type=int,
                            default=1,
                            required=False,
                            help="How many checkpoints to save")
        parser.add_argument(
            "--early_stopping_patience",
            type=int,
            default=-1,
            required=False,
            help=
            "-1 means never early stop. early_stopping_patience is measured in validation checks, not epochs. So val_check_interval will effect it.",
        )
        parser.add_argument(
            '--json-summary',
            type=str,
            default="results/dllogger.json",
            help='If provided, the json summary will be written to'
            'the specified file.')
        parser.add_argument(
            '--distill',
            type=str,
            default=None,
            help=
            "string indicating distillation to perform, only sft supported",
            choices=["sft", None])
        parser.add_argument(
            '--layers',
            type=str,
            default=None,
            help=
            "string indicating which layers to distill for SFT, split by '-' (ex. 0-6-11)"
        )
        parser.add_argument('--do_encoder',
                            action="store_true",
                            default=False,
                            help="if true distills the encoder")
        parser.add_argument('--do_decoder',
                            action="store_true",
                            default=False,
                            help="if true distills the decoder")

        return parser
Beispiel #14
0
    def add_model_specific_args(parser, root_dir):
        BaseTransformer.add_model_specific_args(parser, root_dir)
        add_generic_args(parser, root_dir)
        parser.add_argument(
            "--max_source_length",
            default=128,
            type=int,
            help=
            "The maximum total input sequence length after tokenization. Sequences longer "
            "than this will be truncated, sequences shorter will be padded.",
        )
        parser.add_argument(
            "--max_target_length",
            default=25,
            type=int,
            help=
            "The maximum total input sequence length after tokenization. Sequences longer "
            "than this will be truncated, sequences shorter will be padded.",
        )
        parser.add_argument(
            "--val_max_target_length",
            default=25,
            type=int,
            help=
            "The maximum total input sequence length after tokenization. Sequences longer "
            "than this will be truncated, sequences shorter will be padded.",
        )
        parser.add_argument(
            "--test_max_target_length",
            default=25,
            type=int,
            help=
            "The maximum total input sequence length after tokenization. Sequences longer "
            "than this will be truncated, sequences shorter will be padded.",
        )
        parser.add_argument("--logger_name",
                            type=str,
                            choices=["default", "wandb", "wandb_shared"],
                            default="default")
        parser.add_argument("--n_train",
                            type=int,
                            default=-1,
                            required=False,
                            help="# examples. -1 means use all.")
        parser.add_argument("--n_val",
                            type=int,
                            default=-1,
                            required=False,
                            help="# examples. -1 means use all.")
        parser.add_argument("--n_test",
                            type=int,
                            default=-1,
                            required=False,
                            help="# examples. -1 means use all.")
        parser.add_argument("--label_smoothing",
                            type=float,
                            default=0.0,
                            required=False)
        parser.add_argument(
            "--prefix",
            type=str,
            default=None,
            help=
            "Prefix added at the beginning of each text, typically used with T5-based models.",
        )
        parser.add_argument(
            "--early_stopping_patience",
            type=int,
            default=-1,
            required=False,
            help=
            "-1 means never early stop. early_stopping_patience is measured in validation checks, not epochs. So val_check_interval will effect it.",
        )
        parser.add_argument("--distributed-port",
                            type=int,
                            default=-1,
                            required=False,
                            help="Port number for distributed training.")
        parser.add_argument(
            "--model_type",
            choices=["rag_sequence", "rag_token", "bart", "t5"],
            type=str,
            help=
            "RAG model type: sequence or token, if none specified, the type is inferred from the model_name_or_path",
        )
        parser.add_argument(
            "--context_encoder_name",
            default="facebook/dpr-ctx_encoder-multiset-base",
            type=str,
            help=
            "Name of the pre-trained context encoder checkpoint from the DPR",
        )
        parser.add_argument(
            "--csv_path",
            default=str(
                Path(__file__).parent / "test_run" / "dummy-kb" /
                "my_knowledge_dataset.csv"),
            type=str,
            help="path of the raw KB csv",
        )
        parser.add_argument("--end2end",
                            action="store_true",
                            help="whether to train the system end2end or not")
        parser.add_argument("--index_gpus",
                            type=int,
                            help="how many GPUs used in re-encoding process")
        parser.add_argument(
            "--shard_dir",
            type=str,
            default=str(Path(__file__).parent / "test_run" / "kb-shards"),
            help=
            "directory used to keep temporary shards during the re-encode process",
        )

        parser.add_argument(
            "--gpu_order",
            type=str,
            help=
            "order of the GPU used during the fine-tuning.  Used to finding free GPUs during the re-encode process. I do not have many GPUs :)",
        )

        parser.add_argument("--indexing_freq",
                            type=int,
                            help="frequency of re-encode process")
        return parser
Beispiel #15
0
    def add_model_specific_args(parser, root_dir):
        BaseTransformer.add_model_specific_args(parser, root_dir)
        add_generic_args(parser, root_dir)
        parser.add_argument(
            "--max_source_length",
            default=1024,
            type=int,
            help="The maximum total input sequence length after tokenization. Sequences longer "
            "than this will be truncated, sequences shorter will be padded.",
        )
        parser.add_argument(
            "--max_target_length",
            default=56,
            type=int,
            help="The maximum total input sequence length after tokenization. Sequences longer "
            "than this will be truncated, sequences shorter will be padded.",
        )
        parser.add_argument("--freeze_encoder", action="store_true")
        parser.add_argument("--freeze_embeds", action="store_true")
        parser.add_argument("--sortish_sampler", action="store_true", default=False)
        parser.add_argument("--max_tokens_per_batch", type=int, default=None)
        parser.add_argument("--logger_name", type=str, choices=["default", "wandb", "wandb_shared"], default="default")
        parser.add_argument("--n_train", type=int, default=-1, required=False, help="# examples. -1 means use all.")
        parser.add_argument("--n_val", type=int, default=500, required=False, help="# examples. -1 means use all.")
        parser.add_argument("--n_test", type=int, default=-1, required=False, help="# examples. -1 means use all.")
        parser.add_argument(
            "--task", type=str, default="summarization", required=False, help="# examples. -1 means use all."
        )
        parser.add_argument("--label_smoothing", type=float, default=0.0, required=False)
        parser.add_argument("--src_lang", type=str, default="", required=False)
        parser.add_argument("--tgt_lang", type=str, default="", required=False)
        parser.add_argument("--eval_beams", type=int, default=None, required=False)
        parser.add_argument(
            "--val_metric", type=str, default=None, required=False, choices=["bleu", "rouge2", "loss", None]
        )
        parser.add_argument("--eval_max_gen_length", type=int, default=None, help="never generate more than n tokens")
        parser.add_argument(
            "--early_stopping_patience",
            type=int,
            default=-1,
            required=False,
            help="-1 means never early stop. early_stopping_patience is measured in validation checks, not epochs. So val_check_interval will effect it.",
        )

        ####################################
        ##        Decoding Strategy       ##
        ####################################

        parser.add_argument('--decode_method', choices=['greedy', 'beam', 'nucleus'])
        parser.add_argument("--decode_num_beams", default=5, type=int, required=False, help="")
        parser.add_argument("--decode_p", default=0.9, type=float, required=False, help="")

        ####################################
        ##  Unlikelihood Loss Parameters  ##
        ####################################

        parser.add_argument("--unlikelihood_training", action="store_true", help="whether to use unlikelihood training")
        parser.add_argument("--unlikelihood_training_mode", choices=["cochrane", "newsela", "both"], help="which weights to use for unlikelihood training")

        parser.add_argument("--unlikelihood_cochrane_weights_file", 
                            default="data/logr_weights/bart_freq_normalized_ids.txt", 
                            type=str, required=False, 
                            help="The file containing logistic regression weights learned on the Cochrane dataset for use in unlikelihood training")

        parser.add_argument("--unlikelihood_newsela_weights_file", 
                            default="data/logr_weights/bart_freq_newsela_ids.txt", 
                            type=str, required=False, 
                            help="The file containing logistic regression weights learned on the Newsela dataset for use in unlikelihood training")

        parser.add_argument("--unlikelihood_exclude_tokens", default="", type=str, required=False, help="Comma-separated numbers")
        parser.add_argument("--unlikelihood_num_weights", default=100, type=int, required=False, help="The number of weights in unlikelihood training, if -1 use all of them")
        parser.add_argument("--unlikelihood_softmax", action="store_true", help="whether to softmax the token weights in unlikelihood training")
        parser.add_argument("--unlikelihood_temperature", default=2, type=int, help="temperature to use in softmax when normalizing logistic regression weights")
        parser.add_argument("--unlikelihood_selective_penalty", action="store_true", help="whether to use unlikelihood loss only if argmax is the penalty token")
        parser.add_argument("--unlikelihood_alpha", default=100.0, type=float, required=False, help="")


        ##########################################
        ##  Unlikelihood Loss Token Parameters  ##
        ##########################################

        parser.add_argument("--unlikelihood_training_tokens", action="store_true", help="whether to use unlikelihood training at token level")
        parser.add_argument("--unlikelihood_tokens_alpha", default=1.0, type=float, required=False, help="")

        return parser