Ejemplo n.º 1
0
if __name__ == '__main__':
    parser = argparse.ArgumentParser(
            description="""
Main training loop.
""", formatter_class=argparse.RawTextHelpFormatter)

    # Parse command line arguments
    required_args = parser.add_argument_group("Required arguments")
    required_args.add_argument("-C", "--model-conf", type=str, help="Model configuration file", required=True)
    parser.add_argument("-s", "--suffix", type=str, help="Suffix to append to model weights file", default=None)
    parser.add_argument("-d", "--dry-run", help="Don't save the weights of the model", action='store_true', default=False)
    args = parser.parse_args()

    # Create Corpora object and parse model configuration
    c = Corpora()
    conf = Config(args.model_conf)
    try:
        c.load(os.path.join(conf.output_path, conf.short_name), "corpora")
    except IOError as e:
        print("Please run nmt-prepare to prepare your corpora and vocab files.")
        sys.exit(1)

    # Set model subfolder
    # ~/models/iwslt15-en20k-fr20k (will include information about dataset as well)
    base_model_path = os.path.join(conf.model_path, conf.short_name)

    # Add some parameter info to model_prefix
    # e.g. cstm_modelv2-emb384-h1536-h256-bs128
    # This will be the architecture folder as well
    model_id = "%s-emb%d-h%d-h%d-bs%d" % (conf.model_type, conf.model_params["embedding"], conf.model_params["hidden1"], conf.model_params["hidden2"], conf.batch_size)
Ejemplo n.º 2
0
    # Ex: ~/data/iwslt15/en-fr/tok/ted.tok or
    #     ~/data/iwslt15/en-fr/raw/ted
    required_args.add_argument("-R", "--train", type=str, help="Prefix path for training corpora", required=True)

    # Optional arguments
    parser.add_argument(
        "-C", "--conf", type=str, help="PyNMT configuration file (Overrides other arguments)", default=None
    )
    parser.add_argument("-V", "--dev", type=str, help="Prefix path for validation corpora", default=None)
    parser.add_argument("-E", "--test", type=str, help="Prefix path for test corpora", default=None)
    parser.add_argument("-S", "--source-vocab-size", type=int, help="Source vocab size (Default: 20k)", default=20000)
    parser.add_argument("-T", "--target-vocab-size", type=int, help="Target vocab size (Default: 20k)", default=20000)

    args = parser.parse_args()
    c = Corpora()

    if args.conf:
        conf = Config(args.conf)
        print("-C passed, ignoring other arguments except -o and -n.")
        args.source_lang = conf.src_lang
        args.target_lang = conf.trg_lang
        args.source_vocab_size = conf.src_vocab_size
        args.target_vocab_size = conf.trg_vocab_size
        args.train = conf.train_prefix
        args.dev = conf.valid_prefix if conf.do_validate else False
        args.test = conf.test_prefix if conf.do_test else False
        args.max_len = conf.max_seq_len

    # First, training
    train_src_file = "%s.%s" % (args.train, args.source_lang)
Ejemplo n.º 3
0
    required_args.add_argument("-T", "--target-vocab", type=str, help="Target vocab file", required=True)
    required_args.add_argument("-s", "--source", type=str, help="Source sentences file", required=True)
    required_args.add_argument("-o", "--output", type=str, help="Output n-best file", required=True)

    parser.add_argument("-b", "--beamsize", type=int, help="Beam size", default=12)
    parser.add_argument("-l", "--lm", type=str, help="LM model file for beam search (optional)", default=None)
    parser.add_argument("-r", "--reference", type=str, help="Reference sentences file for BLEU evaluation", default=None)
    parser.add_argument("-u", "--generate-unk", action='store_true', help="Whether to generate UNK symbols (default: False)", default=False)
    parser.add_argument("-U", "--remove-unk", action='store_true', help="Whether to filter out source sentences with UNK (default: False)", default=False)
    #parser.add_argument("-w", "--weights", type=str, help="TM & LM weights (optional)")

    args = parser.parse_args()

    # Parse configuration file
    conf = Config(args.model_conf)
    corp = Corpora()

    ref_trans_file = args.reference
    src_sents = None
    if args.source and os.path.exists(args.source):
        src_sents = open(args.source, "rb").readlines()
    else:
        print "%s not found." % args.source
        sys.exit(1)

    # Load the model
    cstm_model = None
    try:
        cstm_model = importlib.import_module(conf.model_type)
    except ImportError as e:
        logging.error("No python module %s found." % conf.model_type)