Beispiel #1
0
def parse_args():
    parser = argparse.ArgumentParser(
        description='umt.py',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    opts.add_md_help_argument(parser)
    opts.preprocess_opts(parser)
    opt = parser.parse_args()
    return opt
Beispiel #2
0
def parse_args():
    parser = argparse.ArgumentParser(
        description='preprocess.py',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    opts.add_md_help_argument(parser)
    opts.preprocess_opts(parser)

    opt = parser.parse_args()
    torch.manual_seed(opt.seed)

    return opt
Beispiel #3
0
def parse_args():
    """ Parsing arguments """
    parser = argparse.ArgumentParser(
        description='preprocess.py',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    opts.preprocess_opts(parser)

    opt = parser.parse_args()
    #  torch.manual_seed(opt.seed)

    return opt
Beispiel #4
0
def parse_args():
    parser = argparse.ArgumentParser(
        description='umt.py',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    opts.add_md_help_argument(parser)
    opts.model_opts(parser)
    opts.preprocess_opts(parser)
    opts.train_opts(parser)

    opt = parser.parse_args()
    torch.manual_seed(opt.seed)

    if opt.word_vec_size != -1:
        opt.src_word_vec_size = opt.word_vec_size
        opt.tgt_word_vec_size = opt.word_vec_size

    if opt.layers != -1:
        opt.enc_layers = opt.layers
        opt.dec_layers = opt.layers

    opt.brnn = (opt.encoder_type == "brnn")

    # if opt.seed > 0:
    random.seed(opt.seed)
    torch.manual_seed(opt.seed)

    if torch.cuda.is_available() and not opt.gpuid:
        print("WARNING: You have a CUDA device, should run with -gpuid 0")

    if opt.gpuid:
        cuda.set_device(opt.gpuid[0])
        if opt.seed > 0:
            torch.cuda.manual_seed(opt.seed)

    if len(opt.gpuid) > 1:
        sys.stderr.write("Sorry, multigpu isn't supported yet, coming soon!\n")
        sys.exit(1)

    # Set up the Crayon logging server.
    if opt.exp_host != "":
        from pycrayon import CrayonClient

        cc = CrayonClient(hostname=opt.exp_host)

        experiments = cc.get_experiment_names()
        print(experiments)
        if opt.exp in experiments:
            cc.remove_experiment(opt.exp)

    return opt
Beispiel #5
0
def parse_args():
    parser = argparse.ArgumentParser(
        description='preprocess.py',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    opts.add_md_help_argument(parser)
    opts.preprocess_opts(parser)

    opt = parser.parse_args()
    torch.manual_seed(opt.seed)

    check_existing_pt_files(opt)

    return opt
Beispiel #6
0
def parse_args():
    parser = configargparse.ArgumentParser(
        description='preprocess.py',
        formatter_class=configargparse.ArgumentDefaultsHelpFormatter)

    opts.add_md_help_argument(parser)
    opts.config_opts(parser)
    opts.preprocess_opts(parser)

    opt = parser.parse_args()
    torch.manual_seed(opt.seed)

    check_existing_pt_files(opt)

    return opt
def parse_args():
    parser = argparse.ArgumentParser(
        description='template_preprocess.py',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    opts.add_md_help_argument(parser)
    opts.preprocess_opts(parser)
    group = parser.add_argument_group('Template')
    group.add_argument('-train_template', required=True,
                       help="Path to the training template")
    group.add_argument('-valid_template', required=True,
                       help="Path to the valid template")

    opt = parser.parse_args()
    torch.manual_seed(opt.seed)

    check_existing_pt_files(opt)

    return opt
Beispiel #8
0
                    required=True,
                    help="Output file for the prepared data")

parser.add_argument('-src_vocab', help="Path to an existing source vocabulary")
parser.add_argument('-tgt_vocab', help="Path to an existing target vocabulary")
parser.add_argument('-features_vocabs_prefix',
                    type=str,
                    default='',
                    help="Path prefix to existing features vocabularies")
parser.add_argument('-seed', type=int, default=3435, help="Random seed")
parser.add_argument('-report_every',
                    type=int,
                    default=100000,
                    help="Report status every this many sentences")

opts.preprocess_opts(parser)

opt = parser.parse_args()
torch.manual_seed(opt.seed)


def main():
    print('Preparing training ...')
    with codecs.open(opt.train_src, "r", "utf-8") as src_file:
        src_line = src_file.readline().strip().split()
        _, _, nFeatures = onmt.IO.extractFeatures(src_line)

    fields = onmt.IO.ONMTDataset.get_fields(nFeatures)
    print("Building Training...")
    train = onmt.IO.ONMTDataset(opt.train_src, opt.train_tgt, fields, opt)
    print("Building Vocab...")
import copy
import unittest
import glob
import os
from collections import Counter

import torchtext

import onmt
import onmt.io
import opts
import preprocess


parser = argparse.ArgumentParser(description='preprocess.py')
opts.preprocess_opts(parser)


SAVE_DATA_PREFIX = 'data/test_preprocess'

default_opts = [
    '-data_type', 'text',
    '-train_src', 'data/src-train.txt',
    '-train_tgt', 'data/tgt-train.txt',
    '-valid_src', 'data/src-val.txt',
    '-valid_tgt', 'data/tgt-val.txt',
    '-save_data', SAVE_DATA_PREFIX
]

opt = parser.parse_known_args(default_opts)[0]