Exemple #1
0
def main(args):
    model_class = get_model_class(args.model)
    model_class.add_config(argparser)
    args = argparser.parse_args()

    args.run_id = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
    root_dir = os.path.dirname(os.path.realpath(__file__))
    # only use generated run_path if none provided by user
    if args.run_path is None:
        args.run_path = os.path.join(root_dir, args.run_dir, args.run_id)
    if not os.path.exists(args.run_path):
        os.makedirs(args.run_path)

    global outputManager
    outputManager = OutputManager(args.run_path)

    outputManager.say(args)

    #if not os.path.exists(args.run_dir):
    #    os.makedirs(args.run_dir)
    #assert os.path.isdir(args.run_dir)
    #assert not os.path.exists(args.run_path)
    #os.makedirs(args.run_path)
    outputManager.say("\nRun ID: {}\nRun Path: {}\n\n".format(
        args.run_id, args.run_path))

    train_corpus_path = os.path.dirname(args.train) + "/corpus.tsv.gz"
    train_corpus = Corpus(
        [tuple([train_corpus_path,
                os.path.dirname(args.train)])])
    valid_corpus_path = os.path.dirname(args.eval) + "/corpus.tsv.gz"
    valid_corpus = Corpus(
        [tuple([valid_corpus_path,
                os.path.dirname(args.eval)])])
    outputManager.say("Corpus loaded.\n")

    embs = load_embedding(args.embedding) if args.embedding else None

    embedding_layer = EmbeddingLayer(args.n_d, ['<s>', '</s>'], embs)

    model = model_class(embedding_layer, args)

    if args.cuda:
        model.cuda()
    outputManager.say("\n{}\n\n".format(model))

    outputManager.say(model.state_dict().keys())

    needs_grad = lambda x: x.requires_grad
    optimizer = optim.Adam(filter(needs_grad, model.parameters()), lr=args.lr)
    outputManager.say(optimizer.state_dict())

    if args.load_model:
        outputManager.say("Loading pretrained model")
        model.load_state_dict(torch.load(args.load_model))

    else:
        outputManager.say("Training will begin from scratch")

    best_dev = 0
    iter_cnt = 0

    current_dev = evaluate(iter_cnt, args.eval + "/dev", model, valid_corpus,
                           args)
    evaluate(iter_cnt, args.eval + "/test", model, valid_corpus, args, False)

    for epoch in range(args.max_epoch):
        iter_cnt = train(iter_cnt, model, train_corpus, args, optimizer)
        current_dev = evaluate(iter_cnt, args.eval + "/dev", model,
                               valid_corpus, args)
        if current_dev > best_dev:
            best_dev = current_dev
            evaluate(iter_cnt, args.eval + "/test", model, valid_corpus, args,
                     False)
        outputManager.say("\n")

    if args.save_model:
        torch.save(model.state_dict(), args.save_model)
        torch.save(model, args.save_model + '-complete')
Exemple #2
0
def main(args):
    model_class = get_model_class(args.model)
    model_class.add_config(argparser)
    ClassificationD.add_config(argparser)
    args, _ = argparser.parse_known_args()

    args.run_id = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
    root_dir = os.path.dirname(os.path.realpath(__file__))
    # only use generated run_path if none provided by user
    if args.run_path is None:
        args.run_path = os.path.join(root_dir, args.run_dir, args.run_id)
    if not os.path.exists(args.run_path):
        os.makedirs(args.run_path)

    global outputManager
    outputManager = OutputManager(args.run_path)

    outputManager.say(args)

    outputManager.say("\nRun ID: {}\nRun Path: {}\n\n".format(
        args.run_id, args.run_path))

    train_corpus_path = os.path.dirname(args.train) + "/corpus.tsv.gz"
    cross_train_corpus_path = os.path.dirname(
        args.cross_train) + "/corpus.tsv.gz"
    train_corpus = Corpus([
        tuple([train_corpus_path,
               os.path.dirname(args.train)]),
        tuple([cross_train_corpus_path,
               os.path.dirname(args.cross_train)])
    ])
    valid_corpus_path = os.path.dirname(args.eval) + "/corpus.tsv.gz"
    valid_corpus = Corpus(
        [tuple([valid_corpus_path,
                os.path.dirname(args.eval)])])
    outputManager.say("Corpus loaded.\n")

    embs = load_embedding(args.embedding) if args.embedding else None

    embedding_layer = EmbeddingLayer(args.n_d, ['<s>', '</s>'], embs)

    model = model_class(embedding_layer, args)
    if args.wasserstein:
        domain_d = WassersteinD(model, args)
        args.lambda_d = -args.lambda_d
    else:
        domain_d = ClassificationD(model, args)

    if args.cuda:
        model.cuda()
        domain_d.cuda()
    outputManager.say("\n{}\n\n".format(model))
    outputManager.say("\n{}\n\n".format(domain_d))

    needs_grad = lambda x: x.requires_grad

    optimizer_encoder = optim.Adam(filter(needs_grad, model.parameters()),
                                   lr=args.lr)
    optimizer_domain_d = optim.Adam(filter(needs_grad, domain_d.parameters()),
                                    lr=-args.lr2)

    best_dev = 0
    iter_cnt = 0
    for epoch in range(args.max_epoch):
        iter_cnt = train(iter_cnt, model, domain_d, train_corpus, args,
                         optimizer_encoder, optimizer_domain_d)
        current_dev = evaluate(iter_cnt, args.eval + "/dev", model,
                               valid_corpus, args)
        if current_dev > best_dev:
            best_dev = current_dev
            evaluate(iter_cnt, args.eval + "/test", model, valid_corpus, args,
                     False)
        outputManager.say("\n")

    if args.save_model:
        torch.save(model.state_dict(), args.save_model)
        torch.save(model, args.save_model + '-complete')
Exemple #3
0
def main(args):
    model_class = get_model_class(args.model)
    model_class.add_config(argparser)
    args = argparser.parse_args()
    say(args)

    args.run_id = random.randint(0, 10**9)
    args.run_path = "{}/{}".format(args.run_dir, args.run_id)
    #if not os.path.exists(args.run_dir):
    #    os.makedirs(args.run_dir)
    #assert os.path.isdir(args.run_dir)
    #assert not os.path.exists(args.run_path)
    #os.makedirs(args.run_path)
    say("\nRun ID: {}\nRun Path: {}\n\n".format(args.run_id, args.run_path))

    train_corpus_path = os.path.dirname(args.train) + "/corpus.tsv.gz"
    train_corpus = Corpus(
        [tuple([train_corpus_path,
                os.path.dirname(args.train)])])
    valid_corpus_path = os.path.dirname(args.eval) + "/corpus.tsv.gz"
    valid_corpus = Corpus(
        [tuple([valid_corpus_path,
                os.path.dirname(args.eval)])])
    say("Corpus loaded.\n")

    embs = load_embedding(args.embedding) if args.embedding else None

    embedding_layer = EmbeddingLayer(args.n_d, ['<s>', '</s>'], embs)

    model = model_class(embedding_layer, args)

    if args.cuda:
        model.cuda()
    say("\n{}\n\n".format(model))

    print model.state_dict().keys()

    needs_grad = lambda x: x.requires_grad
    optimizer = optim.Adam(filter(needs_grad, model.parameters()), lr=args.lr)

    if args.load_model:
        print "Loading pretrained model"
        model.load_state_dict(torch.load(args.load_model))

    else:
        print "Training will begin from scratch"

    best_dev = 0
    iter_cnt = 0

    current_dev = evaluate(iter_cnt, args.eval + "/dev", model, valid_corpus,
                           args)
    evaluate(iter_cnt, args.eval + "/test", model, valid_corpus, args, False)

    for epoch in range(args.max_epoch):
        iter_cnt = train(iter_cnt, model, train_corpus, args, optimizer)
        current_dev = evaluate(iter_cnt, args.eval + "/dev", model,
                               valid_corpus, args)
        if current_dev > best_dev:
            best_dev = current_dev
            evaluate(iter_cnt, args.eval + "/test", model, valid_corpus, args,
                     False)
        say("\n")

    if args.save_model:
        torch.save(model.state_dict(), args.save_model)
Exemple #4
0
def main(args):
    model_class = get_model_class(args.model)
    model_class.add_config(argparser)
    ClassificationD.add_config(argparser)
    args, _ = argparser.parse_known_args()
    say(args)

    args.run_id = random.randint(0, 10**9)
    args.run_path = "{}/{}".format(args.run_dir, args.run_id)
    #if not os.path.exists(args.run_dir):
    #    os.makedirs(args.run_dir)
    #assert os.path.isdir(args.run_dir)
    #assert not os.path.exists(args.run_path)
    #os.makedirs(args.run_path)
    say("\nRun ID: {}\nRun Path: {}\n\n".format(args.run_id, args.run_path))

    train_corpus_path = os.path.dirname(args.train) + "/corpus.tsv.gz"
    cross_train_corpus_path = os.path.dirname(
        args.cross_train) + "/corpus.tsv.gz"
    train_corpus = Corpus([
        tuple([train_corpus_path,
               os.path.dirname(args.train)]),
        tuple([cross_train_corpus_path,
               os.path.dirname(args.cross_train)])
    ])
    valid_corpus_path = os.path.dirname(args.eval) + "/corpus.tsv.gz"
    valid_corpus = Corpus(
        [tuple([valid_corpus_path,
                os.path.dirname(args.eval)])])
    say("Corpus loaded.\n")

    embs = load_embedding(args.embedding) if args.embedding else None

    embedding_layer = EmbeddingLayer(args.n_d, ['<s>', '</s>'], embs)

    model = model_class(embedding_layer, args)
    if args.wasserstein:
        domain_d = WassersteinD(model, args)
        args.lambda_d = -args.lambda_d
    else:
        domain_d = ClassificationD(model, args)

    if args.cuda:
        model.cuda()
        domain_d.cuda()
    say("\n{}\n\n".format(model))
    say("\n{}\n\n".format(domain_d))

    needs_grad = lambda x: x.requires_grad

    optimizer_encoder = optim.Adam(filter(needs_grad, model.parameters()),
                                   lr=args.lr)
    optimizer_domain_d = optim.Adam(filter(needs_grad, domain_d.parameters()),
                                    lr=-args.lr2)

    best_dev = 0
    iter_cnt = 0
    for epoch in range(args.max_epoch):
        iter_cnt = train(iter_cnt, model, domain_d, train_corpus, args,
                         optimizer_encoder, optimizer_domain_d)
        current_dev = evaluate(iter_cnt, args.eval + "/dev", model,
                               valid_corpus, args)
        if current_dev > best_dev:
            best_dev = current_dev
            evaluate(iter_cnt, args.eval + "/test", model, valid_corpus, args,
                     False)
        say("\n")

    if args.save_model:
        torch.save(model.state_dict(), args.save_model)