Ejemplo n.º 1
0
def train(args):
    ''' Training Strategy

    Input: source = {S1, S2, ..., Sk}, target = {T}

    Train:
        Approach 1: fix metric and learn encoder only
        Approach 2: learn metric and encoder alternatively
    '''

    # test_mahalanobis_metric() and return

    encoder_class = get_model_class("mlp")
    encoder_class.add_config(argparser)
    critic_class = get_critic_class(args.critic)
    critic_class.add_config(argparser)

    args = argparser.parse_args()
    say(args)

    # encoder is shared across domains
    encoder = encoder_class(args)

    say("Transferring from %s to %s\n" % (args.train, args.test))
    source_train_sets = args.train.split(',')
    train_loaders = []
    Us = []
    Ps = []
    Ns = []
    Ws = []
    Vs = []
    # Ms = []
    for source in source_train_sets:
        filepath = os.path.join(DATA_DIR, "%s_train.svmlight" % (source))
        assert (os.path.exists(filepath))
        train_dataset = AmazonDataset(filepath)
        train_loader = data.DataLoader(train_dataset,
                                       batch_size=args.batch_size,
                                       shuffle=True,
                                       num_workers=0)
        train_loaders.append(train_loader)

        if args.metric == "biaffine":
            U = torch.FloatTensor(encoder.n_d, encoder.n_d)
            W = torch.FloatTensor(encoder.n_d, 1)
            nn.init.xavier_uniform_(W)
            Ws.append(W)
            V = torch.FloatTensor(encoder.n_d, 1)
            nn.init.xavier_uniform_(V)
            Vs.append(V)
        else:
            U = torch.FloatTensor(encoder.n_d, args.m_rank)

        nn.init.xavier_uniform_(U)
        Us.append(U)
        P = torch.FloatTensor(encoder.n_d, args.m_rank)
        nn.init.xavier_uniform_(P)
        Ps.append(P)
        N = torch.FloatTensor(encoder.n_d, args.m_rank)
        nn.init.xavier_uniform_(N)
        Ns.append(N)
        # Ms.append(U.mm(U.t()))

    unl_filepath = os.path.join(DATA_DIR, "%s_train.svmlight" % (args.test))
    assert (os.path.exists(unl_filepath))
    unl_dataset = AmazonDomainDataset(unl_filepath)
    unl_loader = data.DataLoader(unl_dataset,
                                 batch_size=args.batch_size,
                                 shuffle=True,
                                 num_workers=0)

    valid_filepath = os.path.join(DATA_DIR, "%s_dev.svmlight" % (args.test))
    if os.path.exists(valid_filepath):
        valid_dataset = AmazonDataset(valid_filepath)
        valid_loader = data.DataLoader(valid_dataset,
                                       batch_size=args.batch_size,
                                       shuffle=False,
                                       num_workers=0)
    else:
        valid_loader = None

    test_filepath = os.path.join(DATA_DIR, "%s_test.svmlight" % (args.test))
    assert (os.path.exists(test_filepath))
    test_dataset = AmazonDataset(test_filepath)
    test_loader = data.DataLoader(test_dataset,
                                  batch_size=args.batch_size,
                                  shuffle=False,
                                  num_workers=0)
    say("Corpus loaded.\n")

    classifiers = []
    for source in source_train_sets:
        classifier = nn.Linear(encoder.n_out, 2)  # binary classification
        nn.init.xavier_normal_(classifier.weight)
        nn.init.constant_(classifier.bias, 0.1)
        classifiers.append(classifier)

    critic = critic_class(encoder, args)

    # if args.save_model:
    #     say(colored("Save model to {}\n".format(args.save_model + ".init"), 'red'))
    #     torch.save([encoder, classifiers, Us, Ps, Ns], args.save_model + ".init")

    if args.cuda:
        # map(lambda m: m.cuda(), [encoder, critic] + classifiers)
        encoder = encoder.cuda()
        critic = critic.cuda()
        classifiers = [x.cuda() for x in classifiers]
        Us = [Variable(U.cuda(), requires_grad=True) for U in Us]
        Ps = [Variable(P.cuda(), requires_grad=True) for P in Ps]
        Ns = [Variable(N.cuda(), requires_grad=True) for N in Ns]
        if args.metric == "biaffine":
            Ws = [Variable(W.cuda(), requires_grad=True) for W in Ws]
            Vs = [Variable(V.cuda(), requires_grad=True) for V in Vs]

    # Ms = [ U.mm(U.t()) for U in Us ]

    say("\nEncoder: {}\n".format(encoder))
    for i, classifier in enumerate(classifiers):
        say("Classifier-{}: {}\n".format(i, classifier))
    say("Critic: {}\n".format(critic))

    requires_grad = lambda x: x.requires_grad
    task_params = list(encoder.parameters())
    for classifier in classifiers:
        task_params += list(classifier.parameters())
    task_params += list(critic.parameters())
    task_params += Us
    task_params += Ps
    task_params += Ns
    if args.metric == "biaffine":
        task_params += Ws
        task_params += Vs

    optim_model = optim.Adam(filter(requires_grad, task_params),
                             lr=args.lr,
                             weight_decay=1e-4)

    say("Training will begin from scratch\n")

    best_dev = 0
    best_test = 0
    iter_cnt = 0

    for epoch in range(args.max_epoch):
        if args.metric == "biaffine":
            mats = [Us, Ws, Vs]
        else:
            mats = [Us, Ps, Ns]

        iter_cnt = train_epoch(iter_cnt, encoder, classifiers, critic, mats,
                               [train_loaders, unl_loader, valid_loader], args,
                               optim_model)

        if valid_loader:
            (curr_dev, oracle_curr_dev), confusion_mat = evaluate(
                encoder, classifiers, mats, [train_loaders, valid_loader],
                args)
            say("Dev accuracy/oracle: {:.4f}/{:.4f}\n".format(
                curr_dev, oracle_curr_dev))
        (curr_test, oracle_curr_test), confusion_mat = evaluate(
            encoder, classifiers, mats, [train_loaders, test_loader], args)
        say("Test accuracy/oracle: {:.4f}/{:.4f}\n".format(
            curr_test, oracle_curr_test))

        if valid_loader and curr_dev >= best_dev:
            best_dev = curr_dev
            best_test = curr_test
            print(confusion_mat)
            if args.save_model:
                say(
                    colored(
                        "Save model to {}\n".format(args.save_model + ".best"),
                        'red'))
                torch.save([encoder, classifiers, Us, Ps, Ns],
                           args.save_model + ".best")
            say("\n")

    if valid_loader:
        say(colored("Best test accuracy {:.4f}\n".format(best_test), 'red'))
    say(
        colored("Test accuracy after training {:.4f}\n".format(curr_test),
                'red'))
Ejemplo n.º 2
0
def train(args):
    encoder_class = get_model_class(args.encoder)
    encoder_class.add_config(argparser)
    critic_class = get_critic_class(args.critic)
    critic_class.add_config(argparser)

    args = argparser.parse_args()
    say(args)

    say("Transferring from %s to %s\n" % (args.train, args.test))

    source_train_sets = args.train.split(',')
    train_loaders = []
    for source in source_train_sets:
        filepath = os.path.join(DATA_DIR, "%s_train.svmlight" % (source))
        assert (os.path.exists(filepath))
        train_dataset = AmazonDataset(filepath)
        train_loader = data.DataLoader(train_dataset,
                                       batch_size=args.batch_size,
                                       shuffle=True,
                                       num_workers=0)
        train_loaders.append(train_loader)

    target_d_filepath = os.path.join(DATA_DIR,
                                     "%s_train.svmlight" % (args.test))
    assert (os.path.exists(target_d_filepath))
    train_target_d_dataset = AmazonDomainDataset(target_d_filepath, domain=1)
    train_target_d_loader = data.DataLoader(train_target_d_dataset,
                                            batch_size=args.batch_size_d,
                                            shuffle=True,
                                            num_workers=0)

    valid_filepath = os.path.join(DATA_DIR, "%s_dev.svmlight" % (args.test))
    # assert (os.path.exists(valid_filepath))
    if os.path.exists(valid_filepath):
        valid_dataset = AmazonDataset(valid_filepath)
        valid_loader = data.DataLoader(valid_dataset,
                                       batch_size=args.batch_size,
                                       shuffle=False,
                                       num_workers=0)
    else:
        valid_loader = None

    test_filepath = os.path.join(DATA_DIR, "%s_test.svmlight" % (args.test))
    assert (os.path.exists(test_filepath))
    test_dataset = AmazonDataset(test_filepath)
    test_loader = data.DataLoader(test_dataset,
                                  batch_size=args.batch_size,
                                  shuffle=False,
                                  num_workers=0)
    say("Corpus loaded.\n")

    encoder = encoder_class(args)
    critic = critic_class(encoder, args)
    classifier = nn.Linear(encoder.n_out, 2)  # binary classification
    nn.init.xavier_normal_(classifier.weight)
    nn.init.constant_(classifier.bias, 0.1)

    gan_gen = encoder_class(args)
    gan_disc = MMD(gan_gen, args)

    if args.cuda:
        encoder = encoder.cuda()
        critic = critic.cuda()
        classifier = classifier.cuda()
        gan_gen = gan_gen.cuda()
        gan_disc = gan_disc.cuda()

    say("\n{}\n\n".format(encoder))
    say("\n{}\n\n".format(critic))
    say("\n{}\n\n".format(classifier))
    say("\n{}\n\n".format(gan_gen))
    say("\n{}\n\n".format(gan_disc))

    print(encoder.state_dict().keys())
    print(critic.state_dict().keys())
    print(classifier.state_dict().keys())
    print(gan_gen.state_dict().keys())
    print(gan_disc.state_dict().keys())

    requires_grad = lambda x: x.requires_grad
    task_params = list(encoder.parameters()) + \
                  list(classifier.parameters()) + \
                  list(critic.parameters())
    optimizer = optim.Adam(filter(requires_grad, task_params),
                           lr=args.lr,
                           weight_decay=1e-4)

    reg_params = list(encoder.parameters()) + \
                 list(gan_gen.parameters())
    optimizer_reg = optim.Adam(filter(requires_grad, reg_params),
                               lr=args.lr,
                               weight_decay=1e-4)

    say("Training will begin from scratch\n")

    best_dev = 0
    best_test = 0
    iter_cnt = 0

    for epoch in range(args.max_epoch):
        iter_cnt = train_epoch(iter_cnt, encoder, classifier, critic,
                               train_loaders, train_target_d_loader,
                               valid_loader, args, optimizer)

        if args.advreg:
            for loader in train_loaders + [train_target_d_loader]:
                train_advreg_mmd(iter_cnt, encoder, gan_gen, gan_disc, loader,
                                 args, optimizer_reg)

        if valid_loader:
            curr_dev, confusion_mat, _ = evaluate(encoder, classifier,
                                                  valid_loader, args)
            say("Dev accuracy: {:.4f}\n".format(curr_dev))

        curr_test, confusion_mat, _ = evaluate(encoder, classifier,
                                               test_loader, args)
        say("Test accuracy: {:.4f}\n".format(curr_test))

        if valid_loader and curr_dev >= best_dev:
            best_dev = curr_dev
            best_test = curr_test
            # print(confusion_mat)
            if args.save_model:
                say(
                    colored(
                        "Save model to {}\n".format(args.save_model + ".best"),
                        'red'))
                torch.save([encoder, classifier], args.save_model + ".best")
            say("\n")

    if valid_loader:
        say(colored("Best test accuracy {:.4f}\n".format(best_test), 'red'))
    say(
        colored("Test accuracy after training {:.4f}\n".format(curr_test),
                'red'))
Ejemplo n.º 3
0
def train_model(args):

    CEMBED_SIZE = args.CEMBED_SIZE
    WEMBED_SIZE = args.WEMBED_SIZE
    HIDDEN_SIZE = args.HIDDEN_SIZE
    MLP_SIZE = args.MLP_SIZE
    SPARSE = args.SPARSE
    TIMEOUT = args.TIMEOUT

    num_train_files = 0

    Us = []
    batch_trains = []

    if args.train:
        train = file_conll(args.train).tupled_data
    if args.multi_train:
        train = []
        for file_name in args.multi_train:
            if "ontonotes" in file_name:
                print(len(file_conll(file_name).tupled_data))
                train += file_conll(file_name).tupled_data[:args.num_twitter]
            else:
                train  += file_conll(file_name).\
                          tupled_data[:args.train_samples]
            num_train_files += 1
            U = torch.FloatTensor(2 * args.HIDDEN_SIZE, args.m_rank)
            nn.init.xavier_uniform(U)
            Us.append(U)
        train_combined = []
        train = []
        max_samples = args.train_samples
        for file_name in args.multi_train:
            train += file_conll(file_name).tupled_data[:args.train_samples]
            if "ontonotes" in file_name:
                train_combined.append(file_conll(file_name).tupled_data\
                                     [:args.train_samples])
            elif "total.conllu" in file_name:
                train_combined.append(file_conll(file_name).tupled_data\
                                      [:args.num_twitter])
                max_samples = max(args.num_twitter, args.train_samples)
            else:
                train_combined.append(file_conll(file_name).tupled_data\
                                      [:args.train_samples])
        args.train_samples = max_samples
        for j in range(max_samples):
            current_batch = []
            for i in range(num_train_files):
                current_batch.append(train_combined[i][j %
                                                       len(train_combined[i])])
            batch_trains.append(current_batch)

    if args.dev:
        dev = file_conll(args.dev).tupled_data
    if args.test:
        test = file_conll(args.test).tupled_data

    if args.dev == args.test:
        print("Dividing test and dev equally")
        test = test[len(test) / 2:]
        dev = dev[:len(dev) / 2]

    args.num_train_files = num_train_files
    args.cuda = args.CUDA

    words = []
    tags = []
    chars = set()
    wc = Counter()
    for sent in (train + dev + test):
        for w, p in sent:
            words.append(w)
            tags.append(p)
            wc[w] += 1
            chars.update(w)
    words.append("_UNK_")
    chars.add("_UNK_")
    chars.add("<*>")

    vw = Vocab.from_corpus([words])
    vt = Vocab.from_corpus([tags])
    vc = Vocab.from_corpus([chars])
    UNK = vw.w2i["_UNK_"]
    CUNK = vc.w2i["_UNK_"]
    pad_char = vc.w2i["<*>"]

    nwords = vw.size()
    ntags = vt.size()
    nchars = vc.size()
    print("nwords=%r, ntags=%r, nchars=%r" % (nwords, ntags, nchars))

    args.ntags = ntags
    args.nwords = nwords
    args.nchars = nchars
    encoder_class = get_model_class("tagger")
    encoder_class.add_config(parser)
    encoder = encoder_class(args, vw, vc, vt, wc, UNK, CUNK, pad_char)

    classifier = Classifier(2 * HIDDEN_SIZE, MLP_SIZE, ntags)

    critic_class = get_critic_class(args.critic)
    critic_class.add_config(parser)
    critic = critic_class(encoder, args)

    classifiers = []
    for ind in range(num_train_files):
        classifiers.append(Classifier(2 * HIDDEN_SIZE, MLP_SIZE, ntags))

    requires_grad = lambda x: x.requires_grad

    if args.CUDA:
        map(lambda m: m.cuda(), [encoder] + [classifier] + classifiers + \
            [critic])
        Us = [Variable(U.cuda(), requires_grad=True) for U in Us]

    else:
        Us = [Variable(U, requires_grad=True) for U in Us]

    optimizer_encoder = optim.Adam(encoder.parameters(), lr = 1e-3,\
                                   weight_decay = 1e-4)
    task_params = list(classifier.parameters())
    for x in classifiers:
        task_params += list(x.parameters())
    task_params += Us
    task_params += list(critic.parameters())
    optimizer_classifier = optim.Adam(filter(requires_grad, task_params),
                                      lr=1e-3,
                                      weight_decay=1e-4)

    print("startup time: %r" % (time.time() - start))
    start_time = time.time()

    i = 0

    best_test = 0
    best_dev = 0

    for ITER in range(args.epochs):
        #random.shuffle(batch_trains)
        encoder, classifier, optimizer_encoder, optimizer_classifier = \
                   train_epoch(encoder, classifier, classifiers, critic,\
                   batch_trains, dev, test, optimizer_encoder,\
                   optimizer_classifier, start_time, i, Us)
        print("epoch %r finished" % ITER)
        domain_encs = domain_encoding(batch_trains, args, encoder)
        curr_dev = evaluate(encoder, args, batch_trains, classifier, classifiers, \
                 dev, domain_encs, Us)
        curr_test = evaluate(encoder, args, batch_trains, classifier, classifiers, \
                 test, domain_encs, Us)
        if curr_dev > best_dev:
            best_dev = curr_dev
            best_test = curr_test

    print(best_dev, best_test)