def visualize(args): encoder, classifier = torch.load(args.load_model) if args.cuda: encoder = encoder.cuda() classifier = classifier.cuda() source_train_sets = args.train.split(',') train_loaders = [] for source in source_train_sets: filepath = os.path.join(DATA_DIR, "%s_train.svmlight" % (source)) train_dataset = AmazonDataset(filepath) train_loader = data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=False, num_workers=0 ) train_loaders.append(train_loader) target_d_filepath = os.path.join(DATA_DIR, "%s_train.svmlight" % (args.test)) train_target_d_dataset = AmazonDataset(target_d_filepath) train_target_d_loader = data.DataLoader( train_target_d_dataset, batch_size=args.batch_size_d, shuffle=False, num_workers=0 ) source_hs = [] source_ys = [] source_num = 0 for loader in train_loaders: _, _, (hs, ys) = evaluate(encoder, classifier, loader, args) source_hs.append(hs) source_ys.append(ys) source_num += ys.shape[0] _, _, (ht, yt) = evaluate(encoder, classifier, train_target_d_loader, args) h_both = torch.cat(source_hs + [ht]).cpu().numpy() y_both = torch.cat(source_ys + [yt]).cpu().numpy() tsne = TSNE(perplexity=30, n_components=2, n_iter=3300) vdata = tsne.fit_transform(h_both) print(vdata.shape, source_num) torch.save([vdata, y_both, source_num], 'vis/%s-%s-mdan.vdata' % (args.train, args.test)) plot_embedding(vdata, y_both, source_num, args.save_image)
def predict(args): encoder, classifiers, Us, Ps, Ns = torch.load(args.load_model) map(lambda m: m.eval(), [encoder] + classifiers) # args = argparser.parse_args() # say(args) if args.cuda: map(lambda m: m.cuda(), [encoder] + classifiers) Us = [ U.cuda() for U in Us ] Ps = [ P.cuda() for P in Ps ] Ns = [ N.cuda() for N in Ns ] say("\nTransferring from %s to %s\n" % (args.train, args.test)) source_train_sets = args.train.split(',') train_loaders = [] for source in source_train_sets: filepath = os.path.join(DATA_DIR, "%s_train.svmlight" % (source)) train_dataset = AmazonDataset(filepath) train_loader = data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=False, num_workers=0 ) train_loaders.append(train_loader) test_filepath = os.path.join(DATA_DIR, "%s_test.svmlight" % (args.test)) test_dataset = AmazonDataset(test_filepath) test_loader = data.DataLoader( test_dataset, batch_size=args.batch_size, shuffle=False, num_workers=0 ) say("Corpus loaded.\n") mats = [Us, Ps, Ns] (acc, oracle_acc), confusion_mat = evaluate( encoder, classifiers, mats, [train_loaders, test_loader], args ) say(colored("Test accuracy/oracle {:.4f}/{:.4f}\n".format(acc, oracle_acc), 'red'))
def predict(args): encoder, classifier = torch.load(args.load_model) map(lambda m: m.eval(), [encoder, classifier]) if args.cuda: map(lambda m: m.cuda(), [encoder, classifier]) test_filepath = os.path.join(DATA_DIR, "%s_train.svmlight" % (args.test)) assert (os.path.exists(test_filepath)) test_dataset = AmazonDataset(test_filepath) test_loader = data.DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False, num_workers=0) acc, confusion_mat, _ = evaluate(encoder, classifier, test_loader, args) say(colored("Test accuracy {:.4f}\n".format(acc), 'red')) print confusion_mat
def train(args): encoder_class = get_model_class(args.encoder) encoder_class.add_config(argparser) critic_class = get_critic_class(args.critic) critic_class.add_config(argparser) args = argparser.parse_args() say(args) say("Transferring from %s to %s\n" % (args.train, args.test)) source_train_sets = args.train.split(',') train_loaders = [] for source in source_train_sets: filepath = os.path.join(DATA_DIR, "%s_train.svmlight" % (source)) assert (os.path.exists(filepath)) train_dataset = AmazonDataset(filepath) train_loader = data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=0) train_loaders.append(train_loader) target_d_filepath = os.path.join(DATA_DIR, "%s_train.svmlight" % (args.test)) assert (os.path.exists(target_d_filepath)) train_target_d_dataset = AmazonDomainDataset(target_d_filepath, domain=1) train_target_d_loader = data.DataLoader(train_target_d_dataset, batch_size=args.batch_size_d, shuffle=True, num_workers=0) valid_filepath = os.path.join(DATA_DIR, "%s_dev.svmlight" % (args.test)) # assert (os.path.exists(valid_filepath)) if os.path.exists(valid_filepath): valid_dataset = AmazonDataset(valid_filepath) valid_loader = data.DataLoader(valid_dataset, batch_size=args.batch_size, shuffle=False, num_workers=0) else: valid_loader = None test_filepath = os.path.join(DATA_DIR, "%s_test.svmlight" % (args.test)) assert (os.path.exists(test_filepath)) test_dataset = AmazonDataset(test_filepath) test_loader = data.DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False, num_workers=0) say("Corpus loaded.\n") encoder = encoder_class(args) critic = critic_class(encoder, args) classifier = nn.Linear(encoder.n_out, 2) # binary classification nn.init.xavier_normal_(classifier.weight) nn.init.constant_(classifier.bias, 0.1) gan_gen = encoder_class(args) gan_disc = MMD(gan_gen, args) if args.cuda: encoder = encoder.cuda() critic = critic.cuda() classifier = classifier.cuda() gan_gen = gan_gen.cuda() gan_disc = gan_disc.cuda() say("\n{}\n\n".format(encoder)) say("\n{}\n\n".format(critic)) say("\n{}\n\n".format(classifier)) say("\n{}\n\n".format(gan_gen)) say("\n{}\n\n".format(gan_disc)) print(encoder.state_dict().keys()) print(critic.state_dict().keys()) print(classifier.state_dict().keys()) print(gan_gen.state_dict().keys()) print(gan_disc.state_dict().keys()) requires_grad = lambda x: x.requires_grad task_params = list(encoder.parameters()) + \ list(classifier.parameters()) + \ list(critic.parameters()) optimizer = optim.Adam(filter(requires_grad, task_params), lr=args.lr, weight_decay=1e-4) reg_params = list(encoder.parameters()) + \ list(gan_gen.parameters()) optimizer_reg = optim.Adam(filter(requires_grad, reg_params), lr=args.lr, weight_decay=1e-4) say("Training will begin from scratch\n") best_dev = 0 best_test = 0 iter_cnt = 0 for epoch in range(args.max_epoch): iter_cnt = train_epoch(iter_cnt, encoder, classifier, critic, train_loaders, train_target_d_loader, valid_loader, args, optimizer) if args.advreg: for loader in train_loaders + [train_target_d_loader]: train_advreg_mmd(iter_cnt, encoder, gan_gen, gan_disc, loader, args, optimizer_reg) if valid_loader: curr_dev, confusion_mat, _ = evaluate(encoder, classifier, valid_loader, args) say("Dev accuracy: {:.4f}\n".format(curr_dev)) curr_test, confusion_mat, _ = evaluate(encoder, classifier, test_loader, args) say("Test accuracy: {:.4f}\n".format(curr_test)) if valid_loader and curr_dev >= best_dev: best_dev = curr_dev best_test = curr_test # print(confusion_mat) if args.save_model: say( colored( "Save model to {}\n".format(args.save_model + ".best"), 'red')) torch.save([encoder, classifier], args.save_model + ".best") say("\n") if valid_loader: say(colored("Best test accuracy {:.4f}\n".format(best_test), 'red')) say( colored("Test accuracy after training {:.4f}\n".format(curr_test), 'red'))
def train(args): ''' Training Strategy Input: source = {S1, S2, ..., Sk}, target = {T} Train: Approach 1: fix metric and learn encoder only Approach 2: learn metric and encoder alternatively ''' # test_mahalanobis_metric() and return encoder_class = get_model_class("mlp") encoder_class.add_config(argparser) critic_class = get_critic_class(args.critic) critic_class.add_config(argparser) args = argparser.parse_args() say(args) # encoder is shared across domains encoder = encoder_class(args) say("Transferring from %s to %s\n" % (args.train, args.test)) source_train_sets = args.train.split(',') train_loaders = [] Us = [] Ps = [] Ns = [] Ws = [] Vs = [] # Ms = [] for source in source_train_sets: filepath = os.path.join(DATA_DIR, "%s_train.svmlight" % (source)) assert (os.path.exists(filepath)) train_dataset = AmazonDataset(filepath) train_loader = data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=0) train_loaders.append(train_loader) if args.metric == "biaffine": U = torch.FloatTensor(encoder.n_d, encoder.n_d) W = torch.FloatTensor(encoder.n_d, 1) nn.init.xavier_uniform_(W) Ws.append(W) V = torch.FloatTensor(encoder.n_d, 1) nn.init.xavier_uniform_(V) Vs.append(V) else: U = torch.FloatTensor(encoder.n_d, args.m_rank) nn.init.xavier_uniform_(U) Us.append(U) P = torch.FloatTensor(encoder.n_d, args.m_rank) nn.init.xavier_uniform_(P) Ps.append(P) N = torch.FloatTensor(encoder.n_d, args.m_rank) nn.init.xavier_uniform_(N) Ns.append(N) # Ms.append(U.mm(U.t())) unl_filepath = os.path.join(DATA_DIR, "%s_train.svmlight" % (args.test)) assert (os.path.exists(unl_filepath)) unl_dataset = AmazonDomainDataset(unl_filepath) unl_loader = data.DataLoader(unl_dataset, batch_size=args.batch_size, shuffle=True, num_workers=0) valid_filepath = os.path.join(DATA_DIR, "%s_dev.svmlight" % (args.test)) if os.path.exists(valid_filepath): valid_dataset = AmazonDataset(valid_filepath) valid_loader = data.DataLoader(valid_dataset, batch_size=args.batch_size, shuffle=False, num_workers=0) else: valid_loader = None test_filepath = os.path.join(DATA_DIR, "%s_test.svmlight" % (args.test)) assert (os.path.exists(test_filepath)) test_dataset = AmazonDataset(test_filepath) test_loader = data.DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False, num_workers=0) say("Corpus loaded.\n") classifiers = [] for source in source_train_sets: classifier = nn.Linear(encoder.n_out, 2) # binary classification nn.init.xavier_normal_(classifier.weight) nn.init.constant_(classifier.bias, 0.1) classifiers.append(classifier) critic = critic_class(encoder, args) # if args.save_model: # say(colored("Save model to {}\n".format(args.save_model + ".init"), 'red')) # torch.save([encoder, classifiers, Us, Ps, Ns], args.save_model + ".init") if args.cuda: # map(lambda m: m.cuda(), [encoder, critic] + classifiers) encoder = encoder.cuda() critic = critic.cuda() classifiers = [x.cuda() for x in classifiers] Us = [Variable(U.cuda(), requires_grad=True) for U in Us] Ps = [Variable(P.cuda(), requires_grad=True) for P in Ps] Ns = [Variable(N.cuda(), requires_grad=True) for N in Ns] if args.metric == "biaffine": Ws = [Variable(W.cuda(), requires_grad=True) for W in Ws] Vs = [Variable(V.cuda(), requires_grad=True) for V in Vs] # Ms = [ U.mm(U.t()) for U in Us ] say("\nEncoder: {}\n".format(encoder)) for i, classifier in enumerate(classifiers): say("Classifier-{}: {}\n".format(i, classifier)) say("Critic: {}\n".format(critic)) requires_grad = lambda x: x.requires_grad task_params = list(encoder.parameters()) for classifier in classifiers: task_params += list(classifier.parameters()) task_params += list(critic.parameters()) task_params += Us task_params += Ps task_params += Ns if args.metric == "biaffine": task_params += Ws task_params += Vs optim_model = optim.Adam(filter(requires_grad, task_params), lr=args.lr, weight_decay=1e-4) say("Training will begin from scratch\n") best_dev = 0 best_test = 0 iter_cnt = 0 for epoch in range(args.max_epoch): if args.metric == "biaffine": mats = [Us, Ws, Vs] else: mats = [Us, Ps, Ns] iter_cnt = train_epoch(iter_cnt, encoder, classifiers, critic, mats, [train_loaders, unl_loader, valid_loader], args, optim_model) if valid_loader: (curr_dev, oracle_curr_dev), confusion_mat = evaluate( encoder, classifiers, mats, [train_loaders, valid_loader], args) say("Dev accuracy/oracle: {:.4f}/{:.4f}\n".format( curr_dev, oracle_curr_dev)) (curr_test, oracle_curr_test), confusion_mat = evaluate( encoder, classifiers, mats, [train_loaders, test_loader], args) say("Test accuracy/oracle: {:.4f}/{:.4f}\n".format( curr_test, oracle_curr_test)) if valid_loader and curr_dev >= best_dev: best_dev = curr_dev best_test = curr_test print(confusion_mat) if args.save_model: say( colored( "Save model to {}\n".format(args.save_model + ".best"), 'red')) torch.save([encoder, classifiers, Us, Ps, Ns], args.save_model + ".best") say("\n") if valid_loader: say(colored("Best test accuracy {:.4f}\n".format(best_test), 'red')) say( colored("Test accuracy after training {:.4f}\n".format(curr_test), 'red'))
def visualize(args): if args.mop == 3: encoder, classifiers, source_classifier = torch.load(args.load_model) elif args.mop == 2: encoder, classifiers, Us, Ps, Ns = torch.load(args.load_model) else: say("\nUndefined --mop\n") return map(lambda m: m.eval(), [encoder] + classifiers) if args.cuda: map(lambda m: m.cuda(), [encoder] + classifiers) source_train_sets = args.train.split(',') train_loaders = [] for source in source_train_sets: filepath = os.path.join(DATA_DIR, "%s_train.svmlight" % (source)) train_dataset = AmazonDataset(filepath) train_loader = data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=False, num_workers=0 ) train_loaders.append(train_loader) test_filepath = os.path.join(DATA_DIR, "%s_train.svmlight" % (args.test)) test_dataset = AmazonDataset(test_filepath) test_loader = data.DataLoader( test_dataset, batch_size=args.batch_size, shuffle=False, num_workers=0 ) say("Corpus loaded.\n") source_hs = [] source_ys = [] source_num = [] for loader in train_loaders: encoding_vecs = torch.FloatTensor() labels = torch.LongTensor() if args.cuda: encoding_vecs = encoding_vecs.cuda() labels = labels.cuda() for batch, label in loader: if args.cuda: batch = batch.cuda() label = label.cuda() batch = Variable(batch) hidden = encoder(batch) encoding_vecs = torch.cat([encoding_vecs, hidden.data]) labels = torch.cat([labels, label.view(-1, 1)]) source_hs.append(encoding_vecs) source_ys.append(labels) source_num.append(labels.shape[0]) ht = torch.FloatTensor() yt = torch.LongTensor() if args.cuda: ht = ht.cuda() yt = yt.cuda() for batch, label in test_loader: if args.cuda: batch = batch.cuda() label = label.cuda() batch = Variable(batch) hidden = encoder(batch) ht = torch.cat([ht, hidden.data]) yt = torch.cat([yt, label.view(-1, 1)]) h_both = torch.cat(source_hs + [ht]).cpu().numpy() y_both = torch.cat(source_ys + [yt]).cpu().numpy() say("Dimension reduction...\n") tsne = TSNE(perplexity=30, n_components=2, n_iter=3300) vdata = tsne.fit_transform(h_both) print vdata.shape, source_num torch.save([vdata, y_both, source_num], 'vis/%s-%s-mop%d.vdata' % (args.train, args.test, args.mop)) ms_plot_embedding_sep(vdata, y_both, source_num, args.save_image)