def run(): # load source dataset src_data_loader = get_data_loader(params.src_dataset) src_data_loader_eval = get_data_loader(params.src_dataset, train=False) # load models src_encoder = init_model(net=LeNetEncoder(), restore=params.src_encoder_restore) src_classifier = init_model(net=LeNetClassifier(), restore=params.src_classifier_restore) # pre-train source model print("=== Training classifier for source domain ===") print(">>> Source Encoder <<<") im, _ = next(iter(src_data_loader)) summary(src_encoder, input_size=im[0].size()) print(">>> Source Classifier <<<") print(src_classifier) if not (src_encoder.restored and src_classifier.restored and params.src_model_trained): src_encoder, src_classifier = train_src( src_encoder, src_classifier, src_data_loader) # eval source model print("=== Evaluating classifier for source domain ===") eval_src(src_encoder, src_classifier, src_data_loader_eval)
def run(): # load dataset src_data_loader = get_data_loader(params.src_dataset) tgt_data_loader = get_data_loader(params.tgt_dataset) # load models src_encoder = init_model(net=LeNetEncoder(), restore=params.src_encoder_restore) tgt_encoder = init_model(net=LeNetEncoder(), restore=params.tgt_encoder_restore) critic = init_model(Discriminator(input_dims=params.d_input_dims, hidden_dims=params.d_hidden_dims, output_dims=params.d_output_dims), restore=params.d_model_restore) # Adapt target encoder by GAN print("=== Training encoder for target domain ===") print(">>> Target Encoder <<<") im, _ = next(iter(tgt_data_loader)) summary(tgt_encoder, input_size=im[0].size()) print(">>> Critic <<<") print(critic) # init weights of target encoder with those of source encoder if not tgt_encoder.restored: tgt_encoder.load_state_dict(src_encoder.state_dict()) # Train target if not (tgt_encoder.restored and critic.restored and params.tgt_model_trained): tgt_encoder = train_tgt(src_encoder, tgt_encoder, critic, src_data_loader, tgt_data_loader)
def main(): args = parse_arguments() n_vocab = params.n_vocab n_layer = params.n_layer n_hidden = params.n_hidden n_embed = params.n_embed n_batch = args.n_batch temperature = params.temperature train_path = params.train_path assert torch.cuda.is_available() print("loading_data...") # 训练时加载处理好的词典(如果有的话) if os.path.exists("vocab.json"): vocab = Vocabulary() with open('vocab.json', 'r') as fp: vocab.stoi = json.load(fp) for key, value in vocab.stoi.items(): vocab.itos.append(key) else: vocab = build_vocab(train_path, n_vocab) # save vocab with open('vocab.json', 'w') as fp: json.dump(vocab.stoi, fp) train_X, train_y, train_K = load_data(train_path, vocab) train_loader = get_data_loader(train_X, train_y, train_K, n_batch) print("successfully loaded") encoder = Encoder(n_vocab, n_embed, n_hidden, n_layer, vocab).cuda() Kencoder = KnowledgeEncoder(n_vocab, n_embed, n_hidden, n_layer, vocab).cuda() manager = Manager(n_hidden, n_vocab, temperature).cuda() decoder = Decoder(n_vocab, n_embed, n_hidden, n_layer, vocab).cuda() if args.restore: encoder = init_model(encoder, restore=params.encoder_restore) Kencoder = init_model(Kencoder, restore=params.Kencoder_restore) manager = init_model(manager, restore=params.manager_restore) decoder = init_model(decoder, restore=params.decoder_restore) # ToDo:目前的代码所有的embedding都是独立的,可以参考transformer源码使用直接赋值的方法共享参数: #if emb_src_trg_weight_sharing: # self.encoder.src_word_emb.weight = self.decoder.trg_word_emb.weight model = [encoder, Kencoder, manager, decoder] parameters = list(encoder.parameters()) + list(Kencoder.parameters()) + \ list(manager.parameters()) + list(decoder.parameters()) optimizer = optim.Adam(parameters, lr=args.lr) # pre_train knowledge manager print("start pre-training") pre_train(model, optimizer, train_loader, args) print("start training") train(model, optimizer, train_loader, args) # save final model save_models(model, params.all_restore)
def train(args): """Train.""" start_time = time.time() if args.one_per_line: corpus: Corpus = ClassificationCorpus( args.data_dir, train_file=args.train_file, dev_file=args.dev_file, ) else: assert args.label_symbol is not None corpus: Corpus = FlyClassificationCorpus( args.data_dir, train_file=args.train_file, dev_file=args.dev_file, comment_symbol=args.comment_symbol, label_symbol=args.label_symbol, ) label_dict = corpus.make_label_dictionary() vocab = corpus.make_vocab_dictionary().get_items() embeddings = utils.init_embeddings(vocab, args) document_embeddings = DocumentRNNEmbeddings( [embeddings], hidden_size=args.hidden_size, use_attn=args.use_attn, num_heads=args.num_heads, scaling=args.scaling, pooling_operation=args.pooling_operation, use_sent_query=args.use_sent_query, ) model = TextClassifier(document_embeddings, label_dictionary=label_dict) utils.init_model(model, args) trainer: ModelTrainer = ModelTrainer(model, corpus, utils.optim_method(args.optim)) trainer.train( args.model_dir, mini_batch_size=args.mini_batch_size, max_epochs=args.max_epochs, anneal_factor=args.anneal_factor, learning_rate=args.learning_rate, patience=args.patience, min_learning_rate=args.min_learning_rate, embeddings_storage_mode=args.embeddings_storage_mode, ) logger.info("End of training: time %.1f min", (time.time() - start_time) / 60)
def train(args): """Train.""" start_time = time.time() column_format = {i: col for i, col in enumerate(args.data_columns)} corpus: Corpus = ColumnCorpus( args.data_dir, column_format, train_file=args.train_file, dev_file=args.dev_file, comment_symbol=args.comment_symbol, ) tag_type = args.data_columns[-1] tag_dict = corpus.make_tag_dictionary(tag_type=tag_type) vocab = corpus.make_vocab_dictionary().get_items() embeddings = utils.init_embeddings(vocab, args) model: SequenceTagger = SequenceTagger( hidden_size=args.hidden_size, embeddings=embeddings, tag_dictionary=tag_dict, tag_type=tag_type, column_format=column_format, use_crf=True, use_attn=args.use_attn, attn_type=args.attn_type, num_heads=args.num_heads, scaling=args.scaling, pooling_operation=args.pooling_operation, use_sent_query=args.use_sent_query, ) utils.init_model(model, args) trainer: ModelTrainer = ModelTrainer(model, corpus, utils.optim_method(args.optim)) trainer.train( args.model_dir, mini_batch_size=args.mini_batch_size, max_epochs=args.max_epochs, anneal_factor=args.anneal_factor, learning_rate=args.learning_rate, patience=args.patience, min_learning_rate=args.min_learning_rate, embeddings_storage_mode=args.embeddings_storage_mode, ) logger.info("End of training: time %.1f min", (time.time() - start_time) / 60)
def __init__( self, net_G=None, net_D=None, opt_G=None, opt_D=None, scaler_G=None, scaler_D=None, device=None, lambda_L1=100.0, ): super().__init__() self.device = device if net_G: self.net_G = net_G.to(self.device) else: self.net_G = init_model(Generator_Res_Unet().get_model(), self.device) if net_D: self.net_D = net_D.to(self.device) else: self.net_D = init_model(Discriminator(input_channels=3), self.device) if scaler_G: self.scaler_G = scaler_G else: self.scaler_G = amp.GradScaler() if scaler_D: self.scaler_D = scaler_D else: self.scaler_D = amp.GradScaler() if opt_G: self.opt_G = opt_G else: self.opt_G = optim.Adam( self.net_G.parameters(), lr=lr_G, betas=(beta1, beta2) ) if opt_D: self.opt_D = opt_D else: self.opt_D = optim.Adam( self.net_D.parameters(), lr=lr_D, betas=(beta1, beta2) ) self.GANcriterion = GANLoss(gan_mode="vanilla").to(self.device) self.L1criterion = nn.L1Loss().to(self.device) self.lambda_L1 = lambda_L1
def main(): args = arguments() # init random seed init_random_seed(manual_seed) src_train_loader, src_test_loader, tgt_train_loader, tgt_test_loader = get_dataset( args) print("=== Datasets successfully loaded ===") src_encoder_restore = "snapshots/src-encoder-{}.pt".format(args.src) src_classifier_restore = "snapshots/src-classifier-{}.pt".format(args.src) # load models src_encoder = init_model(BERTEncoder(), restore=src_encoder_restore) src_classifier = init_model(BERTClassifier(), restore=src_classifier_restore) # if torch.cuda.device_count() > 1: # print('Let\'s use {} GPUs!'.format(torch.cuda.device_count())) # src_encoder = nn.DataParallel(src_encoder) # src_classifier = nn.DataParallel(src_classifier) # argument setting print("=== Argument Setting ===") print("src: " + args.src) print("tgt: " + args.tgt) print("seqlen: " + str(args.seqlen)) print("num_epochs: " + str(args.num_epochs)) print("batch_size: " + str(args.batch_size)) print("learning_rate: " + str(args.lr)) if args.enc_train: for param in src_encoder.parameters(): param.requires_grad = True # train source model print("=== Training classifier for source domain ===") src_encoder, src_classifier = train_no_da(args, src_encoder, src_classifier, src_train_loader, src_test_loader) # eval source model print("Evaluate classifier for source domain: {}".format(args.src)) eval_src(src_encoder, src_classifier, src_test_loader) # eval target encoder on test set of target dataset print("Evaluate classifier for encoded target domain: {}".format(args.tgt)) eval_tgt(src_encoder, src_classifier, tgt_test_loader)
def office(): init_random_seed(params.manual_seed) # load dataset src_data_loader = get_data_loader(params.src_dataset) src_data_loader_eval = get_data_loader(params.src_dataset, train=False) tgt_data_loader = get_data_loader(params.tgt_dataset) tgt_data_loader_eval = get_data_loader(params.tgt_dataset, train=False) # load models src_encoder = init_model(net=LeNetEncoder(), restore=params.src_encoder_restore) src_classifier = init_model(net=LeNetClassifier(), restore=params.src_classifier_restore) tgt_encoder = init_model(net=LeNetEncoder(), restore=params.tgt_encoder_restore) critic = init_model(Discriminator(input_dims=params.d_input_dims, hidden_dims=params.d_hidden_dims, output_dims=params.d_output_dims), restore=params.d_model_restore) if not (src_encoder.restored and src_classifier.restored and params.src_model_trained): src_encoder, src_classifier = train_src( src_encoder, src_classifier, src_data_loader) # eval source model # print("=== Evaluating classifier for source domain ===") # eval_src(src_encoder, src_classifier, src_data_loader_eval) # train target encoder by GAN # init weights of target encoder with those of source encoder if not tgt_encoder.restored: tgt_encoder.load_state_dict(src_encoder.state_dict()) if not (tgt_encoder.restored and critic.restored and params.tgt_model_trained): tgt_encoder = train_tgt(src_encoder, tgt_encoder, critic, src_data_loader, tgt_data_loader) # eval target encoder on test set of target dataset print(">>> domain adaption <<<") acc = eval_tgt(tgt_encoder, src_classifier, tgt_data_loader_eval) return acc
def main(): args = parse_arguments() n_vocab = params.n_vocab n_layer = params.n_layer n_hidden = params.n_hidden n_embed = params.n_embed n_batch = args.n_batch temperature = params.temperature train_path = params.train_path assert torch.cuda.is_available() print("loading_data...") vocab = build_vocab(train_path, n_vocab) # save vocab with open('vocab.json', 'w') as fp: json.dump(vocab.stoi, fp) train_X, train_y, train_K = load_data(train_path, vocab) train_loader = get_data_loader(train_X, train_y, train_K, n_batch) print("successfully loaded") encoder = Encoder(n_vocab, n_embed, n_hidden, n_layer, vocab).cuda() Kencoder = KnowledgeEncoder(n_vocab, n_embed, n_hidden, n_layer, vocab).cuda() manager = Manager(n_hidden, n_vocab, temperature).cuda() decoder = Decoder(n_vocab, n_embed, n_hidden, n_layer, vocab).cuda() if args.restore: encoder = init_model(encoder, restore=params.encoder_restore) Kencoder = init_model(Kencoder, restore=params.Kencoder_restore) manager = init_model(manager, restore=params.manager_restore) decoder = init_model(decoder, restore=params.decoder_restore) model = [encoder, Kencoder, manager, decoder] parameters = list(encoder.parameters()) + list(Kencoder.parameters()) + \ list(manager.parameters()) + list(decoder.parameters()) optimizer = optim.Adam(parameters, lr=args.lr) # pre_train knowledge manager print("start pre-training") pre_train(model, optimizer, train_loader, args) print("start training") train(model, optimizer, train_loader, args) # save final model save_models(model, params.all_restore)
def trio(tgt_classifier_net, tgt_encoder_net, src_dataset, tgt_dataset, conv): print('loading pretrained trio after conv ' + str(conv) + '...') tgt_classifier = init_model(net=tgt_classifier_net, restore= str(conv) + "_snapshots/" + \ src_dataset + "-ADDA-target-classifier-final.pt") tgt_encoder = init_model(net=tgt_encoder_net, restore= str(conv) + "_snapshots/" + \ tgt_dataset + "-ADDA-target-classifier-final.pt") critic = init_model(Discriminator(input_dims=params.d_input_dims, hidden_dims=params.d_hidden_dims, output_dims=params.d_output_dims), restore= str(conv) + "_snapshots/" + \ tgt_dataset + "-ADDA-target-classifier-final.pt") return tgt_classifier, tgt_encoder, critic
def main(): args = parse_arguments() n_vocab = params.n_vocab n_layer = params.n_layer n_hidden = params.n_hidden n_embed = params.n_embed n_batch = args.n_batch temperature = params.temperature test_path = params.test_path vocab_path = params.vocab_path assert torch.cuda.is_available() print("loading the vocab...") vocab = Vocabulary() with open(vocab_path, 'r',encoding='utf-8') as fp: vocab.stoi = json.load(fp) for key, value in vocab.stoi.items(): vocab.itos.append(key) # load data and change to id print("loading_data...") test_X, test_y, test_K = load_data(test_path, vocab) test_loader = get_data_loader(test_X, test_y, test_K, n_batch,False) print("successfully loaded test data") encoder = Encoder(n_vocab, n_embed, n_hidden, n_layer).cuda() Kencoder = KnowledgeEncoder(n_vocab, n_embed, n_hidden, n_layer).cuda() manager = Manager(n_hidden, n_vocab, temperature).cuda() decoder = Decoder(n_vocab, n_embed, n_hidden, n_layer).cuda() encoder = init_model(encoder, restore=params.encoder_restore) Kencoder = init_model(Kencoder, restore=params.Kencoder_restore) manager = init_model(manager, restore=params.manager_restore) decoder = init_model(decoder, restore=params.decoder_restore) print("models successfully loaded!\n") model = [encoder, Kencoder, manager, decoder] #evaluate_loss(model, 0, test_loader) evaluate_sample(model, vocab,test_X, test_y, test_K, test_loader)
def run(): # load dataset tgt_data_loader_eval = get_data_loader(params.tgt_dataset, train=False) # Load models src_encoder = init_model(net=LeNetEncoder(), restore=params.src_encoder_restore) src_classifier = init_model(net=LeNetClassifier(), restore=params.src_classifier_restore) tgt_encoder = init_model(net=LeNetEncoder(), restore=params.tgt_encoder_restore) # Evalute target encoder on test set of target dataset print("=== Evaluating classifier for encoded target domain ===") print(">>> source only <<<") eval_tgt(src_encoder, src_classifier, tgt_data_loader_eval) print(">>> domain adaption <<<") eval_tgt(tgt_encoder, src_classifier, tgt_data_loader_eval)
def main(): logger.info('=> PyTorch Version: {}'.format(torch.__version__)) # Environment initialization device, pin_memory = init_device(args.seed, args.cpu, args.gpu, args.cpu_affinity) # Create the data loader train_loader, val_loader, test_loader = Cost2100DataLoader( root=args.data_dir, batch_size=args.batch_size, num_workers=args.workers, pin_memory=pin_memory, scenario=args.scenario)() # Define model model = init_model(args) model.to(device) # Define loss function criterion = nn.MSELoss().to(device) # Inference mode if args.evaluate: Tester(model, device, criterion)(test_loader) return # Define optimizer and scheduler lr_init = 1e-3 if args.scheduler == 'const' else 2e-3 optimizer = torch.optim.Adam(model.parameters(), lr_init) if args.scheduler == 'const': scheduler = FakeLR(optimizer=optimizer) else: scheduler = WarmUpCosineAnnealingLR(optimizer=optimizer, T_max=args.epochs * len(train_loader), T_warmup=30 * len(train_loader), eta_min=5e-5) # Define the training pipeline trainer = Trainer(model=model, device=device, optimizer=optimizer, criterion=criterion, scheduler=scheduler, resume=args.resume) # Start training trainer.loop(args.epochs, train_loader, val_loader, test_loader) # Final testing loss, rho, nmse = Tester(model, device, criterion)(test_loader) print(f"\n=! Final test loss: {loss:.3e}" f"\n test rho: {rho:.3e}" f"\n test NMSE: {nmse:.3e}\n")
def main(): args = parse_arguments() n_vocab = params.n_vocab n_layer = params.n_layer n_hidden = params.n_hidden n_embed = params.n_embed n_batch = args.n_batch temperature = params.temperature test_path = params.test_path assert torch.cuda.is_available() print("loading_data...") if os.path.exists("vocab.json"): vocab = Vocabulary() with open('vocab.json', 'r') as fp: vocab.stoi = json.load(fp) for key, value in vocab.stoi.items(): vocab.itos.append(key) else: train_path = params.train_path vocab = build_vocab(train_path, n_vocab) test_X, test_y, test_K = load_data(test_path, vocab) test_loader = get_data_loader(test_X, test_y, test_K, n_batch) print("successfully loaded") encoder = Encoder(n_vocab, n_embed, n_hidden, n_layer).cuda() Kencoder = KnowledgeEncoder(n_vocab, n_embed, n_hidden, n_layer).cuda() manager = Manager(n_hidden, n_vocab, temperature).cuda() decoder = Decoder(n_vocab, n_embed, n_hidden, n_layer).cuda() encoder = init_model(encoder, restore=params.encoder_restore) Kencoder = init_model(Kencoder, restore=params.Kencoder_restore) manager = init_model(manager, restore=params.manager_restore) decoder = init_model(decoder, restore=params.decoder_restore) model = [encoder, Kencoder, manager, decoder] print("start evaluating") evaluate(model, test_loader)
def main(): logger.info('=> PyTorch Version: {}'.format(torch.__version__)) # Environment initialization device = init_device(args.seed, args.cpu, args.gpu, args.cpu_affinity) # Create the test data loader test_loader = Cost2100DataLoader(root=args.data_dir, batch_size=args.batch_size, num_workers=args.workers, scenario=args.scenario)() # Define model model = init_model(args) model.to(device) # Define loss function criterion = nn.MSELoss().to(device) # Inference Tester(model, device, criterion, print_freq=20)(test_loader)
def main(args, f): # args = parse_arguments() set_seed(args.train_seed) if args.model in ['roberta', 'distilroberta']: tokenizer = RobertaTokenizer.from_pretrained('roberta-base') else: tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') # preprocess data src_eval_loader, src_loader, tgt_all_loader, tgt_train_loader = get_all_dataloader( args, tokenizer) # load models if args.model == 'bert': encoder = BertEncoder() src_encoder = BertEncoder() classifier = BertClassifier() elif args.model == 'distilbert': encoder = DistilBertEncoder() src_encoder = DistilBertEncoder() classifier = BertClassifier() elif args.model == 'roberta': encoder = RobertaEncoder() src_encoder = RobertaEncoder() classifier = RobertaClassifier() else: encoder = DistilRobertaEncoder() src_encoder = DistilRobertaEncoder() classifier = RobertaClassifier() # domain discriminator discriminator = AdversarialNetworkCdan(param.input_dim * param.num_labels, param.hidden_dim) # parallel models if torch.cuda.device_count() > 1: print('Let\'s use {} GPUs!'.format(torch.cuda.device_count())) encoder = nn.DataParallel(encoder) src_encoder = nn.DataParallel(src_encoder) classifier = nn.DataParallel(classifier) discriminator = nn.DataParallel(discriminator) if args.load: encoder = init_model(args, encoder, restore_path=param.src_encoder_path) src_encoder = init_model(args, src_encoder, restore_path=param.tgt_encoder_path) classifier = init_model(args, classifier, restore_path=param.src_classifier_path) # discriminator = init_model(args, discriminator, restore_path=param.d_model_path) else: encoder = init_model(args, encoder) src_encoder = init_model(args, src_encoder) classifier = init_model(args, classifier) discriminator = init_model(args, discriminator) # train source model print("=== Pretrain encoder for source domain ===") if args.pretrain: encoder, classifier = pretrain(args, encoder, classifier, src_loader) # eval source model print("=== Evaluating classifier for source domain ===") evaluate(args, encoder, classifier, src_loader) src_acc = evaluate(args, encoder, classifier, tgt_all_loader) f.write(f'{args.src} -> {args.tgt} no adapt acc on src data: {src_acc}\n') # x, y = save_features(args, encoder, src_loader) # np.savez(os.path.join(param.model_root, 's_feat_pretrain'), x, y) # x, y = save_features(args, encoder, tgt_all_loader) # np.savez(os.path.join(param.model_root, 't_feat_pretrain'), x, y) # adapt print("=== Adapt encoder for target domain ===") src_encoder.load_state_dict(encoder.state_dict()) if args.src_free: # use the same encoder and copy encoder to src_encoder have different baseline results s_res_features = src_gmm(args, encoder, src_loader) src_loader = s_numpy_dataloader(s_res_features, args.batch_size) encoder, classifier = cdan_adapt_src_free(args, encoder, src_encoder, discriminator, classifier, src_loader, tgt_train_loader, tgt_all_loader) elif args.data_free: s_res_features = src_gmm(args, encoder, src_loader) t_res_features = tgt_gmm(encoder, tgt_all_loader, 1) src_loader = s_numpy_dataloader(s_res_features, args.batch_size) tgt_train_loader = t_numpy_dataloader(t_res_features, args.batch_size) encoder, classifier = cdan_adapt_data_free(args, encoder, discriminator, classifier, src_loader, tgt_train_loader, tgt_all_loader) else: encoder, classifier = cdan_adapt(args, encoder, discriminator, classifier, src_loader, tgt_train_loader, tgt_all_loader) # x, y = save_features(args, encoder, src_loader) # np.savez(os.path.join(param.model_root, 's_feat_adapt_cdan'), x, y) # x, y = save_features(args, encoder, tgt_all_loader) # np.savez(os.path.join(param.model_root, 't_feat_adapt_cdan'), x, y) # argument setting print( f"model_type: {args.model}; batch_size: {args.batch_size}; data_free: {args.data_free}; " f"src_free: {args.src_free}; pre_epochs: {args.pre_epochs}; num_epochs: {args.num_epochs}; " f"src: {args.src}; tgt: {args.tgt}; kd: {args.kd}; dp: {args.dp}; ent: {args.ent}" ) # eval target encoder on lambda0.1 set of target dataset print("=== Evaluating classifier for encoded target domain ===") print(">>> domain adaption <<<") tgt_acc = evaluate(args, encoder, classifier, tgt_all_loader) f.write(f'{args.src} -> {args.tgt}: DA acc on tgt data: {tgt_acc}\n') f.write( f"model_type: {args.model}; batch_size: {args.batch_size}; data_free: {args.data_free}; " f"src_free: {args.src_free}; pre_epochs: {args.pre_epochs}; num_epochs: {args.num_epochs}; " f"src: {args.src}; tgt: {args.tgt}; kd: {args.kd}; dp: {args.dp}; ent: {args.ent}\n\n" )
def experiments(exp): #print(exp, case, affine, num_epochs) # init random seed #params.d_learning_rate = lr_d #params.c_learning_rate = lr_c init_random_seed(params.manual_seed) # load dataset src_dataset, tgt_dataset = exp.split('_') src_data_loader = get_data_loader(src_dataset) src_data_loader_eval = get_data_loader(src_dataset, train=False) tgt_data_loader = get_data_loader(tgt_dataset) tgt_data_loader_eval = get_data_loader(tgt_dataset, train=False) # load models src_encoder = init_model(net=LeNetEncoder(), restore=params.src_encoder_restore, exp=exp) src_classifier = init_model(net=LeNetClassifier(), restore=params.src_classifier_restore, exp=exp) tgt_encoder = init_model(net=LeNetEncoder(), restore=params.tgt_encoder_restore, exp=exp) critic = init_model(Discriminator(input_dims=params.d_input_dims, hidden_dims=params.d_hidden_dims, output_dims=params.d_output_dims), exp=exp, restore=params.d_model_restore) # train source model print("=== Training classifier for source domain ===") print(">>> Source Encoder <<<") print(src_encoder) print(">>> Source Classifier <<<") print(src_classifier) if not (src_encoder.restored and src_classifier.restored and params.src_model_trained): src_encoder, src_classifier = train_src(exp, src_encoder, src_classifier, src_data_loader, src_data_loader_eval) # eval source model print("=== Evaluating classifier for source domain ===") evaluation(src_encoder, src_classifier, src_data_loader_eval) # train target encoder by GAN print("=== Training encoder for target domain ===") print(">>> Target Encoder <<<") print(tgt_encoder) print(">>> Critic <<<") print(critic) # init weights of target encoder with those of source encoder if not tgt_encoder.restored: tgt_encoder.load_state_dict(src_encoder.state_dict()) if not (tgt_encoder.restored and critic.restored and params.tgt_model_trained): tgt_encoder = train_tgt(exp, src_encoder, tgt_encoder, critic, src_classifier, src_data_loader, tgt_data_loader, tgt_data_loader_eval) # eval target encoder on test set of target dataset print("=== Evaluating classifier for encoded target domain ===") print(">>> source only <<<") evaluation(src_encoder, src_classifier, tgt_data_loader_eval) print(">>> domain adaption <<<") evaluation(tgt_encoder, src_classifier, tgt_data_loader_eval)
os.environ["CUDA_VISIBLE_DEVICES"] = '0,1' if __name__ == '__main__': # init random seed init_random_seed(params.manual_seed) # load dataset src_data_loader = get_visda(root=params.data_root, sub_dir='train', split='train') src_data_loader_eval = get_visda(root=params.data_root, sub_dir='train', split='test') tgt_data_loader = get_visda(root=params.data_root, sub_dir='validation', split='train') tgt_data_loader_eval = get_visda(root=params.data_root, sub_dir='validation', split='test') # load models src_encoder = init_model(net=ResNet34Encoder(), restore=params.src_encoder_restore) src_classifier = init_model(net=Classifier(), restore=params.src_classifier_restore) # train source model # print("=== Training classifier for source domain ===") # print(">>> Source Encoder <<<") # print(src_encoder) # print(">>> Source Classifier <<<") # print(src_classifier) # eval source model print("=== Evaluating classifier for source domain ===") eval_src(src_encoder, src_classifier, src_data_loader_eval) # eval target encoder on test set of target dataset
"Target RUBV3D2: Verification IOU Precision = {:.4f}%, F1 IOU= {:.4f}%" .format(iou_acc_v, f1_v)) (self.store_learning).write_file( (self.store_learning).IOU_acc_file_verif, iou_acc_v) (self.store_learning).write_file( (self.store_learning).f1_IOU_file_verif, f1_v) if __name__ == '__main__': ##Final Model Classification from RUBV3D2 import UNet src_encoder = init_model(net=UNet(params.input_channels, params.nb_classes, params.default_layers, params.default_features_root, params.default_filter_width, params.distance_unet, params.bins, params.default_bn), restore=params.src_encoder_restore) tgt_encoder = init_model(net=UNet(params.input_channels, params.nb_classes, params.default_layers, params.default_features_root, params.default_filter_width, params.distance_unet, params.bins, params.default_bn), restore=params.tgt_encoder_restore) discri_net = init_model(net=Discriminator( input_dims=params.d_input_dims, output_dims=params.d_output_dims, len_feature_map=params.d_len_feature_maps,
def main(args, f): set_seed(args.train_seed) if args.model in ['roberta', 'distilroberta']: tokenizer = RobertaTokenizer.from_pretrained('roberta-base') else: tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') # preprocess data src_eval_loader, src_loader, tgt_all_loader, tgt_train_loader, tgt_te = get_all_dataloader( args, tokenizer) # load models if args.model == 'bert': src_encoder = BertEncoder() # encoder = BertEncoder() classifier = BertClassifier() elif args.model == 'distilbert': src_encoder = DistilBertEncoder() # encoder = DistilBertEncoder() classifier = BertClassifier() elif args.model == 'roberta': src_encoder = RobertaEncoder() # encoder = RobertaEncoder() classifier = RobertaClassifier() else: src_encoder = DistilRobertaEncoder() # encoder = DistilRobertaEncoder() classifier = RobertaClassifier() discriminator = Discriminator() # parallel models if torch.cuda.device_count() > 1: print('Let\'s use {} GPUs!'.format(torch.cuda.device_count())) src_encoder = nn.DataParallel(src_encoder) classifier = nn.DataParallel(classifier) # encoder = nn.DataParallel(encoder) discriminator = nn.DataParallel(discriminator) if args.load: src_encoder = init_model(args, src_encoder, restore_path=param.src_encoder_path) classifier = init_model(args, classifier, restore_path=param.src_classifier_path) # encoder = init_model(args, encoder, restore_path=param.tgt_encoder_path) # discriminator = init_model(args, discriminator, restore_path=param.d_model_path) else: src_encoder = init_model(args, src_encoder) classifier = init_model(args, classifier) # encoder = init_model(args, encoder) discriminator = init_model(args, discriminator) # train source model if args.pretrain: print("=== Training classifier for source domain ===") src_encoder, classifier = pretrain(args, src_encoder, classifier, src_loader) # save pretrained model # save_model(args, src_encoder, param.src_encoder_path) # save_model(args, classifier, param.src_classifier_path) # eval source model print("=== Evaluating classifier for source domain ===") evaluate(args, src_encoder, classifier, src_loader) src_acc = evaluate(args, src_encoder, classifier, tgt_all_loader) f.write(f'{args.src} -> {args.tgt}: No adapt acc on src data: {src_acc}\n') # adapt print("=== Adapt tgt encoder ===") # encoder.load_state_dict(src_encoder.state_dict()) # if args.src_free: # s_res_features = src_gmm(args, src_encoder, src_loader) # src_loader = s_numpy_dataloader(s_res_features, args.batch_size) # encoder = aad_adapt_src_free(args, src_encoder, encoder, discriminator, # classifier, src_loader, tgt_train_loader, tgt_all_loader) # else: if args.adapt: encoder, classifier = shot_adapt(args, src_encoder, classifier, tgt_train_loader, tgt_all_loader, tgt_te) # save_model(args, encoder, param.tgt_encoder_path) # argument setting # print("=== Argument Setting ===") print( f"model_type: {args.model}; max_seq_len: {args.max_seq_length}; batch_size: {args.batch_size}; " f"pre_epochs: {args.pre_epochs}; num_epochs: {args.num_epochs}; src: {args.src}; tgt: {args.tgt}; " f'src_free: {args.src_free}; dp: {args.dp}') # eval target encoder on lambda0.1 set of target dataset print("=== Evaluating classifier for encoded target domain ===") print(">>> domain adaption <<<") tgt_acc = evaluate(args, encoder, classifier, tgt_all_loader) f.write(f'{args.src} -> {args.tgt}: DA acc on tgt data: {tgt_acc}\n') f.write( f"model_type: {args.model}; batch_size: {args.batch_size}; pre_epochs: {args.pre_epochs}; " f"num_epochs: {args.num_epochs}; src_free: {args.src_free}; src: {args.src}; " f"tgt: {args.tgt}; dp: {args.dp}\n\n")
parser.add_argument('--out_dir', type=str, default=None) parser.add_argument('--fig_save_path', type=str, default=None) parser.add_argument('--num', type=int, default=10) parser.add_argument('--sample_h', type=int, default=256) parser.add_argument('--sample_w', type=int, default=256) parser.add_argument('--lower', type=float, default=-3) parser.add_argument('--upper', type=float, default=3) parser.add_argument('--starter_im1_path', type=str) parser.add_argument('--starter_im2_path', type=str) args = parser.parse_args() print(args) print('----------') latent_dim = 512 model = init_model(args.model_class, args.restore_path, restore_required=True, latent_dim=latent_dim) model.eval() lower = args.lower upper = args.upper num = args.num sample_h = args.sample_h sample_w = args.sample_w fig_save_path = args.fig_save_path out_dir = args.out_dir starter_im1_path = args.starter_im1_path starter_im2_path = args.starter_im2_path # generate samples with torch.no_grad():
opts.train_opts(parser) opt = parser.parse_args() location = opt.gpu if torch.cuda.is_available( ) and opt.gpu != -1 else 'cpu' device = torch.device(location) TXT, train_iter, valid_iter = \ preproc.build_iters(ftrain=opt.ftrain, fvalid=opt.fvalid, bsz=opt.bsz, min_freq=opt.min_freq, device=opt.gpu) model = nets.BaseRNN(voc_size=len(TXT.vocab.itos), edim=opt.edim, hdim=opt.hdim, padding_idx=TXT.vocab.stoi[PAD]).to(device) utils.init_model(model) optimizer = optim.Adam(params=filter(lambda p: p.requires_grad, model.parameters()), lr=opt.lr) criterion = nn.CrossEntropyLoss(reduce=False) train(model, { 'train': train_iter, 'valid': valid_iter }, opt, criterion, optimizer)
data_sequences = [] for i in range(len(data.review)): tokenized_text = tokenizer.tokenize(data.review[i]) indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) data_sequences.append(indexed_tokens) data_loader = get_data_loader(data_sequences, data.label, args.batch_size, args.seqlen) encoder = BERTEncoder() if torch.cuda.device_count() > 1: encoder = torch.nn.DataParallel(encoder) if args.tgt: encoder = init_model(encoder, restore=param.tgt_encoder_restore) else: encoder = init_model(encoder, restore=param.src_encoder_restore) feats = [] labels = [] encoder.eval() print("=== start encoding data ===") for step, (reviews, label) in enumerate(data_loader): mask = (reviews != 0).long() feat = encoder(reviews, mask) feats.extend(feat.cpu().detach().numpy()) labels.extend(label.cpu().numpy()) print("Step [%.2d/%.2d]" % (step + 1, len(data_loader)), feat.size()) feats = np.array(feats)
edge = {} edge['data'] = result scipy.io.savemat(join(save_dir, "%s.mat" % filename), edge) #result = Image.fromarray((result * 255).astype(np.uint8)) #result.save(join(save_dir, "%s.png" % filename)) print("Running test [%d/%d]" % (idx + 1, len(test_loader))) test_dataset = BSDS_Loader(root='../DATA/data/HED-BSDS/') test_loader = DataLoader(test_dataset, batch_size=1, num_workers=0, drop_last=True, shuffle=False) with open('../DATA/data/HED-BSDS/test.lst', 'r') as f: test_list = f.readlines() test_list = [split(i.rstrip())[1] for i in test_list] assert len(test_list) == len(test_loader), "%d vs %d" % (len(test_list), len(test_loader)) print('Test size : %d' % len(test_loader)) save_dir = 'bsds' model = RCF() init_model(model) #print(state_dict['conv1_1.weight'].shape) #quit() model.cuda() test_model(model, test_loader, test_list, save_dir)
from utils import get_data_loader, init_model, init_random_seed from test import eval_tgt from adopt import train_target if __name__ == '__main__': # init random seed init_random_seed(params.manual_seed) # load dataset src_data_loader = get_data_loader(params.src_dataset) src_data_loader_eval = get_data_loader(params.src_dataset, train=False) tgt_data_loader = get_data_loader(params.tgt_dataset) tgt_data_loader_eval = get_data_loader(params.tgt_dataset, train=False) # load models src_encoder = init_model(net=LeNetEncoder(), restore=params.src_encoder_restore) src_classifier = init_model(net=LeNetClassifier(), restore=params.src_classifier_restore) tgt_encoder = init_model(net=LeNetEncoder(), restore=params.tgt_encoder_restore) critic = init_model(Discriminator(input_dim=params.d_input_dims, hidden_dim=params.d_hidden_dims, output_dim=params.d_output_dims), restore=params.d_model_restore) # train source model print("=== Training classifier for source domain ===") print(">>> Source Encoder <<<") print(src_encoder) print(">>> Source Classifier <<<") print(src_classifier)
def main(data, val_data, config): wandb.init(project="prescalenorm") config = load_config(config) dataset = init_dataset(data) dataset.train_percent = config.train_data_percent dataset.set_data_source(config.data_source) loader = init_dataloader(dataset, batch_size=config.batch_size) model = init_model(type_vocab_size=config.type_vocab_size) print(f'from_scratch: {config.from_scratch}', f'prenorm: {config.prenorm}', f'tie_qk: {config.tie_query_key}', f'norm_type: {config.norm_type}') model = configure_model(model, config) val_dataset = init_dataset(val_data) val_dataset.train_percent = config.val_data_percent val_dataset.to_val_mode('scientsbank', 'answer') val_loader = init_dataloader(val_dataset, batch_size=config.batch_size, random=False) wandb.watch(model) model.train() cuda = torch.cuda.is_available() if cuda: model.cuda() optimizer = init_optimizer(model, config) lr_scheduler = transformers.get_cosine_schedule_with_warmup( optimizer, config.warmup_steps, config.total_steps) # best_val_acc = 0.0 # torch.save(config, os.path.join(wandb.run.dir, 'model.config')) json.dump(config.__dict__, open(os.path.join(wandb.run.dir, 'model_config.json'), 'w')) wandb.save('*.config') best_f1 = 0.0 patience = 0 try: while lr_scheduler.last_epoch <= config.total_steps: av_epoch_loss = train_epoch(loader, model, optimizer, lr_scheduler, config, cuda) #tidy stuff up every epoch gc.collect() torch.cuda.empty_cache() p, r, f1, val_acc = val_loop(model, val_loader, cuda) log_line = f'precision: {p:.5f} | recall: {r:.5f} | f1: {f1:.5f} | accuracy: {val_acc:.5f}\n' print(log_line[:-1]) print('av_epoch_loss', av_epoch_loss) if f1 > best_f1: print( "saving to: ", os.path.join(wandb.run.dir, f'full_bert_model_best_acc.pt')) torch.save([model.state_dict(), config.__dict__], os.path.join(wandb.run.dir, f'full_bert_model_best_f1.pt')) wandb.save('*.pt') best_f1 = f1 patience = max((0, patience - 1)) else: patience += 1 if patience >= 3: break if av_epoch_loss < .2: break torch.save([model.state_dict(), config.__dict__], os.path.join( wandb.run.dir, f'full_bert_model_{lr_scheduler.last_epoch}_steps.pt')) #Move stuff off the gpu model.cpu() #This is for sure a kinda dumb way of doing it, but the least mentally taxing right now optimizer = init_optimizer(model, config) gc.collect() torch.cuda.empty_cache() return model except KeyboardInterrupt: wandb.save('*.pt') #Move stuff off the gpu model.cpu() #This is for sure a kinda dumb way of doing it, but the least mentally taxing right now optimizer = init_optimizer(model, config) gc.collect() torch.cuda.empty_cache() return model
def main(): max_len = 50 n_vocab = params.n_vocab n_layer = params.n_layer n_hidden = params.n_hidden n_embed = params.n_embed temperature = params.temperature assert torch.cuda.is_available() if os.path.exists("data/vocab.json"): vocab = Vocabulary() with open('data/vocab.json', 'r') as fp: vocab.stoi = json.load(fp) for key, value in vocab.stoi.items(): vocab.itos.append(key) else: print("vocabulary doesn't exist!") return print("loading model...") encoder = Encoder(n_vocab, n_embed, n_hidden, n_layer).cuda() Kencoder = KnowledgeEncoder(n_vocab, n_embed, n_hidden, n_layer).cuda() manager = Manager(n_hidden, n_vocab, temperature).cuda() decoder = Decoder(n_vocab, n_embed, n_hidden, n_layer).cuda() encoder = init_model(encoder, restore=params.encoder_restore) Kencoder = init_model(Kencoder, restore=params.Kencoder_restore) manager = init_model(manager, restore=params.manager_restore) decoder = init_model(decoder, restore=params.decoder_restore) print("successfully loaded!\n") utterance = "" while True: if utterance == "exit": break k1 = input("Type first Knowledge: ").lower() while not k1: print("Please type first Knowledge.\n") k1 = input("Type first Knowledge: ").lower() k2 = input("Type second Knowledge: ").lower() while not k2: print("Please type second Knowledge.\n") k2 = input("Type second Knowledge: ").lower() k3 = input("Type third Knowledge: ").lower() while not k3: print("Please type third Knowledge.\n") k3 = input("Type third Knowledge: ").lower() K = [k1, k2, k3] K = knowledgeToIndex(K, vocab) K = Kencoder(K) print() while True: utterance = input("you: ").lower() while not utterance: print("Please type utterance.\n") utterance = input("you: ") if utterance == "change knowledge" or utterance == "exit": print() break X = [] tokens = nltk.word_tokenize(utterance) for word in tokens: if word in vocab.stoi: X.append(vocab.stoi[word]) else: X.append(vocab.stoi["<UNK>"]) X = torch.LongTensor(X).unsqueeze(0).cuda() # X: [1, x_seq_len] encoder_outputs, hidden, x = encoder(X) k_i = manager(x, None, K) outputs = torch.zeros( max_len, 1, n_vocab).cuda() # outputs: [max_len, 1, n_vocab] hidden = hidden[decoder.n_layer:] output = torch.LongTensor([params.SOS]).cuda() for t in range(max_len): output, hidden, attn_weights = decoder(output, k_i, hidden, encoder_outputs) outputs[t] = output output = output.data.max(1)[1] outputs = outputs.max(2)[1] answer = "" for idx in outputs: if idx == params.EOS: break answer += vocab.itos[idx] + " " print("bot:", answer[:-1], "\n")
import gym import world import utils from Buffer import ReplayBuffer from models import DQN from world import Print, ARGS from wrapper import WrapIt from procedure import train_DQN # ------------------------------------------------ env = gym.make('RiverraidNoFrameskip-v4') env = WrapIt(env) Print('ENV action', env.unwrapped.get_action_meanings()) Print('ENV observation', f"Image: {ARGS.imgDIM} X {ARGS.imgDIM} X {1}" ) # we assert to use gray image # ------------------------------------------------ Optimizer = utils.getOptimizer() schedule = utils.LinearSchedule(1000000, 0.1) Game_buffer = ReplayBuffer(ARGS.buffersize, ARGS.framelen) Q = utils.init_model(env, DQN).train().to(world.DEVICE) Q_target = utils.init_model(env, DQN).eval().to(world.DEVICE) # ------------------------------------------------ train_DQN(env, Q=Q, Q_target=Q_target, optimizer=Optimizer, replay_buffer=Game_buffer, exploration=schedule)
def main(): args = parse_arguments() # argument setting print("=== Argument Setting ===") print("src: " + args.src) print("tgt: " + args.tgt) print("seed: " + str(args.seed)) print("train_seed: " + str(args.train_seed)) print("model_type: " + str(args.model)) print("max_seq_length: " + str(args.max_seq_length)) print("batch_size: " + str(args.batch_size)) print("pre_epochs: " + str(args.pre_epochs)) print("num_epochs: " + str(args.num_epochs)) print("AD weight: " + str(args.alpha)) print("KD weight: " + str(args.beta)) print("temperature: " + str(args.temperature)) set_seed(args.train_seed) if args.model in ['roberta', 'distilroberta']: tokenizer = RobertaTokenizer.from_pretrained('roberta-base') else: tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') # preprocess data print("=== Processing datasets ===") if args.src in ['blog', 'airline', 'imdb']: src_x, src_y = CSV2Array( os.path.join('data', args.src, args.src + '.csv')) else: src_x, src_y = XML2Array( os.path.join('data', args.src, 'negative.review'), os.path.join('data', args.src, 'positive.review')) src_x, src_test_x, src_y, src_test_y = train_test_split( src_x, src_y, test_size=0.2, stratify=src_y, random_state=args.seed) if args.tgt in ['blog', 'airline', 'imdb']: tgt_x, tgt_y = CSV2Array( os.path.join('data', args.tgt, args.tgt + '.csv')) else: tgt_x, tgt_y = XML2Array( os.path.join('data', args.tgt, 'negative.review'), os.path.join('data', args.tgt, 'positive.review')) tgt_train_x, tgt_test_y, tgt_train_y, tgt_test_y = train_test_split( tgt_x, tgt_y, test_size=0.2, stratify=tgt_y, random_state=args.seed) if args.model in ['roberta', 'distilroberta']: src_features = roberta_convert_examples_to_features( src_x, src_y, args.max_seq_length, tokenizer) src_test_features = roberta_convert_examples_to_features( src_test_x, src_test_y, args.max_seq_length, tokenizer) tgt_features = roberta_convert_examples_to_features( tgt_x, tgt_y, args.max_seq_length, tokenizer) tgt_train_features = roberta_convert_examples_to_features( tgt_train_x, tgt_train_y, args.max_seq_length, tokenizer) else: src_features = convert_examples_to_features(src_x, src_y, args.max_seq_length, tokenizer) src_test_features = convert_examples_to_features( src_test_x, src_test_y, args.max_seq_length, tokenizer) tgt_features = convert_examples_to_features(tgt_x, tgt_y, args.max_seq_length, tokenizer) tgt_train_features = convert_examples_to_features( tgt_train_x, tgt_train_y, args.max_seq_length, tokenizer) # load dataset src_data_loader = get_data_loader(src_features, args.batch_size) src_data_eval_loader = get_data_loader(src_test_features, args.batch_size) tgt_data_train_loader = get_data_loader(tgt_train_features, args.batch_size) tgt_data_all_loader = get_data_loader(tgt_features, args.batch_size) # load models if args.model == 'bert': src_encoder = BertEncoder() tgt_encoder = BertEncoder() src_classifier = BertClassifier() elif args.model == 'distilbert': src_encoder = DistilBertEncoder() tgt_encoder = DistilBertEncoder() src_classifier = BertClassifier() elif args.model == 'roberta': src_encoder = RobertaEncoder() tgt_encoder = RobertaEncoder() src_classifier = RobertaClassifier() else: src_encoder = DistilRobertaEncoder() tgt_encoder = DistilRobertaEncoder() src_classifier = RobertaClassifier() discriminator = Discriminator() if args.load: src_encoder = init_model(args, src_encoder, restore=param.src_encoder_path) src_classifier = init_model(args, src_classifier, restore=param.src_classifier_path) tgt_encoder = init_model(args, tgt_encoder, restore=param.tgt_encoder_path) discriminator = init_model(args, discriminator, restore=param.d_model_path) else: src_encoder = init_model(args, src_encoder) src_classifier = init_model(args, src_classifier) tgt_encoder = init_model(args, tgt_encoder) discriminator = init_model(args, discriminator) # train source model print("=== Training classifier for source domain ===") if args.pretrain: src_encoder, src_classifier = pretrain(args, src_encoder, src_classifier, src_data_loader) # eval source model print("=== Evaluating classifier for source domain ===") evaluate(src_encoder, src_classifier, src_data_loader) evaluate(src_encoder, src_classifier, src_data_eval_loader) evaluate(src_encoder, src_classifier, tgt_data_all_loader) for params in src_encoder.parameters(): params.requires_grad = False for params in src_classifier.parameters(): params.requires_grad = False # train target encoder by GAN print("=== Training encoder for target domain ===") if args.adapt: tgt_encoder.load_state_dict(src_encoder.state_dict()) tgt_encoder = adapt(args, src_encoder, tgt_encoder, discriminator, src_classifier, src_data_loader, tgt_data_train_loader, tgt_data_all_loader) # eval target encoder on lambda0.1 set of target dataset print("=== Evaluating classifier for encoded target domain ===") print(">>> source only <<<") evaluate(src_encoder, src_classifier, tgt_data_all_loader) print(">>> domain adaption <<<") evaluate(tgt_encoder, src_classifier, tgt_data_all_loader)
train=True) src_data_loader_eval = get_data_loader(params.src_dataset, dataset_root=params.dataset_root, batch_size=params.batch_size, train=False) tgt_data_loader = get_data_loader(params.tgt_dataset, dataset_root=params.dataset_root, batch_size=params.batch_size, train=True) tgt_data_loader_eval = get_data_loader(params.tgt_dataset, dataset_root=params.dataset_root, batch_size=params.batch_size, train=False) # load models src_encoder = init_model(net=LeNetEncoder(), restore=params.src_encoder_restore) src_classifier = init_model(net=LeNetClassifier(), restore=params.src_classifier_restore) tgt_encoder = init_model(net=LeNetEncoder(), restore=params.tgt_encoder_restore) critic = init_model(Discriminator(), restore=params.d_model_restore) # train source model print("=== Training classifier for source domain ===") if not (src_encoder.restored and src_classifier.restored and params.src_model_trained): src_encoder, src_classifier = train_src(src_encoder, src_classifier, src_data_loader, params) # eval source model