def attention(): u = require_token() if u.name[:4] == 'tmp_': abort(403) s = request.form.get('switch') if s not in ['0', '1']: abort(422) pid = get_num(request.form.get('pid')) post = Post.query.get(pid) if not post: abort(404) at = Attention.query.filter_by(name_hash=hash_name(u.name), pid=pid).first() if not at: at = Attention(name_hash=hash_name(u.name), pid=pid, disabled=True) db.session.add(at) if(at.disabled != (s == '0')): at.disabled = (s == '0') post.likenum += 1 - 2 * int(s == '0'); db.session.commit() return { 'code': 0, 'likenum': post.likenum, 'attention': (s=='1') }
def train(): params = {'batch_size': opt.batch_size, 'shuffle': True, 'num_workers': 0} torch.backends.cudnn.benchmark = True training_set = DatasetCUB(opt) training_generator = data.DataLoader(training_set, **params) test_set = DatasetCUB(opt,train=False) test_generator = data.DataLoader(test_set, **params) netA=Attention(text_dim=training_set.text_dim, dimensions=training_set.feature_dim).cuda() netA.apply(weights_init) optimizerA = optim.Adam(netA.parameters(), lr=opt.lr, betas=(0.5, 0.9), weight_decay=0.0001) # criterion = torch.nn.CrossEntropyLoss() # why use cross entropy when already applied softmax criterion = torch.nn.NLLLoss() text_feat=Variable(torch.tensor(training_set.train_text_feature)).unsqueeze(0).cuda() text_feat_test=Variable(torch.tensor(training_set.test_text_feature)).unsqueeze(0).cuda() for it in range(opt.max_epoch): print('epoch: ', it) for bi, batch in enumerate(training_generator): images, labels = batch image_representation, y_true = Variable(images).cuda(), labels.cuda() attention_weights,attention_scores=netA(image_representation,text_feat) loss = criterion(attention_weights.squeeze(), y_true.long()) topv, topi = attention_scores.squeeze().data.topk(1) compare_pred_ground = topi.squeeze() == y_true correct = np.count_nonzero(compare_pred_ground.cpu() == 1) optimizerA.zero_grad() loss.backward() optimizerA.step() # print("it:", it) # print('train accuracy:', correct / y_true.shape[0]) netA.eval() correct=0 for bi, batch in enumerate(test_generator): images, labels = batch image_representation, y_true = Variable(images).cuda(), labels.cuda() attention_weights, attention_scores = netA(image_representation, text_feat_test) topv, topi = attention_weights.squeeze().data.topk(1) correct+=torch.sum(topi.squeeze()==y_true).cpu().tolist() print (test_set.pfc_feat_data_test.shape) print('test accuracy:', 100 * correct / test_set.pfc_feat_data_test.shape[0]) GZSL_evaluation(text_feat, text_feat_test,training_set.train_cls_num,training_generator,test_generator,netA) netA.train()
def __init__(self, embed_size, hidden_size, output_size, n_layers=1, dropout=0.2): super(Decoder, self).__init__() self.embed_size = embed_size self.hidden_size = hidden_size self.output_size = output_size self.n_layers = n_layers self.embed = nn.Embedding(output_size, embed_size) self.dropout = nn.Dropout(dropout, inplace=True) self.attention = Attention(hidden_size) self.gru = nn.GRU(hidden_size + embed_size, hidden_size, n_layers, dropout=dropout) self.out = nn.Linear(hidden_size * 2, output_size)
def main(): train_iterator, valid_iterator, test_iterator, params = prepare_data() (INPUT_DIM, OUTPUT_DIM, ENC_EMB_DIM, DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, ENC_DROPOUT, DEC_DROPOUT) = params # INPUT_DIM = len(SRC.vocab), 7855 # OUTPUT_DIM = len(TRG.vocab), 5893 # ENC_EMB_DIM = 256 # DEC_EMB_DIM = 256 # ENC_HID_DIM = 512 # DEC_HID_DIM = 512 # ENC_DROPOUT = 0.5 # DEC_DROPOUT = 0.5 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') attn = Attention(ENC_HID_DIM, DEC_HID_DIM) enc = Encoder(INPUT_DIM, ENC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, ENC_DROPOUT) dec = Decoder(OUTPUT_DIM, DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, DEC_DROPOUT, attn) model = Seq2Seq(enc, dec, device).to(device) model.apply(init_weights) print(f'The model has {count_parameters(model):,} trainable parameters') for i, batch in enumerate(train_iterator): print(f'ITER: {i}') example = batch print("Input Length:", example.src.shape, "[src_len, batch_size]") output = model.forward(example.src, example.trg) print(output.shape) print('') if i > 3: break
def follow(request): if request.method == 'POST': uid = request.POST['uid'] print "[INFO]social-music.views.follow: uid=%s" %(uid) attendedUser = User.objects.get(pk=uid) try: Attention.objects.get(user=request.user, attendedUser=attendedUser) except Attention.DoesNotExist: attention = Attention() attention.user = request.user attention.attendedUser = attendedUser attention.save() print "[INFO]social-music.views.follow: <%s> following <%s> success." %(request.user, attendedUser) return HttpResponse("following success!") else: print "[ERROR]social-music.views.follow: <%s> following <%s> failure." %(request.user, attendedUser) return HttpResponse("following failure!")
def do_post(): u = require_token() content = request.form.get('text') content = content.strip() if content else None content = '[tmp]\n' + content if u.name[:4] == 'tmp_' else content post_type = request.form.get('type') cw = request.form.get('cw') cw = cw.strip() if cw else None if not content or len(content) > 4096: abort(422) if cw and len(cw)>32: abort(422) p = Post( name_hash = hash_name(u.name), content = content, post_type = post_type, cw = cw or None, likenum = 1, comments = [] ) if post_type == 'text': pass elif post_type == 'image': # TODO p.file_url = 'foo bar' else: abort(422) db.session.add(p) db.session.commit() tags = re.findall('(^|\s)#([^#\s]{1,32})', content) #print(tags) for t in tags: tag = t[1] if not re.match('\d+', tag): db.session.add(TagRecord(tag=tag, pid=p.id)) db.session.add(Attention(name_hash=hash_name(u.name), pid=p.id)) db.session.commit() return { 'code': 0, 'date': p.id }
def create_seq2seq_model(args, src, trg, loaded_vectors): """ Args: src: Field trg: Field """ input_dim = len(src.vocab) output_dim = len(trg.vocab) pad_idx = src.vocab.stoi['<pad>'] sos_idx = trg.vocab.stoi['<sos>'] eos_idx = trg.vocab.stoi['<eos>'] attn = Attention(args.enc_dim, args.dec_dim) enc = Encoder(input_dim, args.emb_dim, args.enc_dim, args.dec_dim, args.dropout, src.vocab.stoi, src.vocab.itos) dec = Decoder(output_dim, args.emb_dim, args.enc_dim, args.dec_dim, args.dropout, attn, trg.vocab.stoi, trg.vocab.itos) model = Seq2Seq(args, enc, dec, pad_idx, sos_idx, eos_idx, device, args.use_pretrained_embeddings, loaded_vectors, args.trainable_embeddings).to(device) print(f'The model has {count_parameters(model):,} trainable parameters')
INPUT_DIM = src_lang.get_vocab_size() OUTPUT_DIM = trg_lang.get_vocab_size() print(f"Input vocab {INPUT_DIM} and output vocab {OUTPUT_DIM}") ENC_EMB_DIM = 256 DEC_EMB_DIM = 256 ENC_HID_DIM = 512 DEC_HID_DIM = 512 ENC_DROPOUT = 0.5 DEC_DROPOUT = 0.5 PAD_IDX = utils.PAD_token SOS_IDX = utils.SOS_token EOS_IDX = utils.EOS_token SUFFIX = "" device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') attn = Attention(ENC_HID_DIM, DEC_HID_DIM) enc = Encoder(INPUT_DIM, ENC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, ENC_DROPOUT) if UNIFORM: dec = DecoderUniform(OUTPUT_DIM, DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, DEC_DROPOUT, attn) SUFFIX = "_uniform" elif NO_ATTN or DECODE_WITH_NO_ATTN: dec = DecoderNoAttn(OUTPUT_DIM, DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, DEC_DROPOUT, attn) if NO_ATTN: SUFFIX = "_no-attn" else: dec = Decoder(OUTPUT_DIM, DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, DEC_DROPOUT, attn)
def train(): # Fix Seed for Reproducibility # torch.manual_seed(9) if torch.cuda.is_available(): torch.cuda.manual_seed(9) # Samples, Weights and Results Path # paths = [config.samples_path, config.weights_path, config.plots_path] paths = [make_dirs(path) for path in paths] # Prepare Data Loader # train_horse_loader, train_zebra_loader = get_horse2zebra_loader('train', config.batch_size) val_horse_loader, val_zebra_loader = get_horse2zebra_loader('test', config.batch_size) total_batch = min(len(train_horse_loader), len(train_zebra_loader)) # Image Pool # masked_fake_A_pool = ImageMaskPool(config.pool_size) masked_fake_B_pool = ImageMaskPool(config.pool_size) # Prepare Networks # Attn_A = Attention() Attn_B = Attention() G_A2B = Generator() G_B2A = Generator() D_A = Discriminator() D_B = Discriminator() networks = [Attn_A, Attn_B, G_A2B, G_B2A, D_A, D_B] for network in networks: network.to(device) # Loss Function # criterion_Adversarial = nn.MSELoss() criterion_Cycle = nn.L1Loss() # Optimizers # D_optim = torch.optim.Adam(chain(D_A.parameters(), D_B.parameters()), lr=config.lr, betas=(0.5, 0.999)) G_optim = torch.optim.Adam(chain(Attn_A.parameters(), Attn_B.parameters(), G_A2B.parameters(), G_B2A.parameters()), lr=config.lr, betas=(0.5, 0.999)) D_optim_scheduler = get_lr_scheduler(D_optim) G_optim_scheduler = get_lr_scheduler(G_optim) # Lists # D_A_losses, D_B_losses = [], [] G_A_losses, G_B_losses = [], [] # Train # print("Training Unsupervised Attention-Guided GAN started with total epoch of {}.".format(config.num_epochs)) for epoch in range(config.num_epochs): for i, (real_A, real_B) in enumerate(zip(train_horse_loader, train_zebra_loader)): # Data Preparation # real_A = real_A.to(device) real_B = real_B.to(device) # Initialize Optimizers # D_optim.zero_grad() G_optim.zero_grad() ################### # Train Generator # ################### set_requires_grad([D_A, D_B], requires_grad=False) # Adversarial Loss using real A # attn_A = Attn_A(real_A) fake_B = G_A2B(real_A) masked_fake_B = fake_B * attn_A + real_A * (1-attn_A) masked_fake_B *= attn_A prob_real_A = D_A(masked_fake_B) real_labels = torch.ones(prob_real_A.size()).to(device) G_loss_A = criterion_Adversarial(prob_real_A, real_labels) # Adversarial Loss using real B # attn_B = Attn_B(real_B) fake_A = G_B2A(real_B) masked_fake_A = fake_A * attn_B + real_B * (1-attn_B) masked_fake_A *= attn_B prob_real_B = D_B(masked_fake_A) real_labels = torch.ones(prob_real_B.size()).to(device) G_loss_B = criterion_Adversarial(prob_real_B, real_labels) # Cycle Consistency Loss using real A # attn_ABA = Attn_B(masked_fake_B) fake_ABA = G_B2A(masked_fake_B) masked_fake_ABA = fake_ABA * attn_ABA + masked_fake_B * (1 - attn_ABA) # Cycle Consistency Loss using real B # attn_BAB = Attn_A(masked_fake_A) fake_BAB = G_A2B(masked_fake_A) masked_fake_BAB = fake_BAB * attn_BAB + masked_fake_A * (1 - attn_BAB) # Cycle Consistency Loss # G_cycle_loss_A = config.lambda_cycle * criterion_Cycle(masked_fake_ABA, real_A) G_cycle_loss_B = config.lambda_cycle * criterion_Cycle(masked_fake_BAB, real_B) # Total Generator Loss # G_loss = G_loss_A + G_loss_B + G_cycle_loss_A + G_cycle_loss_B # Back Propagation and Update # G_loss.backward() G_optim.step() ####################### # Train Discriminator # ####################### set_requires_grad([D_A, D_B], requires_grad=True) # Train Discriminator A using real A # prob_real_A = D_A(real_B) real_labels = torch.ones(prob_real_A.size()).to(device) D_loss_real_A = criterion_Adversarial(prob_real_A, real_labels) # Add Pooling # masked_fake_B, attn_A = masked_fake_B_pool.query(masked_fake_B, attn_A) masked_fake_B *= attn_A # Train Discriminator A using fake B # prob_fake_B = D_A(masked_fake_B.detach()) fake_labels = torch.zeros(prob_fake_B.size()).to(device) D_loss_fake_A = criterion_Adversarial(prob_fake_B, fake_labels) D_loss_A = (D_loss_real_A + D_loss_fake_A).mean() # Train Discriminator B using real B # prob_real_B = D_B(real_A) real_labels = torch.ones(prob_real_B.size()).to(device) D_loss_real_B = criterion_Adversarial(prob_real_B, real_labels) # Add Pooling # masked_fake_A, attn_B = masked_fake_A_pool.query(masked_fake_A, attn_B) masked_fake_A *= attn_B # Train Discriminator B using fake A # prob_fake_A = D_B(masked_fake_A.detach()) fake_labels = torch.zeros(prob_fake_A.size()).to(device) D_loss_fake_B = criterion_Adversarial(prob_fake_A, fake_labels) D_loss_B = (D_loss_real_B + D_loss_fake_B).mean() # Calculate Total Discriminator Loss # D_loss = D_loss_A + D_loss_B # Back Propagation and Update # D_loss.backward() D_optim.step() # Add items to Lists # D_A_losses.append(D_loss_A.item()) D_B_losses.append(D_loss_B.item()) G_A_losses.append(G_loss_A.item()) G_B_losses.append(G_loss_B.item()) #################### # Print Statistics # #################### if (i+1) % config.print_every == 0: print("UAG-GAN | Epoch [{}/{}] | Iteration [{}/{}] | D A Losses {:.4f} | D B Losses {:.4f} | G A Losses {:.4f} | G B Losses {:.4f}". format(epoch+1, config.num_epochs, i+1, total_batch, np.average(D_A_losses), np.average(D_B_losses), np.average(G_A_losses), np.average(G_B_losses))) # Save Sample Images # save_samples(val_horse_loader, val_zebra_loader, G_A2B, G_B2A, Attn_A, Attn_B, epoch, config.samples_path) # Adjust Learning Rate # D_optim_scheduler.step() G_optim_scheduler.step() # Save Model Weights # if (epoch + 1) % config.save_every == 0: torch.save(G_A2B.state_dict(), os.path.join(config.weights_path, 'UAG-GAN_Generator_A2B_Epoch_{}.pkl'.format(epoch+1))) torch.save(G_B2A.state_dict(), os.path.join(config.weights_path, 'UAG-GAN_Generator_B2A_Epoch_{}.pkl'.format(epoch+1))) torch.save(Attn_A.state_dict(), os.path.join(config.weights_path, 'UAG-GAN_Attention_A_Epoch_{}.pkl'.format(epoch+1))) torch.save(Attn_B.state_dict(), os.path.join(config.weights_path, 'UAG-GAN_Attention_B_Epoch_{}.pkl'.format(epoch+1))) # Make a GIF file # make_gifs_train("UAG-GAN", config.samples_path) # Plot Losses # plot_losses(D_A_losses, D_B_losses, G_A_losses, G_B_losses, config.num_epochs, config.plots_path) print("Training finished.")
def create_attention(): input = Input(shape=(32, 32, 3)) model = Model(inputs=input, outputs=Attention(input, config.Att_filters, config.Att_nBlocks, config.Att_nLayers)) return model
def main(): # ArgumentParser {{{ parser = argparse.ArgumentParser() # hyper parameters parser.add_argument('--batch_size', type=int, default=128) parser.add_argument('--n_epochs', type=int, default=10) parser.add_argument('--enc_embd_size', type=int, default=256) parser.add_argument('--dec_embd_size', type=int, default=256) parser.add_argument('--enc_h_size', type=int, default=512) parser.add_argument('--dec_h_size', type=int, default=512) # other parameters parser.add_argument('--beam_width', type=int, default=3) parser.add_argument('--n_best', type=int, default=3) parser.add_argument('--max_dec_steps', type=int, default=1000) parser.add_argument('--export_dir', type=str, default=modelpath) parser.add_argument('--model_name', type=str, default='s2s') parser.add_argument('--model_path', type=str, default=modelpath / 's2s-vanilla.pt') parser.add_argument('--skip_train', action='store_true') parser.add_argument('--attention', action='store_true') opts = parser.parse_args() # }}} # opts.skip_train = True opts.attention = True # SOS_token = '<SOS>' # EOS_token = '<EOS>' # SRC = Field(tokenize=tokenize_de, # init_token=SOS_token, # eos_token=EOS_token, # lower=True) # TRG = Field(tokenize=tokenize_en, # init_token=SOS_token, # eos_token=EOS_token, # lower=True) # train_data, valid_data, test_data = Multi30k.splits(exts=('.de', '.en'), fields=(SRC, TRG)) # print(f'Number of training examples: {len(train_data.examples)}') # print(f'Number of validation examples: {len(valid_data.examples)}') # print(f'Number of testing examples: {len(test_data.examples)}') # SRC.build_vocab(train_data, min_freq=2) # TRG.build_vocab(train_data, min_freq=2) # print(f'Unique tokens in source (de) vocabulary: {len(SRC.vocab)}') # print(f'Unique tokens in target (en) vocabulary: {len(TRG.vocab)}') # train_itr, valid_itr, test_itr =\ # BucketIterator.splits( # (train_data, valid_data, test_data), # batch_size=opts.batch_size, # device=DEVICE) # exit train_dataset, valid_dataset, test_dataset = Multi30k(root=dataroot) train_dataset1, train_dataset2 = tee(train_dataset) valid_dataset1, valid_dataset2 = tee(valid_dataset) test_dataset1, test_dataset2 = tee(test_dataset) spacy_de = spacy.load('de_core_news_sm') spacy_en = spacy.load('en_core_web_sm') de_counter = Counter() en_counter = Counter() de_tokenizer = get_tokenizer('spacy', language='de_core_news_sm') en_tokenizer = get_tokenizer('spacy', language='en_core_web_sm') def build_vocab(dataset): for (src_sentence, tgt_sentence) in tqdm(dataset): de_counter.update(de_tokenizer(src_sentence)) en_counter.update(en_tokenizer(tgt_sentence)) def data_process(dataset): data = [] for (raw_de, raw_en) in tqdm(dataset): de_tensor_ = torch.tensor( [de_vocab[token] for token in de_tokenizer(raw_de)], dtype=torch.long) en_tensor_ = torch.tensor( [en_vocab[token] for token in en_tokenizer(raw_en)], dtype=torch.long) data.append((de_tensor_, en_tensor_)) return data def generate_batch(data_batch): de_batch, en_batch = [], [] for (de_item, en_item) in data_batch: de_batch.append( torch.cat([ torch.tensor([TRG_SOS_IDX]), de_item, torch.tensor([TRG_EOS_IDX]) ], dim=0)) en_batch.append( torch.cat([ torch.tensor([TRG_SOS_IDX]), en_item, torch.tensor([TRG_EOS_IDX]) ], dim=0)) de_batch = pad_sequence(de_batch, padding_value=TRG_PAD_IDX) en_batch = pad_sequence(en_batch, padding_value=TRG_PAD_IDX) return de_batch, en_batch build_vocab(train_dataset1) build_vocab(valid_dataset1) build_vocab(test_dataset1) de_vocab = Vocab(de_counter, specials=['<unk>', '<pad>', '<bos>', '<eos>']) en_vocab = Vocab(en_counter, specials=['<unk>', '<pad>', '<bos>', '<eos>']) dec_v_size = len(de_vocab) enc_v_size = len(en_vocab) TRG_PAD_IDX = en_vocab.stoi['<pad>'] TRG_SOS_IDX = en_vocab.stoi['<bos>'] TRG_EOS_IDX = en_vocab.stoi['<eos>'] train_data = data_process(train_dataset2) valid_data = data_process(valid_dataset2) test_data = data_process(test_dataset2) train_itr = DataLoader(train_data, batch_size=opts.batch_size, shuffle=False, collate_fn=generate_batch) valid_itr = DataLoader(valid_data, batch_size=opts.batch_size, shuffle=False, collate_fn=generate_batch) test_itr = DataLoader(test_data, batch_size=opts.batch_size, shuffle=False, collate_fn=generate_batch) encoder = EncoderRNN(opts.enc_embd_size, opts.enc_h_size, opts.dec_h_size, dec_v_size, DEVICE) if opts.attention: attn = Attention(opts.enc_h_size, opts.dec_h_size) decoder = AttnDecoderRNN(opts.dec_embd_size, opts.enc_h_size, opts.dec_h_size, enc_v_size, attn, DEVICE) else: decoder = DecoderRNN(opts.dec_embd_size, opts.dec_h_size, enc_v_size, DEVICE) model = Seq2Seq(encoder, decoder, DEVICE).to(DEVICE) # TRG_PAD_IDX = TRG.vocab.stoi[TRG.pad_token] # TRG_PAD_IDX = tgt_vocab.stoi['<pad>'] if opts.skip_train: model.load_state_dict(torch.load(opts.model_path)) if not opts.skip_train: optimizer = optim.Adam(model.parameters()) criterion = nn.CrossEntropyLoss(ignore_index=TRG_PAD_IDX) best_valid_loss = float('inf') for epoch in range(opts.n_epochs): start_time = time.time() train_loss = train(model, train_itr, optimizer, criterion) valid_loss = evaluate(model, valid_itr, criterion) end_time = time.time() epoch_mins, epoch_secs = epoch_time(start_time, end_time) if valid_loss < best_valid_loss: best_valid_loss = valid_loss attn_type = 'attn' if opts.attention else 'vanilla' model_path = os.path.join(opts.export_dir, f'{opts.model_name}-{attn_type}.pt') print(f'Update model! Saved {model_path}') torch.save(model.state_dict(), model_path) else: print('Model was not updated. Stop training') break print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s') print( f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}' ) print( f'\t Val. Loss: {valid_loss:.3f} | Val. PPL: {math.exp(valid_loss):7.3f}' ) # TRG_SOS_IDX = TRG.vocab.stoi[TRG.init_token] # TRG_EOS_IDX = TRG.vocab.stoi[TRG.eos_token] model.eval() with torch.no_grad(): # for batch_id, batch in enumerate(test_itr): for batch in tqdm(test_itr): # src = batch.src # (T, bs) # trg = batch.trg # (T, bs) src, trg = batch src = src.to(DEVICE) trg = src.to(DEVICE) print(f'In: {" ".join(de_vocab.itos[idx] for idx in src[:, 0])}') enc_outs, h = model.encoder(src) # (T, bs, H), (bs, H) # decoded_seqs: (bs, T) start_time = time.time() decoded_seqs = beam_search_decoding( decoder=model.decoder, enc_outs=enc_outs, enc_last_h=h, beam_width=opts.beam_width, n_best=opts.n_best, sos_token=TRG_SOS_IDX, eos_token=TRG_EOS_IDX, max_dec_steps=opts.max_dec_steps, device=DEVICE) end_time = time.time() print(f'for loop beam search time: {end_time-start_time:.3f}') print_n_best(decoded_seqs[0], en_vocab.itos) start_time = time.time() decoded_seqs = batch_beam_search_decoding( decoder=model.decoder, enc_outs=enc_outs, enc_last_h=h, beam_width=opts.beam_width, n_best=opts.n_best, sos_token=TRG_SOS_IDX, eos_token=TRG_EOS_IDX, max_dec_steps=opts.max_dec_steps, device=DEVICE) end_time = time.time() print(f'Batch beam search time: {end_time-start_time:.3f}') print_n_best(decoded_seqs[0], en_vocab.itos)
def __init__(self, word_emb, graph_emb, graph_type, hidden_size, device): super(CommandScorerWithKG, self).__init__() self.device = device self.hidden_size = hidden_size self.dropout_ratio = 0.0 # * self.n_heads = 1 # * self.use_hints = True # * self.bidirectional = True self.graph_type = graph_type n_factor = 2 # command bi_factor = (2 if self.bidirectional else 1 ) # hidden size multiplier when bidirectional is used self.word_embedding = PretrainedEmbeddings(word_emb) self.word_embedding_size = self.word_embedding.dim # * self.word_embedding_prj = torch.nn.Linear(self.word_embedding_size, self.hidden_size, bias=False) if not self.bidirectional: self.word_hint_prj = torch.nn.Linear(self.hidden_size * 2, self.hidden_size, bias=False) self.graph_embedding = None if graph_emb is not None and ('local' in self.graph_type or 'world' in self.graph_type): self.graph_embedding = PretrainedEmbeddings(graph_emb, True) self.graph_embedding_size = self.graph_embedding.dim self.graph_embedding_prj = torch.nn.Linear( self.graph_embedding_size, self.hidden_size, bias=False) if not self.bidirectional: self.graph_hint_prj = torch.nn.Linear(self.hidden_size * 2, self.hidden_size, bias=False) # Encoder for th observation self.encoder_gru = nn.GRU(hidden_size, hidden_size, batch_first=True, bidirectional=self.bidirectional) # Encoder for the commands self.cmd_encoder_gru = nn.GRU(hidden_size, hidden_size, batch_first=True, bidirectional=self.bidirectional) # RNN that keeps track of the encoded state over time self.state_gru = nn.GRU(hidden_size * bi_factor, hidden_size * bi_factor, batch_first=True) self.kg_word_encoder_gru = nn.GRU(hidden_size, hidden_size, batch_first=True) self.kg_graph_encoder_gru = nn.GRU(hidden_size, hidden_size, batch_first=True) if 'local' in self.graph_type or 'world' in graph_type: self.attention = CQAttention(block_hidden_dim=hidden_size * bi_factor, dropout=self.dropout_ratio) self.attention_prj = torch.nn.Linear(hidden_size * bi_factor * 4, hidden_size * bi_factor, bias=False) if 'world' in self.graph_type: n_factor += 1 self.worldkg_gat = GAT(hidden_size, hidden_size, self.dropout_ratio, alpha=0.2, nheads=self.n_heads) self.worldkg_attention_prj = torch.nn.Linear( hidden_size * bi_factor * 4, hidden_size * bi_factor, bias=False) self.world_self_attention = SelfAttention(hidden_size * bi_factor, hidden_size * bi_factor, self.n_heads, self.dropout_ratio) if 'local' in graph_type: n_factor += 1 self.localkg_gat = GAT(hidden_size, hidden_size, self.dropout_ratio, alpha=0.2, nheads=self.n_heads) self.localkg_attention_prj = torch.nn.Linear( hidden_size * bi_factor * 4, hidden_size * bi_factor, bias=False) self.local_self_attention = SelfAttention(hidden_size * bi_factor, hidden_size * bi_factor, self.n_heads, self.dropout_ratio) self.state_hidden = [] self.general_attention = Attention( hidden_size * bi_factor * 2, hidden_size * bi_factor) # General attention from [cmd + obs ==> graph_nodes] self.world_attention = None self.local_attention = None self.obs2kg_attention = torch.nn.Linear(hidden_size * bi_factor, hidden_size * bi_factor, bias=False) self.critic = nn.Linear(hidden_size * bi_factor, 1) self.att_cmd = nn.Sequential( nn.Linear(hidden_size * bi_factor * n_factor, hidden_size * bi_factor), nn.ReLU(), nn.Linear(hidden_size * bi_factor, 1)) self.count = 1
def inference(): # Inference Path # paths = [config.inference_path_H2Z, config.inference_path_Z2H] paths = [make_dirs(path) for path in paths] # Prepare Data Loader # test_horse_loader, test_zebra_loader = get_horse2zebra_loader( 'test', config.val_batch_size) # Prepare Attention and Generator # Attn_A = Attention().to(device) Attn_B = Attention().to(device) G_A2B = Generator().to(device) G_B2A = Generator().to(device) Attn_A.load_state_dict( torch.load( os.path.join( config.weights_path, 'UAG-GAN_Attention_A_Epoch_{}.pkl'.format(config.num_epochs)))) Attn_B.load_state_dict( torch.load( os.path.join( config.weights_path, 'UAG-GAN_Attention_B_Epoch_{}.pkl'.format(config.num_epochs)))) G_A2B.load_state_dict( torch.load( os.path.join( config.weights_path, 'UAG-GAN_Generator_A2B_Epoch_{}.pkl'.format( config.num_epochs)))) G_B2A.load_state_dict( torch.load( os.path.join( config.weights_path, 'UAG-GAN_Generator_B2A_Epoch_{}.pkl'.format( config.num_epochs)))) # Test # print("UAG-GAN | Generating Horse2Zebra images started...") for i, (horse, zebra) in enumerate(zip(test_horse_loader, test_zebra_loader)): # Prepare Data # real_A = horse.to(device) real_B = zebra.to(device) # Generate Attention Images # attn_A = Attn_A(real_A.detach()) attn_A = attn_A.repeat(1, 3, 1, 1) attn_A = 2 * attn_A - 1 attn_B = Attn_B(real_B.detach()) attn_B = attn_B.repeat(1, 3, 1, 1) attn_B = 2 * attn_B - 1 # Generated Fake Images # fake_B = G_A2B(real_A.detach()) fake_A = G_B2A(real_B.detach()) # Save Images (Horse -> Zebra) # result = torch.cat((real_A, attn_A, fake_B), dim=0) save_image( denorm(result.data), os.path.join(config.inference_path_H2Z, 'UAG-GAN_Horse2Zebra_Results_%03d.png' % (i + 1))) # Save Images (Zebra -> Horse) # result = torch.cat((real_B, attn_B, fake_A), dim=0) save_image( denorm(result.data), os.path.join(config.inference_path_Z2H, 'UAG-GAN_Zebra2Horse_Results_%03d.png' % (i + 1))) # Make a GIF file # make_gifs_test("UAG-GAN", "Horse2Zebra", config.inference_path_H2Z) make_gifs_test("UAG-GAN", "Zebra2Horse", config.inference_path_Z2H)
type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--train-split', type=str, default=0.8, metavar='E', help='percentage of data to use as train.') args = parser.parse_args() use_cuda = torch.cuda.is_available() torch.manual_seed(args.seed) batch_size = args.batch_size epochs = args.epochs model = Attention() train_split = args.train_split # percentage of the data we want in train (as opposed to valdation) transform_train = transforms.Compose([ # transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.RandomVerticalFlip(), transforms.ToTensor(), # Convert Pillow Image to Tensor # transforms.Resize(128) ]) train_dataset = LymphocytosisDataset( "/data/clinical_annotation.csv", "/data", train=True, valid=False,
def main(): # ArgumentParser {{{ parser = argparse.ArgumentParser() # hyper parameters parser.add_argument('--batch_size', type=int, default=128) parser.add_argument('--n_epochs', type=int, default=10) parser.add_argument('--enc_embd_size', type=int, default=256) parser.add_argument('--dec_embd_size', type=int, default=256) parser.add_argument('--enc_h_size', type=int, default=512) parser.add_argument('--dec_h_size', type=int, default=512) # other parameters parser.add_argument('--beam_width', type=int, default=10) parser.add_argument('--n_best', type=int, default=5) parser.add_argument('--max_dec_steps', type=int, default=1000) parser.add_argument('--export_dir', type=str, default='./ckpts/') parser.add_argument('--model_name', type=str, default='s2s') parser.add_argument('--model_path', type=str, default='') parser.add_argument('--skip_train', action='store_true') parser.add_argument('--attention', action='store_true') opts = parser.parse_args() # }}} SOS_token = '<SOS>' EOS_token = '<EOS>' SRC = Field(tokenize=tokenize_de, init_token=SOS_token, eos_token=EOS_token, lower=True) TRG = Field(tokenize=tokenize_en, init_token=SOS_token, eos_token=EOS_token, lower=True) train_data, valid_data, test_data = Multi30k.splits(exts=('.de', '.en'), fields=(SRC, TRG)) print(f'Number of training examples: {len(train_data.examples)}') print(f'Number of validation examples: {len(valid_data.examples)}') print(f'Number of testing examples: {len(test_data.examples)}') SRC.build_vocab(train_data, min_freq=2) TRG.build_vocab(train_data, min_freq=2) print(f'Unique tokens in source (de) vocabulary: {len(SRC.vocab)}') print(f'Unique tokens in target (en) vocabulary: {len(TRG.vocab)}') train_itr, valid_itr, test_itr =\ BucketIterator.splits( (train_data, valid_data, test_data), batch_size=opts.batch_size, device=DEVICE) enc_v_size = len(SRC.vocab) dec_v_size = len(TRG.vocab) encoder = EncoderRNN(opts.enc_embd_size, opts.enc_h_size, opts.dec_h_size, enc_v_size, DEVICE) if opts.attention: attn = Attention(opts.enc_h_size, opts.dec_h_size) decoder = AttnDecoderRNN(opts.dec_embd_size, opts.enc_h_size, opts.dec_h_size, dec_v_size, attn, DEVICE) else: decoder = DecoderRNN(opts.dec_embd_size, opts.dec_h_size, dec_v_size, DEVICE) model = Seq2Seq(encoder, decoder, DEVICE).to(DEVICE) TRG_PAD_IDX = TRG.vocab.stoi[TRG.pad_token] if opts.model_path != '': model.load_state_dict(torch.load(opts.model_path)) if not opts.skip_train: optimizer = optim.Adam(model.parameters()) criterion = nn.CrossEntropyLoss(ignore_index=TRG_PAD_IDX) best_valid_loss = float('inf') for epoch in range(opts.n_epochs): start_time = time.time() train_loss = train(model, train_itr, optimizer, criterion) valid_loss = evaluate(model, valid_itr, criterion) end_time = time.time() epoch_mins, epoch_secs = epoch_time(start_time, end_time) if valid_loss < best_valid_loss: best_valid_loss = valid_loss attn_type = 'attn' if opts.attention else 'vanilla' model_path = os.path.join(opts.export_dir, f'{opts.model_name}-{attn_type}.pt') print(f'Update model! Saved {model_path}') torch.save(model.state_dict(), model_path) else: print('Model was not updated. Stop training') break print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s') print( f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}' ) print( f'\t Val. Loss: {valid_loss:.3f} | Val. PPL: {math.exp(valid_loss):7.3f}' ) TRG_SOS_IDX = TRG.vocab.stoi[TRG.init_token] TRG_EOS_IDX = TRG.vocab.stoi[TRG.eos_token] model.eval() with torch.no_grad(): for batch_id, batch in enumerate(test_itr): src = batch.src # (T, bs) trg = batch.trg # (T, bs) print(f'In: {" ".join(SRC.vocab.itos[idx] for idx in src[:, 0])}') enc_outs, h = model.encoder(src) # (T, bs, H), (bs, H) # decoded_seqs: (bs, T) start_time = time.time() decoded_seqs = beam_search_decoding( decoder=model.decoder, enc_outs=enc_outs, enc_last_h=h, beam_width=opts.beam_width, n_best=opts.n_best, sos_token=TRG_SOS_IDX, eos_token=TRG_EOS_IDX, max_dec_steps=opts.max_dec_steps, device=DEVICE) end_time = time.time() print(f'for loop beam search time: {end_time-start_time:.3f}') print_n_best(decoded_seqs[0], TRG.vocab.itos) start_time = time.time() decoded_seqs = batch_beam_search_decoding( decoder=model.decoder, enc_outs=enc_outs, enc_last_h=h, beam_width=opts.beam_width, n_best=opts.n_best, sos_token=TRG_SOS_IDX, eos_token=TRG_EOS_IDX, max_dec_steps=opts.max_dec_steps, device=DEVICE) end_time = time.time() print(f'Batch beam search time: {end_time-start_time:.3f}') print_n_best(decoded_seqs[0], TRG.vocab.itos)