Example #1
0
def attention():
    u = require_token()
    if u.name[:4] == 'tmp_': abort(403)

    s = request.form.get('switch')
    if s not in ['0', '1']: abort(422)

    pid = get_num(request.form.get('pid'))
    
    post = Post.query.get(pid)
    if not post: abort(404)

    at = Attention.query.filter_by(name_hash=hash_name(u.name), pid=pid).first()

    if not at:
        at = Attention(name_hash=hash_name(u.name), pid=pid, disabled=True)
        db.session.add(at)

    if(at.disabled != (s == '0')):
        at.disabled = (s == '0')
        post.likenum += 1 - 2 * int(s == '0');
        db.session.commit()

    return {
            'code': 0,
            'likenum': post.likenum,
            'attention': (s=='1')
            }
Example #2
0
def train():
    params = {'batch_size': opt.batch_size, 
              'shuffle': True,
              'num_workers': 0}

    torch.backends.cudnn.benchmark = True
    training_set = DatasetCUB(opt)
    training_generator = data.DataLoader(training_set, **params)
    test_set = DatasetCUB(opt,train=False)
    test_generator = data.DataLoader(test_set, **params)
    netA=Attention(text_dim=training_set.text_dim, dimensions=training_set.feature_dim).cuda()
    netA.apply(weights_init)
    optimizerA = optim.Adam(netA.parameters(), lr=opt.lr, betas=(0.5, 0.9), weight_decay=0.0001)
    # criterion = torch.nn.CrossEntropyLoss()  # why use cross entropy when already applied softmax
    criterion = torch.nn.NLLLoss()

    text_feat=Variable(torch.tensor(training_set.train_text_feature)).unsqueeze(0).cuda()
    text_feat_test=Variable(torch.tensor(training_set.test_text_feature)).unsqueeze(0).cuda()

    for it in range(opt.max_epoch):
        print('epoch: ', it)

        for bi, batch in enumerate(training_generator):

            images, labels = batch
            image_representation, y_true = Variable(images).cuda(), labels.cuda()

            attention_weights,attention_scores=netA(image_representation,text_feat)

            loss = criterion(attention_weights.squeeze(), y_true.long())

            topv, topi = attention_scores.squeeze().data.topk(1)
            compare_pred_ground = topi.squeeze() == y_true
            correct = np.count_nonzero(compare_pred_ground.cpu() == 1)

            optimizerA.zero_grad()
            loss.backward()
            optimizerA.step()

        # print("it:", it)

        # print('train accuracy:', correct / y_true.shape[0])
        netA.eval()

        correct=0

        for bi, batch in enumerate(test_generator):

            images, labels = batch

            image_representation, y_true = Variable(images).cuda(), labels.cuda()
            attention_weights, attention_scores = netA(image_representation, text_feat_test)
            topv, topi = attention_weights.squeeze().data.topk(1)
            correct+=torch.sum(topi.squeeze()==y_true).cpu().tolist()

        print (test_set.pfc_feat_data_test.shape)
        print('test accuracy:', 100 * correct / test_set.pfc_feat_data_test.shape[0])
        GZSL_evaluation(text_feat, text_feat_test,training_set.train_cls_num,training_generator,test_generator,netA)
        netA.train()
    def __init__(self,
                 embed_size,
                 hidden_size,
                 output_size,
                 n_layers=1,
                 dropout=0.2):

        super(Decoder, self).__init__()

        self.embed_size = embed_size

        self.hidden_size = hidden_size

        self.output_size = output_size

        self.n_layers = n_layers

        self.embed = nn.Embedding(output_size, embed_size)

        self.dropout = nn.Dropout(dropout, inplace=True)

        self.attention = Attention(hidden_size)

        self.gru = nn.GRU(hidden_size + embed_size,
                          hidden_size,
                          n_layers,
                          dropout=dropout)
        self.out = nn.Linear(hidden_size * 2, output_size)
Example #4
0
def main():
    train_iterator, valid_iterator, test_iterator, params = prepare_data()
    (INPUT_DIM, OUTPUT_DIM, ENC_EMB_DIM, DEC_EMB_DIM,
    ENC_HID_DIM, DEC_HID_DIM, ENC_DROPOUT, DEC_DROPOUT) = params
    # INPUT_DIM = len(SRC.vocab), 7855
    # OUTPUT_DIM = len(TRG.vocab), 5893
    # ENC_EMB_DIM = 256
    # DEC_EMB_DIM = 256
    # ENC_HID_DIM = 512
    # DEC_HID_DIM = 512
    # ENC_DROPOUT = 0.5
    # DEC_DROPOUT = 0.5
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    attn = Attention(ENC_HID_DIM, DEC_HID_DIM)
    enc = Encoder(INPUT_DIM, ENC_EMB_DIM, ENC_HID_DIM,
                  DEC_HID_DIM, ENC_DROPOUT)
    dec = Decoder(OUTPUT_DIM, DEC_EMB_DIM, ENC_HID_DIM,
                  DEC_HID_DIM, DEC_DROPOUT, attn)
    model = Seq2Seq(enc, dec, device).to(device)

    model.apply(init_weights)
    print(f'The model has {count_parameters(model):,} trainable parameters')

    for i, batch in enumerate(train_iterator):
        print(f'ITER: {i}')
        example = batch
        print("Input Length:", example.src.shape, "[src_len, batch_size]")
        output = model.forward(example.src, example.trg)
        print(output.shape)
        print('')
        if i > 3: break
Example #5
0
def follow(request):
	if request.method == 'POST':
		
		uid = request.POST['uid']
		print "[INFO]social-music.views.follow: uid=%s" %(uid)
		attendedUser = User.objects.get(pk=uid)
		try:
			Attention.objects.get(user=request.user, attendedUser=attendedUser)
		except Attention.DoesNotExist:
			attention = Attention()
			attention.user = request.user
			attention.attendedUser = attendedUser
			attention.save()
			print "[INFO]social-music.views.follow: <%s> following <%s> success." %(request.user, attendedUser)
			return HttpResponse("following success!")
		else:
			print "[ERROR]social-music.views.follow: <%s> following <%s> failure." %(request.user, attendedUser)
			return HttpResponse("following failure!")
Example #6
0
def do_post():
    u = require_token()

    content = request.form.get('text')
    content =  content.strip() if content else None
    content = '[tmp]\n' + content if u.name[:4] == 'tmp_' else content
    post_type = request.form.get('type')
    cw = request.form.get('cw')
    cw =  cw.strip() if cw else None

    if not content or len(content) > 4096: abort(422)
    if cw and len(cw)>32: abort(422)

    p = Post(
            name_hash = hash_name(u.name),
            content = content,
            post_type = post_type,
            cw = cw or None,
            likenum = 1,
            comments = []
            )

    if post_type == 'text':
        pass
    elif post_type == 'image':
        # TODO
        p.file_url = 'foo bar'
    else:
        abort(422)

    db.session.add(p)
    db.session.commit()

    tags = re.findall('(^|\s)#([^#\s]{1,32})', content)
    #print(tags)
    for t in tags:
        tag = t[1]
        if not re.match('\d+', tag):
            db.session.add(TagRecord(tag=tag, pid=p.id))

    db.session.add(Attention(name_hash=hash_name(u.name), pid=p.id))
    db.session.commit()

    return {
            'code': 0,
            'date': p.id
            }
Example #7
0
def create_seq2seq_model(args, src, trg, loaded_vectors):
    """

    Args:
        src: Field
        trg: Field
    """
    input_dim = len(src.vocab)
    output_dim = len(trg.vocab)
    pad_idx = src.vocab.stoi['<pad>']
    sos_idx = trg.vocab.stoi['<sos>']
    eos_idx = trg.vocab.stoi['<eos>']
    attn = Attention(args.enc_dim, args.dec_dim)
    enc = Encoder(input_dim, args.emb_dim, args.enc_dim, args.dec_dim,
                  args.dropout, src.vocab.stoi, src.vocab.itos)
    dec = Decoder(output_dim, args.emb_dim, args.enc_dim, args.dec_dim,
                  args.dropout, attn, trg.vocab.stoi, trg.vocab.itos)
    model = Seq2Seq(args, enc, dec, pad_idx, sos_idx, eos_idx, device,
                    args.use_pretrained_embeddings, loaded_vectors,
                    args.trainable_embeddings).to(device)

    print(f'The model has {count_parameters(model):,} trainable parameters')
INPUT_DIM = src_lang.get_vocab_size()
OUTPUT_DIM = trg_lang.get_vocab_size()
print(f"Input vocab {INPUT_DIM} and output vocab {OUTPUT_DIM}")
ENC_EMB_DIM = 256
DEC_EMB_DIM = 256
ENC_HID_DIM = 512
DEC_HID_DIM = 512
ENC_DROPOUT = 0.5
DEC_DROPOUT = 0.5
PAD_IDX = utils.PAD_token
SOS_IDX = utils.SOS_token
EOS_IDX = utils.EOS_token
SUFFIX = ""
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

attn = Attention(ENC_HID_DIM, DEC_HID_DIM)
enc = Encoder(INPUT_DIM, ENC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, ENC_DROPOUT)

if UNIFORM:
    dec = DecoderUniform(OUTPUT_DIM, DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM,
                         DEC_DROPOUT, attn)
    SUFFIX = "_uniform"
elif NO_ATTN or DECODE_WITH_NO_ATTN:
    dec = DecoderNoAttn(OUTPUT_DIM, DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM,
                        DEC_DROPOUT, attn)
    if NO_ATTN:
        SUFFIX = "_no-attn"
else:
    dec = Decoder(OUTPUT_DIM, DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM,
                  DEC_DROPOUT, attn)
def train():

    # Fix Seed for Reproducibility #
    torch.manual_seed(9)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(9)

    # Samples, Weights and Results Path #
    paths = [config.samples_path, config.weights_path, config.plots_path]
    paths = [make_dirs(path) for path in paths]

    # Prepare Data Loader #
    train_horse_loader, train_zebra_loader = get_horse2zebra_loader('train', config.batch_size)
    val_horse_loader, val_zebra_loader = get_horse2zebra_loader('test', config.batch_size)
    total_batch = min(len(train_horse_loader), len(train_zebra_loader))

    # Image Pool #
    masked_fake_A_pool = ImageMaskPool(config.pool_size)
    masked_fake_B_pool = ImageMaskPool(config.pool_size)

    # Prepare Networks #
    Attn_A = Attention()
    Attn_B = Attention()
    G_A2B = Generator()
    G_B2A = Generator()
    D_A = Discriminator()
    D_B = Discriminator()

    networks = [Attn_A, Attn_B, G_A2B, G_B2A, D_A, D_B]
    for network in networks:
        network.to(device)

    # Loss Function #
    criterion_Adversarial = nn.MSELoss()
    criterion_Cycle = nn.L1Loss()

    # Optimizers #
    D_optim = torch.optim.Adam(chain(D_A.parameters(), D_B.parameters()), lr=config.lr, betas=(0.5, 0.999))
    G_optim = torch.optim.Adam(chain(Attn_A.parameters(), Attn_B.parameters(), G_A2B.parameters(), G_B2A.parameters()), lr=config.lr, betas=(0.5, 0.999))

    D_optim_scheduler = get_lr_scheduler(D_optim)
    G_optim_scheduler = get_lr_scheduler(G_optim)

    # Lists #
    D_A_losses, D_B_losses = [], []
    G_A_losses, G_B_losses = [], []

    # Train #
    print("Training Unsupervised Attention-Guided GAN started with total epoch of {}.".format(config.num_epochs))

    for epoch in range(config.num_epochs):

        for i, (real_A, real_B) in enumerate(zip(train_horse_loader, train_zebra_loader)):

            # Data Preparation #
            real_A = real_A.to(device)
            real_B = real_B.to(device)

            # Initialize Optimizers #
            D_optim.zero_grad()
            G_optim.zero_grad()

            ###################
            # Train Generator #
            ###################

            set_requires_grad([D_A, D_B], requires_grad=False)

            # Adversarial Loss using real A #
            attn_A = Attn_A(real_A)
            fake_B = G_A2B(real_A)

            masked_fake_B = fake_B * attn_A + real_A * (1-attn_A)

            masked_fake_B *= attn_A
            prob_real_A = D_A(masked_fake_B)
            real_labels = torch.ones(prob_real_A.size()).to(device)

            G_loss_A = criterion_Adversarial(prob_real_A, real_labels)

            # Adversarial Loss using real B #
            attn_B = Attn_B(real_B)
            fake_A = G_B2A(real_B)

            masked_fake_A = fake_A * attn_B + real_B * (1-attn_B)

            masked_fake_A *= attn_B
            prob_real_B = D_B(masked_fake_A)
            real_labels = torch.ones(prob_real_B.size()).to(device)

            G_loss_B = criterion_Adversarial(prob_real_B, real_labels)

            # Cycle Consistency Loss using real A #
            attn_ABA = Attn_B(masked_fake_B)
            fake_ABA = G_B2A(masked_fake_B)
            masked_fake_ABA = fake_ABA * attn_ABA + masked_fake_B * (1 - attn_ABA)

            # Cycle Consistency Loss using real B #
            attn_BAB = Attn_A(masked_fake_A)
            fake_BAB = G_A2B(masked_fake_A)
            masked_fake_BAB = fake_BAB * attn_BAB + masked_fake_A * (1 - attn_BAB)

            # Cycle Consistency Loss #
            G_cycle_loss_A = config.lambda_cycle * criterion_Cycle(masked_fake_ABA, real_A)
            G_cycle_loss_B = config.lambda_cycle * criterion_Cycle(masked_fake_BAB, real_B)

            # Total Generator Loss #
            G_loss = G_loss_A + G_loss_B + G_cycle_loss_A + G_cycle_loss_B

            # Back Propagation and Update #
            G_loss.backward()
            G_optim.step()

            #######################
            # Train Discriminator #
            #######################

            set_requires_grad([D_A, D_B], requires_grad=True)

            # Train Discriminator A using real A #
            prob_real_A = D_A(real_B)
            real_labels = torch.ones(prob_real_A.size()).to(device)
            D_loss_real_A = criterion_Adversarial(prob_real_A, real_labels)

            # Add Pooling #
            masked_fake_B, attn_A = masked_fake_B_pool.query(masked_fake_B, attn_A)
            masked_fake_B *= attn_A

            # Train Discriminator A using fake B #
            prob_fake_B = D_A(masked_fake_B.detach())
            fake_labels = torch.zeros(prob_fake_B.size()).to(device)
            D_loss_fake_A = criterion_Adversarial(prob_fake_B, fake_labels)

            D_loss_A = (D_loss_real_A + D_loss_fake_A).mean()

            # Train Discriminator B using real B #
            prob_real_B = D_B(real_A)
            real_labels = torch.ones(prob_real_B.size()).to(device)
            D_loss_real_B = criterion_Adversarial(prob_real_B, real_labels)

            # Add Pooling #
            masked_fake_A, attn_B = masked_fake_A_pool.query(masked_fake_A, attn_B)
            masked_fake_A *= attn_B

            # Train Discriminator B using fake A #
            prob_fake_A = D_B(masked_fake_A.detach())
            fake_labels = torch.zeros(prob_fake_A.size()).to(device)
            D_loss_fake_B = criterion_Adversarial(prob_fake_A, fake_labels)

            D_loss_B = (D_loss_real_B + D_loss_fake_B).mean()

            # Calculate Total Discriminator Loss #
            D_loss = D_loss_A + D_loss_B

            # Back Propagation and Update #
            D_loss.backward()
            D_optim.step()

            # Add items to Lists #
            D_A_losses.append(D_loss_A.item())
            D_B_losses.append(D_loss_B.item())
            G_A_losses.append(G_loss_A.item())
            G_B_losses.append(G_loss_B.item())

            ####################
            # Print Statistics #
            ####################

            if (i+1) % config.print_every == 0:
                print("UAG-GAN | Epoch [{}/{}] | Iteration [{}/{}] | D A Losses {:.4f} | D B Losses {:.4f} | G A Losses {:.4f} | G B Losses {:.4f}".
                      format(epoch+1, config.num_epochs, i+1, total_batch, np.average(D_A_losses), np.average(D_B_losses), np.average(G_A_losses), np.average(G_B_losses)))

                # Save Sample Images #
                save_samples(val_horse_loader, val_zebra_loader, G_A2B, G_B2A, Attn_A, Attn_B, epoch, config.samples_path)

        # Adjust Learning Rate #
        D_optim_scheduler.step()
        G_optim_scheduler.step()

        # Save Model Weights #
        if (epoch + 1) % config.save_every == 0:
            torch.save(G_A2B.state_dict(), os.path.join(config.weights_path, 'UAG-GAN_Generator_A2B_Epoch_{}.pkl'.format(epoch+1)))
            torch.save(G_B2A.state_dict(), os.path.join(config.weights_path, 'UAG-GAN_Generator_B2A_Epoch_{}.pkl'.format(epoch+1)))
            torch.save(Attn_A.state_dict(), os.path.join(config.weights_path, 'UAG-GAN_Attention_A_Epoch_{}.pkl'.format(epoch+1)))
            torch.save(Attn_B.state_dict(), os.path.join(config.weights_path, 'UAG-GAN_Attention_B_Epoch_{}.pkl'.format(epoch+1)))

    # Make a GIF file #
    make_gifs_train("UAG-GAN", config.samples_path)

    # Plot Losses #
    plot_losses(D_A_losses, D_B_losses, G_A_losses, G_B_losses, config.num_epochs, config.plots_path)

    print("Training finished.")
Example #10
0
def create_attention():
    input = Input(shape=(32, 32, 3))
    model = Model(inputs=input,
                  outputs=Attention(input, config.Att_filters,
                                    config.Att_nBlocks, config.Att_nLayers))
    return model
Example #11
0
def main():
    # ArgumentParser {{{
    parser = argparse.ArgumentParser()
    # hyper parameters
    parser.add_argument('--batch_size', type=int, default=128)
    parser.add_argument('--n_epochs', type=int, default=10)
    parser.add_argument('--enc_embd_size', type=int, default=256)
    parser.add_argument('--dec_embd_size', type=int, default=256)
    parser.add_argument('--enc_h_size', type=int, default=512)
    parser.add_argument('--dec_h_size', type=int, default=512)
    # other parameters
    parser.add_argument('--beam_width', type=int, default=3)
    parser.add_argument('--n_best', type=int, default=3)
    parser.add_argument('--max_dec_steps', type=int, default=1000)
    parser.add_argument('--export_dir', type=str, default=modelpath)
    parser.add_argument('--model_name', type=str, default='s2s')
    parser.add_argument('--model_path',
                        type=str,
                        default=modelpath / 's2s-vanilla.pt')
    parser.add_argument('--skip_train', action='store_true')
    parser.add_argument('--attention', action='store_true')
    opts = parser.parse_args()
    # }}}
    # opts.skip_train = True
    opts.attention = True

    # SOS_token = '<SOS>'
    # EOS_token = '<EOS>'
    # SRC = Field(tokenize=tokenize_de,
    #             init_token=SOS_token,
    #             eos_token=EOS_token,
    #             lower=True)
    # TRG = Field(tokenize=tokenize_en,
    #             init_token=SOS_token,
    #             eos_token=EOS_token,
    #             lower=True)
    # train_data, valid_data, test_data = Multi30k.splits(exts=('.de', '.en'), fields=(SRC, TRG))
    # print(f'Number of training examples: {len(train_data.examples)}')
    # print(f'Number of validation examples: {len(valid_data.examples)}')
    # print(f'Number of testing examples: {len(test_data.examples)}')

    # SRC.build_vocab(train_data, min_freq=2)
    # TRG.build_vocab(train_data, min_freq=2)
    # print(f'Unique tokens in source (de) vocabulary: {len(SRC.vocab)}')
    # print(f'Unique tokens in target (en) vocabulary: {len(TRG.vocab)}')
    # train_itr, valid_itr, test_itr =\
    #         BucketIterator.splits(
    #             (train_data, valid_data, test_data),
    #             batch_size=opts.batch_size,
    #             device=DEVICE)

    # exit

    train_dataset, valid_dataset, test_dataset = Multi30k(root=dataroot)
    train_dataset1, train_dataset2 = tee(train_dataset)
    valid_dataset1, valid_dataset2 = tee(valid_dataset)
    test_dataset1, test_dataset2 = tee(test_dataset)

    spacy_de = spacy.load('de_core_news_sm')
    spacy_en = spacy.load('en_core_web_sm')
    de_counter = Counter()
    en_counter = Counter()
    de_tokenizer = get_tokenizer('spacy', language='de_core_news_sm')
    en_tokenizer = get_tokenizer('spacy', language='en_core_web_sm')

    def build_vocab(dataset):
        for (src_sentence, tgt_sentence) in tqdm(dataset):
            de_counter.update(de_tokenizer(src_sentence))
            en_counter.update(en_tokenizer(tgt_sentence))

    def data_process(dataset):
        data = []
        for (raw_de, raw_en) in tqdm(dataset):
            de_tensor_ = torch.tensor(
                [de_vocab[token] for token in de_tokenizer(raw_de)],
                dtype=torch.long)
            en_tensor_ = torch.tensor(
                [en_vocab[token] for token in en_tokenizer(raw_en)],
                dtype=torch.long)
            data.append((de_tensor_, en_tensor_))
        return data

    def generate_batch(data_batch):
        de_batch, en_batch = [], []
        for (de_item, en_item) in data_batch:
            de_batch.append(
                torch.cat([
                    torch.tensor([TRG_SOS_IDX]), de_item,
                    torch.tensor([TRG_EOS_IDX])
                ],
                          dim=0))
            en_batch.append(
                torch.cat([
                    torch.tensor([TRG_SOS_IDX]), en_item,
                    torch.tensor([TRG_EOS_IDX])
                ],
                          dim=0))
        de_batch = pad_sequence(de_batch, padding_value=TRG_PAD_IDX)
        en_batch = pad_sequence(en_batch, padding_value=TRG_PAD_IDX)
        return de_batch, en_batch

    build_vocab(train_dataset1)
    build_vocab(valid_dataset1)
    build_vocab(test_dataset1)
    de_vocab = Vocab(de_counter, specials=['<unk>', '<pad>', '<bos>', '<eos>'])
    en_vocab = Vocab(en_counter, specials=['<unk>', '<pad>', '<bos>', '<eos>'])

    dec_v_size = len(de_vocab)
    enc_v_size = len(en_vocab)

    TRG_PAD_IDX = en_vocab.stoi['<pad>']
    TRG_SOS_IDX = en_vocab.stoi['<bos>']
    TRG_EOS_IDX = en_vocab.stoi['<eos>']

    train_data = data_process(train_dataset2)
    valid_data = data_process(valid_dataset2)
    test_data = data_process(test_dataset2)

    train_itr = DataLoader(train_data,
                           batch_size=opts.batch_size,
                           shuffle=False,
                           collate_fn=generate_batch)
    valid_itr = DataLoader(valid_data,
                           batch_size=opts.batch_size,
                           shuffle=False,
                           collate_fn=generate_batch)
    test_itr = DataLoader(test_data,
                          batch_size=opts.batch_size,
                          shuffle=False,
                          collate_fn=generate_batch)

    encoder = EncoderRNN(opts.enc_embd_size, opts.enc_h_size, opts.dec_h_size,
                         dec_v_size, DEVICE)

    if opts.attention:
        attn = Attention(opts.enc_h_size, opts.dec_h_size)
        decoder = AttnDecoderRNN(opts.dec_embd_size, opts.enc_h_size,
                                 opts.dec_h_size, enc_v_size, attn, DEVICE)
    else:
        decoder = DecoderRNN(opts.dec_embd_size, opts.dec_h_size, enc_v_size,
                             DEVICE)
    model = Seq2Seq(encoder, decoder, DEVICE).to(DEVICE)

    # TRG_PAD_IDX = TRG.vocab.stoi[TRG.pad_token]
    # TRG_PAD_IDX = tgt_vocab.stoi['<pad>']

    if opts.skip_train:
        model.load_state_dict(torch.load(opts.model_path))

    if not opts.skip_train:
        optimizer = optim.Adam(model.parameters())
        criterion = nn.CrossEntropyLoss(ignore_index=TRG_PAD_IDX)
        best_valid_loss = float('inf')
        for epoch in range(opts.n_epochs):
            start_time = time.time()

            train_loss = train(model, train_itr, optimizer, criterion)
            valid_loss = evaluate(model, valid_itr, criterion)

            end_time = time.time()

            epoch_mins, epoch_secs = epoch_time(start_time, end_time)

            if valid_loss < best_valid_loss:
                best_valid_loss = valid_loss
                attn_type = 'attn' if opts.attention else 'vanilla'
                model_path = os.path.join(opts.export_dir,
                                          f'{opts.model_name}-{attn_type}.pt')
                print(f'Update model! Saved {model_path}')
                torch.save(model.state_dict(), model_path)
            else:
                print('Model was not updated. Stop training')
                break

            print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
            print(
                f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}'
            )
            print(
                f'\t Val. Loss: {valid_loss:.3f} |  Val. PPL: {math.exp(valid_loss):7.3f}'
            )

    # TRG_SOS_IDX = TRG.vocab.stoi[TRG.init_token]
    # TRG_EOS_IDX = TRG.vocab.stoi[TRG.eos_token]
    model.eval()
    with torch.no_grad():
        # for batch_id, batch in enumerate(test_itr):
        for batch in tqdm(test_itr):
            # src = batch.src # (T, bs)
            # trg = batch.trg # (T, bs)
            src, trg = batch
            src = src.to(DEVICE)
            trg = src.to(DEVICE)
            print(f'In: {" ".join(de_vocab.itos[idx] for idx in src[:, 0])}')

            enc_outs, h = model.encoder(src)  # (T, bs, H), (bs, H)
            # decoded_seqs: (bs, T)
            start_time = time.time()
            decoded_seqs = beam_search_decoding(
                decoder=model.decoder,
                enc_outs=enc_outs,
                enc_last_h=h,
                beam_width=opts.beam_width,
                n_best=opts.n_best,
                sos_token=TRG_SOS_IDX,
                eos_token=TRG_EOS_IDX,
                max_dec_steps=opts.max_dec_steps,
                device=DEVICE)
            end_time = time.time()
            print(f'for loop beam search time: {end_time-start_time:.3f}')
            print_n_best(decoded_seqs[0], en_vocab.itos)

            start_time = time.time()
            decoded_seqs = batch_beam_search_decoding(
                decoder=model.decoder,
                enc_outs=enc_outs,
                enc_last_h=h,
                beam_width=opts.beam_width,
                n_best=opts.n_best,
                sos_token=TRG_SOS_IDX,
                eos_token=TRG_EOS_IDX,
                max_dec_steps=opts.max_dec_steps,
                device=DEVICE)
            end_time = time.time()
            print(f'Batch beam search time: {end_time-start_time:.3f}')
            print_n_best(decoded_seqs[0], en_vocab.itos)
Example #12
0
    def __init__(self, word_emb, graph_emb, graph_type, hidden_size, device):
        super(CommandScorerWithKG, self).__init__()
        self.device = device
        self.hidden_size = hidden_size

        self.dropout_ratio = 0.0  # *
        self.n_heads = 1  # *
        self.use_hints = True  # *
        self.bidirectional = True
        self.graph_type = graph_type
        n_factor = 2  # command
        bi_factor = (2 if self.bidirectional else 1
                     )  # hidden size multiplier when bidirectional is used

        self.word_embedding = PretrainedEmbeddings(word_emb)
        self.word_embedding_size = self.word_embedding.dim  # *
        self.word_embedding_prj = torch.nn.Linear(self.word_embedding_size,
                                                  self.hidden_size,
                                                  bias=False)
        if not self.bidirectional:
            self.word_hint_prj = torch.nn.Linear(self.hidden_size * 2,
                                                 self.hidden_size,
                                                 bias=False)

        self.graph_embedding = None
        if graph_emb is not None and ('local' in self.graph_type
                                      or 'world' in self.graph_type):
            self.graph_embedding = PretrainedEmbeddings(graph_emb, True)
            self.graph_embedding_size = self.graph_embedding.dim
            self.graph_embedding_prj = torch.nn.Linear(
                self.graph_embedding_size, self.hidden_size, bias=False)
            if not self.bidirectional:
                self.graph_hint_prj = torch.nn.Linear(self.hidden_size * 2,
                                                      self.hidden_size,
                                                      bias=False)

        # Encoder for th observation
        self.encoder_gru = nn.GRU(hidden_size,
                                  hidden_size,
                                  batch_first=True,
                                  bidirectional=self.bidirectional)
        # Encoder for the commands
        self.cmd_encoder_gru = nn.GRU(hidden_size,
                                      hidden_size,
                                      batch_first=True,
                                      bidirectional=self.bidirectional)

        # RNN that keeps track of the encoded state over time
        self.state_gru = nn.GRU(hidden_size * bi_factor,
                                hidden_size * bi_factor,
                                batch_first=True)

        self.kg_word_encoder_gru = nn.GRU(hidden_size,
                                          hidden_size,
                                          batch_first=True)
        self.kg_graph_encoder_gru = nn.GRU(hidden_size,
                                           hidden_size,
                                           batch_first=True)

        if 'local' in self.graph_type or 'world' in graph_type:
            self.attention = CQAttention(block_hidden_dim=hidden_size *
                                         bi_factor,
                                         dropout=self.dropout_ratio)
            self.attention_prj = torch.nn.Linear(hidden_size * bi_factor * 4,
                                                 hidden_size * bi_factor,
                                                 bias=False)

        if 'world' in self.graph_type:
            n_factor += 1
            self.worldkg_gat = GAT(hidden_size,
                                   hidden_size,
                                   self.dropout_ratio,
                                   alpha=0.2,
                                   nheads=self.n_heads)
            self.worldkg_attention_prj = torch.nn.Linear(
                hidden_size * bi_factor * 4,
                hidden_size * bi_factor,
                bias=False)
            self.world_self_attention = SelfAttention(hidden_size * bi_factor,
                                                      hidden_size * bi_factor,
                                                      self.n_heads,
                                                      self.dropout_ratio)
        if 'local' in graph_type:
            n_factor += 1
            self.localkg_gat = GAT(hidden_size,
                                   hidden_size,
                                   self.dropout_ratio,
                                   alpha=0.2,
                                   nheads=self.n_heads)
            self.localkg_attention_prj = torch.nn.Linear(
                hidden_size * bi_factor * 4,
                hidden_size * bi_factor,
                bias=False)
            self.local_self_attention = SelfAttention(hidden_size * bi_factor,
                                                      hidden_size * bi_factor,
                                                      self.n_heads,
                                                      self.dropout_ratio)

        self.state_hidden = []
        self.general_attention = Attention(
            hidden_size * bi_factor * 2, hidden_size *
            bi_factor)  # General attention from [cmd + obs ==> graph_nodes]
        self.world_attention = None
        self.local_attention = None
        self.obs2kg_attention = torch.nn.Linear(hidden_size * bi_factor,
                                                hidden_size * bi_factor,
                                                bias=False)
        self.critic = nn.Linear(hidden_size * bi_factor, 1)

        self.att_cmd = nn.Sequential(
            nn.Linear(hidden_size * bi_factor * n_factor,
                      hidden_size * bi_factor), nn.ReLU(),
            nn.Linear(hidden_size * bi_factor, 1))
        self.count = 1
def inference():

    # Inference Path #
    paths = [config.inference_path_H2Z, config.inference_path_Z2H]
    paths = [make_dirs(path) for path in paths]

    # Prepare Data Loader #
    test_horse_loader, test_zebra_loader = get_horse2zebra_loader(
        'test', config.val_batch_size)

    # Prepare Attention and Generator #
    Attn_A = Attention().to(device)
    Attn_B = Attention().to(device)

    G_A2B = Generator().to(device)
    G_B2A = Generator().to(device)

    Attn_A.load_state_dict(
        torch.load(
            os.path.join(
                config.weights_path,
                'UAG-GAN_Attention_A_Epoch_{}.pkl'.format(config.num_epochs))))
    Attn_B.load_state_dict(
        torch.load(
            os.path.join(
                config.weights_path,
                'UAG-GAN_Attention_B_Epoch_{}.pkl'.format(config.num_epochs))))

    G_A2B.load_state_dict(
        torch.load(
            os.path.join(
                config.weights_path,
                'UAG-GAN_Generator_A2B_Epoch_{}.pkl'.format(
                    config.num_epochs))))
    G_B2A.load_state_dict(
        torch.load(
            os.path.join(
                config.weights_path,
                'UAG-GAN_Generator_B2A_Epoch_{}.pkl'.format(
                    config.num_epochs))))

    # Test #
    print("UAG-GAN | Generating Horse2Zebra images started...")
    for i, (horse,
            zebra) in enumerate(zip(test_horse_loader, test_zebra_loader)):

        # Prepare Data #
        real_A = horse.to(device)
        real_B = zebra.to(device)

        # Generate Attention Images #
        attn_A = Attn_A(real_A.detach())
        attn_A = attn_A.repeat(1, 3, 1, 1)
        attn_A = 2 * attn_A - 1

        attn_B = Attn_B(real_B.detach())
        attn_B = attn_B.repeat(1, 3, 1, 1)
        attn_B = 2 * attn_B - 1

        # Generated Fake Images #
        fake_B = G_A2B(real_A.detach())
        fake_A = G_B2A(real_B.detach())

        # Save Images (Horse -> Zebra) #
        result = torch.cat((real_A, attn_A, fake_B), dim=0)
        save_image(
            denorm(result.data),
            os.path.join(config.inference_path_H2Z,
                         'UAG-GAN_Horse2Zebra_Results_%03d.png' % (i + 1)))

        # Save Images (Zebra -> Horse) #
        result = torch.cat((real_B, attn_B, fake_A), dim=0)
        save_image(
            denorm(result.data),
            os.path.join(config.inference_path_Z2H,
                         'UAG-GAN_Zebra2Horse_Results_%03d.png' % (i + 1)))

    # Make a GIF file #
    make_gifs_test("UAG-GAN", "Horse2Zebra", config.inference_path_H2Z)
    make_gifs_test("UAG-GAN", "Zebra2Horse", config.inference_path_Z2H)
                    type=int,
                    default=1,
                    metavar='S',
                    help='random seed (default: 1)')
parser.add_argument('--train-split',
                    type=str,
                    default=0.8,
                    metavar='E',
                    help='percentage of data to use as train.')
args = parser.parse_args()
use_cuda = torch.cuda.is_available()
torch.manual_seed(args.seed)

batch_size = args.batch_size
epochs = args.epochs
model = Attention()
train_split = args.train_split  # percentage of the data we want in train (as opposed to valdation)

transform_train = transforms.Compose([
    # transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.ToTensor(),  # Convert Pillow Image to Tensor
    # transforms.Resize(128)
])

train_dataset = LymphocytosisDataset(
    "/data/clinical_annotation.csv",
    "/data",
    train=True,
    valid=False,
Example #15
0
def main():
    # ArgumentParser {{{
    parser = argparse.ArgumentParser()
    # hyper parameters
    parser.add_argument('--batch_size', type=int, default=128)
    parser.add_argument('--n_epochs', type=int, default=10)
    parser.add_argument('--enc_embd_size', type=int, default=256)
    parser.add_argument('--dec_embd_size', type=int, default=256)
    parser.add_argument('--enc_h_size', type=int, default=512)
    parser.add_argument('--dec_h_size', type=int, default=512)
    # other parameters
    parser.add_argument('--beam_width', type=int, default=10)
    parser.add_argument('--n_best', type=int, default=5)
    parser.add_argument('--max_dec_steps', type=int, default=1000)
    parser.add_argument('--export_dir', type=str, default='./ckpts/')
    parser.add_argument('--model_name', type=str, default='s2s')
    parser.add_argument('--model_path', type=str, default='')
    parser.add_argument('--skip_train', action='store_true')
    parser.add_argument('--attention', action='store_true')
    opts = parser.parse_args()
    # }}}

    SOS_token = '<SOS>'
    EOS_token = '<EOS>'
    SRC = Field(tokenize=tokenize_de,
                init_token=SOS_token,
                eos_token=EOS_token,
                lower=True)
    TRG = Field(tokenize=tokenize_en,
                init_token=SOS_token,
                eos_token=EOS_token,
                lower=True)
    train_data, valid_data, test_data = Multi30k.splits(exts=('.de', '.en'),
                                                        fields=(SRC, TRG))
    print(f'Number of training examples: {len(train_data.examples)}')
    print(f'Number of validation examples: {len(valid_data.examples)}')
    print(f'Number of testing examples: {len(test_data.examples)}')

    SRC.build_vocab(train_data, min_freq=2)
    TRG.build_vocab(train_data, min_freq=2)
    print(f'Unique tokens in source (de) vocabulary: {len(SRC.vocab)}')
    print(f'Unique tokens in target (en) vocabulary: {len(TRG.vocab)}')

    train_itr, valid_itr, test_itr =\
            BucketIterator.splits(
                (train_data, valid_data, test_data),
                batch_size=opts.batch_size,
                device=DEVICE)

    enc_v_size = len(SRC.vocab)
    dec_v_size = len(TRG.vocab)

    encoder = EncoderRNN(opts.enc_embd_size, opts.enc_h_size, opts.dec_h_size,
                         enc_v_size, DEVICE)
    if opts.attention:
        attn = Attention(opts.enc_h_size, opts.dec_h_size)
        decoder = AttnDecoderRNN(opts.dec_embd_size, opts.enc_h_size,
                                 opts.dec_h_size, dec_v_size, attn, DEVICE)
    else:
        decoder = DecoderRNN(opts.dec_embd_size, opts.dec_h_size, dec_v_size,
                             DEVICE)
    model = Seq2Seq(encoder, decoder, DEVICE).to(DEVICE)

    TRG_PAD_IDX = TRG.vocab.stoi[TRG.pad_token]

    if opts.model_path != '':
        model.load_state_dict(torch.load(opts.model_path))

    if not opts.skip_train:
        optimizer = optim.Adam(model.parameters())
        criterion = nn.CrossEntropyLoss(ignore_index=TRG_PAD_IDX)
        best_valid_loss = float('inf')
        for epoch in range(opts.n_epochs):
            start_time = time.time()

            train_loss = train(model, train_itr, optimizer, criterion)
            valid_loss = evaluate(model, valid_itr, criterion)

            end_time = time.time()

            epoch_mins, epoch_secs = epoch_time(start_time, end_time)

            if valid_loss < best_valid_loss:
                best_valid_loss = valid_loss
                attn_type = 'attn' if opts.attention else 'vanilla'
                model_path = os.path.join(opts.export_dir,
                                          f'{opts.model_name}-{attn_type}.pt')
                print(f'Update model! Saved {model_path}')
                torch.save(model.state_dict(), model_path)
            else:
                print('Model was not updated. Stop training')
                break

            print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
            print(
                f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}'
            )
            print(
                f'\t Val. Loss: {valid_loss:.3f} |  Val. PPL: {math.exp(valid_loss):7.3f}'
            )

    TRG_SOS_IDX = TRG.vocab.stoi[TRG.init_token]
    TRG_EOS_IDX = TRG.vocab.stoi[TRG.eos_token]
    model.eval()
    with torch.no_grad():
        for batch_id, batch in enumerate(test_itr):
            src = batch.src  # (T, bs)
            trg = batch.trg  # (T, bs)
            print(f'In: {" ".join(SRC.vocab.itos[idx] for idx in src[:, 0])}')

            enc_outs, h = model.encoder(src)  # (T, bs, H), (bs, H)
            # decoded_seqs: (bs, T)
            start_time = time.time()
            decoded_seqs = beam_search_decoding(
                decoder=model.decoder,
                enc_outs=enc_outs,
                enc_last_h=h,
                beam_width=opts.beam_width,
                n_best=opts.n_best,
                sos_token=TRG_SOS_IDX,
                eos_token=TRG_EOS_IDX,
                max_dec_steps=opts.max_dec_steps,
                device=DEVICE)
            end_time = time.time()
            print(f'for loop beam search time: {end_time-start_time:.3f}')
            print_n_best(decoded_seqs[0], TRG.vocab.itos)

            start_time = time.time()
            decoded_seqs = batch_beam_search_decoding(
                decoder=model.decoder,
                enc_outs=enc_outs,
                enc_last_h=h,
                beam_width=opts.beam_width,
                n_best=opts.n_best,
                sos_token=TRG_SOS_IDX,
                eos_token=TRG_EOS_IDX,
                max_dec_steps=opts.max_dec_steps,
                device=DEVICE)
            end_time = time.time()
            print(f'Batch beam search time: {end_time-start_time:.3f}')
            print_n_best(decoded_seqs[0], TRG.vocab.itos)