Beispiel #1
0
import pandas as pd
import random
from tqdm import tqdm
from lib.attribute_loss import AttributeLoss

# Recommended hyperparameters
args = ModelConfig(lr=1e-5, batch_size=32, eps=1e-1, save_dir='imsitu_trainOURS',
                   imsitu_model='ours', l2_weight=1e-4)

train_data, val_data, test_data = ImSitu.splits(zeroshot=True, 
    vector_type=args.vector_type, word_type='infinitive' if args.use_inf else 'lemma')

train_iter, val_iter, test_iter = CudaDataLoader.splits(
    train_data, val_data, test_data, batch_size=args.batch_size, num_workers=2)

att_crit = AttributeLoss(train_data.attributes.domains, size_average=True)
m = ImsituModel(
    zeroshot=True,
    embed_dim=300 if args.use_emb else None,
    att_domains=att_crit.domains_per_att if args.use_att else None,
    l2_weight=args.l2_weight,
)
m.load_pretrained(
    '/home/rowan/code/verb-attributes/checkpoints/imsitu_pretrain/pretrained_ckpt.tar'
)

for n, p in m.resnet152.named_parameters():
    if not n.startswith('layer4'):
        p.requires_grad = False

optimizer = optim.Adam([
import torch
import numpy as np
from tqdm import tqdm
from lib.attribute_loss import AttributeLoss, evaluate_accuracy
from lib.bucket_iterator import DictionaryAttributesIter
from lib.att_prediction import DictionaryModel
import pandas as pd

train_data, val_data, test_data = Attributes.splits(use_defns=True, cuda=True)
dict_field, _ = load_vocab()
test_iter = DictionaryAttributesIter(dict_field,
                                     test_data,
                                     batch_size=64 * 10,
                                     shuffle=False,
                                     train=False)
att_crit = AttributeLoss(train_data.domains, size_average=True)


def eval(ckpt, use_emb=False):

    # Recommended hyperparameters
    args = ModelConfig(batch_size=64, ckpt=ckpt, dropout=0.5, use_emb=use_emb)

    m = DictionaryModel(dict_field.vocab,
                        output_size=att_crit.input_size,
                        embed_input=args.use_emb,
                        dropout_rate=args.dropout)
    m.load_state_dict(torch.load(args.ckpt)['m_state_dict'])

    if torch.cuda.is_available():
        m.cuda()
Beispiel #3
0
from torch import optim
import os
import torch
from lib.misc import CosineRankingLoss, optimize, cosine_ranking_loss
import numpy as np
import time
from data.attribute_loader import Attributes
from lib.att_prediction import FeedForwardModel
from lib.attribute_loss import AttributeLoss, evaluate_accuracy

# Recommended hyperparameters
args = ModelConfig(lr=5e-4, batch_size=16, eps=1e-8, save_dir='nbow2atts')
train_data, val_data, test_data = Attributes.splits(
    use_defns=False, cuda=torch.cuda.is_available())

crit = AttributeLoss(train_data.domains, size_average=True)
m = FeedForwardModel(input_size=300,
                     output_size=crit.input_size,
                     init_dropout=0.05)
optimizer = optim.Adam(m.parameters(),
                       lr=args.lr,
                       eps=args.eps,
                       betas=(args.beta1, args.beta2))

if len(args.ckpt) > 0 and os.path.exists(args.ckpt):
    print("loading checkpoint from {}".format(args.ckpt))
    ckpt = torch.load(args.ckpt)
    m.load_state_dict(ckpt['state_dict'])
    optimizer.load_state_dict(ckpt['optimizer'])

if torch.cuda.is_available():
        s = 0
        preds = []
        for i, att_size in enumerate(dom_sizes):
            e = s + att_size
            att_embs = self.att_emb.weight[s:e].t()
            s = e

            p1 = word_embs @ R[i]
            p2 = p1 @ att_embs
            preds.append(p2)
        preds = torch.cat(preds, 1)
        return preds

m = RTML()
optimizer = optim.Adam(m.parameters(), lr=args.lr, eps=args.eps, betas=(args.beta1, args.beta2))
crit = AttributeLoss(train_data.domains)

if torch.cuda.is_available():
    m.cuda()
    train_data.embeds = train_data.embeds.cuda()
    val_data.embeds = val_data.embeds.cuda()
    test_data.embeds = test_data.embeds.cuda()


@optimize
def train_batch(inds, optimizers=None):
    embs = train_data.embeds[inds]
    atts_list = [a[inds] for a in train_data.atts_list]
    preds = m(embs)
    gt_atts = torch.cat(atts_list, 1)
    loss = binary_cross_entropy_with_logits(preds, gt_atts, size_average=True)
Beispiel #5
0
train_iter = DictionaryAttributesIter(dict_field,
                                      train_data,
                                      batch_size=args.batch_size)
val_iter = DictionaryAttributesIter(dict_field,
                                    val_data,
                                    batch_size=args.batch_size * 10,
                                    shuffle=False,
                                    train=False)
test_iter = DictionaryAttributesIter(dict_field,
                                     test_data,
                                     batch_size=args.batch_size * 10,
                                     shuffle=False,
                                     train=False)

crit = AttributeLoss(train_data.domains, size_average=True)
m = DictionaryModel(dict_field.vocab,
                    output_size=crit.input_size,
                    embed_input=args.use_emb,
                    dropout_rate=args.dropout)
m.load_pretrained(args.ckpt)

for name, p in m.named_parameters():
    if name.startswith('embed'):
        p.requires_grad = False

print(print_para(m))
optimizer = optim.Adam([p for p in m.parameters() if p.requires_grad],
                       lr=args.lr,
                       eps=args.eps,
                       betas=(args.beta1, args.beta2))