Example #1
0
 def __init__(self, args):
     super(BaseRN, self).__init__()
     self.cv_pretrained = args.cv_pretrained
     pretrained_weight = load_pretrained_embedding(args.word_to_idx, args.te_embedding) if args.te_pretrained else None
     self.text_encoder = TextEncoder(args.q_size, args.te_embedding, args.te_type, args.te_hidden, args.te_layer, args.te_dropout, pretrained_weight)
     if args.cv_pretrained:
         filters = 2048 if args.dataset == 'vqa2' else 1024
         self.visual_resize = nn.Conv2d(filters, args.cv_filter, 3, 1, 1)
     else:
         self.visual_encoder = Conv(args.cv_filter, args.cv_kernel, args.cv_stride, args.cv_layer, args.cv_batchnorm)
     self.g_theta = MLP(args.cv_filter + 2 + args.te_hidden, args.basern_gt_hidden, args.basern_gt_hidden, args.basern_gt_layer)
     self.f_phi = MLP(args.basern_gt_hidden, args.basern_fp_hidden, args.a_size, args.basern_fp_layer, args.basern_fp_dropout)
Example #2
0
 def init_encoders(self, args):
     if not args.te_bert:
         pretrained_weight = load_pretrained_embedding(
             args.i2q, args.te_embedding) if args.te_pretrained else None
         self.text_encoder = TextEncoder(args.q_size, args.te_embedding,
                                         args.te_type, args.te_hidden,
                                         args.te_layer, args.te_dropout,
                                         args.te_bidir, pretrained_weight)
         if args.te_bidir:
             args.te_hidden = args.te_hidden * 2
     else:
         self.text_encoder = BertEncoder(args.te_hidden)
     self.init_ve(args)
Example #3
0
 def __init__(self, args):
     super(Film, self).__init__()
     self.filters = args.cv_filter
     self.layers = args.film_res_layer
     self.cv_pretrained = args.cv_pretrained
     pretrained_weight = load_pretrained_embedding(args.word_to_idx, args.te_embedding) if args.te_pretrained else None
     self.text_encoder = TextEncoder(args.q_size, args.te_embedding, args.te_hidden, args.te_layer, args.te_dropout, pretrained_weight)
     if args.cv_pretrained:
         filters = 2048 if args.dataset == 'vqa2' else 1024
         self.visual_encoder = nn.Conv2d(filters, args.cv_filter, 1, 1)
     else:
         self.visual_encoder = Conv(args.cv_filter, args.cv_kernel, args.cv_stride, args.cv_layer, args.cv_batchnorm)
     self.fc = nn.Linear(args.te_hidden, args.cv_filter * args.film_res_layer * 2)
     self.res_blocks = nn.ModuleList([FilmResBlock(args.cv_filter, args.film_res_kernel) for _ in range(args.film_res_layer)])
     self.classifier = FilmClassifier(args.cv_filter, args.film_cf_filter, args.film_fc_hidden, args.a_size, args.film_fc_layer)
Example #4
0
 def __init__(self, args):
     super(Film, self).__init__()
     self.filters = args.cv_filter
     self.layers = args.res_layer
     if args.te_pretrained:
         pretrained_weight = load_pretrained_embedding(args.word2idx, args.te_embedding)
     else:
         pretrained_weight = None
     self.text_encoder = TextEncoder(args.q_size, args.te_embedding, args.te_hidden, args.te_layer, pretrained_weight)
     if args.cv_pretrained:
         self.visual_encoder = load_pretrained_conv(args.cv_filter)
     else:
         self.visual_encoder = Conv(args.cv_filter, args.cv_kernel, args.cv_stride, args.cv_layer, args.cv_batchnorm)
     self.fc = nn.Linear(args.te_hidden, args.cv_filter * args.res_layer * 2)
     self.res_blocks = nn.ModuleList([ResBlock(args.cv_filter, args.res_kernel) for _ in range(args.res_layer)])
     self.classifier = Classifier(args.cv_filter, args.cf_filter, args.fc_hidden, args.a_size, args.fc_layer)
Example #5
0
 def __init__(self, args):
     super().__init__()
     self.cv_pretrained = args.cv_pretrained
     pretrained_weight = load_pretrained_embedding(
         args.word_to_idx,
         args.te_embedding) if args.te_pretrained else None
     self.text_encoder = TextEncoder(args.q_size, args.te_embedding,
                                     args.te_type, args.te_hidden,
                                     args.te_layer, args.te_dropout,
                                     pretrained_weight)
     if args.cv_pretrained:
         raise
     else:
         self.visual_encoder = load_pretrained_conv()
         filters = 2048 if args.dataset == 'vqa2' else 1024
     self.first_block = MrnBlock(filters, args.te_hidden, args.mrn_hidden)
     self.blocks = nn.ModuleList([
         MrnBlock(filters, args.mrn_hidden, args.mrn_hidden)
         for _ in range(args.mrn_layer - 1)
     ])
     self.fc = nn.Linear(args.mrn_hidden, args.a_size)
Example #6
0
    labels = torch.tensor([score for _, score in data])
    return features, labels


batch_size = 64
train_data, test_data = train_test_split(load_data(), test_size=0.2)
vocab = get_vocab(train_data)
print('# words in vocab:', len(vocab))
train_set = Data.TensorDataset(*preprocess(train_data, vocab))
test_set = Data.TensorDataset(*preprocess(test_data, vocab))
train_iter = Data.DataLoader(train_set, batch_size, shuffle=True)
test_iter = Data.DataLoader(test_set, batch_size)

embed_size, num_hiddens, num_layers = 300, 300, 2
net = BiRNN(vocab, embed_size, num_hiddens, num_layers)

cache = '.vector_cache'
if not os.path.exists(cache):
    os.mkdir(cache)
glove_vocab = Vocab.Vectors(name='./data/sgns.weibo.bigram-char', cache=cache)
net.embedding.weight.data.copy_(
    load_pretrained_embedding(vocab.itos, glove_vocab))
net.embedding.weight.requires_grad = False  # 直接加载预训练好的, 所以不需要更新它

lr, num_epochs = 0.01, 5
# 要过滤掉不计算梯度的embedding参数
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, net.parameters()), lr=lr)
loss = nn.CrossEntropyLoss()
trainer = Trainer(net, loss, optimizer)
trainer.train(train_iter, test_iter, device, num_epochs)
def main(args):

    # train
    if args.mode == 'train':

        if args.vocab_file:
            with open(args.vocab_file) as fr:
                vocab = json.load(fr)
        else:
            # Create vocabulary if not given
            print("Creating vocabulary")
            train_json = JSON_FILES['train']
            targets = []
            with open(train_json) as fr:
                for item in json.load(fr):
                    if args.target_type == 'hashtag':
                        targets.extend(item['hashtag'])
                    else:
                        tokens = tokenize_fn(item['text'])
                        targets.extend(tokens)
            vocab = generate_vocab(targets, args.vocab_min_freq)

            if not os.path.isdir('vocabs/'):
                os.mkdir('vocabs/')
            with open(
                    "vocabs/" + args.target_type +
                    "_vocab_{0}.json".format(args.vocab_min_freq), "w") as fw:
                json.dump(vocab, fw)

        # prepare dataloader
        print("Loading DataLoader")
        train_dataloader = get_dataloader(JSON_FILES['train'],
                                          vocab,
                                          type=args.target_type,
                                          tokenize_fn=tokenize_fn,
                                          batch_size=args.batch_size,
                                          num_workers=Config.num_workers,
                                          load_on_ram=args.load_image_on_ram)
        val_dataloader = get_dataloader(JSON_FILES['val'],
                                        vocab,
                                        type=args.target_type,
                                        tokenize_fn=tokenize_fn,
                                        batch_size=1,
                                        num_workers=Config.num_workers,
                                        load_on_ram=args.load_image_on_ram,
                                        shuffle=False)

        # prepare model
        print("Loading Model")
        print(args.model)

        if args.model == 'showatt':
            encoder = Encoder(Config.encoded_size)
            decoder = Decoder(Config.encoder_dim, Config.decoder_dim,
                              Config.attention_dim, Config.embed_dim,
                              len(vocab))
        elif args.model == 'resnext_lb':
            encoder = ResNextEncoder(Config.encoded_size)
            decoder = LookBackDecoder(Config.encoder_dim, Config.decoder_dim,
                                      Config.attention_dim, Config.embed_dim,
                                      len(vocab))
        elif args.model == 'resnext':
            encoder = ResNextEncoder(Config.encoded_size)
            decoder = Decoder(Config.encoder_dim, Config.decoder_dim,
                              Config.attention_dim, Config.embed_dim,
                              len(vocab))
        else:
            # ablation_lookback
            encoder = Encoder(Config.encoded_size)
            decoder = LookBackDecoder(Config.encoder_dim, Config.decoder_dim,
                                      Config.attention_dim, Config.embed_dim,
                                      len(vocab))

        if args.target_type == 'text':
            # load pretrained embedding
            decoder.load_embedding(
                load_pretrained_embedding(vocab).to(Config.device))

        # prepare trainer
        trainer = Trainer(encoder,
                          decoder,
                          train_dataloader,
                          val_dataloader,
                          target_type=args.target_type,
                          lr=args.lr)
        if args.checkpoint_load_path:
            # load checkpint
            trainer.load(args.checkpoint_load_path)

        # train!
        print("Start Training using device {0}".format(Config.device))
        if not os.path.isdir('checkpoint/'):
            os.makedirs('checkpoint/')

        checkpoint_save_path = "checkpoint/{0}_{1}_{2}.pth".format(
            args.model, args.target_type, args.vocab_min_freq)
        trainer.train(args.num_epochs, checkpoint_save_path)

    # test
    else:
        assert args.vocab_file is not None
        assert args.checkpoint_load_path is not None

        print("Loading vocab...")
        with open(args.vocab_file) as fr:
            vocab = json.load(fr)

        print("Loading model...")
        print(args.model)
        if args.model == 'showatt':
            encoder = Encoder(Config.encoded_size)
            decoder = Decoder(Config.encoder_dim, Config.decoder_dim,
                              Config.attention_dim, Config.embed_dim,
                              len(vocab))
        elif args.model == 'resnext_lb':
            encoder = ResNextEncoder(Config.encoded_size)
            decoder = LookBackDecoder(Config.encoder_dim, Config.decoder_dim,
                                      Config.attention_dim, Config.embed_dim,
                                      len(vocab))
        elif args.model == 'resnext':
            encoder = ResNextEncoder(Config.encoded_size)
            decoder = Decoder(Config.encoder_dim, Config.decoder_dim,
                              Config.attention_dim, Config.embed_dim,
                              len(vocab))
        else:
            # lookback
            encoder = Encoder(Config.encoded_size)
            decoder = LookBackDecoder(Config.encoder_dim, Config.decoder_dim,
                                      Config.attention_dim, Config.embed_dim,
                                      len(vocab))

        encoder = encoder.to(Config.device)
        decoder = decoder.to(Config.device)

        load_model(encoder, decoder, args.checkpoint_load_path)

        encoder.eval()
        decoder.eval()

        test_dataloader = get_dataloader(JSON_FILES['test'],
                                         vocab,
                                         type=args.target_type,
                                         tokenize_fn=tokenize_fn,
                                         batch_size=1,
                                         num_workers=Config.num_workers,
                                         load_on_ram=args.load_image_on_ram,
                                         shuffle=False)

        print("Running test...")
        if args.target_type == 'hashtag':
            f1, prec, rec = test_hashtag(encoder, decoder, test_dataloader,
                                         vocab)
            print("avg F1: {0:.4f}".format(f1))
            print('avg Precision: {0:.4f}'.format(prec))
            print('avg Recall: {0:.4f}'.format(rec))
        elif args.target_type == 'text':
            bleu1, rouge_l, meteor = test_text(encoder, decoder,
                                               test_dataloader, vocab)
            print('avg BLEU-1: {0:.4f}'.format(bleu1))
            print('avg ROUGE-L: {0:.4f}'.format(rouge_l))
            print('avg METEOR: {0:.4f}'.format(meteor))
            for conv in self.convs
        ],
                             dim=1)
        # 应用丢弃法后使用全连接层得到输出
        outputs = self.decoder(self.dropout(encoding))
        return outputs


print('构建网络')
embed_size, kernel_sizes, nums_channels = 100, [3, 4, 5], [100, 100, 100]
net = TextCNN(vocab, embed_size, kernel_sizes, nums_channels)

print('加载预训练词向量')
glove = Vocab.GloVe(name='6B', dim=100, cache='./data/glove')
net.embedding.weight.data.copy_(
    utils.load_pretrained_embedding(vocab.itos, glove))
net.constant_embedding.weight.data.copy_(
    utils.load_pretrained_embedding(vocab.itos, glove))
net.constant_embedding.weight.requires_grad = False

print('训练并评价模型')
lr, num_epochs = 0.001, 5
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                    net.parameters()),
                             lr=lr)
loss = nn.CrossEntropyLoss()
utils.train(train_iter, test_iter, net, loss, optimizer, device, num_epochs)

print('尝试预测')
print(
    utils.predict_sentiment(net, vocab,