def __init__(self, args): super(BaseRN, self).__init__() self.cv_pretrained = args.cv_pretrained pretrained_weight = load_pretrained_embedding(args.word_to_idx, args.te_embedding) if args.te_pretrained else None self.text_encoder = TextEncoder(args.q_size, args.te_embedding, args.te_type, args.te_hidden, args.te_layer, args.te_dropout, pretrained_weight) if args.cv_pretrained: filters = 2048 if args.dataset == 'vqa2' else 1024 self.visual_resize = nn.Conv2d(filters, args.cv_filter, 3, 1, 1) else: self.visual_encoder = Conv(args.cv_filter, args.cv_kernel, args.cv_stride, args.cv_layer, args.cv_batchnorm) self.g_theta = MLP(args.cv_filter + 2 + args.te_hidden, args.basern_gt_hidden, args.basern_gt_hidden, args.basern_gt_layer) self.f_phi = MLP(args.basern_gt_hidden, args.basern_fp_hidden, args.a_size, args.basern_fp_layer, args.basern_fp_dropout)
def init_encoders(self, args): if not args.te_bert: pretrained_weight = load_pretrained_embedding( args.i2q, args.te_embedding) if args.te_pretrained else None self.text_encoder = TextEncoder(args.q_size, args.te_embedding, args.te_type, args.te_hidden, args.te_layer, args.te_dropout, args.te_bidir, pretrained_weight) if args.te_bidir: args.te_hidden = args.te_hidden * 2 else: self.text_encoder = BertEncoder(args.te_hidden) self.init_ve(args)
def __init__(self, args): super(Film, self).__init__() self.filters = args.cv_filter self.layers = args.film_res_layer self.cv_pretrained = args.cv_pretrained pretrained_weight = load_pretrained_embedding(args.word_to_idx, args.te_embedding) if args.te_pretrained else None self.text_encoder = TextEncoder(args.q_size, args.te_embedding, args.te_hidden, args.te_layer, args.te_dropout, pretrained_weight) if args.cv_pretrained: filters = 2048 if args.dataset == 'vqa2' else 1024 self.visual_encoder = nn.Conv2d(filters, args.cv_filter, 1, 1) else: self.visual_encoder = Conv(args.cv_filter, args.cv_kernel, args.cv_stride, args.cv_layer, args.cv_batchnorm) self.fc = nn.Linear(args.te_hidden, args.cv_filter * args.film_res_layer * 2) self.res_blocks = nn.ModuleList([FilmResBlock(args.cv_filter, args.film_res_kernel) for _ in range(args.film_res_layer)]) self.classifier = FilmClassifier(args.cv_filter, args.film_cf_filter, args.film_fc_hidden, args.a_size, args.film_fc_layer)
def __init__(self, args): super(Film, self).__init__() self.filters = args.cv_filter self.layers = args.res_layer if args.te_pretrained: pretrained_weight = load_pretrained_embedding(args.word2idx, args.te_embedding) else: pretrained_weight = None self.text_encoder = TextEncoder(args.q_size, args.te_embedding, args.te_hidden, args.te_layer, pretrained_weight) if args.cv_pretrained: self.visual_encoder = load_pretrained_conv(args.cv_filter) else: self.visual_encoder = Conv(args.cv_filter, args.cv_kernel, args.cv_stride, args.cv_layer, args.cv_batchnorm) self.fc = nn.Linear(args.te_hidden, args.cv_filter * args.res_layer * 2) self.res_blocks = nn.ModuleList([ResBlock(args.cv_filter, args.res_kernel) for _ in range(args.res_layer)]) self.classifier = Classifier(args.cv_filter, args.cf_filter, args.fc_hidden, args.a_size, args.fc_layer)
def __init__(self, args): super().__init__() self.cv_pretrained = args.cv_pretrained pretrained_weight = load_pretrained_embedding( args.word_to_idx, args.te_embedding) if args.te_pretrained else None self.text_encoder = TextEncoder(args.q_size, args.te_embedding, args.te_type, args.te_hidden, args.te_layer, args.te_dropout, pretrained_weight) if args.cv_pretrained: raise else: self.visual_encoder = load_pretrained_conv() filters = 2048 if args.dataset == 'vqa2' else 1024 self.first_block = MrnBlock(filters, args.te_hidden, args.mrn_hidden) self.blocks = nn.ModuleList([ MrnBlock(filters, args.mrn_hidden, args.mrn_hidden) for _ in range(args.mrn_layer - 1) ]) self.fc = nn.Linear(args.mrn_hidden, args.a_size)
labels = torch.tensor([score for _, score in data]) return features, labels batch_size = 64 train_data, test_data = train_test_split(load_data(), test_size=0.2) vocab = get_vocab(train_data) print('# words in vocab:', len(vocab)) train_set = Data.TensorDataset(*preprocess(train_data, vocab)) test_set = Data.TensorDataset(*preprocess(test_data, vocab)) train_iter = Data.DataLoader(train_set, batch_size, shuffle=True) test_iter = Data.DataLoader(test_set, batch_size) embed_size, num_hiddens, num_layers = 300, 300, 2 net = BiRNN(vocab, embed_size, num_hiddens, num_layers) cache = '.vector_cache' if not os.path.exists(cache): os.mkdir(cache) glove_vocab = Vocab.Vectors(name='./data/sgns.weibo.bigram-char', cache=cache) net.embedding.weight.data.copy_( load_pretrained_embedding(vocab.itos, glove_vocab)) net.embedding.weight.requires_grad = False # 直接加载预训练好的, 所以不需要更新它 lr, num_epochs = 0.01, 5 # 要过滤掉不计算梯度的embedding参数 optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, net.parameters()), lr=lr) loss = nn.CrossEntropyLoss() trainer = Trainer(net, loss, optimizer) trainer.train(train_iter, test_iter, device, num_epochs)
def main(args): # train if args.mode == 'train': if args.vocab_file: with open(args.vocab_file) as fr: vocab = json.load(fr) else: # Create vocabulary if not given print("Creating vocabulary") train_json = JSON_FILES['train'] targets = [] with open(train_json) as fr: for item in json.load(fr): if args.target_type == 'hashtag': targets.extend(item['hashtag']) else: tokens = tokenize_fn(item['text']) targets.extend(tokens) vocab = generate_vocab(targets, args.vocab_min_freq) if not os.path.isdir('vocabs/'): os.mkdir('vocabs/') with open( "vocabs/" + args.target_type + "_vocab_{0}.json".format(args.vocab_min_freq), "w") as fw: json.dump(vocab, fw) # prepare dataloader print("Loading DataLoader") train_dataloader = get_dataloader(JSON_FILES['train'], vocab, type=args.target_type, tokenize_fn=tokenize_fn, batch_size=args.batch_size, num_workers=Config.num_workers, load_on_ram=args.load_image_on_ram) val_dataloader = get_dataloader(JSON_FILES['val'], vocab, type=args.target_type, tokenize_fn=tokenize_fn, batch_size=1, num_workers=Config.num_workers, load_on_ram=args.load_image_on_ram, shuffle=False) # prepare model print("Loading Model") print(args.model) if args.model == 'showatt': encoder = Encoder(Config.encoded_size) decoder = Decoder(Config.encoder_dim, Config.decoder_dim, Config.attention_dim, Config.embed_dim, len(vocab)) elif args.model == 'resnext_lb': encoder = ResNextEncoder(Config.encoded_size) decoder = LookBackDecoder(Config.encoder_dim, Config.decoder_dim, Config.attention_dim, Config.embed_dim, len(vocab)) elif args.model == 'resnext': encoder = ResNextEncoder(Config.encoded_size) decoder = Decoder(Config.encoder_dim, Config.decoder_dim, Config.attention_dim, Config.embed_dim, len(vocab)) else: # ablation_lookback encoder = Encoder(Config.encoded_size) decoder = LookBackDecoder(Config.encoder_dim, Config.decoder_dim, Config.attention_dim, Config.embed_dim, len(vocab)) if args.target_type == 'text': # load pretrained embedding decoder.load_embedding( load_pretrained_embedding(vocab).to(Config.device)) # prepare trainer trainer = Trainer(encoder, decoder, train_dataloader, val_dataloader, target_type=args.target_type, lr=args.lr) if args.checkpoint_load_path: # load checkpint trainer.load(args.checkpoint_load_path) # train! print("Start Training using device {0}".format(Config.device)) if not os.path.isdir('checkpoint/'): os.makedirs('checkpoint/') checkpoint_save_path = "checkpoint/{0}_{1}_{2}.pth".format( args.model, args.target_type, args.vocab_min_freq) trainer.train(args.num_epochs, checkpoint_save_path) # test else: assert args.vocab_file is not None assert args.checkpoint_load_path is not None print("Loading vocab...") with open(args.vocab_file) as fr: vocab = json.load(fr) print("Loading model...") print(args.model) if args.model == 'showatt': encoder = Encoder(Config.encoded_size) decoder = Decoder(Config.encoder_dim, Config.decoder_dim, Config.attention_dim, Config.embed_dim, len(vocab)) elif args.model == 'resnext_lb': encoder = ResNextEncoder(Config.encoded_size) decoder = LookBackDecoder(Config.encoder_dim, Config.decoder_dim, Config.attention_dim, Config.embed_dim, len(vocab)) elif args.model == 'resnext': encoder = ResNextEncoder(Config.encoded_size) decoder = Decoder(Config.encoder_dim, Config.decoder_dim, Config.attention_dim, Config.embed_dim, len(vocab)) else: # lookback encoder = Encoder(Config.encoded_size) decoder = LookBackDecoder(Config.encoder_dim, Config.decoder_dim, Config.attention_dim, Config.embed_dim, len(vocab)) encoder = encoder.to(Config.device) decoder = decoder.to(Config.device) load_model(encoder, decoder, args.checkpoint_load_path) encoder.eval() decoder.eval() test_dataloader = get_dataloader(JSON_FILES['test'], vocab, type=args.target_type, tokenize_fn=tokenize_fn, batch_size=1, num_workers=Config.num_workers, load_on_ram=args.load_image_on_ram, shuffle=False) print("Running test...") if args.target_type == 'hashtag': f1, prec, rec = test_hashtag(encoder, decoder, test_dataloader, vocab) print("avg F1: {0:.4f}".format(f1)) print('avg Precision: {0:.4f}'.format(prec)) print('avg Recall: {0:.4f}'.format(rec)) elif args.target_type == 'text': bleu1, rouge_l, meteor = test_text(encoder, decoder, test_dataloader, vocab) print('avg BLEU-1: {0:.4f}'.format(bleu1)) print('avg ROUGE-L: {0:.4f}'.format(rouge_l)) print('avg METEOR: {0:.4f}'.format(meteor))
for conv in self.convs ], dim=1) # 应用丢弃法后使用全连接层得到输出 outputs = self.decoder(self.dropout(encoding)) return outputs print('构建网络') embed_size, kernel_sizes, nums_channels = 100, [3, 4, 5], [100, 100, 100] net = TextCNN(vocab, embed_size, kernel_sizes, nums_channels) print('加载预训练词向量') glove = Vocab.GloVe(name='6B', dim=100, cache='./data/glove') net.embedding.weight.data.copy_( utils.load_pretrained_embedding(vocab.itos, glove)) net.constant_embedding.weight.data.copy_( utils.load_pretrained_embedding(vocab.itos, glove)) net.constant_embedding.weight.requires_grad = False print('训练并评价模型') lr, num_epochs = 0.001, 5 optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, net.parameters()), lr=lr) loss = nn.CrossEntropyLoss() utils.train(train_iter, test_iter, net, loss, optimizer, device, num_epochs) print('尝试预测') print( utils.predict_sentiment(net, vocab,