class Net(nn.Module): def __init__(self, args): super().__init__() self.args = args self.wemb = Wemb(args) self.drop = nn.Dropout(args.dropout) odim = len(args.tag_stoi) if args.ner: self.crf = CRF(args.tag_stoi) odim = len(args.tag_stoi) + 2 if not args.lstm: self.ffn = nn.Sequential(nn.Linear(300, 400), nn.ReLU(), nn.Dropout(args.dropout)) else: self.lstm = nn.LSTM(input_size=300, hidden_size=200, num_layers=2, bias=True, batch_first=True, dropout=args.dropout, bidirectional=True) self.hid2tag = nn.Linear(400, odim) def forward(self, batch): mask = pad_sequence([torch.ones(len(x)) for x in batch], True, 0).byte().cuda() if self.args.fix: with torch.no_grad(): x = self.wemb.eval()(batch) else: x = self.wemb(batch) x = self.drop(x) if not self.args.lstm: x = self.ffn(x) else: x = Lstm(self.lstm, x, mask.sum(-1)) x = self.hid2tag(x) return x, mask def train_batch(self, batch, tags): x, mask = self.forward(batch) tag_ids = pad_sequence([ torch.LongTensor([self.args.tag_stoi[t] for t in s]) for s in tags ], True, self.args.tag_stoi["<pad>"]).cuda() if not self.args.ner: loss = nn.functional.cross_entropy(x[mask], tag_ids[mask]) else: loss = self.crf.neg_log_likelihood_loss(x, mask, tag_ids) return loss def test_batch(self, batch): x, mask = self.forward(batch) if not self.args.ner: path = x.max(-1)[1] else: _, path = self.crf._viterbi_decode(x, mask) path = [p[m].tolist() for p, m in zip(path, mask)] tags = [[self.args.tag_itos[i] for i in s] for s in path] return tags
def __init__(self, args): super().__init__() self.args = args self.wemb = Wemb(args) self.drop = nn.Dropout(args.dropout) odim = len(args.tag_stoi) if args.ner: self.crf = CRF(args.tag_stoi) odim = len(args.tag_stoi) + 2 if not args.lstm: self.ffn = nn.Sequential(nn.Linear(300, 400), nn.ReLU(), nn.Dropout(args.dropout)) else: self.lstm = nn.LSTM(input_size=300, hidden_size=200, num_layers=2, bias=True, batch_first=True, dropout=args.dropout, bidirectional=True) self.hid2tag = nn.Linear(400, odim)
for K in args.Ks.split(","): _, _, t = torch.load(f"{args.dir}/sst_{K}.{args.seed}.pt") tess.append(t) import sys sys.path.append("../util") from wordemb import wordemb args.wtoi, args.wvec, args.w2f = wordemb(args) args.tsk_words = sorted( set(chain([s.split() for s, t in tra + dev + tes + sum(tess, [])]))) args.qws = [w for w in args.tsk_words if w not in args.wtoi] args.tra = [] import sys sys.path.append(f"../lstm/_sim") from wemb import Wemb wemb = Wemb(args) wtoi, wvec = wemb.wtoi_, wemb.wvec_ wtoi["<unk>"] = len(wvec) wvec = torch.cat([wvec, torch.zeros(1, 300)]) from more_itertools import chunked from tqdm import tqdm import torch.nn as nn from torch.nn.utils.rnn import pad_sequence from transformers import BertTokenizer tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') from transformers import BertForSequenceClassification
w for w in args.wtoi.keys() if args.f_min < args.w2f[w] < args.f_max ] if len(word_qws) > int(args.n_tradev): word_qws = random.sample(word_qws, int(args.n_tradev)) qws = list(set((word_qws + card.ws + toefl_ws))) N = args.n_dev x = random.sample(word_qws, len(word_qws)) tra, dev = x[:-N], x[-N:] # print("word_qws", len(word_qws), "qws", len(qws)) out = "" best = 0 from wemb import Wemb net = Wemb(args=args, qws=qws).cuda() optim = torch.optim.Adam(params=net.parameters(), lr=args.lr) for epoch in range(args.epoch): net.train() lstloss = [] for ws in chunked(random.sample(tra, len(tra)), args.batch): loss = net.train_ws(ws=ws) loss.backward() torch.nn.utils.clip_grad_norm_(net.parameters(), args.clip) optim.step() optim.zero_grad() lstloss.append(loss.item()) net.eval() lstloss = []
def main(args): stopwords = set(nltk.corpus.stopwords.words("english")) | set( string.punctuation) args.wtoi, args.wvec, args.w2f = wordemb(args) with open(args.corpus_vocab_file, "r") as h: x = [r.split(" ") for r in h.read().rstrip().split("\n")] w2f_cp = {w: int(f) for w, f in x} qws = [ w for w, f in args.w2f.items() if w not in stopwords and w in w2f_cp and args.f_min < f < args.f_max ] qws = random.sample(qws, args.n_tradev) # print(f"qws {len(qws)}") wctx = get_ctxs(args, qws, maxctx=args.maxctx, wnd=args.ctxwnd) N = args.n_dev x = random.sample(wctx, len(wctx)) tra, dev = x[:-N], x[-N:] print(f"tra {len(tra)}, dev {len(dev)}") args.qws = list(set([w for w, _ in wctx])) net = Wemb(args).cuda() optim = torch.optim.Adam(net.parameters(), lr=args.lr) best = 0 for epoch in range(args.epoch): net.train() lstloss = [] x = random.sample(tra, len(tra)) for batch in chunked(x, args.batch): loss = net.train_batch(batch) loss.backward() torch.nn.utils.clip_grad_norm_(net.parameters(), args.clip) optim.step() optim.zero_grad() lstloss.append(loss.item()) net.eval() lstloss = [] for batch in chunked(dev, args.batch): with torch.no_grad(): loss = net.train_batch(batch) lstloss.append(loss.item()) dev_loss = np.mean(lstloss) # print(dev_loss) if dev_loss > best: best = dev_loss # best_loss = f"epoch {epoch}: {dev_loss}" bestmodel = net.state_dict() # print(best_loss) torch.save(bestmodel.state_dict(), "model.pt")
import sys sys.path.append("../util") from util import wordemb args.wtoi, args.wvec, args.w2f = wordemb(args) from rwcard import CARD card = CARD(args=args) from toefl import toefl _toefl = toefl(vocab=args.wtoi, args=args) toefl_ws = [l for l, r in _toefl] qws = sorted(set((card.ws + toefl_ws))) from wemb import Wemb net = Wemb(args=args, qws=qws).cuda() net.load_state_dict(saved_obj["state_dict"]) with torch.no_grad(): es = net.pred(ws=card.ws).cpu().detach() m = torch.ByteTensor([w in args.wtoi for w in card.ws]) es[m] = args.wvec[[args.wtoi[w] for w in card.ws if w in args.wtoi]] r, wivr, oovr, oovrate, sims = card.evaluate(es.tolist(), args.wtoi, ws=card.ws) with torch.no_grad(): cs = [] for x in chunked(_toefl, 10000): bmis, btrg = zip(*x) bmis = list(bmis) with torch.no_grad(): es = net.pred(ws=bmis).cpu().detach().numpy()