def predict_contacts(model, x, y, use_cuda): b = len(x) x, order = pack_sequences(x) x = PackedSequence(Variable(x.data), x.batch_sizes) z = model(x) # embed the sequences z = unpack_sequences(z, order) logits = [] y_list = [] for i in range(b): zi = z[i] lp = model.predict(zi.unsqueeze(0)).view(-1) yi = y[i].view(-1) if use_cuda: yi = yi.cuda() mask = (yi < 0) lp = lp[~mask] yi = yi[~mask] logits.append(lp) y_list.append(yi) return logits, y_list
def eval_similarity(model, test_iterator, use_cuda): y = [] logits = [] for x0, x1, y_mb in test_iterator: if use_cuda: y_mb = y_mb.cuda() y.append(y_mb.long()) b = len(x0) x = x0 + x1 x, order = pack_sequences(x) x = PackedSequence(Variable(x.data), x.batch_sizes) z = model(x) # embed the sequences z = unpack_sequences(z, order) z0 = z[:b] z1 = z[b:] for i in range(b): z_a = z0[i] z_b = z1[i] logits.append(model.score(z_a, z_b)) y = torch.cat(y, 0) logits = torch.stack(logits, 0) p = F.sigmoid(logits).data ones = p.new(p.size(0), 1).zero_() + 1 p_ge = torch.cat([ones, p], 1) p_lt = torch.cat([1 - p, ones], 1) p = p_ge * p_lt p = p / p.sum(1, keepdim=True) # make sure p is normalized loss = F.cross_entropy(p, y).item() _, y_hard = torch.max(p, 1) levels = torch.arange(5).to(p.device) y_hat = torch.sum(p * levels, 1) accuracy = torch.mean((y == y_hard).float()).item() mse = torch.mean((y.float() - y_hat)**2).item() y = y.cpu().numpy() y_hat = y_hat.cpu().numpy() r, _ = pearsonr(y_hat, y) rho, _ = spearmanr(y_hat, y) return loss, accuracy, mse, r, rho
def predict_minibatch(model, x, use_cuda): b = len(x) x,order = pack_sequences(x) x = PackedSequence(x.data, x.batch_sizes) z = model(x) # embed the sequences z = unpack_sequences(z, order) logits = [] for i in range(b): zi = z[i] lp = model.predict(zi.unsqueeze(0)).view(zi.size(0), zi.size(0)) logits.append(lp) return logits
def similarity_grad(model, x0, x1, y, use_cuda, weight=0.5): if use_cuda: y = y.cuda() y = Variable(y) b = len(x0) x = x0 + x1 x, order = pack_sequences(x) x = PackedSequence(Variable(x.data), x.batch_sizes) z = model(x) # embed the sequences z = unpack_sequences(z, order) z0 = z[:b] z1 = z[b:] logits = [] for i in range(b): z_a = z0[i] z_b = z1[i] logits.append(model.score(z_a, z_b)) logits = torch.stack(logits, 0) loss = F.binary_cross_entropy_with_logits(logits, y.float()) # backprop weighted loss w_loss = loss * weight w_loss.backward() # calculate minibatch performance metrics with torch.no_grad(): p = F.sigmoid(logits) ones = p.new(b, 1).zero_() + 1 p_ge = torch.cat([ones, p], 1) p_lt = torch.cat([1 - p, ones], 1) p = p_ge * p_lt p = p / p.sum(1, keepdim=True) # make sure p is normalized _, y_hard = torch.max(p, 1) levels = torch.arange(5).to(p.device) y_hat = torch.sum(p * levels, 1) y = torch.sum(y.data, 1) loss = F.cross_entropy( p, y).item() # calculate cross entropy loss from p vector correct = torch.sum((y == y_hard).float()).item() mse = torch.mean((y.float() - y_hat)**2).item() return loss, correct, mse, b
def __call__(self, x): c = [torch.from_numpy(x_).long() for x_ in x] c, order = pack_sequences(c) if self.use_cuda: c = c.cuda() if self.full_features: z = featurize(c, self.lm_embed, self.lstm_stack, self.proj) else: z = self.model(c) # embed the sequences z = unpack_sequences(z, order) return z
def __call__(self, x, y): n = len(x) c = [torch.from_numpy(x_).long() for x_ in x] + [torch.from_numpy(y_).long() for y_ in y] c, order = pack_sequences(c) if self.use_cuda: c = c.cuda() with torch.no_grad(): z = self.model(c) # embed the sequences z = unpack_sequences(z, order) scores = np.zeros(n) if self.mode == 'align': for i in range(n): z_x = z[i] z_y = z[i + n] logits = self.model.score(z_x, z_y) p = F.sigmoid(logits).cpu() p_ge = torch.ones(p.size(0) + 1) p_ge[1:] = p p_lt = torch.ones(p.size(0) + 1) p_lt[:-1] = 1 - p p = p_ge * p_lt p = p / p.sum() # make sure p is normalized levels = torch.arange(5).float() scores[i] = torch.sum(p * levels).item() elif self.mode == 'coarse': z_x = z[:n] z_y = z[n:] z_x = torch.stack([z.mean(0) for z in z_x], 0) z_y = torch.stack([z.mean(0) for z in z_y], 0) scores[:] = -torch.sum(torch.abs(z_x - z_y), 1).cpu().numpy() return scores
def contacts_grad(model, x, y, use_cuda, weight=0.5): b = len(x) x, order = pack_sequences(x) x = PackedSequence(Variable(x.data), x.batch_sizes) z = model(x) # embed the sequences z = unpack_sequences(z, order) logits = [] for i in range(b): zi = z[i] lp = model.predict(zi.unsqueeze(0)).view(-1) logits.append(lp) logits = torch.cat(logits, 0) y = torch.cat([yi.view(-1) for yi in y]) if use_cuda: y = y.cuda() mask = (y < 0) logits = logits[~mask] y = Variable(y[~mask]) b = y.size(0) loss = F.binary_cross_entropy_with_logits(logits, y) # backprop weighted loss w_loss = loss * weight w_loss.backward() # calculate the recall and precision with torch.no_grad(): p_hat = F.sigmoid(logits) tp = torch.sum(p_hat * y).item() gp = y.sum().item() pp = p_hat.sum().item() return loss.item(), tp, gp, pp, b
def embed_sequences(model: nn.Module, seqs: List[str], batch_size: int = 32, gpu: bool = False) -> np.ndarray: """embed_sequences. Args: model (nn.Module): model seqs (List[str]): seqs batch_size (int): batch_size gpu (bool): gpu Returns: np.ndarray: """ encoded_seqs = encode_seqs(seqs) identity_collate = lambda x: x loader = torch.utils.data.DataLoader(encoded_seqs, batch_size=batch_size, collate_fn=identity_collate) if gpu: model = model.cuda() return_seqs = [] with torch.no_grad(): model.eval() # for batch in tqdm(loader): X, order = utils.pack_sequences(batch) if gpu: X = X.cuda() out = model.forward(X) unpacked = utils.unpack_sequences(out, order) return_seqs.extend([i.detach().cpu().numpy() for i in unpacked]) return return_seqs