def predict_contacts(model, x, y, use_cuda):
    b = len(x)
    x, order = pack_sequences(x)
    x = PackedSequence(Variable(x.data), x.batch_sizes)
    z = model(x)  # embed the sequences
    z = unpack_sequences(z, order)

    logits = []
    y_list = []
    for i in range(b):
        zi = z[i]
        lp = model.predict(zi.unsqueeze(0)).view(-1)

        yi = y[i].view(-1)
        if use_cuda:
            yi = yi.cuda()
        mask = (yi < 0)

        lp = lp[~mask]
        yi = yi[~mask]

        logits.append(lp)
        y_list.append(yi)

    return logits, y_list
def eval_similarity(model, test_iterator, use_cuda):
    y = []
    logits = []
    for x0, x1, y_mb in test_iterator:

        if use_cuda:
            y_mb = y_mb.cuda()
        y.append(y_mb.long())

        b = len(x0)
        x = x0 + x1

        x, order = pack_sequences(x)
        x = PackedSequence(Variable(x.data), x.batch_sizes)
        z = model(x)  # embed the sequences
        z = unpack_sequences(z, order)

        z0 = z[:b]
        z1 = z[b:]

        for i in range(b):
            z_a = z0[i]
            z_b = z1[i]
            logits.append(model.score(z_a, z_b))

    y = torch.cat(y, 0)
    logits = torch.stack(logits, 0)

    p = F.sigmoid(logits).data
    ones = p.new(p.size(0), 1).zero_() + 1
    p_ge = torch.cat([ones, p], 1)
    p_lt = torch.cat([1 - p, ones], 1)
    p = p_ge * p_lt
    p = p / p.sum(1, keepdim=True)  # make sure p is normalized

    loss = F.cross_entropy(p, y).item()

    _, y_hard = torch.max(p, 1)
    levels = torch.arange(5).to(p.device)
    y_hat = torch.sum(p * levels, 1)

    accuracy = torch.mean((y == y_hard).float()).item()
    mse = torch.mean((y.float() - y_hat)**2).item()

    y = y.cpu().numpy()
    y_hat = y_hat.cpu().numpy()

    r, _ = pearsonr(y_hat, y)
    rho, _ = spearmanr(y_hat, y)

    return loss, accuracy, mse, r, rho
def predict_minibatch(model, x, use_cuda):
    b = len(x)
    x,order = pack_sequences(x)
    x = PackedSequence(x.data, x.batch_sizes)
    z = model(x) # embed the sequences
    z = unpack_sequences(z, order)

    logits = []
    for i in range(b):
        zi = z[i]
        lp = model.predict(zi.unsqueeze(0)).view(zi.size(0), zi.size(0))
        logits.append(lp)

    return logits
def similarity_grad(model, x0, x1, y, use_cuda, weight=0.5):
    if use_cuda:
        y = y.cuda()
    y = Variable(y)

    b = len(x0)
    x = x0 + x1

    x, order = pack_sequences(x)
    x = PackedSequence(Variable(x.data), x.batch_sizes)
    z = model(x)  # embed the sequences
    z = unpack_sequences(z, order)

    z0 = z[:b]
    z1 = z[b:]

    logits = []
    for i in range(b):
        z_a = z0[i]
        z_b = z1[i]
        logits.append(model.score(z_a, z_b))
    logits = torch.stack(logits, 0)

    loss = F.binary_cross_entropy_with_logits(logits, y.float())

    # backprop weighted loss
    w_loss = loss * weight
    w_loss.backward()

    # calculate minibatch performance metrics
    with torch.no_grad():
        p = F.sigmoid(logits)
        ones = p.new(b, 1).zero_() + 1
        p_ge = torch.cat([ones, p], 1)
        p_lt = torch.cat([1 - p, ones], 1)
        p = p_ge * p_lt
        p = p / p.sum(1, keepdim=True)  # make sure p is normalized

        _, y_hard = torch.max(p, 1)
        levels = torch.arange(5).to(p.device)
        y_hat = torch.sum(p * levels, 1)
        y = torch.sum(y.data, 1)

        loss = F.cross_entropy(
            p, y).item()  # calculate cross entropy loss from p vector

        correct = torch.sum((y == y_hard).float()).item()
        mse = torch.mean((y.float() - y_hat)**2).item()

    return loss, correct, mse, b
コード例 #5
0
    def __call__(self, x):
        c = [torch.from_numpy(x_).long() for x_ in x]

        c, order = pack_sequences(c)
        if self.use_cuda:
            c = c.cuda()

        if self.full_features:
            z = featurize(c, self.lm_embed, self.lstm_stack, self.proj)
        else:
            z = self.model(c)  # embed the sequences
        z = unpack_sequences(z, order)

        return z
    def __call__(self, x, y):
        n = len(x)
        c = [torch.from_numpy(x_).long()
             for x_ in x] + [torch.from_numpy(y_).long() for y_ in y]

        c, order = pack_sequences(c)
        if self.use_cuda:
            c = c.cuda()

        with torch.no_grad():
            z = self.model(c)  # embed the sequences
            z = unpack_sequences(z, order)

            scores = np.zeros(n)
            if self.mode == 'align':
                for i in range(n):
                    z_x = z[i]
                    z_y = z[i + n]

                    logits = self.model.score(z_x, z_y)
                    p = F.sigmoid(logits).cpu()
                    p_ge = torch.ones(p.size(0) + 1)
                    p_ge[1:] = p
                    p_lt = torch.ones(p.size(0) + 1)
                    p_lt[:-1] = 1 - p
                    p = p_ge * p_lt
                    p = p / p.sum()  # make sure p is normalized
                    levels = torch.arange(5).float()
                    scores[i] = torch.sum(p * levels).item()

            elif self.mode == 'coarse':
                z_x = z[:n]
                z_y = z[n:]
                z_x = torch.stack([z.mean(0) for z in z_x], 0)
                z_y = torch.stack([z.mean(0) for z in z_y], 0)
                scores[:] = -torch.sum(torch.abs(z_x - z_y), 1).cpu().numpy()

        return scores
def contacts_grad(model, x, y, use_cuda, weight=0.5):
    b = len(x)
    x, order = pack_sequences(x)
    x = PackedSequence(Variable(x.data), x.batch_sizes)
    z = model(x)  # embed the sequences
    z = unpack_sequences(z, order)

    logits = []
    for i in range(b):
        zi = z[i]
        lp = model.predict(zi.unsqueeze(0)).view(-1)
        logits.append(lp)
    logits = torch.cat(logits, 0)

    y = torch.cat([yi.view(-1) for yi in y])
    if use_cuda:
        y = y.cuda()
    mask = (y < 0)

    logits = logits[~mask]
    y = Variable(y[~mask])
    b = y.size(0)

    loss = F.binary_cross_entropy_with_logits(logits, y)

    # backprop weighted loss
    w_loss = loss * weight
    w_loss.backward()

    # calculate the recall and precision
    with torch.no_grad():
        p_hat = F.sigmoid(logits)
        tp = torch.sum(p_hat * y).item()
        gp = y.sum().item()
        pp = p_hat.sum().item()

    return loss.item(), tp, gp, pp, b
コード例 #8
0
def embed_sequences(model: nn.Module,
                    seqs: List[str],
                    batch_size: int = 32,
                    gpu: bool = False) -> np.ndarray:
    """embed_sequences.

    Args:
        model (nn.Module): model
        seqs (List[str]): seqs
        batch_size (int): batch_size
        gpu (bool): gpu

    Returns:
        np.ndarray:
    """

    encoded_seqs = encode_seqs(seqs)
    identity_collate = lambda x: x
    loader = torch.utils.data.DataLoader(encoded_seqs,
                                         batch_size=batch_size,
                                         collate_fn=identity_collate)
    if gpu:
        model = model.cuda()
    return_seqs = []
    with torch.no_grad():
        model.eval()
        #
        for batch in tqdm(loader):
            X, order = utils.pack_sequences(batch)
            if gpu:
                X = X.cuda()

            out = model.forward(X)
            unpacked = utils.unpack_sequences(out, order)
            return_seqs.extend([i.detach().cpu().numpy() for i in unpacked])
    return return_seqs