コード例 #1
0
def alignment_text(x, y, pred, truth, stats):
    """ Used to visualize alignment as text

    Parameters
    ----------
    x : str
        Protein X
    y : str
        Protein Y
    pred : list of int
        Predicted states
    truth : list of int
        Ground truth states
    stats : list of float
        List of statistics from roc_edges
    """
    # TODO: we got the truth and prediction edges swapped somewhere earlier
    true_alignment = states2alignment(truth, x, y)
    pred_alignment = states2alignment(pred, x, y)
    cols = ['tp', 'fp', 'fn', 'perc_id', 'ppv', 'fnr', 'fdr']
    stats = list(map(lambda x: np.round(x, 2), stats))
    s = list(map(lambda x: f'{x[0]}: {x[1]}', list(zip(cols, stats))))

    stats_viz = ' '.join(s)
    truth_viz = ('# Ground truth\n'
                 f'    {true_alignment[0]}\n    {true_alignment[1]}')
    pred_viz = ('# Prediction\n'
                f'    {pred_alignment[0]}\n    {pred_alignment[1]}')

    s = stats_viz + '\n' + truth_viz + '\n' + pred_viz
    return s
コード例 #2
0
 def test_states2alignment_8(self):
     x = 'HECDDCSKQFSRNNHLAKHLRAH'
     y = 'YRCHKVCPYTFVGKSDLDLHQFITAH'
     s = np.array([
         1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 0, 0, 1
     ])
     states2alignment(s, y, x)
コード例 #3
0
 def test_states2alignment_10(self):
     gen = 'YACSGGCGQNFRTMSEFNEHMIRLVH'
     oth = 'LICPKHTRDCGKVFKRNSSLRVHEH'
     pred = np.array([
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 2, 1, 1, 0, 1,
         2, 0, 1, 1, 1, 1
     ])
     states2alignment(pred, gen, oth)
コード例 #4
0
 def test_states2alignment_11(self):
     gen = 'LNCKEIKKYCEMSFRNPDDIRKHRGAIH'
     oth = 'YTCSSCNESLRTAWCLNKHLR'
     pred = np.array([
         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0
     ])
     states2alignment(pred, gen, oth)
コード例 #5
0
    def test_decoding2(self):
        X = 'HECDRKTCDESFSTKGNLRVHKLGH'
        Y = 'LKCSGCGKNFKSQYAYKRHEQTH'

        needle = NeedlemanWunschDecoder(self.operator)
        dm = torch.Tensor(np.loadtxt(get_data_path('dm.txt')))
        decoded = needle.traceback(dm)
        pred_x, pred_y, pred_states = list(zip(*decoded))
        states2alignment(np.array(pred_states), X, Y)
コード例 #6
0
 def test_states2alignment_3(self):
     x = ('XSDHGDVSLPPEDRVRALSQLGSAVEVNEDIPPRRYFRSGVEIIRMA'
          'SIYSEEGNIEHAFILYNKYITLFIEKLPKHRDYKSAVIPEKKDTVK'
          'KLKEIAFPKAEELKAELLKRYTKEYTEYNEEKKKEAEELARNMAIQ'
          'QELX')
     y = ('XIDVLRAKAAKERAERRLQSQQDDIDFKRAELALKRAMNRLSVAEMKX')
     s = np.array([
         1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 0, 1, 1, 2, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,
         0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1
     ])
     states2alignment(s, x, y)
コード例 #7
0
 def test_states2alignment_7(self):
     x = ('XGSSGSSGFDENWGADEELLLIDACETLGLGNWADIADYVGNARTKEECRDHYLKTYIEX')
     y = ('XGEIRVGNRYQADITDLLKEGEEDGRDQSRLETQVWEAHNPLTDKQIDQFLVVARSVGTF'
          'ARALDSLHMSAAAASRDITLFHAMDTLHKNIYDISKAISALVPQGGPVLCRDEMEEWSAS'
          'EANLFEEALEKYGKDFTDIQQDFLPWKSLTSIIEYYYMWKTTX')
     s = np.array([
         1, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
         2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 0,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
     ])
     states2alignment(s, x, y)
コード例 #8
0
 def test_states2alignment_2(self):
     s = "111:::111"
     s = np.array(list(map(tmstate_f, s)))
     X = "123456789"
     Y = "abc"
     exp_x = "123456789"
     exp_y = "---abc---"
     res_x, res_y = states2alignment(s, X, Y)
     self.assertEqual(res_x, exp_x)
     self.assertEqual(res_y, exp_y)
コード例 #9
0
 def test_states2alignment_1(self):
     s = "111:::222"
     s = np.array(list(map(tmstate_f, s)))
     X = "123456"
     Y = "abcdef"
     exp_x = "123456---"
     exp_y = "---abcdef"
     res_x, res_y = states2alignment(s, X, Y)
     self.assertEqual(res_x, exp_x)
     self.assertEqual(res_y, exp_y)
コード例 #10
0
def deepblast_align(
    pairings: List[Tuple[str, str]],
    query_by_id: Dict[str, str],
    target_by_id: Dict[str, str],
    model_file: str,
    device: torch.device,
    batch_size: int,
) -> List[Tuple[str, str, str, str]]:
    """Aligns the given pairings using DeepBLAST

    Returns a list of query id, target id, query aligned, target aligned

    The model on its own takes between 740MiB (Titan X, torch 1.5) and 1284MiB (RTX 8000, torch 1.7)

    Note that the batch size has much less of an impact for DeepBLAST than for the embedders
    """
    model = LightningAligner.load_from_checkpoint(model_file).to(device)
    tokenizer = UniprotTokenizer()
    alignments = []
    # Naive batching
    batches = numpy.array_split(pairings,
                                math.ceil(len(pairings) / batch_size))
    for batch in tqdm(batches):
        # noinspection PyArgumentList
        queries = [
            torch.Tensor(tokenizer(query_by_id[query].encode())).long()
            for query, _ in batch
        ]
        # noinspection PyArgumentList
        targets = [
            torch.Tensor(tokenizer(target_by_id[target].encode())).long()
            for _, target in batch
        ]
        seqs, order = pack_sequences(queries, targets)
        gen = model.aligner.traceback(seqs.to(device), order)
        for (decoded, _), (query, target) in zip(gen, batch):
            pred_x, pred_y, pred_states = zip(*decoded)
            pred_alignment = "".join(list(map(revstate_f, pred_states)))
            x_aligned, y_aligned = states2alignment(pred_alignment,
                                                    query_by_id[query],
                                                    target_by_id[target])
            alignments.append((query, target, x_aligned, y_aligned))
    return alignments
コード例 #11
0
 def test_states2alignment_9(self):
     x = 'HCH'
     y = 'HCAH'
     s = np.array([1, 1, 0, 1])
     states2alignment(s, y, x)
コード例 #12
0
seqs={}
with open(args.input) as f:
	for i in f:
		i=i.rstrip()
		if i[0]=='>':
			ID=i[1:]
			continue
		seqs[ID]=seqs.get(ID,'')+i

keys_list = list(seqs)
x = seqs[keys_list[0]]
y = seqs[keys_list[1]]
pred_alignment = model.align(x, y)

x_aligned, y_aligned = states2alignment(pred_alignment, x, y)

file = open(args.output,"w")
file.write(">%s\n%s\n>%s\n%s" % (keys_list[0], x_aligned, keys_list[1], y_aligned))
file.close()

print(x_aligned)
print(pred_alignment)
print(y_aligned)

x_ = torch.Tensor(model.tokenizer(str.encode(x))).long()
y_ = torch.Tensor(model.tokenizer(str.encode(y))).long()

seq, order = pack_sequences([x_], [y_])

score = model.aligner.score(seq, order).item()