Ejemplo n.º 1
0
seqordered = [seq for seq in sorted(seqs, key=lambda x: x.id, reverse=True)]

pairs = []
unpaired = []

first = None
nxt = None

while seqordered:
    if (first is None):
        first = seqordered.pop()
    if (seqordered):
        if (nxt is None):
            nxt = seqordered.pop()
        if first.id == nxt.id:
            pairs.append(pair.Pair([first, nxt]))
            first = None
            nxt = None
        else:
            unpaired.append(pair.Pair([first]))
            first = nxt
            nxt = None
    else:
        unpaired.append(pair.Pair([first]))
        first = None

print("Processed", len(pairs), "pairs of sequences")
print("and ", len(unpaired), " unpaired sequences")

# Extracted from Bryan's code to fix output_schief_csv()
Ejemplo n.º 2
0
    cells[k] = {'heavy': [], 'light': []}

for s in seqordered:
    if (s['chain'] == 'heavy'):
        cells[s.id]['heavy'].append(s)
    else:
        cells[s.id]['light'].append(s)

# Pair sorted sequences based on munged id.
# Unpaired sequences aren't exported. Should they be?
pairs = []
unpaired = []
# Within a 'cluster' of sequences we want to pair each heavy and each light chain sequence.
for cell in cells:
    if (len(cells[cell]['heavy']) == 0) and (len(cells[cell]['light']) > 0):
        unpaired.extend([pair.Pair([s]) for s in cells[cell]['light']])
    if (len(cells[cell]['light']) == 0) and (len(cells[cell]['heavy']) > 0):
        unpaired.extend([pair.Pair([s]) for s in cells[cell]['heavy']])
    if (len(cells[cell]['light']) > 0) and (len(cells[cell]['heavy']) > 0):
        # for each heavy pair it with each light.
        for heavy in cells[cell]['heavy']:
            for light in cells[cell]['light']:
                pairs.append(pair.Pair([heavy, light]))

print("Processed", len(pairs), "pairs of sequences")
print("and ", len(unpaired), " unpaired sequences")

# Extracted from Bryan's code to fix output_schief_csv()


# FROM https://github.com/briney/abtools/blob/master/abtools/pipeline.py
Ejemplo n.º 3
0
from abutils.core import sequence
from abutils.core import pair
import pandas as pd
import numpy as np
import json
from pandas.io.json import json_normalize
import sys, os

pairs = []
with open(sys.argv[1]) as f:
    for line in f:
        d = json.loads(line.strip())
        # Bryan has a pair object
        seq = sequence.Sequence(d)
        seqs = [seq]
        abpair = pair.Pair(seqs)
        pairs.append(abpair)

# Built in json flatten from pandas does an OK job.
#json_df = json_normalize(d)
#json_df.to_csv("dump.csv", sep=',')


# An example of a mole elaborate json flatten that we can repurpose
# From https://towardsdatascience.com/flattening-json-objects-in-python-f5343c794b10
def flatten_json(y):
    out = {}

    def flatten(x, name=''):
        if type(x) is dict:
            for a in x: