seqordered = [seq for seq in sorted(seqs, key=lambda x: x.id, reverse=True)] pairs = [] unpaired = [] first = None nxt = None while seqordered: if (first is None): first = seqordered.pop() if (seqordered): if (nxt is None): nxt = seqordered.pop() if first.id == nxt.id: pairs.append(pair.Pair([first, nxt])) first = None nxt = None else: unpaired.append(pair.Pair([first])) first = nxt nxt = None else: unpaired.append(pair.Pair([first])) first = None print("Processed", len(pairs), "pairs of sequences") print("and ", len(unpaired), " unpaired sequences") # Extracted from Bryan's code to fix output_schief_csv()
cells[k] = {'heavy': [], 'light': []} for s in seqordered: if (s['chain'] == 'heavy'): cells[s.id]['heavy'].append(s) else: cells[s.id]['light'].append(s) # Pair sorted sequences based on munged id. # Unpaired sequences aren't exported. Should they be? pairs = [] unpaired = [] # Within a 'cluster' of sequences we want to pair each heavy and each light chain sequence. for cell in cells: if (len(cells[cell]['heavy']) == 0) and (len(cells[cell]['light']) > 0): unpaired.extend([pair.Pair([s]) for s in cells[cell]['light']]) if (len(cells[cell]['light']) == 0) and (len(cells[cell]['heavy']) > 0): unpaired.extend([pair.Pair([s]) for s in cells[cell]['heavy']]) if (len(cells[cell]['light']) > 0) and (len(cells[cell]['heavy']) > 0): # for each heavy pair it with each light. for heavy in cells[cell]['heavy']: for light in cells[cell]['light']: pairs.append(pair.Pair([heavy, light])) print("Processed", len(pairs), "pairs of sequences") print("and ", len(unpaired), " unpaired sequences") # Extracted from Bryan's code to fix output_schief_csv() # FROM https://github.com/briney/abtools/blob/master/abtools/pipeline.py
from abutils.core import sequence from abutils.core import pair import pandas as pd import numpy as np import json from pandas.io.json import json_normalize import sys, os pairs = [] with open(sys.argv[1]) as f: for line in f: d = json.loads(line.strip()) # Bryan has a pair object seq = sequence.Sequence(d) seqs = [seq] abpair = pair.Pair(seqs) pairs.append(abpair) # Built in json flatten from pandas does an OK job. #json_df = json_normalize(d) #json_df.to_csv("dump.csv", sep=',') # An example of a mole elaborate json flatten that we can repurpose # From https://towardsdatascience.com/flattening-json-objects-in-python-f5343c794b10 def flatten_json(y): out = {} def flatten(x, name=''): if type(x) is dict: for a in x: