def filterNetMHC(allele, length, baseScoreTable, baseChainCollection, baseFasta, additionalScoreTable, additionalChainCollection, additionalFasta, k, reverse=False): baseScoreDist = collections.Counter() if baseScoreTable and baseChainCollection and baseFasta: exclusion = None if additionalChainCollection: additionalGen = getPeptideGen(additionalChainCollection, additionalFasta, length) exclusion = set(additionalGen) baseGen = getPeptideGen(baseChainCollection, baseFasta, length) assert (allele in baseScoreTable.getAlleles()) l = list(baseScoreTable.scoreIter(allele)) baseScoreDist = scoreDistribution(l, baseGen, exclusion) additionalScoreDist = collections.Counter() if additionalScoreTable and additionalChainCollection and additionalFasta: additionalGen = getPeptideGen(additionalChainCollection, additionalFasta, length) additionalScoreDist = scoreDistribution( additionalScoreTable.scoreIter(allele), additionalGen, None) combinedScoreDist = additionalScoreDist + baseScoreDist threshold = computeScoreThreshold(combinedScoreDist, k, reverse) peptides = [] pepToHeader = collections.defaultdict(set) if baseScoreTable and baseChainCollection and baseFasta: gen = peptideGenerator(baseChainCollection, baseFasta, length) pep = filterOnThreshold(baseScoreTable.scoreIter(allele), gen, threshold, reverse) for holder in pep: pepToHeader[holder.getPeptideSequence()].update( holder.getHeaders()) if additionalScoreTable and additionalChainCollection and additionalFasta: gen = peptideGenerator(additionalChainCollection, additionalFasta, length) pep = filterOnThreshold(additionalScoreTable.scoreIter(allele), gen, threshold, reverse) for holder in pep: pepToHeader[holder.getPeptideSequence()].update( holder.getHeaders()) return pepToHeader
def getPeptideGen(chainCollection, fastaPath, pepLen): return map(lambda x: x.getPeptideSequence(), peptideGenerator(chainCollection, fastaPath, pepLen))
assert(bindingScoreTable.peptideLength == chainCollection.peptideLength) if args.allele in bindingScoreTable.getAlleles(): print('Allele already in binding score table. Exiting') sys.exit(1) def getPeptideGen(chainCollection, fastaPath, pepLen): return map(lambda x: x.getPeptideSequence(), peptideGenerator(chainCollection, fastaPath, pepLen)) predictor=mhcflurry.Class1AffinityPredictor.load() KNOWN_AMINOS=set('ACDEFGHIKLMNPQRSTVWY') def unknownSub(peptides): pep = [] for x in peptides: sub = ''.join([y if y in KNOWN_AMINOS else 'X' for y in x]) pep.append(sub) return pep peptides = [x.getPeptideSequence() for x in peptideGenerator(chainCollection, args.fasta, args.length)] peptidesWithUnknown = unknownSub(peptides) scores = list(predictor.predict(allele=args.allele, peptides=peptidesWithUnknown)) result = bindingScoreTable.addAllele(args.allele, iter(scores))
parser.add_argument('chains') parser.add_argument('scoreTable') parser.add_argument('allele') parser.add_argument('k', type=int) args = parser.parse_args() assert (os.path.isfile(args.chains)) assert (os.path.isfile(args.fasta)) assert (os.path.isfile(args.scoreTable)) assert (args.k > 0) chainCollection = None with open(args.chains, 'rb') as f: chainCollection = pickle.load(f) f = open(args.scoreTable, 'rb') st = ScoreTable.readExisting(f) print('alleles: ') print(st.getAlleles()) assert (args.allele in st.getAlleles()) length = st.peptideLength n = st.numPeptides assert (args.k <= n) print('num peptides: ' + str(n)) pepGen = peptideGenerator(chainCollection, args.fasta, length) selection = sorted(random.sample(range(0, n), args.k)) scoreIter = st.scoreIter(args.allele) pepAndScores = extractPeptidesAndScores(scoreIter, pepGen, selection) for pep, score in pepAndScores: print(str(pep) + '\t' + str(score))