def getPairedAndUnpairedSpectra(dtaDir, dtaList, Nmod, Cmod, ppm=5, cutOff=0.1, verbose=False): specPairs = [] unpairedSpecs = [] delta = Nmod + Cmod for i in range(len(dtaList)): paired = False precMass1 = DataFile.getPrecMassAndCharge(dtaList[i])[0] spec1 = DataFile.getMassIntPairs(dtaList[i]) for j in range(i + 1, len(dtaList)): precMass2 = DataFile.getPrecMassAndCharge(dtaList[j])[0] epsilon = ppm * 10 ** -6 * max(precMass1, precMass2) if np.abs(np.abs(precMass1 - precMass2) - delta) < epsilon: spec2 = DataFile.getMassIntPairs(dtaList[j]) if precMass1 < precMass2: N, C = SA.getNandCIons(spec1, spec2, Nmod, Cmod, epsilon=epsilon) ratio = SA.getSharedPeaksRatio(spec1, spec2, N, C) else: N, C = SA.getNandCIons(spec2, spec1, Nmod, Cmod, epsilon=epsilon) ratio = SA.getSharedPeaksRatio(spec2, spec1, N, C) if ratio > cutOff: if verbose: print 'Pair found', dtaList[i], dtaList[j] paired = True specs = (dtaList[i], dtaList[j]) lightInd = int(precMass2 < precMass1) specPairs.extend([(ratio, specs[lightInd], specs[1 - lightInd])]) if not paired: unpairedSpecs.extend([dtaList[i]]) if verbose: print 'No pairs for', dtaList[i] return specPairs, unpairedSpecs
def findDeltaPairs(dtaList, delta, ppm=5, intEp=20): precMassArr = np.zeros((len(dtaList), 2)) pairs = [] for i in range(len(dtaList)): precMassArr[i] = [DataFile.getPrecMassAndCharge(dtaList[i])[0], DataFile.getScanNum(dtaList[i])] maxPrecMass = np.max(precMassArr, 0)[0] epsilon = ppm * 10**-6 * maxPrecMass resolution = epsilon/intEp hPrecMassArr = copy.copy(precMassArr) hPrecMassArr[:,0] = np.round(hPrecMassArr[:,0]/resolution) hashedDict = {} for elem in hPrecMassArr: hInd = int(elem[0]) for hMass in range(hInd-intEp, hInd+intEp+1): try: hashedDict[hMass] += [(hMass-hInd, elem[1])] except KeyError: hashedDict[hMass] = [(hMass-hInd, elem[1])] shiftHashDict = copy.copy(precMassArr) shiftHashDict[:,0] = np.round((shiftHashDict[:,0] - delta)/resolution) for i, elem in enumerate(shiftHashDict): hInd = int(elem[0]) if hInd in hashedDict: for possiblePair in hashedDict[hInd]: if abs(possiblePair[0]) * resolution * 10**6/precMassArr[i][0] < ppm: pairs += [(int(possiblePair[1]), int(elem[1]))] return pairs
def findSamePrecMassClusters(dtaList, ppm=5): precMassArr = np.zeros((len(dtaList), 2)) for i in range(len(dtaList)): precMassArr[i] = [DataFile.getPrecMassAndCharge(dtaList[i])[0], DataFile.getScanNum(dtaList[i])] precMassArr = precMassArr[np.argsort(precMassArr[:,0])] clusters = [[i] for i in range(precMassArr.shape[0])] i = 0 while i < len(clusters): mergeClusters = False epsilon = ppm * 10**-6 * precMassArr[clusters[i][0]][0] for precMassInd1 in clusters[i]: for precMassInd2 in clusters[i - 1]: if (np.abs(precMassArr[precMassInd1][0] - precMassArr[precMassInd2][0]) < epsilon): mergeClusters = True break if mergeClusters: clusters[i - 1].extend(clusters[i]) del clusters[i] else: i = i + 1 scanFClusters = [] for cluster in clusters: scanFClusters += [[precMassArr[i][1] for i in cluster]] return scanFClusters
def getScanFDict(dtaList): scanFDict = {} for dta in dtaList: scanF = DataFile.getScanNum(dta) precMass = DataFile.getPrecMassAndCharge(dta)[0] scanFDict[scanF] = {'dta': dta, 'precMass': precMass, 'sequenced': False} return scanFDict
def getScanFDict(dtaList): scanFDict = {} for dta in dtaList: scanF = DataFile.getScanNum(dta) precMass = DataFile.getPrecMassAndCharge(dta)[0] scanFDict[scanF] = {"dta": dta, "precMass": precMass, "sequenced": False} return scanFDict
def getLADSPScore(seq, dtaPath, PNet, ppm=5, ambigEdges=None, ambigAA='X', ambigPenalty=20): pairs = DataFile.getMassIntPairs(dtaPath) precMass = DataFile.getPrecMassAndCharge(dtaPath)[0] epsilon = ppm * precMass * 10 ** -6 spec = PN.Spectrum(PNet, precMass, Nmod=0, Cmod=0, epsilon=epsilon, spectrum=pairs) spec.initializeNoiseModel() nodeGen = Constants.nodeInfoGen(seq, considerTerminalMods=True, ambigEdges=ambigEdges) pScore = 0 node = nodeGen.next() pScore += spec.getNodeScore(**node) pScore += spec.getPriorScore(prm=0, formAA=None, lattAA=node['formAA']) if node['formAA'] == ambigAA: pScore -= ambigPenalty for node in nodeGen: pScore += spec.getNodeScore(**node) if node['formAA'] == ambigAA: pScore -= ambigPenalty pScore += spec.getPriorScore(prm=precMass- Constants.mods['H+'] - Constants.mods['H2O'], formAA=node['lattAA'], lattAA=None) if node['lattAA'] == ambigAA: pScore -= ambigPenalty return pScore
if len(cluster) > 1: combMasses += [sum(cluster) / len(cluster)] else: combMasses += cluster return np.sort(np.array(combMasses)) if __name__ == '__main__': dirPath = 'C:\\Users\\Arun\\Pythonprojects\\DeNovoSequencing\\LF2_short_HCD+CID_ath001862_244\\' ppm = 5 heavyPath = dirPath + '244.3611.3611.1.dta' lightPath = dirPath + '244.3619.3619.1.dta' heavyPairs = DataFile.getMassIntPairs(heavyPath) lightPairs = DataFile.getMassIntPairs(lightPath) heavyPrecMass, heavyCharge = DataFile.getPrecMassAndCharge(heavyPath) lightPrecMass, lightCharge = DataFile.getPrecMassAndCharge(lightPath) print ppm * 10 ** -6 * heavyPrecMass print getSharedPeaksRatio(lightPairs, heavyPairs, Nmod=0, Cmod=Constants.mods['*'], epsilon=ppm * heavyPrecMass * 10 ** -6) """ tPath = dirPath + '244.0855.0855.1.dta' tMass = DataFile.getPrecMassAndCharge(tPath)[0] tPairs = DataFile.getMassIntPairs(tPath) tIons = tPairs[:,0] tIons = np.insert(tIons, 0, 0) tIons = np.append(tIons, tMass) tIons = getSymmetrizedSpectrum(tIons, tMass) print tIons
(lightSpec, heavySpec) = pair[1:] if options.verbose: print 'Now sequencing %s %s with shared peaks ratio %f' % (lightSpec, heavySpec, pair[0]) s1 = time.time() heavyPath = heavySpec lightPath = lightSpec sharedInfo = DNS.getPairedSpectraInfoForSequencing(lightPath, heavyPath, options.verbose) DNS.sequencePairedSpectra(sharedInfo['NInd'], sharedInfo['CInd'], sharedInfo['lightPairs'], sharedInfo['heavyPairs'], sharedInfo['lightPrecMass'] - Constants.mods['H+'] - Constants.mods['H2O'], PNet, alpha=options.alpha, unknownPenalty=options.ambigEdgePenalty, maxEdge=options.maxEdge, minEdge=options.minEdge, Nmod=options.Nmod, Cmod=options.Cmod, aas=aas, verbose=options.verbose) if options.verbose: print 'Time taken:', time.time() - s1 for spec in unpaired: if options.verbose: print 'Now sequencing unpaired spectrum %s' % spec s1 = time.time() precMass = DataFile.getPrecMassAndCharge(spec)[0] pairs = DataFile.getMassIntPairs(spec) DNS.sequenceSingleSpectrum(pairs, precMass - Constants.mods['H+'] - Constants.mods['H2O'], PNet, alpha=options.alpha, unknownPenalty=options.ambigEdgePenalty, maxEdge=options.maxEdge, minEdge=options.minEdge, aas=aas, verbose=options.verbose) if options.verbose: print 'Time taken:', time.time() - s1 if options.verbose: print 'Finished sequencing. Time taken: ', time.time() - t2 print 'Total time taken for program: ', time.time() - t1