예제 #1
0
def getPairedAndUnpairedSpectra(dtaDir, dtaList, Nmod, Cmod, ppm=5, cutOff=0.1, verbose=False):
    specPairs = []
    unpairedSpecs = []
    delta = Nmod + Cmod
    for i in range(len(dtaList)):
        paired = False
        precMass1 = DataFile.getPrecMassAndCharge(dtaList[i])[0]
        spec1 = DataFile.getMassIntPairs(dtaList[i])
        for j in range(i + 1, len(dtaList)):
            precMass2 = DataFile.getPrecMassAndCharge(dtaList[j])[0]
            epsilon = ppm * 10 ** -6 * max(precMass1, precMass2)
            if np.abs(np.abs(precMass1 - precMass2) - delta) < epsilon:
                spec2 = DataFile.getMassIntPairs(dtaList[j])
                if precMass1 < precMass2:
                    N, C = SA.getNandCIons(spec1, spec2, Nmod, Cmod, epsilon=epsilon)
                    ratio = SA.getSharedPeaksRatio(spec1, spec2, N, C)
                else:
                    N, C = SA.getNandCIons(spec2, spec1, Nmod, Cmod, epsilon=epsilon)
                    ratio = SA.getSharedPeaksRatio(spec2, spec1, N, C)
                if ratio > cutOff:
                    if verbose:
                        print 'Pair found', dtaList[i], dtaList[j]
                    paired = True
                    specs = (dtaList[i], dtaList[j])
                    lightInd = int(precMass2 < precMass1)
                    specPairs.extend([(ratio, specs[lightInd], specs[1 - lightInd])])
        
        if not paired:
            unpairedSpecs.extend([dtaList[i]])
            if verbose:
                print 'No pairs for', dtaList[i]
    return specPairs, unpairedSpecs
예제 #2
0
def findDeltaPairs(dtaList, delta, ppm=5, intEp=20):
    precMassArr = np.zeros((len(dtaList), 2))
    pairs = []
    
    for i in range(len(dtaList)):
        precMassArr[i] = [DataFile.getPrecMassAndCharge(dtaList[i])[0], DataFile.getScanNum(dtaList[i])]

    maxPrecMass = np.max(precMassArr, 0)[0]
    epsilon = ppm * 10**-6 * maxPrecMass
    resolution = epsilon/intEp
    
    hPrecMassArr = copy.copy(precMassArr)
    hPrecMassArr[:,0] = np.round(hPrecMassArr[:,0]/resolution)
    hashedDict = {}
    for elem in hPrecMassArr:
        hInd = int(elem[0])
        for hMass in range(hInd-intEp, hInd+intEp+1):
            try:
                hashedDict[hMass] += [(hMass-hInd, elem[1])]
            except KeyError:
                hashedDict[hMass] = [(hMass-hInd, elem[1])]
    
    shiftHashDict = copy.copy(precMassArr)
    shiftHashDict[:,0] = np.round((shiftHashDict[:,0] - delta)/resolution)
    for i, elem in enumerate(shiftHashDict):
        hInd = int(elem[0])
        if hInd in hashedDict:
            for possiblePair in hashedDict[hInd]:
                if abs(possiblePair[0]) * resolution * 10**6/precMassArr[i][0] < ppm:
                    pairs += [(int(possiblePair[1]), int(elem[1]))]

    return pairs
예제 #3
0
def findSamePrecMassClusters(dtaList, ppm=5):
    precMassArr = np.zeros((len(dtaList), 2))
    for i in range(len(dtaList)):
        precMassArr[i] = [DataFile.getPrecMassAndCharge(dtaList[i])[0], DataFile.getScanNum(dtaList[i])]
        
    precMassArr = precMassArr[np.argsort(precMassArr[:,0])]
    
    clusters = [[i] for i in range(precMassArr.shape[0])]
    
    i = 0
    while i < len(clusters):
        mergeClusters = False
        epsilon = ppm * 10**-6 * precMassArr[clusters[i][0]][0]
        for precMassInd1 in clusters[i]:
            for precMassInd2 in clusters[i - 1]:
                if (np.abs(precMassArr[precMassInd1][0] - precMassArr[precMassInd2][0]) < epsilon):
                    mergeClusters = True
                    break
            
        if mergeClusters:
            clusters[i - 1].extend(clusters[i])
            del clusters[i]
        else:
            i = i + 1
    
    scanFClusters = []
    for cluster in clusters:
        scanFClusters += [[precMassArr[i][1] for i in cluster]]
       
    return scanFClusters
예제 #4
0
def getScanFDict(dtaList):
    scanFDict = {}
    for dta in dtaList:
        scanF = DataFile.getScanNum(dta)
        precMass = DataFile.getPrecMassAndCharge(dta)[0]
        scanFDict[scanF] = {'dta': dta, 'precMass': precMass, 'sequenced': False}
    
    return scanFDict
예제 #5
0
def getScanFDict(dtaList):
    scanFDict = {}
    for dta in dtaList:
        scanF = DataFile.getScanNum(dta)
        precMass = DataFile.getPrecMassAndCharge(dta)[0]
        scanFDict[scanF] = {"dta": dta, "precMass": precMass, "sequenced": False}

    return scanFDict
예제 #6
0
def getLADSPScore(seq, dtaPath, PNet, ppm=5, ambigEdges=None, ambigAA='X', ambigPenalty=20):
    pairs = DataFile.getMassIntPairs(dtaPath)
    precMass = DataFile.getPrecMassAndCharge(dtaPath)[0]
    epsilon = ppm * precMass * 10 ** -6
    spec = PN.Spectrum(PNet, precMass, Nmod=0, Cmod=0, epsilon=epsilon, spectrum=pairs)
    spec.initializeNoiseModel()
    nodeGen = Constants.nodeInfoGen(seq, considerTerminalMods=True, ambigEdges=ambigEdges)
    pScore = 0
    node = nodeGen.next()
    pScore += spec.getNodeScore(**node)
    pScore += spec.getPriorScore(prm=0, formAA=None, lattAA=node['formAA'])
    if node['formAA'] == ambigAA:
        pScore -= ambigPenalty
        
    for node in nodeGen:
        pScore += spec.getNodeScore(**node)
        if node['formAA'] == ambigAA:
            pScore -= ambigPenalty
            
    pScore += spec.getPriorScore(prm=precMass- Constants.mods['H+'] - Constants.mods['H2O'], formAA=node['lattAA'], lattAA=None)
    if node['lattAA'] == ambigAA:
        pScore -= ambigPenalty
    
    return pScore  
예제 #7
0
        if len(cluster) > 1:
            combMasses += [sum(cluster) / len(cluster)]
        else:
            combMasses += cluster
    
    return np.sort(np.array(combMasses))
    
if __name__ == '__main__':
    dirPath = 'C:\\Users\\Arun\\Pythonprojects\\DeNovoSequencing\\LF2_short_HCD+CID_ath001862_244\\'
    
    ppm = 5
    heavyPath = dirPath + '244.3611.3611.1.dta'
    lightPath = dirPath + '244.3619.3619.1.dta'
    heavyPairs = DataFile.getMassIntPairs(heavyPath)
    lightPairs = DataFile.getMassIntPairs(lightPath)
    heavyPrecMass, heavyCharge = DataFile.getPrecMassAndCharge(heavyPath) 
    lightPrecMass, lightCharge = DataFile.getPrecMassAndCharge(lightPath)   
    
    print ppm * 10 ** -6 * heavyPrecMass
    print getSharedPeaksRatio(lightPairs, heavyPairs, Nmod=0, Cmod=Constants.mods['*'], epsilon=ppm * heavyPrecMass * 10 ** -6)
    
     
    """
    tPath = dirPath + '244.0855.0855.1.dta'
    tMass = DataFile.getPrecMassAndCharge(tPath)[0] 
    tPairs = DataFile.getMassIntPairs(tPath)
    tIons = tPairs[:,0]
    tIons = np.insert(tIons, 0, 0)
    tIons = np.append(tIons, tMass)
    tIons = getSymmetrizedSpectrum(tIons, tMass)
    print tIons
예제 #8
0
        (lightSpec, heavySpec) = pair[1:]
        if options.verbose:
            print 'Now sequencing %s %s with shared peaks ratio %f' % (lightSpec, heavySpec, pair[0])
            s1 = time.time()
            
        heavyPath = heavySpec
        lightPath = lightSpec
        sharedInfo = DNS.getPairedSpectraInfoForSequencing(lightPath, heavyPath, options.verbose)
        DNS.sequencePairedSpectra(sharedInfo['NInd'], sharedInfo['CInd'], sharedInfo['lightPairs'], sharedInfo['heavyPairs'], sharedInfo['lightPrecMass'] - Constants.mods['H+'] - Constants.mods['H2O'], PNet, alpha=options.alpha, unknownPenalty=options.ambigEdgePenalty, maxEdge=options.maxEdge, minEdge=options.minEdge, Nmod=options.Nmod, Cmod=options.Cmod, aas=aas, verbose=options.verbose)
        
        if options.verbose:
            print 'Time taken:', time.time() - s1 
    
    for spec in unpaired:
        if options.verbose:
            print 'Now sequencing unpaired spectrum %s' % spec
            s1 = time.time()
            
        precMass = DataFile.getPrecMassAndCharge(spec)[0]
        pairs = DataFile.getMassIntPairs(spec)
        DNS.sequenceSingleSpectrum(pairs, precMass - Constants.mods['H+'] - Constants.mods['H2O'], PNet, alpha=options.alpha, unknownPenalty=options.ambigEdgePenalty, maxEdge=options.maxEdge, minEdge=options.minEdge, aas=aas, verbose=options.verbose)
        
        if options.verbose:
            print 'Time taken:', time.time() - s1
    
    if options.verbose:
        print 'Finished sequencing. Time taken: ', time.time() - t2
        print 'Total time taken for program: ', time.time() - t1