예제 #1
0
	def test_getFeatureConvexhullCoordinates(self):
		expectedFeatureConvexhull = [[{'mzMax': '338.251376135343', 'rtMin': '7045.7642', 'rtMax': '7053.4848', 'mzMin': '336.124751115092'}], [{'mzMax': '338.251376135343', 'rtMin': '5105.9217', 'rtMax': '5111.6874', 'mzMin': '336.124751115092'}], [{'mzMax': '430.197574989105', 'rtMin': '4001.7973', 'rtMax': '4017.7105', 'mzMin': '428.070943557216'}], [{'mzMax': '339.251376135343', 'rtMin': '5107.9217', 'rtMax': '5112.6874', 'mzMin': '337.124751115092'}]]

		featureXML = parseFeatureXML.Reader(testFolder+'featurexmlTestFile_1.featureXML')
		actualFeatureConvexhull = []
		for feature in featureXML.getSimpleFeatureInfo():
			actualFeatureConvexhull.append(featureFunctions.getFeatureConvexhullCoordinates(feature).values()) # only looking at the values because the features are stored at locations which differ between calls, so don't know what to expect
			self.assertEqual(str(type(featureFunctions.getFeatureConvexhullCoordinates(feature).keys()[0])), '<type \'Element\'>') # I don't know where ther class Element comes from so I convert the type to string and compare the strings
		
		
		self.assertListEqual(expectedFeatureConvexhull, actualFeatureConvexhull)
예제 #2
0
    def test_getFeatureConvexhullCoordinates(self):
        expectedFeatureConvexhull = [[{
            'mzMax': '338.251376135343',
            'rtMin': '7045.7642',
            'rtMax': '7053.4848',
            'mzMin': '336.124751115092'
        }],
                                     [{
                                         'mzMax': '338.251376135343',
                                         'rtMin': '5105.9217',
                                         'rtMax': '5111.6874',
                                         'mzMin': '336.124751115092'
                                     }],
                                     [{
                                         'mzMax': '430.197574989105',
                                         'rtMin': '4001.7973',
                                         'rtMax': '4017.7105',
                                         'mzMin': '428.070943557216'
                                     }],
                                     [{
                                         'mzMax': '339.251376135343',
                                         'rtMin': '5107.9217',
                                         'rtMax': '5112.6874',
                                         'mzMin': '337.124751115092'
                                     }]]

        featureXML = parseFeatureXML.Reader(testFolder +
                                            'featurexmlTestFile_1.featureXML')
        actualFeatureConvexhull = []
        for feature in featureXML.getSimpleFeatureInfo():
            actualFeatureConvexhull.append(
                featureFunctions.getFeatureConvexhullCoordinates(
                    feature).values()
            )  # only looking at the values because the features are stored at locations which differ between calls, so don't know what to expect
            self.assertEqual(
                str(
                    type(
                        featureFunctions.getFeatureConvexhullCoordinates(
                            feature).keys()[0])), '<type \'Element\'>'
            )  # I don't know where ther class Element comes from so I convert the type to string and compare the strings

        self.assertListEqual(expectedFeatureConvexhull,
                             actualFeatureConvexhull)
예제 #3
0
	def test_getFeatureOverlap(self):
		expectedOverlap = 43
		
		featureXML = parseFeatureXML.Reader(testFolder+'featurexmlTestFile_1.featureXML')   # make a reader instance
		featureDict = {}	
		for feature in featureXML.getSimpleFeatureInfo():   # get all the features in featureXML and loop through them. Because the for loop gets the convexhull coordinates one at a time, the convexhulls first have to be put in one big dictionary before they can be given to getOverlap
			featureDict.update(featureFunctions.getFeatureConvexhullCoordinates(feature))		# getFeatureConvexhullCoordinates returns a dictionary, so featureDict can be updated with .update()
		actualOverlap = featureFunctions.getOverlap(featureDict)
		
		self.assertTrue(expectedOverlap, actualOverlap)
예제 #4
0
    def test_getFeatureOverlap(self):
        expectedOverlap = 43

        featureXML = parseFeatureXML.Reader(
            testFolder +
            'featurexmlTestFile_1.featureXML')  # make a reader instance
        featureDict = {}
        for feature in featureXML.getSimpleFeatureInfo(
        ):  # get all the features in featureXML and loop through them. Because the for loop gets the convexhull coordinates one at a time, the convexhulls first have to be put in one big dictionary before they can be given to getOverlap
            featureDict.update(
                featureFunctions.getFeatureConvexhullCoordinates(feature)
            )  # getFeatureConvexhullCoordinates returns a dictionary, so featureDict can be updated with .update()
        actualOverlap = featureFunctions.getOverlap(featureDict)

        self.assertTrue(expectedOverlap, actualOverlap)
예제 #5
0
def compareCoordinate(mzmlFile, featureFile, writeCSV=False, writeTo="precursorPerFeature.csv"):
    r""" 
    Compare the precursors scan time and m/z values of a spectrum with all the retention time and m/z values in the convexhull of a feature. The spectrum information can come from
    a mzml File or a peaks.mzml file. It returns a dictionary with 3 keys: totalPrecursorsInFeatures, averagePrecursorsInFeatures and featPerPrecursorDict. totalPrecursorsInFeatures
    is a numeric value: the total amount of precursors that are in all features, averagePrecursorsInFeatures is a numeric value: the average amount of precursors in a feature and
    totalPrecursorsInFeatures is a dictionary with as key every feature and as value the amount of precursors per feature.
    A third input is writeCSV. If this is set to true, totalPrecursorsInFeatures is written out to a CSV file with a column featureID and a column # of precursors.
    
    @type mzmlFile: string
    @param mzmlFile: The path of the .mzML file
    @type featureFile: string
    @param featureFile: The path of the .featureXML file
    @type writeCSV: bool
    @param writeCSV: Flag if a CSV file has to be written out of the precursor per feature data (default: false)
    @type writeTo: string
    @param writeTo: The file and path where writeCSV has to be written to, default is precursorPerFeature.csv in the same folder as the script
    @rtype: Dictionary
    @returns: A dictionary with 3 keys: totalPrecursorsInFeatures, averagePrecursorsInFeatures and featPerPrecursorDict. totalPrecursorsInFeatures
    is a numeric value: the total amount of precursors that are in all features, averagePrecursorsInFeatures is a numeric value: the average amount of precursors in a feature and
    totalPrecursorsInFeatures is a dictionary with as key every feature and as value the amount of precursors per feature
    
    B{Examples}:
    
    Print the return value:

    >>> print compareCoordinate('example_mzML_file.mzML', 'example_feature_file.featureXML')
    {'totalPrecursorsInFeatures': 2, 'featPerPrecursorDict': {'f_43922326584371237334': 1, 'f_8613715360396561740': 0, 'f_13020522388175237334': 1}, 'averagePrecursorsInFeatures': 0.66666666666666663}

    Write the results to a csv file:
    
    >>> compareCoordinate(testFolder+'mzmlTestFile.mzML', testFolder+'featurexmlTestFile.featureXML', True, testFolder+'testPrecursorPerFeature.csv') # note the True
    """

    fileHandle = fileHandling.FileHandle(os.path.abspath(mzmlFile))

    # getting the absolute path of the given mzml file
    mzmlFile = os.path.abspath(mzmlFile)
    # parsing of mzml file
    msrun = pymzml.run.Reader(mzmlFile)

    # get the retention times and m/z of all precursors in msrun
    retentionTime = mzmlFunctions.getPrecursorRtMz(msrun)

    featureFile = os.path.abspath(featureFile)
    # make an instance of the parseFeatureXML.Reader object, with file as input
    featureXML = parseFeatureXML.Reader(featureFile)

    # featPrecursor will hold the amount of precursors per feature, with id as key and amount of precursors as feature
    featPrecursor = {}
    totalPrecursor = 0
    countZero = 0
    x = 0
    # get all features out of featureXML
    for feature in featureXML.getSimpleFeatureInfo():
        # set the amount of precursor per feature to 0 at every feature
        precursorPerFeature = 0
        # get the coordinates of all features
        featureCoordinates = featureFunctions.getFeatureConvexhullCoordinates(feature)
        # loop for every feature coordinate through every MS/MS precursor coordinate
        for mzAndRT in retentionTime:
            # if the retention time (*60 to go from minutes to seconds) is larger than xMin and smaller than xMax and the m/z is
            # larger than xMin and smaller than xMax, count the precursors
            if (
                float(mzAndRT["rt"]) * 60 > float(featureCoordinates[feature]["rtMin"])
                and float(mzAndRT["rt"] * 60) < float(featureCoordinates[feature]["rtMax"])
                and float(mzAndRT["mz"]) > float(featureCoordinates[feature]["mzMin"])
                and float(mzAndRT["mz"]) < float(featureCoordinates[feature]["mzMax"])
            ):
                precursorPerFeature += 1
                totalPrecursor += 1
        if precursorPerFeature == 0:
            countZero += 1
        featPrecursor[featureXML["id"]] = precursorPerFeature

        x += 1

    # if writeCSV flag is set to True, write out csv file to the absolute path of writeTo (default: precursorPerFeature.csv in the same folder)
    if writeCSV:
        compareDataWriter = output.CompareDataWriter(os.path.abspath(writeTo))
        compareDataWriter.precursorPerFeatureCsvWriter(featPrecursor)

    # calculate the average precursor per feature
    averagePrecursFeature = float(totalPrecursor) / float(len(featPrecursor))
    return {
        "totalPrecursorsInFeatures": totalPrecursor,
        "averagePrecursorsInFeatures": averagePrecursFeature,
        "featPerPrecursorDict": featPrecursor,
    }
예제 #6
0
def compareCoordinate(mzmlFile,
                      featureFile,
                      writeCSV=False,
                      writeTo='precursorPerFeature.csv'):
    r""" 
    Compare the precursors scan time and m/z values of a spectrum with all the retention time and m/z values in the convexhull of a feature. The spectrum information can come from
    a mzml File or a peaks.mzml file. It returns a dictionary with 3 keys: totalPrecursorsInFeatures, averagePrecursorsInFeatures and featPerPrecursorDict. totalPrecursorsInFeatures
    is a numeric value: the total amount of precursors that are in all features, averagePrecursorsInFeatures is a numeric value: the average amount of precursors in a feature and
    totalPrecursorsInFeatures is a dictionary with as key every feature and as value the amount of precursors per feature.
    A third input is writeCSV. If this is set to true, totalPrecursorsInFeatures is written out to a CSV file with a column featureID and a column # of precursors.
    
    @type mzmlFile: string
    @param mzmlFile: The path of the .mzML file
    @type featureFile: string
    @param featureFile: The path of the .featureXML file
    @type writeCSV: bool
    @param writeCSV: Flag if a CSV file has to be written out of the precursor per feature data (default: false)
    @type writeTo: string
    @param writeTo: The file and path where writeCSV has to be written to, default is precursorPerFeature.csv in the same folder as the script
    @rtype: Dictionary
    @returns: A dictionary with 3 keys: totalPrecursorsInFeatures, averagePrecursorsInFeatures and featPerPrecursorDict. totalPrecursorsInFeatures
    is a numeric value: the total amount of precursors that are in all features, averagePrecursorsInFeatures is a numeric value: the average amount of precursors in a feature and
    totalPrecursorsInFeatures is a dictionary with as key every feature and as value the amount of precursors per feature
    
    B{Examples}:
    
    Print the return value:

    >>> print compareCoordinate('example_mzML_file.mzML', 'example_feature_file.featureXML')
    {'totalPrecursorsInFeatures': 2, 'featPerPrecursorDict': {'f_43922326584371237334': 1, 'f_8613715360396561740': 0, 'f_13020522388175237334': 1}, 'averagePrecursorsInFeatures': 0.66666666666666663}

    Write the results to a csv file:
    
    >>> compareCoordinate(testFolder+'mzmlTestFile.mzML', testFolder+'featurexmlTestFile.featureXML', True, testFolder+'testPrecursorPerFeature.csv') # note the True
    """

    fileHandle = fileHandling.FileHandle(os.path.abspath(mzmlFile))

    # getting the absolute path of the given mzml file
    mzmlFile = os.path.abspath(mzmlFile)
    # parsing of mzml file
    msrun = pymzml.run.Reader(mzmlFile)

    # get the retention times and m/z of all precursors in msrun
    retentionTime = mzmlFunctions.getPrecursorRtMz(msrun)

    featureFile = os.path.abspath(featureFile)
    # make an instance of the parseFeatureXML.Reader object, with file as input
    featureXML = parseFeatureXML.Reader(featureFile)

    # featPrecursor will hold the amount of precursors per feature, with id as key and amount of precursors as feature
    featPrecursor = {}
    totalPrecursor = 0
    countZero = 0
    x = 0
    # get all features out of featureXML
    for feature in featureXML.getSimpleFeatureInfo():
        # set the amount of precursor per feature to 0 at every feature
        precursorPerFeature = 0
        # get the coordinates of all features
        featureCoordinates = featureFunctions.getFeatureConvexhullCoordinates(
            feature)
        # loop for every feature coordinate through every MS/MS precursor coordinate
        for mzAndRT in retentionTime:
            # if the retention time (*60 to go from minutes to seconds) is larger than xMin and smaller than xMax and the m/z is
            # larger than xMin and smaller than xMax, count the precursors
            if float(mzAndRT['rt']) * 60 > float(featureCoordinates[feature]['rtMin']) and float(mzAndRT['rt'] * 60) < float(featureCoordinates[feature]['rtMax']) \
                 and float(mzAndRT['mz']) > float(featureCoordinates[feature]['mzMin']) and float(mzAndRT['mz']) < float(featureCoordinates[feature]['mzMax']):
                precursorPerFeature += 1
                totalPrecursor += 1
        if precursorPerFeature == 0:
            countZero += 1
        featPrecursor[featureXML['id']] = precursorPerFeature

        x += 1

    # if writeCSV flag is set to True, write out csv file to the absolute path of writeTo (default: precursorPerFeature.csv in the same folder)
    if writeCSV:
        compareDataWriter = output.CompareDataWriter(os.path.abspath(writeTo))
        compareDataWriter.precursorPerFeatureCsvWriter(featPrecursor)

    # calculate the average precursor per feature
    averagePrecursFeature = float(totalPrecursor) / float(len(featPrecursor))
    return {
        'totalPrecursorsInFeatures': totalPrecursor,
        'averagePrecursorsInFeatures': averagePrecursFeature,
        'featPerPrecursorDict': featPrecursor
    }