def test_getPrecursorRtMz(self): expectedPrecursorRtMz_mzml = [{ 'mz': 337.33999999999997, 'rt': 85.166666666666671 }, { 'mz': 421.76077270507812, 'rt': 0.82813999999999999 }, { 'mz': 462.24014282226562, 'rt': 12.52604 }, { 'mz': 335.222412109375, 'rt': 37.834473333333335 }] expectedPrecursorRtMz_peaks = [{ 'rt': 9.5545000000000009, 'mz': 421.758026123047 }, { 'rt': 49.688400000000001, 'mz': 421.760772705078 }] msrun_mzml = pymzml.run.Reader(testFolder + 'mzml_test_file_1.mzML') msrun_peaks = pymzml.run.Reader(testFolder + 'peaksMzmlTestfile.peaks.mzML') actualPrecursorRtMz_mzml = mzmlFunctions.getPrecursorRtMz(msrun_mzml) actualPrecursorRtMz_peaks = mzmlFunctions.getPrecursorRtMz(msrun_peaks) self.assertEqual(expectedPrecursorRtMz_mzml, actualPrecursorRtMz_mzml) self.assertEqual(expectedPrecursorRtMz_peaks, actualPrecursorRtMz_peaks)
def test_getPrecursorRtMz(self): expectedPrecursorRtMz_mzml = [{'mz': 337.33999999999997, 'rt': 85.166666666666671},{'mz': 421.76077270507812, 'rt': 0.82813999999999999},{'mz': 462.24014282226562, 'rt': 12.52604},{'mz': 335.222412109375, 'rt': 37.834473333333335}] expectedPrecursorRtMz_peaks = [{'rt': 9.5545000000000009, 'mz': 421.758026123047}, {'rt': 49.688400000000001, 'mz': 421.760772705078}] msrun_mzml = pymzml.run.Reader(testFolder+'mzml_test_file_1.mzML') msrun_peaks = pymzml.run.Reader(testFolder+'peaksMzmlTestfile.peaks.mzML') actualPrecursorRtMz_mzml = mzmlFunctions.getPrecursorRtMz(msrun_mzml) actualPrecursorRtMz_peaks = mzmlFunctions.getPrecursorRtMz(msrun_peaks) self.assertEqual(expectedPrecursorRtMz_mzml, actualPrecursorRtMz_mzml) self.assertEqual(expectedPrecursorRtMz_peaks, actualPrecursorRtMz_peaks)
def compareCoordinate(mzmlFile, featureFile, writeCSV=False, writeTo="precursorPerFeature.csv"): r""" Compare the precursors scan time and m/z values of a spectrum with all the retention time and m/z values in the convexhull of a feature. The spectrum information can come from a mzml File or a peaks.mzml file. It returns a dictionary with 3 keys: totalPrecursorsInFeatures, averagePrecursorsInFeatures and featPerPrecursorDict. totalPrecursorsInFeatures is a numeric value: the total amount of precursors that are in all features, averagePrecursorsInFeatures is a numeric value: the average amount of precursors in a feature and totalPrecursorsInFeatures is a dictionary with as key every feature and as value the amount of precursors per feature. A third input is writeCSV. If this is set to true, totalPrecursorsInFeatures is written out to a CSV file with a column featureID and a column # of precursors. @type mzmlFile: string @param mzmlFile: The path of the .mzML file @type featureFile: string @param featureFile: The path of the .featureXML file @type writeCSV: bool @param writeCSV: Flag if a CSV file has to be written out of the precursor per feature data (default: false) @type writeTo: string @param writeTo: The file and path where writeCSV has to be written to, default is precursorPerFeature.csv in the same folder as the script @rtype: Dictionary @returns: A dictionary with 3 keys: totalPrecursorsInFeatures, averagePrecursorsInFeatures and featPerPrecursorDict. totalPrecursorsInFeatures is a numeric value: the total amount of precursors that are in all features, averagePrecursorsInFeatures is a numeric value: the average amount of precursors in a feature and totalPrecursorsInFeatures is a dictionary with as key every feature and as value the amount of precursors per feature B{Examples}: Print the return value: >>> print compareCoordinate('example_mzML_file.mzML', 'example_feature_file.featureXML') {'totalPrecursorsInFeatures': 2, 'featPerPrecursorDict': {'f_43922326584371237334': 1, 'f_8613715360396561740': 0, 'f_13020522388175237334': 1}, 'averagePrecursorsInFeatures': 0.66666666666666663} Write the results to a csv file: >>> compareCoordinate(testFolder+'mzmlTestFile.mzML', testFolder+'featurexmlTestFile.featureXML', True, testFolder+'testPrecursorPerFeature.csv') # note the True """ fileHandle = fileHandling.FileHandle(os.path.abspath(mzmlFile)) # getting the absolute path of the given mzml file mzmlFile = os.path.abspath(mzmlFile) # parsing of mzml file msrun = pymzml.run.Reader(mzmlFile) # get the retention times and m/z of all precursors in msrun retentionTime = mzmlFunctions.getPrecursorRtMz(msrun) featureFile = os.path.abspath(featureFile) # make an instance of the parseFeatureXML.Reader object, with file as input featureXML = parseFeatureXML.Reader(featureFile) # featPrecursor will hold the amount of precursors per feature, with id as key and amount of precursors as feature featPrecursor = {} totalPrecursor = 0 countZero = 0 x = 0 # get all features out of featureXML for feature in featureXML.getSimpleFeatureInfo(): # set the amount of precursor per feature to 0 at every feature precursorPerFeature = 0 # get the coordinates of all features featureCoordinates = featureFunctions.getFeatureConvexhullCoordinates(feature) # loop for every feature coordinate through every MS/MS precursor coordinate for mzAndRT in retentionTime: # if the retention time (*60 to go from minutes to seconds) is larger than xMin and smaller than xMax and the m/z is # larger than xMin and smaller than xMax, count the precursors if ( float(mzAndRT["rt"]) * 60 > float(featureCoordinates[feature]["rtMin"]) and float(mzAndRT["rt"] * 60) < float(featureCoordinates[feature]["rtMax"]) and float(mzAndRT["mz"]) > float(featureCoordinates[feature]["mzMin"]) and float(mzAndRT["mz"]) < float(featureCoordinates[feature]["mzMax"]) ): precursorPerFeature += 1 totalPrecursor += 1 if precursorPerFeature == 0: countZero += 1 featPrecursor[featureXML["id"]] = precursorPerFeature x += 1 # if writeCSV flag is set to True, write out csv file to the absolute path of writeTo (default: precursorPerFeature.csv in the same folder) if writeCSV: compareDataWriter = output.CompareDataWriter(os.path.abspath(writeTo)) compareDataWriter.precursorPerFeatureCsvWriter(featPrecursor) # calculate the average precursor per feature averagePrecursFeature = float(totalPrecursor) / float(len(featPrecursor)) return { "totalPrecursorsInFeatures": totalPrecursor, "averagePrecursorsInFeatures": averagePrecursFeature, "featPerPrecursorDict": featPrecursor, }
def compareCoordinate(mzmlFile, featureFile, writeCSV=False, writeTo='precursorPerFeature.csv'): r""" Compare the precursors scan time and m/z values of a spectrum with all the retention time and m/z values in the convexhull of a feature. The spectrum information can come from a mzml File or a peaks.mzml file. It returns a dictionary with 3 keys: totalPrecursorsInFeatures, averagePrecursorsInFeatures and featPerPrecursorDict. totalPrecursorsInFeatures is a numeric value: the total amount of precursors that are in all features, averagePrecursorsInFeatures is a numeric value: the average amount of precursors in a feature and totalPrecursorsInFeatures is a dictionary with as key every feature and as value the amount of precursors per feature. A third input is writeCSV. If this is set to true, totalPrecursorsInFeatures is written out to a CSV file with a column featureID and a column # of precursors. @type mzmlFile: string @param mzmlFile: The path of the .mzML file @type featureFile: string @param featureFile: The path of the .featureXML file @type writeCSV: bool @param writeCSV: Flag if a CSV file has to be written out of the precursor per feature data (default: false) @type writeTo: string @param writeTo: The file and path where writeCSV has to be written to, default is precursorPerFeature.csv in the same folder as the script @rtype: Dictionary @returns: A dictionary with 3 keys: totalPrecursorsInFeatures, averagePrecursorsInFeatures and featPerPrecursorDict. totalPrecursorsInFeatures is a numeric value: the total amount of precursors that are in all features, averagePrecursorsInFeatures is a numeric value: the average amount of precursors in a feature and totalPrecursorsInFeatures is a dictionary with as key every feature and as value the amount of precursors per feature B{Examples}: Print the return value: >>> print compareCoordinate('example_mzML_file.mzML', 'example_feature_file.featureXML') {'totalPrecursorsInFeatures': 2, 'featPerPrecursorDict': {'f_43922326584371237334': 1, 'f_8613715360396561740': 0, 'f_13020522388175237334': 1}, 'averagePrecursorsInFeatures': 0.66666666666666663} Write the results to a csv file: >>> compareCoordinate(testFolder+'mzmlTestFile.mzML', testFolder+'featurexmlTestFile.featureXML', True, testFolder+'testPrecursorPerFeature.csv') # note the True """ fileHandle = fileHandling.FileHandle(os.path.abspath(mzmlFile)) # getting the absolute path of the given mzml file mzmlFile = os.path.abspath(mzmlFile) # parsing of mzml file msrun = pymzml.run.Reader(mzmlFile) # get the retention times and m/z of all precursors in msrun retentionTime = mzmlFunctions.getPrecursorRtMz(msrun) featureFile = os.path.abspath(featureFile) # make an instance of the parseFeatureXML.Reader object, with file as input featureXML = parseFeatureXML.Reader(featureFile) # featPrecursor will hold the amount of precursors per feature, with id as key and amount of precursors as feature featPrecursor = {} totalPrecursor = 0 countZero = 0 x = 0 # get all features out of featureXML for feature in featureXML.getSimpleFeatureInfo(): # set the amount of precursor per feature to 0 at every feature precursorPerFeature = 0 # get the coordinates of all features featureCoordinates = featureFunctions.getFeatureConvexhullCoordinates( feature) # loop for every feature coordinate through every MS/MS precursor coordinate for mzAndRT in retentionTime: # if the retention time (*60 to go from minutes to seconds) is larger than xMin and smaller than xMax and the m/z is # larger than xMin and smaller than xMax, count the precursors if float(mzAndRT['rt']) * 60 > float(featureCoordinates[feature]['rtMin']) and float(mzAndRT['rt'] * 60) < float(featureCoordinates[feature]['rtMax']) \ and float(mzAndRT['mz']) > float(featureCoordinates[feature]['mzMin']) and float(mzAndRT['mz']) < float(featureCoordinates[feature]['mzMax']): precursorPerFeature += 1 totalPrecursor += 1 if precursorPerFeature == 0: countZero += 1 featPrecursor[featureXML['id']] = precursorPerFeature x += 1 # if writeCSV flag is set to True, write out csv file to the absolute path of writeTo (default: precursorPerFeature.csv in the same folder) if writeCSV: compareDataWriter = output.CompareDataWriter(os.path.abspath(writeTo)) compareDataWriter.precursorPerFeatureCsvWriter(featPrecursor) # calculate the average precursor per feature averagePrecursFeature = float(totalPrecursor) / float(len(featPrecursor)) return { 'totalPrecursorsInFeatures': totalPrecursor, 'averagePrecursorsInFeatures': averagePrecursFeature, 'featPerPrecursorDict': featPrecursor }