def test_getMappedFeatureIds(self): expectedList = [{ 'from': 5189.2922399999998, 'from_featureID': 'f_13020522388175237334', 'to': 5109.2922399999998, 'to_featureID': 'f_13020522388175237334' }, { 'from': 5197.2922399999998, 'from_featureID': 'f_43922326584371237334', 'to': 5107.2922399999998, 'to_featureID': 'f_43922326584371237334' }, { 'from': 3969.5872599999998, 'from_featureID': 'f_8613715360396561740', 'to': 4009.5872599999998, 'to_featureID': 'f_8613715360396561740' }] featureXML_1 = parseFeatureXML.Reader( testFolder + 'featurexmlTestFile_1.featureXML') featureXML_2 = parseFeatureXML.Reader( testFolder + 'featurexmlTestFile_2.featureXML') trafoXML = testFolder + 'featurexmlTestFile_2.trafoXML' featureMapping = fm.Map(featureXML_1, featureXML_2, trafoXML) actualList = featureMapping.getMappedFeatureIds() self.assertListEqual(expectedList, actualList)
def test_fillFeatureMappingException(self): featureXML = parseFeatureXML.Reader(testFolder + 'featurexmlTestFile_1.featureXML') featureXML_2 = parseFeatureXML.Reader( testFolder + 'featurexmlTestFile_2.featureXML') trafoXML = testFolder + 'featurexmlTestFile_2.trafoXML' with database.ConnectSqlite(testDatabasePath + 'test_pyMSA_database.db') as sqlCon: fd = database.FillDatabase(sqlCon) self.assertRaises(KeyError, fd.fillFeatureMapping, featureXML, featureXML_2, trafoXML)
def test_FeatureMappingException(self): featureXML_1 = parseFeatureXML.Reader( testFolder + 'featurexmlTestFile_1.featureXML') featureXML_2 = parseFeatureXML.Reader( testFolder + 'featurexmlTestFile_2.featureXML') trafoXML = testFolder + 'featurexmlTestFile_1.trafoXML' trafoXML_2 = testFolder + 'featurexmlTestFile_2.trafoXML' self.assertRaises(IOError, fm.Map, featureXML_1, featureXML_2, trafoXML) self.assertRaises(RuntimeError, fm.Map, featureXML_2, featureXML_2, trafoXML_2)
def test_fillFeatureMapping(self): featureXML_1 = parseFeatureXML.Reader( testFolder + 'featurexmlTestFile_1.featureXML') featureXML_2 = parseFeatureXML.Reader( testFolder + 'featurexmlTestFile_2.featureXML') trafoXML = testFolder + 'featurexmlTestFile_2.trafoXML' with database.ConnectSqlite(testDatabasePath + 'test_pyMSA_database.db') as sqlCon: fd = database.FillDatabase(sqlCon, 'test') fd.fillMsrun(testFolder + 'mzml_test_file_1.mzML') fd.fillFeatures(featureXML_1) fd.fillFeatures(featureXML_2) fd.fillFeatureMapping(featureXML_1, featureXML_2, trafoXML)
def test_mappedIntensities(self): expectedList_1 = ['556384', '234284', '111429'] expectedList_2 = ['111329', '524284', '524284'] featureXML_1 = parseFeatureXML.Reader( testFolder + 'featurexmlTestFile_1.featureXML') featureXML_2 = parseFeatureXML.Reader( testFolder + 'featurexmlTestFile_2.featureXML') trafoXML = testFolder + 'featurexmlTestFile_2.trafoXML' featureMapping = fm.Map(featureXML_1, featureXML_2, trafoXML) actualList_1, actualList_2 = featureMapping.mappedIntensities() self.assertListEqual(expectedList_1, actualList_1) self.assertListEqual(expectedList_2, actualList_2)
def test_linkSpectrumToFeature(self): expectedNumResult = 4 expectedResult = [(1, 2), (1, 2), (1, 4), (1, 4)] mzMLinstance = pymzml.run.Reader( testFolder + 'mzml_test_file_1.mzML') # make a Reader instance featureXML = parseFeatureXML.Reader( testFolder + 'featurexmlTestFile_1.featureXML') # make a Reader instance with database.ConnectSqlite(testDatabasePath + 'test_pyMSA_database.db') as sqlCon: fd = database.FillDatabase(sqlCon, 'test') fd.fillMsrun(testFolder + 'mzml_test_file_1.mzML') fd.fillFeatures(featureXML) fd.fillSpectrum(mzMLinstance) fd.linkSpectrumToFeature() actualResult = [] self.cursor.execute("SELECT * FROM feature_has_MSMS_precursor") for result in self.cursor.fetchall(): actualResult.append(result) actualResult.append(result) self.assertEqual(expectedNumResult, len(actualResult)) self.assertListEqual(expectedResult, actualResult)
def test_getInfoException(self): featurexmlReaderInstance = parseFeatureXML.Reader( testFolder + 'featurexmlTestFile_1.featureXML') featureWriter = output.FeatureWriter(featurexmlReaderInstance) self.assertRaises(TypeError, lambda: list(featureWriter.getInfo('notalist')) ) #lambda makes sure the whole loop is run
def test_FeatureCsvWriter(self): expectedCsvFirstLine = [ 'charge', 'convexhull_xCoor', 'convexhull_yCoor', 'id', 'intensity', 'overallquality', 'position_dim0', 'position_dim1', 'quality_dim0', 'quality_dim1', 'spectrum_index', 'spectrum_native_id' ] expectedCsvSecondLine = [ '2', '5107.9217', '337.125209180674', 'f_43922326584371237334', '556384', '225053', '5107.29224', '337.251104825', '0', '0', '3916', '18484' ] reader = parseFeatureXML.Reader(testFolder + 'featurexmlTestFile_1.featureXML') if os.path.exists(testFolder + 'featureCsvTest.csv'): os.remove( testFolder + 'featureCsvTest.csv' ) # to make sure that the test isn't passing when the method doesn't work, but the file already exists featureCsvWriter = output.FeatureCsvWriter( testFolder + 'featureCsvTest.csv', reader) csvReader = csv.reader(open(testFolder + 'featureCsvTest.csv', 'rb'), delimiter='\t') actualCsvFirstLine = csvReader.next() actualCsvSecondLine = csvReader.next() self.assertListEqual(expectedCsvFirstLine, actualCsvFirstLine) self.assertListEqual(expectedCsvSecondLine, actualCsvSecondLine) os.remove(testFolder + 'featureCsvTest.csv')
def test_getFeatures_rtWindow(self): featureXML = parseFeatureXML.Reader(testFolder+'featurexmlTestFile_1.featureXML') featureLocation = getWindow.FeatureLocation(featureXML) featureList = [] for feature in featureLocation.getFeatures_rtWindow(4000,5000): featureList.append(feature) self.assertIs(0, len(featureList))
def test_fillFeaturesException(self): featureXML = parseFeatureXML.Reader( testFolder + 'featurexmlTestFile_1.featureXML') # make a Reader instance with database.ConnectSqlite(testDatabasePath + 'test_pyMSA_database.db') as sqlCon: fd = database.FillDatabase(sqlCon) self.assertRaises(RuntimeError, fd.fillFeatures, featureXML)
def test_mapping(self): expectedDict = { 'featureXML_1_mapped': set(['4009.58726', '5107.29224', '5109.29224']), 'featureXML_1_not_mapped': set(['7052.29224']), 'featureXML_2_mapped': set(['3969.58726', '5189.29224', '5197.29224']), 'featureXML_2_not_mapped': set(['5345.29224']) } featureXML_1 = parseFeatureXML.Reader( testFolder + 'featurexmlTestFile_1.featureXML') featureXML_2 = parseFeatureXML.Reader( testFolder + 'featurexmlTestFile_2.featureXML') trafoXML = testFolder + 'featurexmlTestFile_2.trafoXML' featureMapping = fm.Map(featureXML_1, featureXML_2, trafoXML) actualDict = featureMapping.mapping() self.assertDictEqual(expectedDict, actualDict)
def test_getFeatureConvexhullCoordinatesException(self): self.assertRaises( TypeError, featureFunctions.getFeatureConvexhullCoordinates, 'not element type' ) # input is a string instead of type element, should give type error featureXML = parseFeatureXML.Reader( testFolder + 'invalidFeatureXML_noconvexhull.featureXML') for feature in featureXML.getSimpleFeatureInfo(): self.assertRaises( IOError, featureFunctions.getFeatureConvexhullCoordinates, feature ) # this should give an IOError because the file given to parseFeatureXML.Reader is not a valid featureXML File (the features don't have a convexhull
def test_getFeatureOverlap(self): expectedOverlap = 43 featureXML = parseFeatureXML.Reader( testFolder + 'featurexmlTestFile_1.featureXML') # make a reader instance featureDict = {} for feature in featureXML.getSimpleFeatureInfo( ): # get all the features in featureXML and loop through them. Because the for loop gets the convexhull coordinates one at a time, the convexhulls first have to be put in one big dictionary before they can be given to getOverlap featureDict.update( featureFunctions.getFeatureConvexhullCoordinates(feature) ) # getFeatureConvexhullCoordinates returns a dictionary, so featureDict can be updated with .update() actualOverlap = featureFunctions.getOverlap(featureDict) self.assertTrue(expectedOverlap, actualOverlap)
def test_getFeatureConvexhullCoordinates(self): expectedFeatureConvexhull = [[{ 'mzMax': '338.251376135343', 'rtMin': '7045.7642', 'rtMax': '7053.4848', 'mzMin': '336.124751115092' }], [{ 'mzMax': '338.251376135343', 'rtMin': '5105.9217', 'rtMax': '5111.6874', 'mzMin': '336.124751115092' }], [{ 'mzMax': '430.197574989105', 'rtMin': '4001.7973', 'rtMax': '4017.7105', 'mzMin': '428.070943557216' }], [{ 'mzMax': '339.251376135343', 'rtMin': '5107.9217', 'rtMax': '5112.6874', 'mzMin': '337.124751115092' }]] featureXML = parseFeatureXML.Reader(testFolder + 'featurexmlTestFile_1.featureXML') actualFeatureConvexhull = [] for feature in featureXML.getSimpleFeatureInfo(): actualFeatureConvexhull.append( featureFunctions.getFeatureConvexhullCoordinates( feature).values() ) # only looking at the values because the features are stored at locations which differ between calls, so don't know what to expect self.assertEqual( str( type( featureFunctions.getFeatureConvexhullCoordinates( feature).keys()[0])), '<type \'Element\'>' ) # I don't know where ther class Element comes from so I convert the type to string and compare the strings self.assertListEqual(expectedFeatureConvexhull, actualFeatureConvexhull)
def test_getInfo(self): expectedFeatureList = [ 'f_130205234428175237334', 'f_130205234428175237334', 'f_13020522388175237334', 'f_13020522388175237334', 'f_8613715360396561740', 'f_8613715360396561740', 'f_43922326584371237334', 'f_43922326584371237334' ] expectedKeyList = [ 'charge', 'intensity', 'charge', 'intensity', 'charge', 'intensity', 'charge', 'intensity' ] expectedInfoList = [ '2', '52234', '2', '234284', '2', '111429', '2', '556384' ] actualFeatureList = [] actualKeyList = [] actualInfoList = [] actualFeatureList_fromSet = [ ] # differences between the two is that one actual will be made from a list and the other from a set input actualKeyList_fromSet = [] actualInfoList_fromSet = [] featurexmlReaderInstance = parseFeatureXML.Reader( testFolder + 'featurexmlTestFile_1.featureXML') featureWriter = output.FeatureWriter(featurexmlReaderInstance) for featureId, key, info in featureWriter.getInfo( ['charge', 'intensity']): # test with list actualFeatureList.append(featureId) actualKeyList.append(key) actualInfoList.append(info) for featureId, key, info in featureWriter.getInfo( set(['charge', 'intensity'])): #test with set actualFeatureList_fromSet.append(featureId) actualKeyList_fromSet.append(key) actualInfoList_fromSet.append(info) self.assertListEqual(expectedFeatureList, actualFeatureList) self.assertListEqual(expectedKeyList, actualKeyList) self.assertListEqual(expectedInfoList, actualInfoList) self.assertListEqual(expectedFeatureList, actualFeatureList_fromSet) self.assertListEqual(expectedKeyList, actualKeyList_fromSet) self.assertListEqual(expectedInfoList, actualInfoList_fromSet)
def test_fillFeatures(self): expectedNumResult = 4 featureXML = parseFeatureXML.Reader( testFolder + 'featurexmlTestFile_1.featureXML') # make a Reader instance with database.ConnectSqlite(testDatabasePath + 'test_pyMSA_database.db') as sqlCon: fd = database.FillDatabase(sqlCon, 'test') fd.fillMsrun(testFolder + 'mzml_test_file_1.mzML') fd.fillFeatures(featureXML) actualResult = [] for feature in featureXML.getSimpleFeatureInfo(): self.cursor.execute("SELECT * FROM `feature` WHERE feature_id = ?", (str(featureXML['id']), )) actualResult.append(self.cursor.fetchone()) self.connection.commit() self.assertEqual(len(actualResult), expectedNumResult)
def compareCoordinate(mzmlFile, featureFile, writeCSV=False, writeTo='precursorPerFeature.csv'): r""" Compare the precursors scan time and m/z values of a spectrum with all the retention time and m/z values in the convexhull of a feature. The spectrum information can come from a mzml File or a peaks.mzml file. It returns a dictionary with 3 keys: totalPrecursorsInFeatures, averagePrecursorsInFeatures and featPerPrecursorDict. totalPrecursorsInFeatures is a numeric value: the total amount of precursors that are in all features, averagePrecursorsInFeatures is a numeric value: the average amount of precursors in a feature and totalPrecursorsInFeatures is a dictionary with as key every feature and as value the amount of precursors per feature. A third input is writeCSV. If this is set to true, totalPrecursorsInFeatures is written out to a CSV file with a column featureID and a column # of precursors. @type mzmlFile: string @param mzmlFile: The path of the .mzML file @type featureFile: string @param featureFile: The path of the .featureXML file @type writeCSV: bool @param writeCSV: Flag if a CSV file has to be written out of the precursor per feature data (default: false) @type writeTo: string @param writeTo: The file and path where writeCSV has to be written to, default is precursorPerFeature.csv in the same folder as the script @rtype: Dictionary @returns: A dictionary with 3 keys: totalPrecursorsInFeatures, averagePrecursorsInFeatures and featPerPrecursorDict. totalPrecursorsInFeatures is a numeric value: the total amount of precursors that are in all features, averagePrecursorsInFeatures is a numeric value: the average amount of precursors in a feature and totalPrecursorsInFeatures is a dictionary with as key every feature and as value the amount of precursors per feature B{Examples}: Print the return value: >>> print compareCoordinate('example_mzML_file.mzML', 'example_feature_file.featureXML') {'totalPrecursorsInFeatures': 2, 'featPerPrecursorDict': {'f_43922326584371237334': 1, 'f_8613715360396561740': 0, 'f_13020522388175237334': 1}, 'averagePrecursorsInFeatures': 0.66666666666666663} Write the results to a csv file: >>> compareCoordinate(testFolder+'mzmlTestFile.mzML', testFolder+'featurexmlTestFile.featureXML', True, testFolder+'testPrecursorPerFeature.csv') # note the True """ fileHandle = fileHandling.FileHandle(os.path.abspath(mzmlFile)) # getting the absolute path of the given mzml file mzmlFile = os.path.abspath(mzmlFile) # parsing of mzml file msrun = pymzml.run.Reader(mzmlFile) # get the retention times and m/z of all precursors in msrun retentionTime = mzmlFunctions.getPrecursorRtMz(msrun) featureFile = os.path.abspath(featureFile) # make an instance of the parseFeatureXML.Reader object, with file as input featureXML = parseFeatureXML.Reader(featureFile) # featPrecursor will hold the amount of precursors per feature, with id as key and amount of precursors as feature featPrecursor = {} totalPrecursor = 0 countZero = 0 x = 0 # get all features out of featureXML for feature in featureXML.getSimpleFeatureInfo(): # set the amount of precursor per feature to 0 at every feature precursorPerFeature = 0 # get the coordinates of all features featureCoordinates = featureFunctions.getFeatureConvexhullCoordinates( feature) # loop for every feature coordinate through every MS/MS precursor coordinate for mzAndRT in retentionTime: # if the retention time (*60 to go from minutes to seconds) is larger than xMin and smaller than xMax and the m/z is # larger than xMin and smaller than xMax, count the precursors if float(mzAndRT['rt']) * 60 > float(featureCoordinates[feature]['rtMin']) and float(mzAndRT['rt'] * 60) < float(featureCoordinates[feature]['rtMax']) \ and float(mzAndRT['mz']) > float(featureCoordinates[feature]['mzMin']) and float(mzAndRT['mz']) < float(featureCoordinates[feature]['mzMax']): precursorPerFeature += 1 totalPrecursor += 1 if precursorPerFeature == 0: countZero += 1 featPrecursor[featureXML['id']] = precursorPerFeature x += 1 # if writeCSV flag is set to True, write out csv file to the absolute path of writeTo (default: precursorPerFeature.csv in the same folder) if writeCSV: compareDataWriter = output.CompareDataWriter(os.path.abspath(writeTo)) compareDataWriter.precursorPerFeatureCsvWriter(featPrecursor) # calculate the average precursor per feature averagePrecursFeature = float(totalPrecursor) / float(len(featPrecursor)) return { 'totalPrecursorsInFeatures': totalPrecursor, 'averagePrecursorsInFeatures': averagePrecursFeature, 'featPerPrecursorDict': featPrecursor }
def test_getFeatures_rtWindowException(self): featureXML = parseFeatureXML.Reader(testFolder+'featurexmlTestFile_1.featureXML') featureLocation = getWindow.FeatureLocation(featureXML) self.assertRaises(TypeError, lambda: featureLocation.getFeatures_rtWindow, 'not an int', 1) self.assertRaises(TypeError, lambda: featureLocation.getFeatures_rtWindow , 1, 'not an int')