def test_getAllNestedItems(self): expectedItems = {'fullName': 'Proteomics Standards Initiative Mass Spectrometry Ontology', 'id': '', 'URI': 'http://psidev.cvs.sourceforge.net/*checkout*/psidev/psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo','fullName': 'Proteomics Standards Initiative Mass Spectrometry Ontology','id': 'MS'} # dict to hold everythin that getAllNestedItems returns actualItems = {} elementFile = open(testFolder+'peaksMzmlTestfile.peaks.mzML') for event, element in cElementTree.iterparse(elementFile): items = elementFunctions.getAllNestedItems(element) for item in items: actualItems.update(item) # only doing one to test, break break self.assertDictEqual(expectedItems, actualItems)
def getSimpleSpectraInfo(self): """ Iterator function that yields all the feature elements in the file given to Reader() It saves info from the features in a dict, self.spectraInfo, which is used in the L{Reader.__getitem__} retrieval function. This function has predefined information like intensity, overallquality, convexhull etc that make for easier browsing, but because of this it does not contain all information. If you want to get all information exactly as found in the xml file, use L{parsePeaksMzML.Reader.getAllSpectraInfo}. @rtype: Element @return: Iterator of all the elements in the file where element.tag == 'spectrum' @raise RuntimeError: No features in the file B{Example}: Print all the information of all the MS/MS spectra in examplePeaksfile.peaks.mzML. Only showing one result: >>> peaksMzML = Reader('example_peaks_file.peaks.mzML') # make a Reader instance >>> spectra = peaksMzML.getSpectra() # get all the spectra of the Reader instance ... for spectrum in spectra: # loop through all the spectra ... if int(peaksMzML['ms level']) == 2: ... for keys in peaksMzML.getKeys(): ... print 'key: '+str(keys)+'\tvalue: '+str(peaksMzML[keys]) ... print '-'*20 key: scan_id value: 1 key: centroid spectrum value: centroid spectrum key: ms level value: 2 key: mass spectrum value: mass spectrum key: positive scan value: positive scan key: base peak m/z value: 368.750823974609 key: base peak intensity value: 37719.2890625 key: total ion current value: 110887.0078125 key: lowest observed m/z value: 108.770645141602 key: highest observed m/z value: 754.29296875 key: filter string value: ITMS + c NSI d Full ms2 [email protected] [90.00-770.00] key: preset scan configuration value: 4 key: no combination value: no combination key: scan start time value: 1158.9672 key: [thermo trailer extra]monoisotopic m/z: value: 377.673858642578 key: scan window lower limit value: 90 key: scan window upper limit value: 770 key: isolation window target m/z value: 377.673858642578 key: isolation window lower offset value: 1 key: isolation window upper offset value: 1 key: selected ion m/z value: 377.673858642578 key: charge state value: 2 key: peak intensity value: 55344.1875 key: activation energy value: 0 key: collision-induced dissociation value: collision-induced dissociation key: collision energy value: 35 """ # counter for the amount of elements with a userparam tag. If it stay 0 at the end of the yielding this function raises a runtime error userParamCount = 0 # looping through all the elements to get the cvParam and userParam of the element for element in self.getAllElements(): # get the spectrum elements if element.tag.split('}')[1] == 'spectrum': # reset the keyset self.__spectraKeySet = [] # First I only took things that I thought would be useful for analyzing, but maybe someone at some point needs to know if it was a positive or a negative scan # and it is better practice to have everything in already and deal with what is needed later. So this uses an recursive function to get all items of all elements # that are nested in the first element for info in elementFunctions.getAllNestedItems(element): if info.has_key('name'): # some dicts have a name but not a value, that case the name is also the value (more informative than just null if not info.has_key('value'): value = info['name'] else: value = info['value'] self.spectraInfo[element][info['name'].lower()] = value self.__spectraKeySet.append(info['name'].lower()) # setting the id to a number instead of a big string (there is mroe in front of scan= elif info.has_key('id'): self.spectraInfo[element]['scan_id'] = info[ 'id'].split('scan=')[1] self.__spectraKeySet.append('scan_id') yield element # this gets called after every yield statement and clears every element that is under the current element. Because all the # nested elements of the current element have already been used and the results saved in self.elementInfo, they are not # necessary anymore and clearing them lowers the memory usage. for nestedElement in element: nestedElement.clear() element.clear()
def getSimpleSpectraInfo(self): """ Iterator function that yields all the feature elements in the file given to Reader() It saves info from the features in a dict, self.spectraInfo, which is used in the L{Reader.__getitem__} retrieval function. This function has predefined information like intensity, overallquality, convexhull etc that make for easier browsing, but because of this it does not contain all information. If you want to get all information exactly as found in the xml file, use L{parsePeaksMzML.Reader.getAllSpectraInfo}. @rtype: Element @return: Iterator of all the elements in the file where element.tag == 'spectrum' @raise RuntimeError: No features in the file B{Example}: Print all the information of all the MS/MS spectra in examplePeaksfile.peaks.mzML. Only showing one result: >>> peaksMzML = Reader('example_peaks_file.peaks.mzML') # make a Reader instance >>> spectra = peaksMzML.getSpectra() # get all the spectra of the Reader instance ... for spectrum in spectra: # loop through all the spectra ... if int(peaksMzML['ms level']) == 2: ... for keys in peaksMzML.getKeys(): ... print 'key: '+str(keys)+'\tvalue: '+str(peaksMzML[keys]) ... print '-'*20 key: scan_id value: 1 key: centroid spectrum value: centroid spectrum key: ms level value: 2 key: mass spectrum value: mass spectrum key: positive scan value: positive scan key: base peak m/z value: 368.750823974609 key: base peak intensity value: 37719.2890625 key: total ion current value: 110887.0078125 key: lowest observed m/z value: 108.770645141602 key: highest observed m/z value: 754.29296875 key: filter string value: ITMS + c NSI d Full ms2 [email protected] [90.00-770.00] key: preset scan configuration value: 4 key: no combination value: no combination key: scan start time value: 1158.9672 key: [thermo trailer extra]monoisotopic m/z: value: 377.673858642578 key: scan window lower limit value: 90 key: scan window upper limit value: 770 key: isolation window target m/z value: 377.673858642578 key: isolation window lower offset value: 1 key: isolation window upper offset value: 1 key: selected ion m/z value: 377.673858642578 key: charge state value: 2 key: peak intensity value: 55344.1875 key: activation energy value: 0 key: collision-induced dissociation value: collision-induced dissociation key: collision energy value: 35 """ # counter for the amount of elements with a userparam tag. If it stay 0 at the end of the yielding this function raises a runtime error userParamCount = 0 # looping through all the elements to get the cvParam and userParam of the element for element in self.getAllElements(): # get the spectrum elements if element.tag.split('}')[1] == 'spectrum': # reset the keyset self.__spectraKeySet = [] # First I only took things that I thought would be useful for analyzing, but maybe someone at some point needs to know if it was a positive or a negative scan # and it is better practice to have everything in already and deal with what is needed later. So this uses an recursive function to get all items of all elements # that are nested in the first element for info in elementFunctions.getAllNestedItems(element): if info.has_key('name'): # some dicts have a name but not a value, that case the name is also the value (more informative than just null if not info.has_key('value'): value = info['name'] else: value = info['value'] self.spectraInfo[element][info['name'].lower()] = value self.__spectraKeySet.append(info['name'].lower()) # setting the id to a number instead of a big string (there is mroe in front of scan= elif info.has_key('id'): self.spectraInfo[element]['scan_id'] = info['id'].split('scan=')[1] self.__spectraKeySet.append('scan_id') yield element # this gets called after every yield statement and clears every element that is under the current element. Because all the # nested elements of the current element have already been used and the results saved in self.elementInfo, they are not # necessary anymore and clearing them lowers the memory usage. for nestedElement in element: nestedElement.clear() element.clear()