Esempio n. 1
0
    def __init__(self,mapped_featureXML_1, mapped_featureXML_2, trafoXML_file):
        """
        @type mapped_featureXML_1: pyMS.pareFeatureXML.Reader
        @param mapped_featureXML_1: An instance of L{parseFeatureXML.Reader}. This is one of the files mapped by OpenMS's MapAlignerPoseClustering.
        @type mapped_featureXML_2: pyMS.pareFeatureXML.Reader
        @param mapped_featureXML_2: An instance of L{parseFeatureXML.Reader}. This is one of the files mapped by OpenMS's MapAlignerPoseClustering.
        @type trafoXML_file: string
        @param trafoXML_file: path to the trafoXML file that corresponds to featureXML instance 1 and featureXML instance 2. Has to be the linear file, because the identity file doesn't contain transformations
        @raise IOError: trafoXML is an identity file (need a linear file)
        @raise RuntimeError: FeatureXML and trafo_xml didn't match up
        """
        # self.retentionDict_1 will contain all the feature info with retention time as key of featureXML file 1
        self.retentionDict_1 = {}
        # self.retentionDict_2 will contain all the feature info with retention time as key of featureXML file 2
        self.retentionDict_2 = {}
        # this method is more expensive on memory, but a lot faster than old method (old method used getWindow, which looped through the list each time)
        # this makes the retention time the key value, so that the retention time can be found with O(1)
        # the retention time is rounded to second point after decimal because that is unique enough
        for feature in mapped_featureXML_1.getSimpleFeatureInfo():
            self.retentionDict_1[str(mapped_featureXML_1['retention time'])] = {'intensity':mapped_featureXML_1.getElementInfo()['intensity'], 'feature_id':mapped_featureXML_1['id']}

        for feature in mapped_featureXML_2.getSimpleFeatureInfo():
            self.retentionDict_2[str(mapped_featureXML_2['retention time'])] = {'intensity':mapped_featureXML_2.getElementInfo()['intensity'], 'feature_id':mapped_featureXML_2['id']}
        # to keep the to and rom changes in a list of dictionairies with as keys 'from' and 'to' and as values the retention times
        self.trafoXML_list = []
        
        for event, element in cElementTree.iterparse(trafoXML_file):
            if element.tag == 'Transformation':
                if elementFunctions.getItems(element)['name'] == 'identity':
                    raise IOError, trafoXML_file+' is a trafoXML identity file (see the Transformation node). There is no information in the identity file. Use the \'linear\' file as input'
            if element.tag == 'Pair':
                try:
                    fromFeatureID = self.retentionDict_1[str(elementFunctions.getItems(element)['from'])]['feature_id']
                except KeyError:
                    pass
                try:
                    toFeatureID = self.retentionDict_1[str((elementFunctions.getItems(element)['to']))]['feature_id']
                except KeyError:
                    pass
                try:
                    fromFeatureID = self.retentionDict_2[str(elementFunctions.getItems(element)['from'])]['feature_id']
                except KeyError:
                    pass
                try:
                    toFeatureID = self.retentionDict_2[str(elementFunctions.getItems(element)['to'])]['feature_id']                
                except KeyError:
                    pass

                try:
                    self.trafoXML_list.append({'from_featureID':fromFeatureID, 'to_featureID':toFeatureID, 'from':float(elementFunctions.getItems(element)['from']), 'to':float(elementFunctions.getItems(element)['to'])})
                except UnboundLocalError, e:
                    raise RuntimeError, 'Something wrong with the input files. Probably the featureXML files didn\'t match the trafoXML file. Check your input. Actual error raised was: '+str(e)
Esempio n. 2
0
    def getMapInfo(self):
        """Retrieves and stores the map details for files"""

        for element in self.getAllElements():
            if element.tag=='map':
                mapDict={}
                mapid=int(elementFunctions.getItems(element)['id'])
                mapDict['name']=elementFunctions.getItems(element)['name']
                mapDict['unique_id']=elementFunctions.getItems(element)['unique_id']
                mapDict['label' ]=elementFunctions.getItems(element)['label']
                mapDict['size' ]=int(elementFunctions.getItems(element)['size'])
                while len(self.maplist) <=mapid: 
                    self.maplist.append({})
                self.maplist[mapid]=mapDict

        return self.maplist
Esempio n. 3
0
 def test_getItems(self):
     expectedItems = {'{http://www.w3.org/2001/XMLSchema-instance}noNamespaceSchemaLocation': 'http://open-ms.sourceforge.net/schemas/FeatureXML_1_4.xsd', 'version': '1.4', 'id': 'fm_2007447552192692304'}
     # use the celementtree iterparse function to get one element
     
     elementFile = open(testFolder+'featurexmlTestFile_1.featureXML')
     for event, element in cElementTree.iterparse(elementFile):      # looping through the tree
         itemsDict = elementFunctions.getItems(element)                 # getting the items of all elements in the tree
     self.assertDictContainsSubset(expectedItems, itemsDict)                      # assert that the itemsDict dictionary contains the subset of the expectedItems dict
Esempio n. 4
0
    def getAssignedPeptidesMZandRTvalue(self):
        """
        Iterator function that yields all the assigned peptide m/z and retention time value and the accession number of the 
        protein they are assigned to. Does not get any additional information on the peptides
        
        @rtype: dict
        @return: A dict of all the assigned peptides with m/z, RT value and protein description
        @raise RuntimeError: None of the regular expressions for parsing the scan_title was set
        
        B{Example:}
        
        Printing all assigned peptide's m/z value, RT value and protein description:
        
        >>> mascot = Reader('example_mascot_file.xml')    # make a read instance
        >>> for result in mascot.getAssignedPeptidesMZandRTvalue():
        ...    print result
        """
        
        if self.scan_re == None and self.file_re == None and self.rt_re == None and self.mz_re == None:
            raise RuntimeError, 'None of the regular expressions was set to get the scan number or m/z and rt value out of the scan title. You can set them using setScanRE, setFileRE, setRtRE, setMzRE'  
        
        for element in self.getAllElements(): 
            # get the useful info from the element tag
            elementTag = element.tag.split('}')[-1]
            # the protein and peptide information is nested inside hits>hit>protein>pep_scan_title
            if elementTag == 'hits':
                for hit in element:
                    for protein in hit:
                        proteinAccession = elementFunctions.getItems(protein)['accession']
                        prot_desc="No description"
                        for protInfo in protein:
                            protInfoTag = protInfo.tag.split('}')[-1]
                            if protInfoTag == 'prot_desc':
                                if protInfo.text != None and protInfo.text !='':
                                    prot_desc = protInfo.text
                            elif protInfoTag == 'prot_score':
                                prot_score = protInfo.text
                            elif protInfoTag == 'prot_mass':
                                prot_mass = protInfo.text
                            elif protInfoTag == 'prot_matches':
                                prot_matches = protInfo.text
                            elif protInfoTag == 'prot_matches_sig':
                                prot_matches_sig = protInfo.text
                            elif protInfoTag == 'prot_sequences':
                                prot_sequences = protInfo.text
                            elif protInfoTag == 'prot_sequences_sig':
                                prot_sequences_sig = protInfo.text
                            elif protInfoTag == 'peptide':
                                for pepInfo in protInfo:
                                    pepInfoTag = pepInfo.tag.split('}')[-1]
                                    # cuase this not always exists
                                    pep_num_match = None
                                    if pepInfoTag == 'pep_exp_mz':
                                        pep_exp_mz = pepInfo.text
                                    elif pepInfoTag == 'pep_exp_mr':
                                        pep_exp_mr = pepInfo.text
                                    elif pepInfoTag == 'pep_exp_z':
                                        pep_exp_z = pepInfo.text
                                    elif pepInfoTag == 'pep_calc_mr':
                                        pep_calc_mr = pepInfo.text
                                    elif pepInfoTag == 'pep_delta':
                                        pep_delta = pepInfo.text
                                    elif pepInfoTag == 'pep_miss':
                                        pep_miss = pepInfo.text
                                    elif pepInfoTag == 'pep_score':
                                        pep_score = pepInfo.text
                                    elif pepInfoTag == 'pep_expect':
                                        pep_expect = pepInfo.text
                                    elif pepInfoTag == 'pep_res_before':
                                        pep_res_before = pepInfo.text
                                    elif pepInfoTag == 'pep_seq':
                                        pep_seq = pepInfo.text
                                    elif pepInfoTag == 'pep_res_after':
                                        pep_res_after = pepInfo.text
                                    elif pepInfoTag == 'pep_var_mod':
                                        pep_var_mod = pepInfo.text
                                    elif pepInfoTag == 'pep_var_mod_pos':
                                        pep_var_mod_pos = pepInfo.text
                                    elif pepInfoTag == 'pep_num_match':
                                        pep_num_match = pepInfo.text
                                    elif pepInfoTag == 'pep_scan_title':
                                        pep_scan_title = pepInfo.text
                                        ## TODO allow separate RE to parse mz/rt/scan number  values from the title.
                                        titlepar=self._parseTitle(pep_scan_title)
                                        if titlepar.has_key('mz'):
                                            mz=titlepar['mz']
                                        else:
                                            mz=None
                                        if titlepar.has_key('rt'):
                                            rt=titlepar['rt']
                                        else:
                                            rt=None
                                        if titlepar.has_key('file'):
                                            fileroot=titlepar['file']
                                        else:
                                            fileroot=None
                                        if titlepar.has_key('scan'):
                                            scan=titlepar['scan']
                                        else:
                                            scan=None

#                                        mz = pepInfo.text.split('_')[0]
#                                        rt = pepInfo.text.split('_')[1]
                                        yield {'mz':mz, 'rt':rt, 'protAccession':proteinAccession, 'prot_desc':prot_desc, 
                                               'prot_score':prot_score, 'prot_mass':prot_mass,'prot_matches':prot_matches, 
                                               'prot_matches_sig':prot_matches_sig,'prot_sequences':prot_sequences, 
                                               'prot_sequences_sig':prot_sequences_sig,'pep_exp_mz':pep_exp_mz, 
                                               'pep_exp_mr':pep_exp_mr, 'pep_exp_z':pep_exp_z, 'pep_calc_mr':pep_calc_mr
                                               ,'pep_delta':pep_delta,'pep_miss':pep_miss, 'pep_score':pep_score,
                                               'pep_expect':pep_expect,'pep_res_before':pep_res_before,'pep_seq':pep_seq, 
                                               'pep_res_after':pep_res_after, 'pep_var_mod':pep_var_mod,'pep_var_mod_pos':pep_var_mod_pos
                                               ,'pep_num_match':pep_num_match, 'pep_scan_title':pep_scan_title, 'fileroot':fileroot, 'scannumber':scan}
Esempio n. 5
0
    def getSimpleFeatureInfo(self):
        """
        Iterator function that yields all the feature elements in the file given to Reader().
        It saves info from the features in a dict, self.elementInfo, which is used in the L{parseFeatureXML.Reader.__getitem__} retrieval function.
        This function has predefined information like intensity, overallquality, convexhull etc that make for easier browsing, but because of this
        it does not contain all information. If you want to get all information exactly as found in the xml file, use L{parseFeatureXML.Reader.getAllFeatureInfo}.        
        
        @rtype: Element
        @return: Iterator of all the elements in the file where element.tag == 'feature'
        @raise RuntimeError: No features in the file
        
        B{Example}:
        
        Printing all the features in a file:
        
        >>> featureXML = Reader('example_feature_file.featureXML')    # make a reader instance
        >>> allElements = featureXML.getAllElements()    # get all feature elements of the reader instance, you can now iterate over allElements
        >>> features = featureXML.getSimpleFeatureInfo()
        >>> for feature in features:
        ...    print feature
        <Element 'feature' at 0x6184270>
        <Element 'feature' at 0x6184cc0>
        <Element 'feature' at 0x6188630>

        Printing the intensities of all features:
        
        >>> featureXML = Reader('example_feature_file.featureXML')    # make a reader instance
        >>> allElements = featureXML.getAllElements()    # get all feature elements of the reader instance, you can now iterate over allElements
        >>> features = featureXML.getSimpleFeatureInfo()
        >>> for feature in features:
        ...    print featureXML['intensity']
        6182
        3543
        2134
        """
        # counter for the amount of elements with a feature tag. If it stay 0 at the end of the yielding this function raises a runtime error
        featureCount = 0
        # for all the elements
        for element in self.getAllElements():
            # if the element is intensity or overallquality get the intensity and overallquality from the element context
            if element.tag == 'intensity':
                intensity = element.text
            elif element.tag == 'overallquality':
                overallquality = element.text
            # if the element is a feature
            elif element.tag == 'feature':
                featureCount += 1  # keeping track of the amount of features
                # Add all the necessary keys for easy browsing (so the element name) to elementInfo[element]. This is not very generic
                # but it has all the node names and the features should be easily browsable using the __getitem__ implementation
                # The keys are intensity, overallquality, userParm, convexhull, position, quality, charge
                # This only works as long as the featureXML format stays the same
                self.elementInfo[element]['intensity'] = intensity
                self.elementInfo[element]['overallquality'] = overallquality
                self.elementInfo[element]['userParam'] = []
                self.elementInfo[element]['convexhull'] = []
                self.elementInfo[element]['mz'] = 0
                self.elementInfo[element]['retention time'] = 0
                self.elementInfo[element]['quality'] = []
                self.elementInfo[element]['charge'] = 0
                self.elementInfo[element]['content'] = element.text
                self.elementInfo[element]['id'] = elementFunctions.getItems(
                    element)['id']

                # for every element in feature (the rest of the info of feature is already saved in getAllElements())
                for nestedElement in element:
                    # the mz and retention time is saved in the position (in a very unhandy way) get it out and put it in elementInfo
                    if nestedElement.tag == 'position':
                        # position is in the form [('dim', 1), 323.4)] where the number after dim is either 0 or 1. If it is 0 the second value
                        # of the list is the retention time, if it is 1 the second value of the list is the mz value
                        dimValue = int(
                            elementFunctions.getItems(nestedElement)['dim'])
                        if dimValue == 0:
                            self.elementInfo[element][
                                'retention time'] = nestedElement.text

                        elif dimValue == 1:
                            self.elementInfo[element]['mz'] = float(
                                nestedElement.text)
                        else:
                            raise RuntimeError, 'Value of dim in getSimpleFeatureInfo should never be other value than 0 or 1'

                    # dict to contain the different user params
                    userParamDict = {}
                    # for every key in the element
                    for key in nestedElement.keys():
                        # to directly access all properties of feature, the name of property is taken as dictionary key
                        # and the result is taken as value.
                        userParamDict[key] = elementFunctions.getItems(
                            nestedElement)[key]

                    # Add the info of all the elements in feature to elementInfo
                    if nestedElement.tag == 'userParam':
                        self.elementInfo[element]['userParam'].append(
                            userParamDict)

                    elif nestedElement.tag == 'quality':
                        self.elementInfo[element]['quality'].append(
                            [nestedElement.items()[0], nestedElement.text])
                    elif nestedElement.tag == 'charge':
                        self.elementInfo[element][
                            'charge'] = nestedElement.text

                    # if the tag == convexhull it has more elements
                    elif nestedElement.tag == 'convexhull':
                        # a list that will keep all the points of each convexhull. Only remake it if the nr of convexhull is 0 (this is for featureFinder version 1.9.0)
                        if int(elementFunctions.getItems(nestedElement)
                               ['nr']) == 0:
                            convexhullList = []
                        # for every point (element) in info
                        for pointElement in nestedElement:
                            # a dict that will keep the x and y coordinates of one point
                            pointDict = {}
                            # for every key in points.keys()
                            for pointKey in pointElement.keys():
                                # because mz and rt is easier, change x and y
                                if pointKey == 'x':
                                    newKey = 'rt'
                                elif pointKey == 'y':
                                    newKey = 'mz'
                                else:
                                    raise RuntimeError, 'This shouldn\'t happen, pointKey should be either \'x\' or \'y\', not: ' + str(
                                        pointKey)
                                # add [{x:value}, {y:value}] to pointList
                                pointDict[newKey] = elementFunctions.getItems(
                                    pointElement)[pointKey]

                            # for every pointElement add pointlist to convexhull
                            convexhullList.append(pointDict)

                        # add the convexhullList to elementInfo
                        self.elementInfo[element][
                            nestedElement.tag] = convexhullList

                yield element
                # this gets called after every yield statement and clears every element that is under the current element. Because all the
                # nested elements of the current element have already been used and the results saved in self.elementInfo, they are not
                # necessary anymore and clearing them lowers the memory usage.
                for nestedElement in element:
                    nestedElement.clear()
                element.clear()

        if featureCount == 0:
            raise RuntimeError, 'There were no features found in self.getAllElements(). Not a valid featureXML file:' + str(
                self.path)
Esempio n. 6
0
    def getSimpleElementInfo(self):
        """
        Iterator function that yields all the feature elements in the file given to Reader().
        It saves info from the features in a dict, self.elementInfo, which is used in the L{parseConsensusXML.Reader.__getitem__} retrieval function.
        This function has predefined information like intensity, overallquality, convexhull etc that make for easier browsing, but because of this
        it does not contain all information. If you want to get all information exactly as found in the xml file, use L{parseConsensusXML.Reader.getAllElementInfo}.        
        
        @rtype: Element
        @return: Iterator of all the elements in the file where element.tag == 'consensusElement'
        @raise RuntimeError: No features in the file
        
        B{Example}:
        
        Printing all the groupedElements in a file:
        
        >>> consensusXML = Reader('example_consensus_file.consensusXML')    # make a reader instance
        >>> allElements = consensusXML.getAllElements()    # get all feature elements of the reader instance, you can now iterate over allElements
        >>> elements = consensusXML.getSimpleElementInfo()
        >>> for element in elements::
        ...    print element
        <Element 'consensusElement' at 0x6184270>
        <Element 'consensusElement' at 0x6184cc0>
        <Element 'consensusElement' at 0x6188630>

        Printing the intensities of all elements:
        
        >>> consensusXML = Reader('example_consensus_file.consensusXML')    # make a reader instance
        >>> allElements = consensusXML.getAllElements()    # get all feature elements of the reader instance, you can now iterate over allElements
        >>> elements = consensusXML.getSimpleElementInfo()
        >>> for element in elements:
        ...    print consensusXML['intensity']
        6182
        3543
        2134
        """
        # counter for the amount of elements with a consensusElement tag. If it stay 0 at the end of the yielding this function raises a runtime error
        elementCount = 0

        # for all the elements
        for element in self.getAllElements():

            if element.tag == 'consensusElement':
                elementCount += 1 # keeping track of the amount of features
                # Add all the necessary keys for easy browsing (so the element name) to elementInfo[element]. This is not very generic
                # but it has all the node names and the features should be easily browsable using the __getitem__ implementation
                # The keys are intensity, overallquality, userParm, convexhull, position, quality, charge
                # This only works as long as the featureXML format stays the same
                self.elementInfo[element]['mz'] = 0
                self.elementInfo[element]['rt'] = 0
                self.elementInfo[element]['intensity'] = 0
                
                self.elementInfo[element]['quality'] = elementFunctions.getItems(element)['quality']
                self.elementInfo[element]['charge'] = elementFunctions.getItems(element)['charge']
                self.elementInfo[element]['elements'] = []
                self.elementInfo[element]['id'] = elementFunctions.getItems(element)['id']
                
                                
                 
                # for every element in feature (the rest of the info of feature is already saved in getAllElements())
                for nestedElement in element:
                    # the mz and retention time is saved in the position (in a very unhandy way) get it out and put it in elementInfo
                    if nestedElement.tag == 'centroid':
                         self.elementInfo[element]['rt'] = float(elementFunctions.getItems(nestedElement)['rt']
)
                         self.elementInfo[element]['mz'] = float(elementFunctions.getItems(nestedElement)['mz'])
                         self.elementInfo[element]['intensity'] = float(elementFunctions.getItems(nestedElement)['it'])

                        
                            #raise RuntimeError, 'Could not retrieve centroid position or intensity.' 
                    
                    # dict to contain the different user params
                    userParamDict = {}
                    # for every key in the element
                    #for key in nestedElement.keys():
                        # to directly access all properties of feature, the name of property is taken as dictionary key
                        # and the result is taken as value.
                     #   userParamDict[key] = elementFunctions.getItems(nestedElement)[key]
                    
                    # Add the info of all the elements in feature to elementInfo
                    if nestedElement.tag == 'groupedElementList':
                        
                        for ele in nestedElement:
                            if ele.tag=='element':
                                eleDict={}
                                if elementFunctions.getItems(ele)['id'][0:2] == 'f_':
                                    
                                    eleDict['id']=elementFunctions.getItems(ele)['id']
                                else:
                                    eleDict['id']='f_%s'%elementFunctions.getItems(ele)['id']
                                
                                self.elementInfo[element]['elements'].append(eleDict)


                        
                yield element
                # this gets called after every yield statement and clears every element that is under the current element. Because all the 
                # nested elements of the current element have already been used and the results saved in self.elementInfo, they are not
                # necessary anymore and clearing them lowers the memory usage. 
                for nestedElement in element:
                    nestedElement.clear()
                element.clear()

        if elementCount == 0:
            raise RuntimeError, 'There were no consensus features found in self.getAllElements(). Not a valid featureXML file:'+str(self.path)
Esempio n. 7
0
def getFeatureConvexhullCoordinates(featureElement):
    """
    Get the coordinates of the corners of the convexhull of featureElement. Return a dictionary with as key the feature and as value a dictionary
    with as keys mzMax, mzMin, rtMax and rtMin. This is the maximum and minimum retention time and the maximum and minimum m/z ratio of the convexhull. These
    four points together can be seen as a rectangle, if you see each point as the corner. This does not take into account that the feature convexhulls are not
    perfect rectangles.
    
    @type featureElement: Element
    @param featureElement: A feature element
    @rtype: dictionary
    @return: Dictionary with key the feature and values the coordinates of the 4 corners of the convexhull
    @raises IOError: No convexhulls in the element
    @raises TypeError: featureElement is not of type Element
    
    B{Example}:
    
    Print the convexhull coordinates of all the features in a file:
    
    >>> import parseFeatureXML                                                 # to get the features use parseFeatureXML
    >>> featureXML = parseFeatureXML.Reader('example_feature_file.featureXML')   # make a reader instance
    >>> for feature in featureXML.getFeatures():                               # loop through all the features
    ...    print getFeatureConvexhullCoordinates(feature)                      # print the coordinates of all the feature convexhulls
    {<Element 'feature' at 0x136b9a80>: {'mzMax': '338.251376135343', 'rtMin': '5105.9217', 'rtMax': '5111.6874', 'mzMin': '336.124751115092'}}
    {<Element 'feature' at 0x136bd510>: {'mzMax': '430.197574989105', 'rtMin': '4001.7973', 'rtMax': '4017.7105', 'mzMin': '428.070943557216'}}
    {<Element 'feature' at 0x136bde40>: {'mzMax': '339.251376135343', 'rtMin': '5107.9217', 'rtMax': '5112.6874', 'mzMin': '337.124751115092'}}

    """    
    
    
    if str(type(featureElement)) == '<type \'Element\'>':
        # make a dictionary in which the corner coordinates of the feature will be saved
        featureCoordinate = {}
        countConvexhull = 0 # count the amnount of times the tage convexhull is found
        # for every element in feature element
        for element in featureElement:
            # if featureElement = convexhull
            if element.tag == 'convexhull':
                # every time that there is a new convexhull, make an empty list retentionTimeList for x coordinates and mzList for y coordinates
                retentionTimeList = []
                mzList = []
                # for every point in the convexhull element
                for pt in element:
                    # if the syntax of the convexhull is the same as syntax for version 1.8.0
                    if elementFunctions.getItems(pt) != {}:
                        # save the retention time (x-axis) and m/z (y-axis) in a list
                        try:
                            retentionTimeList.append(elementFunctions.getItems(pt)['x'])
                            mzList.append(elementFunctions.getItems(pt)['y'])
                        except:
                            sys.stdout.write('Your featureXML file is not in the format of output from version 1.8.0 or 1.7.0 FeatureFinder')
                            elementFunctions.getItems(pt)['x']
                    # else the syntax for 1.7.0 (don't have access to any other versions
                    else:
                        
                        for convex in pt:
                            # check what dim the convxhull position is (dim 0 is retention time, dim 1 = mz)
                            if int(elementFunctions.getItems(convex)['dim']) == 0:
                                retentionTimeList.append(convex.text)
                            elif int(elementFunctions.getItems(convex)['dim']) == 1:
                                mzList.append(convex.text)
                            else:
                                warnings.warn('dim in convexhull hullpoint is not 0 or 1. Value not used',stacklevel=2)
                # get the minimum and maximum values of x and y and save them
                rtMin = min(retentionTimeList)
                rtMax = max(retentionTimeList)
                mzMin = min(mzList)
                mzMax = max(mzList)
    
                #add the coordinates of the feature to the featureCoordinate
                featureCoordinate[featureElement] = {'rtMin':rtMin, 'rtMax':rtMax, 'mzMin':mzMin,'mzMax':mzMax}
                countConvexhull += 1 # add 1 for every convexhull
        
        if countConvexhull == 0:
            # raise an IO
            raise IOError, 'No convexhulls in the element, check your featureXML file'
        else:
            # return the dictionary with the coordinates of the feature
            return featureCoordinate
    else:
        raise TypeError, 'featureElement in getFeatureConvexhullCoordinates is not of type Element but of type: '+str(type(featureElement))
Esempio n. 8
0
    def getSimpleFeatureInfo(self):
        """
        Iterator function that yields all the feature elements in the file given to Reader().
        It saves info from the features in a dict, self.elementInfo, which is used in the L{parseFeatureXML.Reader.__getitem__} retrieval function.
        This function has predefined information like intensity, overallquality, convexhull etc that make for easier browsing, but because of this
        it does not contain all information. If you want to get all information exactly as found in the xml file, use L{parseFeatureXML.Reader.getAllFeatureInfo}.        
        
        @rtype: Element
        @return: Iterator of all the elements in the file where element.tag == 'feature'
        @raise RuntimeError: No features in the file
        
        B{Example}:
        
        Printing all the features in a file:
        
        >>> featureXML = Reader('example_feature_file.featureXML')    # make a reader instance
        >>> allElements = featureXML.getAllElements()    # get all feature elements of the reader instance, you can now iterate over allElements
        >>> features = featureXML.getSimpleFeatureInfo()
        >>> for feature in features:
        ...    print feature
        <Element 'feature' at 0x6184270>
        <Element 'feature' at 0x6184cc0>
        <Element 'feature' at 0x6188630>

        Printing the intensities of all features:
        
        >>> featureXML = Reader('example_feature_file.featureXML')    # make a reader instance
        >>> allElements = featureXML.getAllElements()    # get all feature elements of the reader instance, you can now iterate over allElements
        >>> features = featureXML.getSimpleFeatureInfo()
        >>> for feature in features:
        ...    print featureXML['intensity']
        6182
        3543
        2134
        """
        # counter for the amount of elements with a feature tag. If it stay 0 at the end of the yielding this function raises a runtime error
        featureCount = 0
        # for all the elements
        for element in self.getAllElements():
            # if the element is intensity or overallquality get the intensity and overallquality from the element context
            if element.tag == 'intensity':
                intensity = element.text
            elif element.tag == 'overallquality':
                overallquality = element.text
            # if the element is a feature
            elif element.tag == 'feature':
                featureCount += 1 # keeping track of the amount of features
                # Add all the necessary keys for easy browsing (so the element name) to elementInfo[element]. This is not very generic
                # but it has all the node names and the features should be easily browsable using the __getitem__ implementation
                # The keys are intensity, overallquality, userParm, convexhull, position, quality, charge
                # This only works as long as the featureXML format stays the same
                self.elementInfo[element]['intensity'] = intensity
                self.elementInfo[element]['overallquality'] = overallquality
                self.elementInfo[element]['userParam'] = []
                self.elementInfo[element]['convexhull'] = []
                self.elementInfo[element]['mz'] = 0
                self.elementInfo[element]['retention time'] = 0
                self.elementInfo[element]['quality'] = []
                self.elementInfo[element]['charge'] = 0
                self.elementInfo[element]['content'] = element.text
                self.elementInfo[element]['id'] = elementFunctions.getItems(element)['id']
                                
                 
                # for every element in feature (the rest of the info of feature is already saved in getAllElements())
                for nestedElement in element:
                    # the mz and retention time is saved in the position (in a very unhandy way) get it out and put it in elementInfo
                    if nestedElement.tag == 'position':
                        # position is in the form [('dim', 1), 323.4)] where the number after dim is either 0 or 1. If it is 0 the second value
                        # of the list is the retention time, if it is 1 the second value of the list is the mz value
                        dimValue = int(elementFunctions.getItems(nestedElement)['dim'])
                        if dimValue == 0:
                            self.elementInfo[element]['retention time'] = nestedElement.text

                        elif dimValue == 1:
                            self.elementInfo[element]['mz'] = float(nestedElement.text)
                        else:
                            raise RuntimeError, 'Value of dim in getSimpleFeatureInfo should never be other value than 0 or 1' 
                    
                    # dict to contain the different user params
                    userParamDict = {}
                    # for every key in the element
                    for key in nestedElement.keys():
                        # to directly access all properties of feature, the name of property is taken as dictionary key
                        # and the result is taken as value.
                        userParamDict[key] = elementFunctions.getItems(nestedElement)[key]
                    
                    # Add the info of all the elements in feature to elementInfo
                    if nestedElement.tag == 'userParam':
                        self.elementInfo[element]['userParam'].append(userParamDict)
                    
                    elif nestedElement.tag == 'quality':
                        self.elementInfo[element]['quality'].append([nestedElement.items()[0],nestedElement.text])
                    elif nestedElement.tag == 'charge':
                        self.elementInfo[element]['charge'] = nestedElement.text
                    
                    # if the tag == convexhull it has more elements
                    elif nestedElement.tag == 'convexhull':
                        # a list that will keep all the points of each convexhull. Only remake it if the nr of convexhull is 0 (this is for featureFinder version 1.9.0)
                        if int(elementFunctions.getItems(nestedElement)['nr']) == 0:
                            convexhullList = []
                        # for every point (element) in info
                        for pointElement in nestedElement:
                            # a dict that will keep the x and y coordinates of one point
                            pointDict = {}
                            # for every key in points.keys()
                            for pointKey in pointElement.keys():
                                # because mz and rt is easier, change x and y
                                if pointKey == 'x':
                                    newKey = 'rt'
                                elif pointKey == 'y':
                                    newKey = 'mz'
                                else:
                                    raise RuntimeError, 'This shouldn\'t happen, pointKey should be either \'x\' or \'y\', not: '+str(pointKey)
                                # add [{x:value}, {y:value}] to pointList
                                pointDict[newKey] = elementFunctions.getItems(pointElement)[pointKey]

                            # for every pointElement add pointlist to convexhull
                            convexhullList.append(pointDict)
                            
                        # add the convexhullList to elementInfo
                        self.elementInfo[element][nestedElement.tag] = convexhullList
                        
                yield element
                # this gets called after every yield statement and clears every element that is under the current element. Because all the 
                # nested elements of the current element have already been used and the results saved in self.elementInfo, they are not
                # necessary anymore and clearing them lowers the memory usage. 
                for nestedElement in element:
                    nestedElement.clear()
                element.clear()

        if featureCount == 0:
            raise RuntimeError, 'There were no features found in self.getAllElements(). Not a valid featureXML file:'+str(self.path)
Esempio n. 9
0
    def getAssignedPeptidesMZandRTvalue(self):
        """
        Iterator function that yields all the assigned peptide m/z and retention time value and the accession number of the 
        protein they are assigned to. Does not get any additional information on the peptides
        
        @rtype: dict
        @return: A dict of all the assigned peptides with m/z, RT value and protein description
        
        B{Example:}
        
        Printing all assigned peptide's m/z value, RT value and protein description:
        
        >>> mascot = Reader('example_mascot_file.xml')    # make a read instance
        >>> for result in mascot.getAssignedPeptidesMZandRTvalue():
        ...    print result
        """
        for element in self.getAllElements(): 
            # get the useful info from the element tag
            elementTag = element.tag.split('}')[-1]
            # the protein and peptide information is nested inside hits>hit>protein>pep_scan_title
            if elementTag == 'hits':
                for hit in element:
                    for protein in hit:
                        proteinAccession = elementFunctions.getItems(protein)['accession']
                        for protInfo in protein:
                            protInfoTag = protInfo.tag.split('}')[-1]
                            if protInfoTag == 'prot_desc':
                                prot_desc = protInfo.text
                            elif protInfoTag == 'prot_score':
                                prot_score = protInfo.text
                            elif protInfoTag == 'prot_mass':
                                prot_mass = protInfo.text
                            elif protInfoTag == 'prot_matches':
                                prot_matches = protInfo.text
                            elif protInfoTag == 'prot_matches_sig':
                                prot_matches_sig = protInfo.text
                            elif protInfoTag == 'prot_sequences':
                                prot_sequences = protInfo.text
                            elif protInfoTag == 'prot_sequences_sig':
                                prot_sequences_sig = protInfo.text
                            elif protInfoTag == 'peptide':
                                for pepInfo in protInfo:
                                    pepInfoTag = pepInfo.tag.split('}')[-1]
                                    # cuase this not always exists
                                    pep_num_match = None
                                    if pepInfoTag == 'pep_exp_mz':
                                        pep_exp_mz = pepInfo.text
                                    elif pepInfoTag == 'pep_exp_mr':
                                        pep_exp_mr = pepInfo.text
                                    elif pepInfoTag == 'pep_exp_z':
                                        pep_exp_z = pepInfo.text
                                    elif pepInfoTag == 'pep_calc_mr':
                                        pep_calc_mr = pepInfo.text
                                    elif pepInfoTag == 'pep_delta':
                                        pep_delta = pepInfo.text
                                    elif pepInfoTag == 'pep_miss':
                                        pep_miss = pepInfo.text
                                    elif pepInfoTag == 'pep_score':
                                        pep_score = pepInfo.text
                                    elif pepInfoTag == 'pep_expect':
                                        pep_expect = pepInfo.text
                                    elif pepInfoTag == 'pep_res_before':
                                        pep_res_before = pepInfo.text
                                    elif pepInfoTag == 'pep_seq':
                                        pep_seq = pepInfo.text
                                    elif pepInfoTag == 'pep_res_after':
                                        pep_res_after = pepInfo.text
                                    elif pepInfoTag == 'pep_var_mod':
                                        pep_var_mod = pepInfo.text
                                    elif pepInfoTag == 'pep_var_mod_pos':
                                        pep_var_mod_pos = pepInfo.text
                                    elif pepInfoTag == 'pep_num_match':
                                        pep_num_match = pepInfo.text
                                    elif pepInfoTag == 'pep_scan_title':
                                        pep_scan_title = pepInfo.text
                                        ## TODO allow separate RE to parse mz/rt/scan number  values from the title.
                                        titlepar=self._parseTitle(pep_scan_title)
                                        if titlepar.has_key('mz'):
                                            mz=titlepar['mz']
                                        else:
                                            mz=None
                                        if titlepar.has_key('rt'):
                                            rt=titlepar['rt']
                                        else:
                                            rt=None
                                        if titlepar.has_key('file'):
                                            fileroot=titlepar['file']
                                        else:
                                            fileroot=None
                                        if titlepar.has_key('scan'):
                                            scan=titlepar['scan']
                                        else:
                                            scan=None

#                                        mz = pepInfo.text.split('_')[0]
#                                        rt = pepInfo.text.split('_')[1]
                                        yield {'mz':mz, 'rt':rt, 'protAccession':proteinAccession, 'prot_desc':prot_desc, 
                                               'prot_score':prot_score, 'prot_mass':prot_mass,'prot_matches':prot_matches, 
                                               'prot_matches_sig':prot_matches_sig,'prot_sequences':prot_sequences, 
                                               'prot_sequences_sig':prot_sequences_sig,'pep_exp_mz':pep_exp_mz, 
                                               'pep_exp_mr':pep_exp_mr, 'pep_exp_z':pep_exp_z, 'pep_calc_mr':pep_calc_mr
                                               ,'pep_delta':pep_delta,'pep_miss':pep_miss, 'pep_score':pep_score,
                                               'pep_expect':pep_expect,'pep_res_before':pep_res_before,'pep_seq':pep_seq, 
                                               'pep_res_after':pep_res_after, 'pep_var_mod':pep_var_mod,'pep_var_mod_pos':pep_var_mod_pos
                                               ,'pep_num_match':pep_num_match, 'pep_scan_title':pep_scan_title, 'fileroot':fileroot, 'scannumber':scan}
Esempio n. 10
0
def getFeatureConvexhullCoordinates(featureElement):
    """
    Get the coordinates of the corners of the convexhull of featureElement. Return a dictionary with as key the feature and as value a dictionary
    with as keys mzMax, mzMin, rtMax and rtMin. This is the maximum and minimum retention time and the maximum and minimum m/z ratio of the convexhull. These
    four points together can be seen as a rectangle, if you see each point as the corner. This does not take into account that the feature convexhulls are not
    perfect rectangles.
    
    @type featureElement: Element
    @param featureElement: A feature element
    @rtype: dictionary
    @return: Dictionary with key the feature and values the coordinates of the 4 corners of the convexhull
    @raises IOError: No convexhulls in the element
    @raises TypeError: featureElement is not of type Element
    
    B{Example}:
    
    Print the convexhull coordinates of all the features in a file:
    
    >>> import parseFeatureXML                                                 # to get the features use parseFeatureXML
    >>> featureXML = parseFeatureXML.Reader('example_feature_file.featureXML')   # make a reader instance
    >>> for feature in featureXML.getFeatures():                               # loop through all the features
    ...    print getFeatureConvexhullCoordinates(feature)                      # print the coordinates of all the feature convexhulls
    {<Element 'feature' at 0x136b9a80>: {'mzMax': '338.251376135343', 'rtMin': '5105.9217', 'rtMax': '5111.6874', 'mzMin': '336.124751115092'}}
    {<Element 'feature' at 0x136bd510>: {'mzMax': '430.197574989105', 'rtMin': '4001.7973', 'rtMax': '4017.7105', 'mzMin': '428.070943557216'}}
    {<Element 'feature' at 0x136bde40>: {'mzMax': '339.251376135343', 'rtMin': '5107.9217', 'rtMax': '5112.6874', 'mzMin': '337.124751115092'}}

    """

    if str(type(featureElement)) == '<type \'Element\'>':
        # make a dictionary in which the corner coordinates of the feature will be saved
        featureCoordinate = {}
        countConvexhull = 0  # count the amnount of times the tage convexhull is found
        # for every element in feature element
        for element in featureElement:
            # if featureElement = convexhull
            if element.tag == 'convexhull':
                # every time that there is a new convexhull, make an empty list retentionTimeList for x coordinates and mzList for y coordinates
                retentionTimeList = []
                mzList = []
                # for every point in the convexhull element
                for pt in element:
                    # if the syntax of the convexhull is the same as syntax for version 1.8.0
                    if elementFunctions.getItems(pt) != {}:
                        # save the retention time (x-axis) and m/z (y-axis) in a list
                        try:
                            retentionTimeList.append(
                                elementFunctions.getItems(pt)['x'])
                            mzList.append(elementFunctions.getItems(pt)['y'])
                        except:
                            sys.stdout.write(
                                'Your featureXML file is not in the format of output from version 1.8.0 or 1.7.0 FeatureFinder'
                            )
                            elementFunctions.getItems(pt)['x']
                    # else the syntax for 1.7.0 (don't have access to any other versions
                    else:

                        for convex in pt:
                            # check what dim the convxhull position is (dim 0 is retention time, dim 1 = mz)
                            if int(elementFunctions.getItems(convex)
                                   ['dim']) == 0:
                                retentionTimeList.append(convex.text)
                            elif int(elementFunctions.getItems(convex)
                                     ['dim']) == 1:
                                mzList.append(convex.text)
                            else:
                                warnings.warn(
                                    'dim in convexhull hullpoint is not 0 or 1. Value not used',
                                    stacklevel=2)
                # get the minimum and maximum values of x and y and save them
                rtMin = min(retentionTimeList)
                rtMax = max(retentionTimeList)
                mzMin = min(mzList)
                mzMax = max(mzList)

                #add the coordinates of the feature to the featureCoordinate
                featureCoordinate[featureElement] = {
                    'rtMin': rtMin,
                    'rtMax': rtMax,
                    'mzMin': mzMin,
                    'mzMax': mzMax
                }
                countConvexhull += 1  # add 1 for every convexhull

        if countConvexhull == 0:
            # raise an IO
            raise IOError, 'No convexhulls in the element, check your featureXML file'
        else:
            # return the dictionary with the coordinates of the feature
            return featureCoordinate
    else:
        raise TypeError, 'featureElement in getFeatureConvexhullCoordinates is not of type Element but of type: ' + str(
            type(featureElement))