Python FileHandleの例、fileHandling.FileHandle Pythonの例

コード例 #1

0

ファイルを表示

ファイル: parsePeaksMzML.py プロジェクト: npklein/pyMSA

    def __init__(self, path):
        """
        Initialize the Reader instance and check if the file is a valid peaks.mzML file and put it in a fileHandling.FileHandle instance.
        
        @type path: string
        @param path: The path of the feature XML file                       

        """

        # filepath
        self.path = path
        # if the file at path does not start with <?xml, raise an exception that the xml file is invalid
        file = fileHandling.FileHandle(self.path)
        file.isXML()
        file.isMzML()
        # the current element
        self.element = None
        # current userParam
        # a list of all the keys that can be used for __getItem__
        self.__spectraKeySet = []
        # element dictionary to contain all the elements
        # uses collections.defaultidct to enable unknown keys to be added to the dictionary
        self.spectraInfo = collections.defaultdict(dict)

        self.simpleFlag = True

コード例 #2

0

ファイルを表示

ファイル: parseFeatureXML.py プロジェクト: npklein/pyMSA

    def __init__(self, path):
        """
        Initialize the Reader instance and check if the file is a valid featureXML file and put it in a fileHandling.FileHandle instance.
        
        @type path: string
        @param path: The path of the feature XML file                       

        """
        # filepath
        self.path = path
        # if the file at path does not start with <?xml, raise an exception that the xml file is invalid
        file = fileHandling.FileHandle(self.path)
        file.isXML()
        # if the second line of the file does not start with <featureMap, raise an exception that the file is not a featureXML file
        file.isFeatureXML()

        # a flag to see if simpleFeatureInfo or allFeatureInfo is used. This makes a difference in the __getItem__ function
        self.simpleFlag = True

        # the current element
        self.element = None
        # a list of all the keys that can be used for __getItem__
        self.__elementKeySet = set([])
        # element dictionary to contain all the elements
        # uses collections.defaultidct to enable unknown keys to be added to the dictionary
        self.elementInfo = collections.defaultdict(dict)

        # add the keys to _elementKeySet that __getitem__ takes
        self.__elementKeySet.add('intensity')
        self.__elementKeySet.add('overallquality')
        self.__elementKeySet.add('userParam')
        self.__elementKeySet.add('convexhull')
        self.__elementKeySet.add('mz')
        self.__elementKeySet.add('retention time')
        self.__elementKeySet.add('quality')
        self.__elementKeySet.add('charge')
        self.__elementKeySet.add('content')
        self.__elementKeySet.add('id')
        return

コード例 #3

0

ファイルを表示

ファイル: test_fileHandling.py プロジェクト: npklein/pyMSA

 def test_isMascot(self):
     validMascot = testFolder + 'test_mascot.xml'
     fileHandler = fileHandling.FileHandle(validMascot)
     fileHandler.isMascot()

コード例 #4

0

ファイルを表示

ファイル: test_fileHandling.py プロジェクト: npklein/pyMSA

 def test_isMascotException(self):
     invalidMascot = testFolder + 'featurexmlTestFile_1.featureXML'
     fileHandler = fileHandling.FileHandle(invalidMascot)
     self.assertRaises(IOError, fileHandler.isMascot)

コード例 #5

0

ファイルを表示

ファイル: test_fileHandling.py プロジェクト: npklein/pyMSA

 def test_isFeatureXML_invalidException(self):
     invalidFeatureXML = testFolder + 'invalidFeatureXML_noheader.featureXML'
     fileHandler = fileHandling.FileHandle(invalidFeatureXML)
     # test if isFeatureXML gives the right error (IOError) when called with an invalid XML file
     self.assertRaises(IOError, fileHandler.isFeatureXML)

コード例 #6

0

ファイルを表示

ファイル: test_fileHandling.py プロジェクト: npklein/pyMSA

 def test_isMzML(self):
     validMzML = testFolder + 'mzml_test_file_1.mzML'
     fileHandler = fileHandling.FileHandle(validMzML)
     fileHandler.isMzML()
     self.assertEqual(fileHandler.isMzML(), None)

コード例 #7

0

ファイルを表示

ファイル: test_fileHandling.py プロジェクト: npklein/pyMSA

 def test_isFeatureXML(self):
     validFeatureXML = testFolder + 'featurexmlTestFile_1.featureXML'
     # if no error is given test passes
     fileHandler = fileHandling.FileHandle(validFeatureXML)
     self.assertEqual(fileHandler.isFeatureXML(), None)

コード例 #8

0

ファイルを表示

ファイル: test_fileHandling.py プロジェクト: npklein/pyMSA

 def test_isXML_invalidException(self):
     invalidXmlFile = testFolder + '/invalidXML.XML'
     fileHandler = fileHandling.FileHandle(invalidXmlFile)
     # test if isXML gives the right error (IOError) when called with an invalid XML file
     self.assertRaises(IOError, fileHandler.isXML)

コード例 #9

0

ファイルを表示

ファイル: test_fileHandling.py プロジェクト: npklein/pyMSA

 def test_isXML(self):
     validXmlFile = testFolder + 'validXML.XML'
     fileHandler = fileHandling.FileHandle(validXmlFile)
     # if no error is given test passes
     self.assertEqual(fileHandler.isXML(), None)

コード例 #10

0

ファイルを表示

ファイル: test_fileHandling.py プロジェクト: npklein/pyMSA

 def test_getFile(self):
     fileHandle = fileHandling.FileHandle(testFolder +
                                          'featurexmlTestFile_1.featureXML')
     self.assertEqual(fileHandle.getFile(),
                      testFolder + 'featurexmlTestFile_1.featureXML')

コード例 #11

0

ファイルを表示

def compareCoordinate(mzmlFile,
                      featureFile,
                      writeCSV=False,
                      writeTo='precursorPerFeature.csv'):
    r""" 
    Compare the precursors scan time and m/z values of a spectrum with all the retention time and m/z values in the convexhull of a feature. The spectrum information can come from
    a mzml File or a peaks.mzml file. It returns a dictionary with 3 keys: totalPrecursorsInFeatures, averagePrecursorsInFeatures and featPerPrecursorDict. totalPrecursorsInFeatures
    is a numeric value: the total amount of precursors that are in all features, averagePrecursorsInFeatures is a numeric value: the average amount of precursors in a feature and
    totalPrecursorsInFeatures is a dictionary with as key every feature and as value the amount of precursors per feature.
    A third input is writeCSV. If this is set to true, totalPrecursorsInFeatures is written out to a CSV file with a column featureID and a column # of precursors.
    
    @type mzmlFile: string
    @param mzmlFile: The path of the .mzML file
    @type featureFile: string
    @param featureFile: The path of the .featureXML file
    @type writeCSV: bool
    @param writeCSV: Flag if a CSV file has to be written out of the precursor per feature data (default: false)
    @type writeTo: string
    @param writeTo: The file and path where writeCSV has to be written to, default is precursorPerFeature.csv in the same folder as the script
    @rtype: Dictionary
    @returns: A dictionary with 3 keys: totalPrecursorsInFeatures, averagePrecursorsInFeatures and featPerPrecursorDict. totalPrecursorsInFeatures
    is a numeric value: the total amount of precursors that are in all features, averagePrecursorsInFeatures is a numeric value: the average amount of precursors in a feature and
    totalPrecursorsInFeatures is a dictionary with as key every feature and as value the amount of precursors per feature
    
    B{Examples}:
    
    Print the return value:

    >>> print compareCoordinate('example_mzML_file.mzML', 'example_feature_file.featureXML')
    {'totalPrecursorsInFeatures': 2, 'featPerPrecursorDict': {'f_43922326584371237334': 1, 'f_8613715360396561740': 0, 'f_13020522388175237334': 1}, 'averagePrecursorsInFeatures': 0.66666666666666663}

    Write the results to a csv file:
    
    >>> compareCoordinate(testFolder+'mzmlTestFile.mzML', testFolder+'featurexmlTestFile.featureXML', True, testFolder+'testPrecursorPerFeature.csv') # note the True
    """

    fileHandle = fileHandling.FileHandle(os.path.abspath(mzmlFile))

    # getting the absolute path of the given mzml file
    mzmlFile = os.path.abspath(mzmlFile)
    # parsing of mzml file
    msrun = pymzml.run.Reader(mzmlFile)

    # get the retention times and m/z of all precursors in msrun
    retentionTime = mzmlFunctions.getPrecursorRtMz(msrun)

    featureFile = os.path.abspath(featureFile)
    # make an instance of the parseFeatureXML.Reader object, with file as input
    featureXML = parseFeatureXML.Reader(featureFile)

    # featPrecursor will hold the amount of precursors per feature, with id as key and amount of precursors as feature
    featPrecursor = {}
    totalPrecursor = 0
    countZero = 0
    x = 0
    # get all features out of featureXML
    for feature in featureXML.getSimpleFeatureInfo():
        # set the amount of precursor per feature to 0 at every feature
        precursorPerFeature = 0
        # get the coordinates of all features
        featureCoordinates = featureFunctions.getFeatureConvexhullCoordinates(
            feature)
        # loop for every feature coordinate through every MS/MS precursor coordinate
        for mzAndRT in retentionTime:
            # if the retention time (*60 to go from minutes to seconds) is larger than xMin and smaller than xMax and the m/z is
            # larger than xMin and smaller than xMax, count the precursors
            if float(mzAndRT['rt']) * 60 > float(featureCoordinates[feature]['rtMin']) and float(mzAndRT['rt'] * 60) < float(featureCoordinates[feature]['rtMax']) \
                 and float(mzAndRT['mz']) > float(featureCoordinates[feature]['mzMin']) and float(mzAndRT['mz']) < float(featureCoordinates[feature]['mzMax']):
                precursorPerFeature += 1
                totalPrecursor += 1
        if precursorPerFeature == 0:
            countZero += 1
        featPrecursor[featureXML['id']] = precursorPerFeature

        x += 1

    # if writeCSV flag is set to True, write out csv file to the absolute path of writeTo (default: precursorPerFeature.csv in the same folder)
    if writeCSV:
        compareDataWriter = output.CompareDataWriter(os.path.abspath(writeTo))
        compareDataWriter.precursorPerFeatureCsvWriter(featPrecursor)

    # calculate the average precursor per feature
    averagePrecursFeature = float(totalPrecursor) / float(len(featPrecursor))
    return {
        'totalPrecursorsInFeatures': totalPrecursor,
        'averagePrecursorsInFeatures': averagePrecursFeature,
        'featPerPrecursorDict': featPrecursor
    }