Ejemplos de joinpath en Python, ejemplos de maspy.auxiliary.joinpath en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: ontology.py Proyecto: hollenstein/maspy

 def __init__(self):
     super(DefaultTranslator, self).__init__()
     psiOntologyPath = aux.joinpath(os.path.dirname(aux.__file__),
                                    'ontologies', 'psi-ms.obo')
     unitOntologyPath = aux.joinpath(os.path.dirname(aux.__file__),
                                     'ontologies', 'unit.obo')
     self.load(psiOntologyPath)
     self.load(unitOntologyPath)

Ejemplo n.º 2

0

Mostrar archivo

Archivo: ontology.py Proyecto: hollenstein/maspy

 def __init__(self):
     super(DefaultTranslator, self).__init__()
     psiOntologyPath = aux.joinpath(os.path.dirname(aux.__file__),
                                    'ontologies', 'psi-ms.obo'
                                    )
     unitOntologyPath = aux.joinpath(os.path.dirname(aux.__file__),
                                     'ontologies', 'unit.obo'
                                     )
     self.load(psiOntologyPath)
     self.load(unitOntologyPath)

Ejemplo n.º 3

0

Mostrar archivo

Archivo: proteindb.py Proyecto: hollenstein/maspy

    def load(cls, path, name):
        """Imports the specified ``proteindb`` file from the hard disk.

        :param path: filedirectory of the ``proteindb`` file
        :param name: filename without the file extension ".proteindb"

        .. note:: this generates rather large files, which actually take longer
            to import than to newly generate. Maybe saving / loading should be
            limited to the protein database whitout in silico digestion
            information.
        """

        filepath = aux.joinpath(path, name + '.proteindb')
        with zipfile.ZipFile(filepath, 'r', allowZip64=True) as containerZip:
            #Convert the zipfile data into a str object, necessary since
            #containerZip.read() returns a bytes object.
            proteinsString = io.TextIOWrapper(containerZip.open('proteins'),
                                              encoding='utf-8'
                                              ).read()
            peptidesString = io.TextIOWrapper(containerZip.open('peptides'),
                                              encoding='utf-8'
                                              ).read()
            infoString = io.TextIOWrapper(containerZip.open('info'),
                                          encoding='utf-8'
                                          ).read()
        newInstance = cls()
        newInstance.proteins = json.loads(proteinsString,
                                          object_hook=ProteinSequence.jsonHook)
        newInstance.peptides = json.loads(peptidesString,
                                          object_hook=PeptideSequence.jsonHook)
        newInstance.info.update(json.loads(infoString))
        return newInstance

Ejemplo n.º 4

0

Mostrar archivo

    def load(cls, path, name):
        """Imports the specified ``proteindb`` file from the hard disk.

        :param path: filedirectory of the ``proteindb`` file
        :param name: filename without the file extension ".proteindb"

        .. note:: this generates rather large files, which actually take longer
            to import than to newly generate. Maybe saving / loading should be
            limited to the protein database whitout in silico digestion
            information.
        """

        filepath = aux.joinpath(path, name + '.proteindb')
        with zipfile.ZipFile(filepath, 'r', allowZip64=True) as containerZip:
            #Convert the zipfile data into a str object, necessary since
            #containerZip.read() returns a bytes object.
            proteinsString = io.TextIOWrapper(containerZip.open('proteins'),
                                              encoding='utf-8').read()
            peptidesString = io.TextIOWrapper(containerZip.open('peptides'),
                                              encoding='utf-8').read()
            infoString = io.TextIOWrapper(containerZip.open('info'),
                                          encoding='utf-8').read()
        newInstance = cls()
        newInstance.proteins = json.loads(proteinsString,
                                          object_hook=ProteinSequence.jsonHook)
        newInstance.peptides = json.loads(peptidesString,
                                          object_hook=PeptideSequence.jsonHook)
        newInstance.info.update(json.loads(infoString))
        return newInstance

Ejemplo n.º 5

0

Mostrar archivo

Archivo: pparse.py Proyecto: hollenstein/maspy

def cleanUpPparse(outputpath, rawfilename, mgf=False):
    """Delete temporary files generated by pparse, including the filetypes
    ".csv", ".ms1", ".ms2", ".xtract",  the files "pParsePlusLog.txt" and
    "pParse.para" and optionally also the ".mgf" file generated by pParse.

    .. warning:
        When the parameter "mgf" is set to "True" all files ending with ".mgf"
        and containing the specified "filename" are deleted. This could
        potentially also affect MGF files not generated by pParse.

    :param outputpath: path to the output directory of pParse
    :param rawfilename: filename of the thermo ".raw" file
    :param mgf: bool, if True the ".mgf" file generated by pParse is also
        removed
    """
    extensions = ['csv', 'ms1', 'ms2', 'xtract']
    filename, fileext = os.path.splitext(os.path.basename(rawfilename))
    additionalFiles = [aux.joinpath(outputpath, 'pParsePlusLog.txt'),
                       aux.joinpath(outputpath, filename+'.pparse.para'),
                       ]

    for ext in extensions:
        filepath = aux.joinpath(outputpath, '.'.join([filename, ext]))
        if os.path.isfile(filepath):
            print('Removing file: ', filepath)
            os.remove(filepath)
    for filepath in additionalFiles:
        if os.path.isfile(filepath):
            print('Removing file: ', filepath)
            os.remove(filepath)
    if mgf:
        for _filename in os.listdir(outputpath):
            _basename, _fileext = os.path.splitext(_filename)
            if _fileext.lower() != '.mgf':
                continue
            if _basename.find(basename) != -1 and _basename != basename:
                filepath = aux.joinpath(outputpath, _filename)
                print('Removing file: ', filepath)
                os.remove(filepath)

Ejemplo n.º 6

0

Mostrar archivo

    def save(self, path, name, compress=True):
        """Writes the specified specfiles to ``fgic`` files on the hard disk.

        .. note::
            If ``.save()`` is called and no ``fgic`` files are present in the
            specified path new files are generated, otherwise old files are
            replaced.

        :param path: filedirectory to which the ``fgic`` file is written.
        :param name: filename, without file extension
        :param compress: bool, True to use zip file compression
        """
        with aux.PartiallySafeReplace() as msr:
            filename = name + '.fgic'
            filepath = aux.joinpath(path, filename)
            with msr.open(filepath) as openfile:
                self._writeContainer(openfile, compress)

Ejemplo n.º 7

0

Mostrar archivo

Archivo: featuregrouping.py Proyecto: hollenstein/maspy

    def save(self, path, name, compress=True):
        """Writes the specified specfiles to ``fgic`` files on the hard disk.

        .. note::
            If ``.save()`` is called and no ``fgic`` files are present in the
            specified path new files are generated, otherwise old files are
            replaced.

        :param path: filedirectory to which the ``fgic`` file is written.
        :param name: filename, without file extension
        :param compress: bool, True to use zip file compression
        """
        with aux.PartiallySafeReplace() as msr:
            filename = name + '.fgic'
            filepath = aux.joinpath(path, filename)
            with msr.open(filepath) as openfile:
                self._writeContainer(openfile, compress)

Ejemplo n.º 8

0

Mostrar archivo

Archivo: proteindb.py Proyecto: hollenstein/maspy

    def save(self, path, compress=True):
        """Writes the ``.proteins`` and ``.peptides`` entries to the hard disk
        as a ``proteindb`` file.

        .. note::
            If ``.save()`` is called and no ``proteindb`` file is present in the
            specified path a new files is generated, otherwise the old file is
            replaced.

        :param path: filedirectory to which the ``proteindb`` file is written.
            The output file name is specified by ``self.info['name']``
        :param compress: bool, True to use zip file compression
        """
        with aux.PartiallySafeReplace() as msr:
            filename = self.info['name'] + '.proteindb'
            filepath = aux.joinpath(path, filename)
            with msr.open(filepath, mode='w+b') as openfile:
                self._writeContainer(openfile, compress=compress)

Ejemplo n.º 9

0

Mostrar archivo

    def save(self, path, compress=True):
        """Writes the ``.proteins`` and ``.peptides`` entries to the hard disk
        as a ``proteindb`` file.

        .. note::
            If ``.save()`` is called and no ``proteindb`` file is present in the
            specified path a new files is generated, otherwise the old file is
            replaced.

        :param path: filedirectory to which the ``proteindb`` file is written.
            The output file name is specified by ``self.info['name']``
        :param compress: bool, True to use zip file compression
        """
        with aux.PartiallySafeReplace() as msr:
            filename = self.info['name'] + '.proteindb'
            filepath = aux.joinpath(path, filename)
            with msr.open(filepath, mode='w+b') as openfile:
                self._writeContainer(openfile, compress=compress)

Ejemplo n.º 10

0

Mostrar archivo

Archivo: pparse.py Proyecto: hollenstein/maspy

def writeParams(rawfilepath, outputpath, isolationWindow, coElute=0):
    """Generate and write a pParse parameter file.

    :param rawfilepath: location of the thermo ".raw" file
    :param outputpath: path to the output directory of pParse
    :param isolationWindow: MSn isolation window that was used for the
        aquisition of the specified thermo raw file
    :param coElute:

    :returns: file path of the pParse parameter file
    """
    paramText = generateParams(rawfilepath, outputpath, isolationWindow,
                               coElute)
    filename, fileext = os.path.splitext(os.path.basename(rawfilepath))
    paramPath = aux.joinpath(outputpath, filename+'.pparse.para')
    with open(paramPath, 'wb') as openfile:
        openfile.write(paramText)
    return paramPath

Ejemplo n.º 11

0

Mostrar archivo

    def load(self, path, name):
        """Imports the specified ``fgic`` file from the hard disk.

        :param path: filedirectory to which the ``fgic`` file is written.
        :param name: filename, without file extension
        """

        filename = name + '.fgic'
        filepath = aux.joinpath(path, filename)
        with zipfile.ZipFile(filepath, 'r') as containerZip:
            #Convert the zipfile data into a str object, necessary since
            #containerZip.read() returns a bytes object.
            jsonString = io.TextIOWrapper(containerZip.open('data'),
                                          encoding='utf-8').read()
            infoString = io.TextIOWrapper(containerZip.open('info'),
                                          encoding='utf-8').read()
        self.container = json.loads(jsonString, object_hook=Fgi.jsonHook)
        self.info.update(json.loads(infoString))
        self._matrixTemplate = self.info['_matrixTemplate']
        del self.info['_matrixTemplate']

Ejemplo n.º 12

0

Mostrar archivo

Archivo: featuregrouping.py Proyecto: hollenstein/maspy

    def load(self, path, name):
        """Imports the specified ``fgic`` file from the hard disk.

        :param path: filedirectory to which the ``fgic`` file is written.
        :param name: filename, without file extension
        """

        filename = name + '.fgic'
        filepath = aux.joinpath(path, filename)
        with zipfile.ZipFile(filepath, 'r') as containerZip:
            #Convert the zipfile data into a str object, necessary since
            #containerZip.read() returns a bytes object.
            jsonString = io.TextIOWrapper(containerZip.open('data'),
                                          encoding='utf-8'
                                          ).read()
            infoString = io.TextIOWrapper(containerZip.open('info'),
                                          encoding='utf-8'
                                          ).read()
        self.container = json.loads(jsonString, object_hook=Fgi.jsonHook)
        self.info.update(json.loads(infoString))
        self._matrixTemplate = self.info['_matrixTemplate']
        del self.info['_matrixTemplate']

Ejemplo n.º 13

0

Mostrar archivo

Archivo: writer.py Proyecto: hollenstein/maspy

def writeMzml(specfile, msrunContainer, outputdir, spectrumIds=None,
              chromatogramIds=None, writeIndex=True):
    """ #TODO: docstring

    :param specfile: #TODO docstring
    :param msrunContainer: #TODO docstring
    :param outputdir: #TODO docstring
    :param spectrumIds: #TODO docstring
    :param chromatogramIds: #TODO docstring
    """
    #TODO: maybe change to use aux.openSafeReplace
    outputFile = io.BytesIO()

    #TODO: perform check that specfile is present in msrunContainer and at least
    #   the metadatanode.
    metadataTree = msrunContainer.rmc[specfile]
    #Generate a list of spectrum ids that should be written to mzML
    if spectrumIds is None and specfile in msrunContainer.smic:
        keyTuple = [(int(key), key) for key in viewkeys(msrunContainer.smic[specfile])]
        spectrumIds = [key for _, key in sorted(keyTuple)]
    spectrumCounts = len(spectrumIds)
    #Generate a list of chromatogram ids that should be written to mzML
    if chromatogramIds is None and specfile in msrunContainer.cic:
        chromatogramIds = [cId for cId in viewkeys(msrunContainer.cic[specfile])]
    chromatogramCounts = len(chromatogramIds)

    spectrumIndexList = list()
    chromatogramIndexList = list()

    xmlFile = ETREE.xmlfile(outputFile, encoding='ISO-8859-1', buffered=False)
    xmlWriter = xmlFile.__enter__()
    xmlWriter.write_declaration()

    nsmap = {None: 'http://psi.hupo.org/ms/mzml',
             'xsi': 'http://www.w3.org/2001/XMLSchema-instance'
             }
    mzmlAttrib = {'{http://www.w3.org/2001/XMLSchema-instance}schemaLocation': \
                    'http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.0.xsd',
                  'version': '1.1.0', 'id': metadataTree.attrib['id']
                  }

    if writeIndex:
        xmlIndexedMzml = xmlWriter.element('indexedmzML', nsmap=nsmap)
        xmlIndexedMzml.__enter__()
        xmlWriter.write('\n')
    xmlMzml = xmlWriter.element('mzML', mzmlAttrib, nsmap=nsmap)
    xmlMzml.__enter__()
    xmlWriter.write('\n')

    for metadataNode in metadataTree.getchildren():
        if metadataNode.tag != 'run':
            xmlWriter.write(maspy.xml.recCopyElement(metadataNode),
                            pretty_print=True
                            )
        else:
            xmlRun = xmlWriter.element(metadataNode.tag, metadataNode.attrib)
            xmlRun.__enter__()
            xmlWriter.write('\n')
            for runChild in metadataNode.getchildren():
                if runChild.tag == 'spectrumList':
                    specDefaultProcRef = runChild.attrib['defaultDataProcessingRef']
                elif runChild.tag == 'chromatogramList':
                    chromDefaultProcRef = runChild.attrib['defaultDataProcessingRef']
                else:
                    #TODO: maybe recCopy?
                    xmlRun.append(runChild)

            #If any spectra should be written, generate the spectrumList Node.
            if spectrumCounts > 0:
                specListAttribs = {'count': str(spectrumCounts),
                                   'defaultDataProcessingRef': specDefaultProcRef
                                   }
                xmlSpectrumList = xmlWriter.element('spectrumList',
                                                    specListAttribs
                                                    )
                xmlSpectrumList.__enter__()
                xmlWriter.write('\n')

                for index, key in enumerate(spectrumIds):
                    smi = msrunContainer.smic[specfile][key]
                    sai = msrunContainer.saic[specfile][key]
                    #Store the spectrum element offset here
                    spectrumIndexList.append((outputFile.tell(),
                                              smi.attributes['id']
                                              ))

                    xmlSpectrum = xmlSpectrumFromSmi(index, smi, sai)
                    xmlWriter.write(xmlSpectrum, pretty_print=True)

                xmlSpectrumList.__exit__(None, None, None)
                xmlWriter.write('\n')

            #If any chromatograms should be written, generate the
            #chromatogramList Node.
            if chromatogramCounts > 0:
                chromListAttribs = {'count': str(chromatogramCounts),
                                    'defaultDataProcessingRef': chromDefaultProcRef
                                    }
                xmlChromatogramList = xmlWriter.element('chromatogramList',
                                                        chromListAttribs
                                                        )
                xmlChromatogramList.__enter__()
                xmlWriter.write('\n')
                for index, key in enumerate(chromatogramIds):
                    ci = msrunContainer.cic[specfile][key]
                    #Store the chromatogram element offset here
                    chromatogramIndexList.append((outputFile.tell(), ci.id))

                    xmlChromatogram = xmlChromatogramFromCi(index, ci)
                    xmlWriter.write(xmlChromatogram, pretty_print=True)
                xmlChromatogramList.__exit__(None, None, None)
                xmlWriter.write('\n')

            xmlRun.__exit__(None, None, None)
            xmlWriter.write('\n')

    #Close the mzml node
    xmlMzml.__exit__(None, None, None)
    #Optional: write the indexedMzml nodes and close the indexedMzml node
    if writeIndex:
        xmlWriter.write('\n')
        indexListOffset = outputFile.tell()
        _writeMzmlIndexList(xmlWriter, spectrumIndexList, chromatogramIndexList)
        _writeIndexListOffset(xmlWriter, indexListOffset)
        _writeMzmlChecksum(xmlWriter, outputFile)
        xmlIndexedMzml.__exit__(None, None, None)
    #Close the xml file
    xmlFile.__exit__(None, None, None)
    #Write the output mzML file
    filepath = aux.joinpath(outputdir, specfile+'.mzML')
    with open(filepath, 'wb') as openfile:
        openfile.write(outputFile.getvalue())

Ejemplo n.º 14

0

Mostrar archivo

Archivo: _no_unittest.py Proyecto: hollenstein/maspy

            except ValueError:
                if param[1] != referenceDict[param[0]][0]:
                    raise Exception(param)
            if param[2] != referenceDict[param[0]][1]:
                raise Exception(param)
        if paramType == 'userParam':
            if param[3] != referenceDict[param[0]][2]:
                raise Exception(param)
        if paramType == 'refParamGroup':
            observedRefParamGroups.append(param[1])
    if observedRefParamGroups != referenceDict['refParamGroups']:
        raise Exception(observedRefParamGroups)
    return True


testfilepath = aux.joinpath(os.path.dirname(aux.__file__), os.pardir, 'tests', 'testdata', 'spectrum.xml')
with io.open(testfilepath, 'r', encoding='utf-8') as openfile:
    root = etree.XML(openfile.read())


#TESTING mzml.smiFromXmlSpectrum(), mzml.extractBinaries()
spectra = list()
for xmlSpectrum in root.getchildren():
    smi, binaryDataArrayList = maspy.reader.smiFromXmlSpectrum(xmlSpectrum, 'test')
    sai = maspy.core.Sai(smi.id, smi.specfile)
    sai.arrays, sai.arrayInfo = maspy.xml.extractBinaries(binaryDataArrayList,
                                                     smi.attributes['defaultArrayLength'])
    spectra.append({'smi': smi, 'sai': sai})


#TESTING Smi, MzmlScan, MzmlPrecursor, TODO: add MzmlProduct

Ejemplo n.º 15

0

Mostrar archivo

def rtCalibration(fiContainer,
                  allowedRtDev=60,
                  allowedMzDev=2.5,
                  reference=None,
                  specfiles=None,
                  showPlots=False,
                  plotDir=None,
                  minIntensity=1e5):
    """Performs a retention time calibration between :class:`FeatureItem` of multiple specfiles.

    :ivar fiContainer: Perform alignment on :class:`FeatureItem` in :attr:`FeatureContainer.specfiles`
    :ivar allowedRtDev: maxium retention time difference of two features in two runs to be matched
    :ivar allowedMzDev: maxium relative m/z difference (in ppm) of two features in two runs to be matched
    :ivar showPlots: boolean, True if a plot should be generated which shows to results of the calibration
    :ivar plotDir: if not None and showPlots is True, the plots are saved to
        this location.
    :ivar reference: Can be used to specifically specify a reference specfile
    :ivar specfiles: Limit alignment to those specfiles in the fiContainer
    :ivar minIntensity: consider only features with an intensity above this value
    """
    #TODO: long function, maybe split into subfunctions
    specfiles = [_ for _ in viewkeys(fiContainer.info)
                 ] if specfiles is None else specfiles
    matchCharge = True

    refMzKey = 'mz'
    mzKey = 'mz'

    if reference is not None:
        if reference in specfiles:
            specfiles = [reference] + list(
                set(specfiles).difference(set([reference])))
        else:
            print(
                'Specified reference specfile not present, using reference: ',
                specfiles[0])

    for featureItem in fiContainer.getItems(specfiles=specfiles):
        if not hasattr(featureItem, 'obsRt'):
            setattr(featureItem, 'obsRt', featureItem.rt)

    referenceArrays = None
    for specfile in specfiles:
        featureArrays = fiContainer.getArrays(
            ['rt', 'charge', 'mz', 'intensity'], specfiles=specfile, sort='rt')
        if minIntensity is not None:
            intensityMask = (featureArrays['intensity'] > minIntensity)
            for key in list(viewkeys(featureArrays)):
                featureArrays[key] = featureArrays[key][intensityMask]

        if referenceArrays is None:
            referenceArrays = featureArrays
            if showPlots:
                print('Reference: ' + specfile)
            continue

        rtPosList = list()
        rtDevList = list()
        mzDevRelList = list()
        mzDevAbsList = list()

        for featurePos in range(len(featureArrays[mzKey])):
            currRt = featureArrays['rt'][featurePos]
            currMz = featureArrays[mzKey][featurePos]
            currZ = featureArrays['charge'][featurePos]
            mzLimitUp = currMz * (1 + allowedMzDev * 1E-6)
            mzLimitLow = currMz * (1 - allowedMzDev * 1E-6)
            rtLimitUp = currRt + allowedRtDev
            rtLimitLow = currRt - allowedRtDev

            posL = bisect.bisect_left(referenceArrays['rt'], rtLimitLow)
            posU = bisect.bisect_right(referenceArrays['rt'], rtLimitUp)

            refMask = (referenceArrays[refMzKey][posL:posU] <= mzLimitUp) & (
                referenceArrays[refMzKey][posL:posU] >= mzLimitLow)
            if matchCharge:
                refMask = refMask & (referenceArrays['charge'][posL:posU]
                                     == currZ)

            currMzDev = abs(referenceArrays[refMzKey][posL:posU][refMask] -
                            currMz)
            bestHitMask = currMzDev.argsort()
            for refRt, refMz in zip(
                    referenceArrays['rt'][posL:posU][refMask][bestHitMask],
                    referenceArrays[refMzKey][posL:posU][refMask]
                [bestHitMask]):
                rtPosList.append(currRt)
                rtDevList.append(currRt - refRt)
                mzDevRelList.append((1 - currMz / refMz) * 1E6)
                mzDevAbsList.append(currMz - refMz)
                break

        rtPosList = numpy.array(rtPosList)
        rtDevList = numpy.array(rtDevList)

        splineInitialKnots = int(max(rtPosList) - min(rtPosList))
        dataFit = aux.DataFit(rtDevList, rtPosList)
        dataFit.splineInitialKnots = splineInitialKnots
        dataFit.splineTerminalExpansion = 0.2
        dataFit.processInput(dataAveraging='median', windowSize=10)
        dataFit.generateSplines()

        if showPlots:
            corrDevArr = rtDevList - dataFit.corrArray(rtPosList)
            timePoints = [
                min(rtPosList) + x
                for x in range(int(max(rtPosList) - min(rtPosList)))
            ]
            corrValues = dataFit.corrArray(timePoints)
            fig, ax = plt.subplots(3,
                                   2,
                                   sharex=False,
                                   sharey=False,
                                   figsize=(20, 18))
            fig.suptitle(specfile)
            ax[0][0].hist(rtDevList,
                          bins=100,
                          color='grey',
                          alpha=0.5,
                          label='observed')
            ax[0][0].hist(corrDevArr,
                          bins=100,
                          color='red',
                          alpha=0.5,
                          label='corrected')
            ax[0][0].set_title('Retention time deviation')
            ax[0][0].legend()
            ax[0][0].set_xlim(allowedRtDev * -1, allowedRtDev)
            ax[0][1].hist(mzDevRelList, bins=100, color='grey')
            ax[0][1].set_title('Mz deviation [ppm]')
            ax[1][0].scatter(rtPosList,
                             rtDevList,
                             color='grey',
                             alpha=0.1,
                             label='observed')
            ax[1][0].plot(timePoints,
                          corrValues,
                          color='red',
                          alpha=0.5,
                          label='correction function')
            ax[1][0].set_title('Retention time deviation over time')
            ax[1][0].legend()
            ax[1][0].set_ylim(allowedRtDev * -1, allowedRtDev)
            ax[1][1].scatter(rtPosList, mzDevRelList, color='grey', alpha=0.1)
            ax[1][1].set_title('Mz deviation over time')
            ax[1][1].set_ylim(allowedMzDev * -1, allowedMzDev)
            ax[2][0].scatter(rtPosList, corrDevArr, color='grey', alpha=0.1)
            ax[2][0].set_title('Aligned retention time deviation over time')
            ax[2][0].set_ylim(allowedRtDev * -1, allowedRtDev)
            if plotDir is not None:
                plotloc = aux.joinpath(plotDir, specfile + '.rtAlign.png')
                fig.savefig(plotloc)
            else:
                fig.show()

        featureArrays = fiContainer.getArrays(['rt'],
                                              specfiles=specfile,
                                              sort='rt')
        featureArrays['corrRt'] = featureArrays['rt'] - dataFit.corrArray(
            featureArrays['rt'])
        for featureId, corrRt, rt in zip(featureArrays['id'],
                                         featureArrays['corrRt'],
                                         featureArrays['rt']):
            fiContainer.container[specfile][featureId].rt = corrRt


##TODO: Code is deprecated, new classes are currently located in maspy.featuregrouping
#class FeatureGroupItem(object):
#    """Representation of a group of :class:`FeatureItem`.
#
#    :ivar isMatched: False by default, True if any :class:`FeatureItem` in the group are matched.
#    :ivar isAnnotated: False by default, True if any :class:`FeatureItem` in the group are annotated.
#    :ivar siIds: containerId values of matched Si entries
#    :ivar siiIds: containerId values of matched Sii entries
#    :ivar featureIds: containerId values of :class:`FeatureItem` in the feature group
#    :ivar peptide: peptide sequence of best scoring Sii match
#    :ivar sequence: plain amino acid sequence of best scoring Sii match, used to retrieve protein information
#    :ivar score: score of best scoring Sii match
#    :ivar matchMatrix: structured representation of :attr:`FeatureItem.containerId` in the feature group.
#    :ivar intensityMatrix: similar to :attr:`matchMatrix` but contains :attr:`FeatureItem.intensity` values.
#    {chargeState: 2d numpy.array with specfiles as 1st dimension and labelState as 2nd dimension}
#    """
#    def __init__(self):
#        self.isMatched = None
#        self.isAnnotated = None
#        self.siIds = list()
#        self.siiIds = list()
#        self.featureIds = list()
#        self.peptide = None
#        self.sequence = None
#        self.score = None
#        self.matchMatrix = dict()
#        self.intensityMatrix = dict()
#
#
#class FeatureGroupContainer(object):
#    """ItemContainer for peptide feature groups :class`FeatureGroupItem`.
#
#    :ivar container: Storage list of :class:`FeatureGroupItem`
#    :ivar index: Use :attr:`FeatureItem.containerId` to which :class:`FeatureGroupItem` the feature was grouped
#    :ivar labelDescriptor: :class:`maspy.sil.LabelDescriptor` describes the label setup of an experiment
#    :ivar specfiles: List of keywords (filenames) representing files
#    :ivar specfilePositions: {specfile:arrayPosition, ...}
#    arrayPosition respresents the array position of a specfile in :attr:`FeatureGroupItem.matchMatrix`
#    """
#    def __init__(self, specfiles, labelDescriptor=None):
#        self.container = dict()
#        self.labelDescriptor = maspy.sil.LabelDescriptor() if labelDescriptor is None else labelDescriptor
#        self._index = 0
#
#        self.info = dict()
#        for position, specfile in enumerate(specfiles):
#            self.info[specfile] = {'matrixPosition': position}
#
#    def getItems(self, specfiles=None, sort=False, reverse=False, selector=lambda fgi: True):
#        """Generator that yields filtered and/or sorted :class:`Si` objects from :instance:`self.sic`
#
#        :param specfiles: filenames of msrun files - if specified return only items from those files
#        :type specfiles: str or [str, str, ...]
#        :param sort: if "sort" is specified the returned list of items is sorted according to the :class:`Si`
#        attribute specified by "sort", if the attribute is not present the item is skipped.
#        :param reverse: boolean to reverse sort order
#        :param selector: a function which is called with each :class:`Si` item and returns
#        True (include item) or False (discard item). If not specified all items are returned
#        """
#        specfiles = [_ for _ in viewkeys(self.info)] if specfiles is None else aux.toList(specfiles)
#        return _getItems(self.container, specfiles, sort, reverse, selector)
#
#    def getArrays(self, report='lfq', attr=None, specfiles=None, sort=False, reverse=False, selector=lambda si: True, defaultValue=None):
#        """Return a condensed array of data selected from :class:`Si` objects of :instance:`self.sic`
#        for fast and convenient data processing.
#
#        :param attr: list of :class:`Si` item attributes that should be added to the returned array.
#        If an attribute is not present the "defaultValue" is added instead. The attributes "id" and "specfile"
#        are always included, in combination they serve as a unique id.
#        :param specfiles: filenames of msrun files - if specified return only items from those files
#        :type specfiles: str or [str, str, ...]
#        :param sort: if "sort" is specified the returned list of items is sorted according to the :class:`Si`
#        attribute specified by "sort", if the attribute is not present the item is skipped.
#        :param reverse: boolean to reverse sort order
#        :param selector: a function which is called with each :class:`Si` item and returns
#        True (include item) or False (discard item). If not specified all items are returned
#
#        return {'attribute1': numpy.array(), 'attribute1': numpy.array(), ...}
#        """
#        attr = attr if attr is not None else []
#        attr = set(['id', 'specfile'] + aux.toList(attr))
#        specfiles = [_ for _ in viewkeys(self.info)] if specfiles is None else aux.toList(specfiles)
#
#        arrays = arrays = dict([(key, []) for key in attr])
#        reportAttributes = list()
#        if report == 'lfq':
#            arrays['charge'] = list()
#            arrays['labelState'] = list()
#            for specfile in self.specfiles:
#                arrays[specfile] = list()
#                reportAttributes.append(specfile)
#        elif report == 'sil':
#            arrays['charge'] = list()
#            arrays['specfile'] = list()
#            for labelState in list(viewkeys(self.labelDescriptor.labels)) + [-1]:
#                labelAttributeName = ' '.join(('label:', str(labelState)))
#                arrays[labelAttributeName] = list()
#                reportAttributes.append(labelAttributeName)
#
#        if report == 'sil':
#            for item in _getItems(self.container, specfiles, sort, reverse, selector):
#                for charge in viewkeys(item.intensityMatrix):
#                    for specfile in specfiles:
#                        specfilePosition = self.info[specfile]['matrixPosition']
#                        for key in attributes:
#                            arrays[key].append(getattr(item, key, None))
#                        arrays['charge'].append(charge)
#                        arrays['specfile'].append(specfile)
#                        for labelState in list(viewkeys(self.labelDescriptor.labels)) + [-1]:
#                            labelAttributeName = ' '.join(('label:', str(labelState)))
#                            arrays[labelAttributeName].append(item.intensityMatrix[charge][specfilePosition, labelState])
#        elif report == 'lfq':
#            for item in _getItems(self.container, specfiles, sort, reverse, selector):
#                for charge in viewkeys(item.intensityMatrix):
#                    for labelState in list(viewkeys(self.labelDescriptor.labels)) + [-1]:
#                        for key in attributes:
#                            arrays[key].append(getattr(item, key, None))
#                        arrays['charge'].append(charge)
#                        arrays['labelState'].append(labelState)
#                        for specfile in specfiles:
#                            specfilePosition = self.info[specfile]['matrixPosition']
#                            arrays[specfile].append(item.intensityMatrix[charge][specfilePosition, labelState])
#        else:
#            raise Exception('report must be either "lfq" or "sil", not '+report)##
#
#        for key in  [_ for _ in viewkeys(arrays)]:
#            if key in reportAttributes:
#                arrays[key] = numpy.array(arrays[key], dtype=numpy.float64)
#            else:
#                arrays[key] = numpy.array(arrays[key])
#        return arrays

Ejemplo n.º 16

0

Mostrar archivo

Archivo: writer.py Proyecto: hollenstein/maspy

def writeMzml(specfile,
              msrunContainer,
              outputdir,
              spectrumIds=None,
              chromatogramIds=None,
              writeIndex=True):
    """ #TODO: docstring

    :param specfile: #TODO docstring
    :param msrunContainer: #TODO docstring
    :param outputdir: #TODO docstring
    :param spectrumIds: #TODO docstring
    :param chromatogramIds: #TODO docstring
    """
    #TODO: maybe change to use aux.openSafeReplace
    outputFile = io.BytesIO()

    #TODO: perform check that specfile is present in msrunContainer and at least
    #   the metadatanode.
    metadataTree = msrunContainer.rmc[specfile]
    #Generate a list of spectrum ids that should be written to mzML
    if spectrumIds is None and specfile in msrunContainer.smic:
        keyTuple = [(int(key), key)
                    for key in viewkeys(msrunContainer.smic[specfile])]
        spectrumIds = [key for _, key in sorted(keyTuple)]
    spectrumCounts = len(spectrumIds)
    #Generate a list of chromatogram ids that should be written to mzML
    if chromatogramIds is None and specfile in msrunContainer.cic:
        chromatogramIds = [
            cId for cId in viewkeys(msrunContainer.cic[specfile])
        ]
    chromatogramCounts = len(chromatogramIds)

    spectrumIndexList = list()
    chromatogramIndexList = list()

    xmlFile = ETREE.xmlfile(outputFile, encoding='ISO-8859-1', buffered=False)
    xmlWriter = xmlFile.__enter__()
    xmlWriter.write_declaration()

    nsmap = {
        None: 'http://psi.hupo.org/ms/mzml',
        'xsi': 'http://www.w3.org/2001/XMLSchema-instance'
    }
    mzmlAttrib = {'{http://www.w3.org/2001/XMLSchema-instance}schemaLocation': \
                    'http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.0.xsd',
                  'version': '1.1.0', 'id': metadataTree.attrib['id']
                  }

    if writeIndex:
        xmlIndexedMzml = xmlWriter.element('indexedmzML', nsmap=nsmap)
        xmlIndexedMzml.__enter__()
        xmlWriter.write('\n')
    xmlMzml = xmlWriter.element('mzML', mzmlAttrib, nsmap=nsmap)
    xmlMzml.__enter__()
    xmlWriter.write('\n')

    for metadataNode in metadataTree.getchildren():
        if metadataNode.tag != 'run':
            xmlWriter.write(maspy.xml.recCopyElement(metadataNode),
                            pretty_print=True)
        else:
            xmlRun = xmlWriter.element(metadataNode.tag, metadataNode.attrib)
            xmlRun.__enter__()
            xmlWriter.write('\n')
            for runChild in metadataNode.getchildren():
                if runChild.tag == 'spectrumList':
                    specDefaultProcRef = runChild.attrib[
                        'defaultDataProcessingRef']
                elif runChild.tag == 'chromatogramList':
                    chromDefaultProcRef = runChild.attrib[
                        'defaultDataProcessingRef']
                else:
                    #TODO: maybe recCopy?
                    xmlRun.append(runChild)

            #If any spectra should be written, generate the spectrumList Node.
            if spectrumCounts > 0:
                specListAttribs = {
                    'count': str(spectrumCounts),
                    'defaultDataProcessingRef': specDefaultProcRef
                }
                xmlSpectrumList = xmlWriter.element('spectrumList',
                                                    specListAttribs)
                xmlSpectrumList.__enter__()
                xmlWriter.write('\n')

                for index, key in enumerate(spectrumIds):
                    smi = msrunContainer.smic[specfile][key]
                    sai = msrunContainer.saic[specfile][key]
                    #Store the spectrum element offset here
                    spectrumIndexList.append(
                        (outputFile.tell(), smi.attributes['id']))

                    xmlSpectrum = xmlSpectrumFromSmi(index, smi, sai)
                    xmlWriter.write(xmlSpectrum, pretty_print=True)

                xmlSpectrumList.__exit__(None, None, None)
                xmlWriter.write('\n')

            #If any chromatograms should be written, generate the
            #chromatogramList Node.
            if chromatogramCounts > 0:
                chromListAttribs = {
                    'count': str(chromatogramCounts),
                    'defaultDataProcessingRef': chromDefaultProcRef
                }
                xmlChromatogramList = xmlWriter.element(
                    'chromatogramList', chromListAttribs)
                xmlChromatogramList.__enter__()
                xmlWriter.write('\n')
                for index, key in enumerate(chromatogramIds):
                    ci = msrunContainer.cic[specfile][key]
                    #Store the chromatogram element offset here
                    chromatogramIndexList.append((outputFile.tell(), ci.id))

                    xmlChromatogram = xmlChromatogramFromCi(index, ci)
                    xmlWriter.write(xmlChromatogram, pretty_print=True)
                xmlChromatogramList.__exit__(None, None, None)
                xmlWriter.write('\n')

            xmlRun.__exit__(None, None, None)
            xmlWriter.write('\n')

    #Close the mzml node
    xmlMzml.__exit__(None, None, None)
    #Optional: write the indexedMzml nodes and close the indexedMzml node
    if writeIndex:
        xmlWriter.write('\n')
        indexListOffset = outputFile.tell()
        _writeMzmlIndexList(xmlWriter, spectrumIndexList,
                            chromatogramIndexList)
        _writeIndexListOffset(xmlWriter, indexListOffset)
        _writeMzmlChecksum(xmlWriter, outputFile)
        xmlIndexedMzml.__exit__(None, None, None)
    #Close the xml file
    xmlFile.__exit__(None, None, None)
    #Write the output mzML file
    filepath = aux.joinpath(outputdir, specfile + '.mzML')
    with open(filepath, 'wb') as openfile:
        openfile.write(outputFile.getvalue())

Ejemplo n.º 17

0

Mostrar archivo

Archivo: test_auxiliary.py Proyecto: hollenstein/maspy

 def test_joinpath(self):
     self.assertEqual(MODULE.joinpath('C:/basedir', 'adir', 'afile.ext'), 'C:/basedir/adir/afile.ext')

Ejemplo n.º 18

0

Mostrar archivo

            except ValueError:
                if param[1] != referenceDict[param[0]][0]:
                    raise Exception(param)
            if param[2] != referenceDict[param[0]][1]:
                raise Exception(param)
        if paramType == 'userParam':
            if param[3] != referenceDict[param[0]][2]:
                raise Exception(param)
        if paramType == 'refParamGroup':
            observedRefParamGroups.append(param[1])
    if observedRefParamGroups != referenceDict['refParamGroups']:
        raise Exception(observedRefParamGroups)
    return True


testfilepath = aux.joinpath(os.path.dirname(aux.__file__), os.pardir, 'tests',
                            'testdata', 'spectrum.xml')
with io.open(testfilepath, 'r', encoding='utf-8') as openfile:
    root = etree.XML(openfile.read())

#TESTING mzml.smiFromXmlSpectrum(), mzml.extractBinaries()
spectra = list()
for xmlSpectrum in root.getchildren():
    smi, binaryDataArrayList = maspy.reader.smiFromXmlSpectrum(
        xmlSpectrum, 'test')
    sai = maspy.core.Sai(smi.id, smi.specfile)
    sai.arrays, sai.arrayInfo = maspy.xml.extractBinaries(
        binaryDataArrayList, smi.attributes['defaultArrayLength'])
    spectra.append({'smi': smi, 'sai': sai})

#TESTING Smi, MzmlScan, MzmlPrecursor, TODO: add MzmlProduct
smi = spectra[0]['smi']

Ejemplo n.º 19

0

Mostrar archivo

Archivo: featuremethods.py Proyecto: hollenstein/maspy

def rtCalibration(fiContainer, allowedRtDev=60, allowedMzDev=2.5,
                  reference=None, specfiles=None, showPlots=False,
                  plotDir=None, minIntensity=1e5):
    """Performs a retention time calibration between :class:`FeatureItem` of multiple specfiles.

    :ivar fiContainer: Perform alignment on :class:`FeatureItem` in :attr:`FeatureContainer.specfiles`
    :ivar allowedRtDev: maxium retention time difference of two features in two runs to be matched
    :ivar allowedMzDev: maxium relative m/z difference (in ppm) of two features in two runs to be matched
    :ivar showPlots: boolean, True if a plot should be generated which shows to results of the calibration
    :ivar plotDir: if not None and showPlots is True, the plots are saved to
        this location.
    :ivar reference: Can be used to specifically specify a reference specfile
    :ivar specfiles: Limit alignment to those specfiles in the fiContainer
    :ivar minIntensity: consider only features with an intensity above this value
    """
    #TODO: long function, maybe split into subfunctions
    specfiles = [_ for _ in viewkeys(fiContainer.info)] if specfiles is None else specfiles
    matchCharge = True

    refMzKey = 'mz'
    mzKey = 'mz'

    if reference is not None:
        if reference in specfiles:
            specfiles = [reference] + list(set(specfiles).difference(set([reference])))
        else:
            print('Specified reference specfile not present, using reference: ', specfiles[0])

    for featureItem in fiContainer.getItems(specfiles=specfiles):
        if not hasattr(featureItem, 'obsRt'):
            setattr(featureItem, 'obsRt', featureItem.rt)

    referenceArrays = None
    for specfile in specfiles:
        featureArrays = fiContainer.getArrays(['rt', 'charge', 'mz', 'intensity'],
                                              specfiles=specfile, sort='rt'
                                              )
        if minIntensity is not None:
            intensityMask = (featureArrays['intensity'] > minIntensity)
            for key in list(viewkeys(featureArrays)):
                featureArrays[key] = featureArrays[key][intensityMask]

        if referenceArrays is None:
            referenceArrays = featureArrays
            if showPlots:
                print('Reference: '+specfile)
            continue

        rtPosList = list()
        rtDevList = list()
        mzDevRelList = list()
        mzDevAbsList = list()

        for featurePos in range(len(featureArrays[mzKey])):
            currRt = featureArrays['rt'][featurePos]
            currMz = featureArrays[mzKey][featurePos]
            currZ = featureArrays['charge'][featurePos]
            mzLimitUp = currMz*(1+allowedMzDev*1E-6)
            mzLimitLow = currMz*(1-allowedMzDev*1E-6)
            rtLimitUp = currRt+allowedRtDev
            rtLimitLow = currRt-allowedRtDev

            posL = bisect.bisect_left(referenceArrays['rt'], rtLimitLow)
            posU = bisect.bisect_right(referenceArrays['rt'], rtLimitUp)

            refMask = (referenceArrays[refMzKey][posL:posU] <= mzLimitUp) & (referenceArrays[refMzKey][posL:posU] >= mzLimitLow)
            if matchCharge:
                refMask = refMask & (referenceArrays['charge'][posL:posU] == currZ)

            currMzDev = abs(referenceArrays[refMzKey][posL:posU][refMask] - currMz)
            bestHitMask = currMzDev.argsort()
            for refRt, refMz in zip(referenceArrays['rt'][posL:posU][refMask][bestHitMask],
                                    referenceArrays[refMzKey][posL:posU][refMask][bestHitMask]):
                rtPosList.append(currRt)
                rtDevList.append(currRt - refRt)
                mzDevRelList.append((1 - currMz / refMz)*1E6)
                mzDevAbsList.append(currMz - refMz)
                break

        rtPosList = numpy.array(rtPosList)
        rtDevList = numpy.array(rtDevList)

        splineInitialKnots = int(max(rtPosList) - min(rtPosList))
        dataFit = aux.DataFit(rtDevList, rtPosList)
        dataFit.splineInitialKnots = splineInitialKnots
        dataFit.splineTerminalExpansion = 0.2
        dataFit.processInput(dataAveraging='median', windowSize=10)
        dataFit.generateSplines()

        if showPlots:
            corrDevArr = rtDevList - dataFit.corrArray(rtPosList)
            timePoints = [min(rtPosList) + x for x in range(int(max(rtPosList)-min(rtPosList)))]
            corrValues  = dataFit.corrArray(timePoints)
            fig, ax = plt.subplots(3, 2, sharex=False, sharey=False, figsize=(20, 18))
            fig.suptitle(specfile)
            ax[0][0].hist(rtDevList, bins=100, color='grey', alpha=0.5, label='observed')
            ax[0][0].hist(corrDevArr, bins=100, color='red', alpha=0.5, label='corrected')
            ax[0][0].set_title('Retention time deviation')
            ax[0][0].legend()
            ax[0][0].set_xlim(allowedRtDev*-1, allowedRtDev)
            ax[0][1].hist(mzDevRelList, bins=100, color='grey')
            ax[0][1].set_title('Mz deviation [ppm]')
            ax[1][0].scatter(rtPosList, rtDevList, color='grey', alpha=0.1, label='observed')
            ax[1][0].plot(timePoints,corrValues, color='red', alpha=0.5, label='correction function')
            ax[1][0].set_title('Retention time deviation over time')
            ax[1][0].legend()
            ax[1][0].set_ylim(allowedRtDev*-1, allowedRtDev)
            ax[1][1].scatter(rtPosList, mzDevRelList, color='grey', alpha=0.1)
            ax[1][1].set_title('Mz deviation over time')
            ax[1][1].set_ylim(allowedMzDev*-1, allowedMzDev)
            ax[2][0].scatter(rtPosList, corrDevArr, color='grey', alpha=0.1)
            ax[2][0].set_title('Aligned retention time deviation over time')
            ax[2][0].set_ylim(allowedRtDev*-1, allowedRtDev)
            if plotDir is not None:
                plotloc = aux.joinpath(plotDir, specfile+'.rtAlign.png')
                fig.savefig(plotloc)
            else:
                fig.show()

        featureArrays = fiContainer.getArrays(['rt'], specfiles=specfile, sort='rt')
        featureArrays['corrRt'] = featureArrays['rt'] - dataFit.corrArray(featureArrays['rt'])
        for featureId, corrRt, rt in zip(featureArrays['id'], featureArrays['corrRt'], featureArrays['rt']):
            fiContainer.container[specfile][featureId].rt = corrRt


##TODO: Code is deprecated, new classes are currently located in maspy.featuregrouping
#class FeatureGroupItem(object):
#    """Representation of a group of :class:`FeatureItem`.
#
#    :ivar isMatched: False by default, True if any :class:`FeatureItem` in the group are matched.
#    :ivar isAnnotated: False by default, True if any :class:`FeatureItem` in the group are annotated.
#    :ivar siIds: containerId values of matched Si entries
#    :ivar siiIds: containerId values of matched Sii entries
#    :ivar featureIds: containerId values of :class:`FeatureItem` in the feature group
#    :ivar peptide: peptide sequence of best scoring Sii match
#    :ivar sequence: plain amino acid sequence of best scoring Sii match, used to retrieve protein information
#    :ivar score: score of best scoring Sii match
#    :ivar matchMatrix: structured representation of :attr:`FeatureItem.containerId` in the feature group.
#    :ivar intensityMatrix: similar to :attr:`matchMatrix` but contains :attr:`FeatureItem.intensity` values.
#    {chargeState: 2d numpy.array with specfiles as 1st dimension and labelState as 2nd dimension}
#    """
#    def __init__(self):
#        self.isMatched = None
#        self.isAnnotated = None
#        self.siIds = list()
#        self.siiIds = list()
#        self.featureIds = list()
#        self.peptide = None
#        self.sequence = None
#        self.score = None
#        self.matchMatrix = dict()
#        self.intensityMatrix = dict()
#
#
#class FeatureGroupContainer(object):
#    """ItemContainer for peptide feature groups :class`FeatureGroupItem`.
#
#    :ivar container: Storage list of :class:`FeatureGroupItem`
#    :ivar index: Use :attr:`FeatureItem.containerId` to which :class:`FeatureGroupItem` the feature was grouped
#    :ivar labelDescriptor: :class:`maspy.sil.LabelDescriptor` describes the label setup of an experiment
#    :ivar specfiles: List of keywords (filenames) representing files
#    :ivar specfilePositions: {specfile:arrayPosition, ...}
#    arrayPosition respresents the array position of a specfile in :attr:`FeatureGroupItem.matchMatrix`
#    """
#    def __init__(self, specfiles, labelDescriptor=None):
#        self.container = dict()
#        self.labelDescriptor = maspy.sil.LabelDescriptor() if labelDescriptor is None else labelDescriptor
#        self._index = 0
#
#        self.info = dict()
#        for position, specfile in enumerate(specfiles):
#            self.info[specfile] = {'matrixPosition': position}
#
#    def getItems(self, specfiles=None, sort=False, reverse=False, selector=lambda fgi: True):
#        """Generator that yields filtered and/or sorted :class:`Si` objects from :instance:`self.sic`
#
#        :param specfiles: filenames of msrun files - if specified return only items from those files
#        :type specfiles: str or [str, str, ...]
#        :param sort: if "sort" is specified the returned list of items is sorted according to the :class:`Si`
#        attribute specified by "sort", if the attribute is not present the item is skipped.
#        :param reverse: boolean to reverse sort order
#        :param selector: a function which is called with each :class:`Si` item and returns
#        True (include item) or False (discard item). If not specified all items are returned
#        """
#        specfiles = [_ for _ in viewkeys(self.info)] if specfiles is None else aux.toList(specfiles)
#        return _getItems(self.container, specfiles, sort, reverse, selector)
#
#    def getArrays(self, report='lfq', attr=None, specfiles=None, sort=False, reverse=False, selector=lambda si: True, defaultValue=None):
#        """Return a condensed array of data selected from :class:`Si` objects of :instance:`self.sic`
#        for fast and convenient data processing.
#
#        :param attr: list of :class:`Si` item attributes that should be added to the returned array.
#        If an attribute is not present the "defaultValue" is added instead. The attributes "id" and "specfile"
#        are always included, in combination they serve as a unique id.
#        :param specfiles: filenames of msrun files - if specified return only items from those files
#        :type specfiles: str or [str, str, ...]
#        :param sort: if "sort" is specified the returned list of items is sorted according to the :class:`Si`
#        attribute specified by "sort", if the attribute is not present the item is skipped.
#        :param reverse: boolean to reverse sort order
#        :param selector: a function which is called with each :class:`Si` item and returns
#        True (include item) or False (discard item). If not specified all items are returned
#
#        return {'attribute1': numpy.array(), 'attribute1': numpy.array(), ...}
#        """
#        attr = attr if attr is not None else []
#        attr = set(['id', 'specfile'] + aux.toList(attr))
#        specfiles = [_ for _ in viewkeys(self.info)] if specfiles is None else aux.toList(specfiles)
#
#        arrays = arrays = dict([(key, []) for key in attr])
#        reportAttributes = list()
#        if report == 'lfq':
#            arrays['charge'] = list()
#            arrays['labelState'] = list()
#            for specfile in self.specfiles:
#                arrays[specfile] = list()
#                reportAttributes.append(specfile)
#        elif report == 'sil':
#            arrays['charge'] = list()
#            arrays['specfile'] = list()
#            for labelState in list(viewkeys(self.labelDescriptor.labels)) + [-1]:
#                labelAttributeName = ' '.join(('label:', str(labelState)))
#                arrays[labelAttributeName] = list()
#                reportAttributes.append(labelAttributeName)
#
#        if report == 'sil':
#            for item in _getItems(self.container, specfiles, sort, reverse, selector):
#                for charge in viewkeys(item.intensityMatrix):
#                    for specfile in specfiles:
#                        specfilePosition = self.info[specfile]['matrixPosition']
#                        for key in attributes:
#                            arrays[key].append(getattr(item, key, None))
#                        arrays['charge'].append(charge)
#                        arrays['specfile'].append(specfile)
#                        for labelState in list(viewkeys(self.labelDescriptor.labels)) + [-1]:
#                            labelAttributeName = ' '.join(('label:', str(labelState)))
#                            arrays[labelAttributeName].append(item.intensityMatrix[charge][specfilePosition, labelState])
#        elif report == 'lfq':
#            for item in _getItems(self.container, specfiles, sort, reverse, selector):
#                for charge in viewkeys(item.intensityMatrix):
#                    for labelState in list(viewkeys(self.labelDescriptor.labels)) + [-1]:
#                        for key in attributes:
#                            arrays[key].append(getattr(item, key, None))
#                        arrays['charge'].append(charge)
#                        arrays['labelState'].append(labelState)
#                        for specfile in specfiles:
#                            specfilePosition = self.info[specfile]['matrixPosition']
#                            arrays[specfile].append(item.intensityMatrix[charge][specfilePosition, labelState])
#        else:
#            raise Exception('report must be either "lfq" or "sil", not '+report)##
#
#        for key in  [_ for _ in viewkeys(arrays)]:
#            if key in reportAttributes:
#                arrays[key] = numpy.array(arrays[key], dtype=numpy.float64)
#            else:
#                arrays[key] = numpy.array(arrays[key])
#        return arrays

Ejemplo n.º 20

0

Mostrar archivo

Archivo: test_auxiliary.py Proyecto: hollenstein/maspy

 def test_joinpath(self):
     self.assertEqual(MODULE.joinpath('C:/basedir', 'adir', 'afile.ext'),
                      'C:/basedir/adir/afile.ext')