Example #1
0
File: mgf.py Project: hdinkel/isob
    def export(self, hcdonly=0):
        """
        @brief creates an mgf file for the MS/MS spectra in the hdf5 file
        @param hcdonly <integer>: flag to switch output to only HCD spectra bypassing the normal export filters
        """
        if hcdonly:
            remove = ['CID']
            filters = ['none']
        else:
            remove = self.remove
            filters = self.usefilts

        hdf = self.hdf5
        mgfFile = hdf.filePath.parent.joinpath(hdf.filePath.stem + '.mgf')
        # extra = path(hdf.filepath.splitext()[0] + '.txt')
        # self.fextra = extra.open('w')
        # self.fextra.write('spec_id\tmz\tinten\trel_inten\tion\texp_mz\n')

        mgfOut = open(str(mgfFile), 'w')
        mgfOut.write('#Removed spectra = %s, filtering = %s\n' %
                     (remove, filters))
        spec = 0

        # read parameters from hdf5 file
        try:
            hdf.appendOpen()
            headers = hdf.readTable('/rawdata/msmsheader')
            runTimeEntry = hdf.getDataEqual('/rawdata/parameters', 'parameter',
                                            'MS Run Time (min)')
            if len(runTimeEntry) == 0:
                raise ExHa.MGFprocessingError(
                    'MGF Error: Could not find "MS Run Time (min)" parameter in HDF5 file.'
                )
            runtime = runTimeEntry[0]['value']
            units = self.readUnitsOK()

            # add new table for the deconvoluted spectrum data
            hdf.removeTable('/rawdata/deconvions')
            hdf.createTable('rawdata', 'deconvions', 'DeconvIons')
            ident = []
            for frag in units[1]:
                # find all the frag methods to be used in identification
                if 'I' in frag['use']:
                    ident.append(frag['order'])

            logger.log.info('Reading %d spectra from %s' %
                            (len(headers), hdf.filePath.name))
            if 'deconv' in filters:
                deconv = 1
            else:
                deconv = 0

            pBar = progBar.ProgressBar(widgets=progBar.name_widgets,
                                       maxval=len(headers),
                                       name='Create .mgf').start()
            for idx, h in enumerate(headers):
                if hcdonly:
                    if h['fragmeth'] != 'HCD':
                        continue
                elif not h['order'] in ident:
                    continue
                pBar.update(idx)

                # get spectrum data
                spec = h['spec_id']
                spectrum = hdf.getDataEqual('/rawdata/ions', 'spec_id', spec)
                if deconv:
                    # need extra column for charge information
                    spectrum = self.addChargeColumn(spectrum)

                data = hdf.getDataGeneral(
                    '/rawdata/specparams',
                    '(spec_id == %i) & (parameter == "%s")' %
                    (spec, 'setmass1'))
                setmass = data[0]['value']
                data = hdf.getDataGeneral(
                    '/rawdata/specparams',
                    '(spec_id == %i) & (parameter == "%s")' % (spec, 'frag1'))
                frag = data[0]['value']
                try:
                    self.maxint = max(spectrum['inten'])
                except:
                    self.maxint = 0

                # construct title values list
                rt = '%.3f' % h['rt']
                use = units[1][h['order'] - 1]['use']
                pretitle = ''
                if use == 'IQ':
                    # spec is both ID and Quan so us normal msms ID
                    titles = ['msmsid:F%06d' % h['spec_id']]
                elif use == 'I':
                    if h['quan_spec'] == 0:
                        # no quant data so use spec_id
                        titles = ['msmsid:F%06d' % h['spec_id']]
                    else:
                        # spec is only for ident find the quan spec
                        titles = ['msmsid:F%06d' % h['quan_spec']]
                        pretitle = '#CID=F%06d\n' % h['id_spec']
                elif use == 'Q':
                    titles = ['msmsid:F%06d' % h['quan_spec']]
                    pretitle = '#CID=F%06d\n' % h['id_spec']

                titles.append('rt:' + rt)
                titles.append('survey:S%06d' % h['survey_spec'])
                titles.append('parent:' + setmass)
                titles.append('AnalTime:' + runtime)
                titles.append('Activation:' + frag.upper())

                titleline = 'TITLE=%s\n' % ','.join(titles)

                if h['precmz'] > 0:
                    pepmass = h['precmz']
                elif h['precmz_surv'] > 0:
                    pepmass = h['precmz_surv']
                else:
                    pepmass = h['monomz']

                if pepmass == 0:
                    continue

                for filt in filters:
                    if len(spectrum) > 5 and self.filters[filt]:
                        spectrum = self.filters[filt](h, spectrum)

                # filter for mascot interference
                ionList = []
                if len(spectrum) > 2:
                    mgfOut.write(pretitle)
                    mgfOut.write('BEGIN IONS\n')
                    mgfOut.write(titleline)
                    mgfOut.write('PEPMASS=%f\n' % pepmass)
                    mgfOut.write('CHARGE=%d+\n' % h['charge'])
                    if deconv:
                        for pt in spectrum:
                            if pt['inten'] == 0:
                                continue
                            mgfOut.write('%f  %f  %s\n' %
                                         (pt['mz'], pt['inten'], pt['charge']))
                            ionList.append(
                                dict(spec_id=pt['spec_id'],
                                     mz=pt['mz'],
                                     inten=pt['inten'],
                                     charge=pt['charge']))
                    else:
                        for pt in spectrum:
                            if pt['inten'] == 0:
                                continue
                            mgfOut.write('%f  %f\n' % (pt['mz'], pt['inten']))
                            ionList.append(
                                dict(spec_id=pt['spec_id'],
                                     mz=pt['mz'],
                                     inten=pt['inten']))
                    mgfOut.write('END IONS\n\n')
                if len(ionList) > 0:
                    hdf.appendRows('/rawdata/deconvions', ionList)

            pBar.finish()

        except ExHa.MGFprocessingError, czEx:
            if spec:
                ExHa.addContext(czEx,
                                'Raised whist processing spectrum %i' % spec)
            raise
Example #2
0
File: mgf.py Project: hdinkel/isob
        for f in dataDir.glob(fileFilter):
            if not f.is_file():
                # skip any directories
                continue

            # if f.name[:4] in ['6528', '1814', '2032']: continue
            mgf = mgftools(f)
            logger.log.info('Filename:     %s' % f.name)
            if hcdOnly:
                logger.log.info('Export HCD data only')
            else:
                logger.log.info('Using filters: %s' % str(mgf.usefilts))
            rtn = mgf.export(hcdOnly)
            mgf.close()
        if f == 0:
            raise ExHa.MGFprocessingError('no files found for: %s' %
                                          str(dataDir / fileFilter))

    except ExHa.UsageError as useEx:
        ExHa.reformatException(useEx)
        logger.log.info(useEx.context)
    except Exception, genEx:
        ExHa.reformatException(genEx)
        if f:
            ExHa.exportError2File(genEx, f.parent.joinpath(f.stem + '.error'))
        else:
            ExHa.exportError2File(genEx, dataDir.joinpath('errors.error'))
        logger.log.info(ExHa.multiLineRepr(genEx))

    logger.log.info('finished')