def export(self, hcdonly=0): """ @brief creates an mgf file for the MS/MS spectra in the hdf5 file @param hcdonly <integer>: flag to switch output to only HCD spectra bypassing the normal export filters """ if hcdonly: remove = ['CID'] filters = ['none'] else: remove = self.remove filters = self.usefilts hdf = self.hdf5 mgfFile = hdf.filePath.parent.joinpath(hdf.filePath.stem + '.mgf') # extra = path(hdf.filepath.splitext()[0] + '.txt') # self.fextra = extra.open('w') # self.fextra.write('spec_id\tmz\tinten\trel_inten\tion\texp_mz\n') mgfOut = open(str(mgfFile), 'w') mgfOut.write('#Removed spectra = %s, filtering = %s\n' % (remove, filters)) spec = 0 # read parameters from hdf5 file try: hdf.appendOpen() headers = hdf.readTable('/rawdata/msmsheader') runTimeEntry = hdf.getDataEqual('/rawdata/parameters', 'parameter', 'MS Run Time (min)') if len(runTimeEntry) == 0: raise ExHa.MGFprocessingError( 'MGF Error: Could not find "MS Run Time (min)" parameter in HDF5 file.' ) runtime = runTimeEntry[0]['value'] units = self.readUnitsOK() # add new table for the deconvoluted spectrum data hdf.removeTable('/rawdata/deconvions') hdf.createTable('rawdata', 'deconvions', 'DeconvIons') ident = [] for frag in units[1]: # find all the frag methods to be used in identification if 'I' in frag['use']: ident.append(frag['order']) logger.log.info('Reading %d spectra from %s' % (len(headers), hdf.filePath.name)) if 'deconv' in filters: deconv = 1 else: deconv = 0 pBar = progBar.ProgressBar(widgets=progBar.name_widgets, maxval=len(headers), name='Create .mgf').start() for idx, h in enumerate(headers): if hcdonly: if h['fragmeth'] != 'HCD': continue elif not h['order'] in ident: continue pBar.update(idx) # get spectrum data spec = h['spec_id'] spectrum = hdf.getDataEqual('/rawdata/ions', 'spec_id', spec) if deconv: # need extra column for charge information spectrum = self.addChargeColumn(spectrum) data = hdf.getDataGeneral( '/rawdata/specparams', '(spec_id == %i) & (parameter == "%s")' % (spec, 'setmass1')) setmass = data[0]['value'] data = hdf.getDataGeneral( '/rawdata/specparams', '(spec_id == %i) & (parameter == "%s")' % (spec, 'frag1')) frag = data[0]['value'] try: self.maxint = max(spectrum['inten']) except: self.maxint = 0 # construct title values list rt = '%.3f' % h['rt'] use = units[1][h['order'] - 1]['use'] pretitle = '' if use == 'IQ': # spec is both ID and Quan so us normal msms ID titles = ['msmsid:F%06d' % h['spec_id']] elif use == 'I': if h['quan_spec'] == 0: # no quant data so use spec_id titles = ['msmsid:F%06d' % h['spec_id']] else: # spec is only for ident find the quan spec titles = ['msmsid:F%06d' % h['quan_spec']] pretitle = '#CID=F%06d\n' % h['id_spec'] elif use == 'Q': titles = ['msmsid:F%06d' % h['quan_spec']] pretitle = '#CID=F%06d\n' % h['id_spec'] titles.append('rt:' + rt) titles.append('survey:S%06d' % h['survey_spec']) titles.append('parent:' + setmass) titles.append('AnalTime:' + runtime) titles.append('Activation:' + frag.upper()) titleline = 'TITLE=%s\n' % ','.join(titles) if h['precmz'] > 0: pepmass = h['precmz'] elif h['precmz_surv'] > 0: pepmass = h['precmz_surv'] else: pepmass = h['monomz'] if pepmass == 0: continue for filt in filters: if len(spectrum) > 5 and self.filters[filt]: spectrum = self.filters[filt](h, spectrum) # filter for mascot interference ionList = [] if len(spectrum) > 2: mgfOut.write(pretitle) mgfOut.write('BEGIN IONS\n') mgfOut.write(titleline) mgfOut.write('PEPMASS=%f\n' % pepmass) mgfOut.write('CHARGE=%d+\n' % h['charge']) if deconv: for pt in spectrum: if pt['inten'] == 0: continue mgfOut.write('%f %f %s\n' % (pt['mz'], pt['inten'], pt['charge'])) ionList.append( dict(spec_id=pt['spec_id'], mz=pt['mz'], inten=pt['inten'], charge=pt['charge'])) else: for pt in spectrum: if pt['inten'] == 0: continue mgfOut.write('%f %f\n' % (pt['mz'], pt['inten'])) ionList.append( dict(spec_id=pt['spec_id'], mz=pt['mz'], inten=pt['inten'])) mgfOut.write('END IONS\n\n') if len(ionList) > 0: hdf.appendRows('/rawdata/deconvions', ionList) pBar.finish() except ExHa.MGFprocessingError, czEx: if spec: ExHa.addContext(czEx, 'Raised whist processing spectrum %i' % spec) raise
for f in dataDir.glob(fileFilter): if not f.is_file(): # skip any directories continue # if f.name[:4] in ['6528', '1814', '2032']: continue mgf = mgftools(f) logger.log.info('Filename: %s' % f.name) if hcdOnly: logger.log.info('Export HCD data only') else: logger.log.info('Using filters: %s' % str(mgf.usefilts)) rtn = mgf.export(hcdOnly) mgf.close() if f == 0: raise ExHa.MGFprocessingError('no files found for: %s' % str(dataDir / fileFilter)) except ExHa.UsageError as useEx: ExHa.reformatException(useEx) logger.log.info(useEx.context) except Exception, genEx: ExHa.reformatException(genEx) if f: ExHa.exportError2File(genEx, f.parent.joinpath(f.stem + '.error')) else: ExHa.exportError2File(genEx, dataDir.joinpath('errors.error')) logger.log.info(ExHa.multiLineRepr(genEx)) logger.log.info('finished')