Ejemplo n.º 1
0
        for isotopelabel_id, data in allsumionratiodata.iteritems():
            allfractionbgratios[isotopelabel_id] = np.median(data)
        # second normalization so that the bg-ratios all add to 1
        for isotopelabel_id, data in allfractionbgratios.iteritems():
            allfractionbgratios[isotopelabel_id] = data / sum(allfractionbgratios.values())
        logger.log.debug(('allfractionbgratios are %s' % str(allfractionbgratios)))
        for corrects2iquantob in corrects2iquantoblist:
            # perform correction for each of the analyzed .hdf5 files.
            s2icorrecteddata = corrects2iquantob.performS2Icorrection(allfractionbgratios)
            corrects2iquantob.hdf5corrects2iquant.updates2ivalues(s2icorrecteddata)
            hdf5corrects2iquant.close()

    except ExHa.czException as czEx:
        ExHa.reformatException(czEx)
        ExHa.addContext(czEx, 'Error during corrects2iquant run')
        ExHa.exportError2File(czEx, cfg.parameters['runtime']['datadir'] / Path('errors.error'))
        if logger:
            logger.log.warning(ExHa.oneLineRepr(czEx))
        else:
            print ExHa.multiLineRepr(czEx)

    except Exception as genEx:

        ExHa.reformatException(genEx)
        ExHa.addContext(genEx, 'Error during corrects2iquant run')
        ExHa.exportError2File(genEx, cfg.parameters['runtime']['datadir'] / 'errors.error')
        if logger:
            logger.log.warning(ExHa.oneLineRepr(genEx))
        else:
            print ExHa.multiLineRepr(genEx)
Ejemplo n.º 2
0
    def updateHDF5(self):
        """
        @brief controls the updating of the data to the hdf5 results file

        @return finalMessage <string>: constructed from the protein data this is the RESULT stored in the DB
        """
        pep2unique = self.pep2unique
        baseContext = 'updateHDF5: '
        context = 'updateHDF5'
        try:
            # find the peptide sequences that are being imported
            usedPeps = self.setsManager.findUsedPeptides()
            logger.log.info('there are %s usedPeps' % len(usedPeps))

            context = baseContext + 'Retrieving sample IDs'

            sample_ids = range(1, len(self.hdfFiles) + 1)
            # create proteinset and proteinhit data
            starting_protein_group_no = 1
            self.setsManager.setProteinGroupNo(starting_protein_group_no)

            logger.log.info('adding protein group data to HDF5')

            logger.log.debug(str(self.hdfFiles.keys()))
            spectrum_id = 0
            peptide_id = 0
            hdfFileList = self.hdfFiles.keys()
            hdfFileList.sort()

            for key in hdfFileList:
                baseContext += '%s: ' % key
                logger.log.log(
                    logger.PROCESS,
                    'Integrating Spectrum, Peptide & Quantification data from %s'
                    % key)
                # collect fileData
                hdf = self.hdfFiles[key]
                hdfObj = hdf.hdfObject

                # set the current sample_id from the list of IDs extracted from the DB
                current_sample_id = sample_ids.pop()

                hdf.acquired_spectra, hdf.mascot_matched_spectra, numIsotopes, runTime = hdfObj.getNumbers(
                )

                # read the Mascot data
                context = baseContext + 'Reading Mascot data'
                tmp = hdfObj.readImporterData(usedPeps, hdf)
                peptides = tmp[0]
                queryDict = tmp[1]
                headerArray = tmp[2]
                quanArray = tmp[3]

                hdf.spectra_in_qc_proteins = len(peptides)

                logger.log.debug('getting spectrum_ids')
                context = baseContext + 'Retrieving spectrum IDs'

                acqTime, hdf.idAct, hdf.quanAct = hdfObj.getTimeAndActivation()
                # create blank lists to hold data for writing to hdf5 file
                spectrum_list = []
                peptide_list = []
                quant_list = []
                logger.log.info('collating spectrum, peptide & quant data')
                pBar = progBar.ProgressBar(widgets=progBar.name_widgets,
                                           maxval=len(queryDict),
                                           name='collate data').start()
                for idx, q in enumerate(queryDict):
                    # loop round all the required spectra
                    pBar.nextPrimary()
                    context = baseContext + 'query %i: Setting spectrum data' % q
                    # extract a spectrum_id from the list
                    spectrum_id += 1
                    query = queryDict[q]
                    spec = int(query['spec_id'])
                    context = baseContext + 'spectrum %i: Updating DB with spectrum data' % spec
                    # add spectrum data to spectrum_list
                    header = self.filterArrayEqual(headerArray, 'spec_id',
                                                   spec)
                    spectrum_list.append(
                        self.makeSpectrumDict(spectrum_id, current_sample_id,
                                              query, acqTime, header))

                    # find the appropriate peptides
                    pepList = peptides[q]
                    logger.log.debug('there are %s in peplist %s' %
                                     (len(pepList), str(pepList)))
                    quantFound = 0

                    # this list will hold all peptides returned from makePeptideDictList and then filter
                    # those non-rank1 equivalents based on the score of the rank 1 peptide
                    tmplist = []
                    for pep in pepList:
                        # find the sets that the peptide belongs to and add to the peptide_list
                        sets = self.setsManager.peptide2set[pep['peptide']]
                        context = baseContext + 'spectrum %i: Creating peptide data entries for hdf5' % spec
                        tmp, qf = self.makePeptideDictList(
                            spectrum_id, pep, query, sets, hdf, pep2unique)
                        tmplist.extend(tmp)
                        peptide_list += tmp
                        quantFound += qf

                    # only keep rank1 equivalent peptides (based on score)
                    tmplist.sort(key=lambda x: x['rank'])
                    toprankscore = tmplist[0]['score']
                    tmplist = [
                        x for x in tmplist if x['score'] == toprankscore
                    ]

                    if quantMethID and quantFound:
                        # extract quantification data for the spectrum
                        context = baseContext + 'spectrum %i: Creating quantitation data entries for DB' % spec
                        newquant, deltas = self.makeQuantDictLists(
                            spectrum_id, spec, tmplist, header, quanArray, hdf)

                        quant_list += newquant

                        if quantSource == 'ms2':
                            context = baseContext + 'spectrum %i: Adding reporter ion delta data' % spec
                            hdf.addReporterDeltas(deltas)
                pBar.finish()

                # calculate statistics
                context = baseContext + 'Calculating statistics'
                hdf.calcReporterStats()
                context = baseContext + 'Calculating delta m/z for fragment ions'

                context = baseContext + 'Updating sample table (%i)' % current_sample_id
                sample_data = hdf.getSampleDataDict(current_sample_id, key,
                                                    runTime)

                hdf5results.writeSample(sample_data)

                self.importData.combineStatistics(hdf)

                # write data to HDF5
                context = baseContext + 'Updating spectrum table'
                logger.log.info('updating HDF5 with spectrum data')
                hdf5results.writeSpectrum(spectrum_list)

                if quantMethID:
                    context = baseContext + 'Updating specquant table'
                    logger.log.info('updating HDF5 with quant data')
                    hdf5results.writeSpecQuant(quant_list)

                context = baseContext + 'Retrieving peptide IDs'
                logger.log.info('updating HDF5 with peptide data')
                for pepdata in peptide_list:
                    pepdata['peptide_id'] = peptide_id
                    peptide_id += 1

                context = baseContext + 'Updating peptide table'
                hdf5results.writePeptide(peptide_list)
            hdf5results.createIndexes()

            logger.log.info('finalising HDF5 entries')
            hdf5results.writeFDRdata(self.importData.score2fdr, 'peptide')
            hdf5results.writeFDRdata(self.importData.proteinscore2fdr,
                                     'protein')

            topScoringProteinInfo = self.setsManager.addPeptideSetDBdata(
                hdf5results, self.importData.proteinscore2fdr)
            runtimedata = self.importData.getSummaryStatisticsDict()

            hdf5results.writeStatistics(runtimedata)

            finalMessage = 'queries matched: %i / %s (%.1f%%) ' % (
                runtimedata['spectra_in_qc_proteins'],
                runtimedata['mascot_matched_spectra'],
                (runtimedata['spectra_in_qc_proteins'] /
                 float(runtimedata['mascot_matched_spectra'])) * 100)
            finalMessage += 'spectra quantified: %i top hit %s (%s) ' % (
                runtimedata['quantified_spectra'], '', '')
            finalMessage += 'with total score %f and %i matched peptides (hook AND non hook)' % \
                            (topScoringProteinInfo[0], topScoringProteinInfo[2])

            baseContext = 'updateHDF5: '
            context = baseContext + 'Finalising HDF5 entries'
        except Exception, genEx:
            # make sure that there aren't any permanent changes
            ExHa.addContext(genEx, context)
            finalMessage = 'Error: %s' % ExHa.oneLineRepr(genEx)
            raise
Ejemplo n.º 3
0
    ret = cfg.evaluateCommandLineArgs(sys.argv)

    try:
        cfg.scalePpmMda()
        dataDir = cfg.parameters['runtime']['datadir']

        logParam = cfg.parameters['logging']
        logPath = Path(dataDir.joinpath(logParam['logdir']))
        if not logPath.exists():
            logPath.mkdir(parents=True)
        logFile = logPath.joinpath(logParam['logfile'])

        logger = Logger(logFile, logParam['loglevel'], logParam['screenlevel'],
                        False)
        logger.setMascotParserLogs()

        jobcontrol(cfg, logger)

    except ExHa.UsageError as useEx:
        ExHa.reformatException(useEx)
        print useEx.context
    except Exception as genEx:
        ExHa.reformatException(genEx)
        errorFile = Path(cfg.parameters['runtime']['hdf5file']).stem + '.error'
        ExHa.exportError2File(
            genEx, cfg.parameters['runtime']['datadir'].joinpath(errorFile))
        if logs:
            logs.datlog.warning(ExHa.oneLineRepr(genEx))
        else:
            print ExHa.multiLineRepr(genEx)
Ejemplo n.º 4
0
                        importer.importData.mascot_matched_spectra)
        logger.log.info('Total Spectra Matched = %6i' %
                        importer.importData.spectra_in_qc_proteins)
        logger.log.info('Total Spectra w Quant = %6i' %
                        importer.importData.quantified_spectra)
        logger.log.info('Total Spectra All Rep = %6i' %
                        importer.importData.numSpectraAllReporters)

        times = sw.stop()
        logger.log.info(sw.format())

        hdf5results.close()

    except ExHa.UsageError as useEx:
        ExHa.reformatException(useEx)
        logger.log.warning(ExHa.oneLineRepr(useEx))
    except Exception as genEx:
        # error
        if logger:
            logger.log.warning(ExHa.oneLineRepr(genEx))
            print ExHa.multiLineRepr(genEx)
        else:
            print ExHa.multiLineRepr(genEx)
        if cfg:
            ExHa.exportError2File(genEx,
                                  dataDir.joinpath(resultfile.stem + '.error'))
        else:
            ExHa.exportError2File(genEx, dataDir.joinpath('errors.error'))

        sys.exit(ExHa.oneLineRepr(genEx))