예제 #1
0
    def performS2Icorrection(self, correctionfactors):
        """
        @brief actully performs the s2i correction. The corrected data are written to the .hdf5 file in a separate
        column
        @correctionfactors dict normalized calculated factors giving amount of interfering signal from other reporter
        signals to be removed depending on s2i of ms1
        """
        spectrum2s2i = self.spectrumid2s2i
        self.cfg.log.info('we have %s s2i records' % len(spectrum2s2i))
        mys2icorrecteddata = {}

        pBar = progBar.ProgressBar(widgets=progBar.name_widgets, maxval=len(self.isotopecorrecteddata),
                                   name='S2I correcting').start()

        for spectrum_id, data in self.isotopecorrecteddata.iteritems():
            self.cfg.log.debug('spectrum id is %s' % spectrum_id)
            # perform correction only if there is actually an S2I value for that spectrum.
            if spectrum_id in spectrum2s2i:
                pBar.nextPrimary()
                s2ivalue = round(spectrum2s2i[spectrum_id], 3)
                self.cfg.log.debug('spectrum_id for s2i correction %s, s2i value %s ' % (spectrum_id, s2ivalue))

                for isotopelabel_id, val in data.iteritems():

                    s2icorr = val - ((1 - s2ivalue) * correctionfactors[isotopelabel_id] * sum(data.values()))
                    self.cfg.log.debug('calculated s2i corrected value %f from original %f for %s ' %
                                       (s2icorr, val, isotopelabel_id))
                    if s2icorr < 0:  # no signal can be less than zero
                        s2icorr = 0
                    if spectrum_id not in mys2icorrecteddata:
                        mys2icorrecteddata[spectrum_id] = {}
                    mys2icorrecteddata[spectrum_id][isotopelabel_id] = s2icorr
            else:
                self.cfg.log.debug('no spectrum_id (%s) for s2i correction ' % spectrum_id)
                for isotopelabel_id, val in data.iteritems():
                    if spectrum_id not in mys2icorrecteddata:
                        mys2icorrecteddata[spectrum_id] = {}
                    mys2icorrecteddata[spectrum_id][isotopelabel_id] = val
        pBar.finish()
        self.s2icorrecteddata = mys2icorrecteddata
        self.cfg.log.info('done performS2Icorrection')
        return mys2icorrecteddata
예제 #2
0
파일: mgf.py 프로젝트: hdinkel/isob
    def export(self, hcdonly=0):
        """
        @brief creates an mgf file for the MS/MS spectra in the hdf5 file
        @param hcdonly <integer>: flag to switch output to only HCD spectra bypassing the normal export filters
        """
        if hcdonly:
            remove = ['CID']
            filters = ['none']
        else:
            remove = self.remove
            filters = self.usefilts

        hdf = self.hdf5
        mgfFile = hdf.filePath.parent.joinpath(hdf.filePath.stem + '.mgf')
        # extra = path(hdf.filepath.splitext()[0] + '.txt')
        # self.fextra = extra.open('w')
        # self.fextra.write('spec_id\tmz\tinten\trel_inten\tion\texp_mz\n')

        mgfOut = open(str(mgfFile), 'w')
        mgfOut.write('#Removed spectra = %s, filtering = %s\n' %
                     (remove, filters))
        spec = 0

        # read parameters from hdf5 file
        try:
            hdf.appendOpen()
            headers = hdf.readTable('/rawdata/msmsheader')
            runTimeEntry = hdf.getDataEqual('/rawdata/parameters', 'parameter',
                                            'MS Run Time (min)')
            if len(runTimeEntry) == 0:
                raise ExHa.MGFprocessingError(
                    'MGF Error: Could not find "MS Run Time (min)" parameter in HDF5 file.'
                )
            runtime = runTimeEntry[0]['value']
            units = self.readUnitsOK()

            # add new table for the deconvoluted spectrum data
            hdf.removeTable('/rawdata/deconvions')
            hdf.createTable('rawdata', 'deconvions', 'DeconvIons')
            ident = []
            for frag in units[1]:
                # find all the frag methods to be used in identification
                if 'I' in frag['use']:
                    ident.append(frag['order'])

            logger.log.info('Reading %d spectra from %s' %
                            (len(headers), hdf.filePath.name))
            if 'deconv' in filters:
                deconv = 1
            else:
                deconv = 0

            pBar = progBar.ProgressBar(widgets=progBar.name_widgets,
                                       maxval=len(headers),
                                       name='Create .mgf').start()
            for idx, h in enumerate(headers):
                if hcdonly:
                    if h['fragmeth'] != 'HCD':
                        continue
                elif not h['order'] in ident:
                    continue
                pBar.update(idx)

                # get spectrum data
                spec = h['spec_id']
                spectrum = hdf.getDataEqual('/rawdata/ions', 'spec_id', spec)
                if deconv:
                    # need extra column for charge information
                    spectrum = self.addChargeColumn(spectrum)

                data = hdf.getDataGeneral(
                    '/rawdata/specparams',
                    '(spec_id == %i) & (parameter == "%s")' %
                    (spec, 'setmass1'))
                setmass = data[0]['value']
                data = hdf.getDataGeneral(
                    '/rawdata/specparams',
                    '(spec_id == %i) & (parameter == "%s")' % (spec, 'frag1'))
                frag = data[0]['value']
                try:
                    self.maxint = max(spectrum['inten'])
                except:
                    self.maxint = 0

                # construct title values list
                rt = '%.3f' % h['rt']
                use = units[1][h['order'] - 1]['use']
                pretitle = ''
                if use == 'IQ':
                    # spec is both ID and Quan so us normal msms ID
                    titles = ['msmsid:F%06d' % h['spec_id']]
                elif use == 'I':
                    if h['quan_spec'] == 0:
                        # no quant data so use spec_id
                        titles = ['msmsid:F%06d' % h['spec_id']]
                    else:
                        # spec is only for ident find the quan spec
                        titles = ['msmsid:F%06d' % h['quan_spec']]
                        pretitle = '#CID=F%06d\n' % h['id_spec']
                elif use == 'Q':
                    titles = ['msmsid:F%06d' % h['quan_spec']]
                    pretitle = '#CID=F%06d\n' % h['id_spec']

                titles.append('rt:' + rt)
                titles.append('survey:S%06d' % h['survey_spec'])
                titles.append('parent:' + setmass)
                titles.append('AnalTime:' + runtime)
                titles.append('Activation:' + frag.upper())

                titleline = 'TITLE=%s\n' % ','.join(titles)

                if h['precmz'] > 0:
                    pepmass = h['precmz']
                elif h['precmz_surv'] > 0:
                    pepmass = h['precmz_surv']
                else:
                    pepmass = h['monomz']

                if pepmass == 0:
                    continue

                for filt in filters:
                    if len(spectrum) > 5 and self.filters[filt]:
                        spectrum = self.filters[filt](h, spectrum)

                # filter for mascot interference
                ionList = []
                if len(spectrum) > 2:
                    mgfOut.write(pretitle)
                    mgfOut.write('BEGIN IONS\n')
                    mgfOut.write(titleline)
                    mgfOut.write('PEPMASS=%f\n' % pepmass)
                    mgfOut.write('CHARGE=%d+\n' % h['charge'])
                    if deconv:
                        for pt in spectrum:
                            if pt['inten'] == 0:
                                continue
                            mgfOut.write('%f  %f  %s\n' %
                                         (pt['mz'], pt['inten'], pt['charge']))
                            ionList.append(
                                dict(spec_id=pt['spec_id'],
                                     mz=pt['mz'],
                                     inten=pt['inten'],
                                     charge=pt['charge']))
                    else:
                        for pt in spectrum:
                            if pt['inten'] == 0:
                                continue
                            mgfOut.write('%f  %f\n' % (pt['mz'], pt['inten']))
                            ionList.append(
                                dict(spec_id=pt['spec_id'],
                                     mz=pt['mz'],
                                     inten=pt['inten']))
                    mgfOut.write('END IONS\n\n')
                if len(ionList) > 0:
                    hdf.appendRows('/rawdata/deconvions', ionList)

            pBar.finish()

        except ExHa.MGFprocessingError, czEx:
            if spec:
                ExHa.addContext(czEx,
                                'Raised whist processing spectrum %i' % spec)
            raise
예제 #3
0
    def updateHDF5(self):
        """
        @brief controls the updating of the data to the hdf5 results file

        @return finalMessage <string>: constructed from the protein data this is the RESULT stored in the DB
        """
        pep2unique = self.pep2unique
        baseContext = 'updateHDF5: '
        context = 'updateHDF5'
        try:
            # find the peptide sequences that are being imported
            usedPeps = self.setsManager.findUsedPeptides()
            logger.log.info('there are %s usedPeps' % len(usedPeps))

            context = baseContext + 'Retrieving sample IDs'

            sample_ids = range(1, len(self.hdfFiles) + 1)
            # create proteinset and proteinhit data
            starting_protein_group_no = 1
            self.setsManager.setProteinGroupNo(starting_protein_group_no)

            logger.log.info('adding protein group data to HDF5')

            logger.log.debug(str(self.hdfFiles.keys()))
            spectrum_id = 0
            peptide_id = 0
            hdfFileList = self.hdfFiles.keys()
            hdfFileList.sort()

            for key in hdfFileList:
                baseContext += '%s: ' % key
                logger.log.log(
                    logger.PROCESS,
                    'Integrating Spectrum, Peptide & Quantification data from %s'
                    % key)
                # collect fileData
                hdf = self.hdfFiles[key]
                hdfObj = hdf.hdfObject

                # set the current sample_id from the list of IDs extracted from the DB
                current_sample_id = sample_ids.pop()

                hdf.acquired_spectra, hdf.mascot_matched_spectra, numIsotopes, runTime = hdfObj.getNumbers(
                )

                # read the Mascot data
                context = baseContext + 'Reading Mascot data'
                tmp = hdfObj.readImporterData(usedPeps, hdf)
                peptides = tmp[0]
                queryDict = tmp[1]
                headerArray = tmp[2]
                quanArray = tmp[3]

                hdf.spectra_in_qc_proteins = len(peptides)

                logger.log.debug('getting spectrum_ids')
                context = baseContext + 'Retrieving spectrum IDs'

                acqTime, hdf.idAct, hdf.quanAct = hdfObj.getTimeAndActivation()
                # create blank lists to hold data for writing to hdf5 file
                spectrum_list = []
                peptide_list = []
                quant_list = []
                logger.log.info('collating spectrum, peptide & quant data')
                pBar = progBar.ProgressBar(widgets=progBar.name_widgets,
                                           maxval=len(queryDict),
                                           name='collate data').start()
                for idx, q in enumerate(queryDict):
                    # loop round all the required spectra
                    pBar.nextPrimary()
                    context = baseContext + 'query %i: Setting spectrum data' % q
                    # extract a spectrum_id from the list
                    spectrum_id += 1
                    query = queryDict[q]
                    spec = int(query['spec_id'])
                    context = baseContext + 'spectrum %i: Updating DB with spectrum data' % spec
                    # add spectrum data to spectrum_list
                    header = self.filterArrayEqual(headerArray, 'spec_id',
                                                   spec)
                    spectrum_list.append(
                        self.makeSpectrumDict(spectrum_id, current_sample_id,
                                              query, acqTime, header))

                    # find the appropriate peptides
                    pepList = peptides[q]
                    logger.log.debug('there are %s in peplist %s' %
                                     (len(pepList), str(pepList)))
                    quantFound = 0

                    # this list will hold all peptides returned from makePeptideDictList and then filter
                    # those non-rank1 equivalents based on the score of the rank 1 peptide
                    tmplist = []
                    for pep in pepList:
                        # find the sets that the peptide belongs to and add to the peptide_list
                        sets = self.setsManager.peptide2set[pep['peptide']]
                        context = baseContext + 'spectrum %i: Creating peptide data entries for hdf5' % spec
                        tmp, qf = self.makePeptideDictList(
                            spectrum_id, pep, query, sets, hdf, pep2unique)
                        tmplist.extend(tmp)
                        peptide_list += tmp
                        quantFound += qf

                    # only keep rank1 equivalent peptides (based on score)
                    tmplist.sort(key=lambda x: x['rank'])
                    toprankscore = tmplist[0]['score']
                    tmplist = [
                        x for x in tmplist if x['score'] == toprankscore
                    ]

                    if quantMethID and quantFound:
                        # extract quantification data for the spectrum
                        context = baseContext + 'spectrum %i: Creating quantitation data entries for DB' % spec
                        newquant, deltas = self.makeQuantDictLists(
                            spectrum_id, spec, tmplist, header, quanArray, hdf)

                        quant_list += newquant

                        if quantSource == 'ms2':
                            context = baseContext + 'spectrum %i: Adding reporter ion delta data' % spec
                            hdf.addReporterDeltas(deltas)
                pBar.finish()

                # calculate statistics
                context = baseContext + 'Calculating statistics'
                hdf.calcReporterStats()
                context = baseContext + 'Calculating delta m/z for fragment ions'

                context = baseContext + 'Updating sample table (%i)' % current_sample_id
                sample_data = hdf.getSampleDataDict(current_sample_id, key,
                                                    runTime)

                hdf5results.writeSample(sample_data)

                self.importData.combineStatistics(hdf)

                # write data to HDF5
                context = baseContext + 'Updating spectrum table'
                logger.log.info('updating HDF5 with spectrum data')
                hdf5results.writeSpectrum(spectrum_list)

                if quantMethID:
                    context = baseContext + 'Updating specquant table'
                    logger.log.info('updating HDF5 with quant data')
                    hdf5results.writeSpecQuant(quant_list)

                context = baseContext + 'Retrieving peptide IDs'
                logger.log.info('updating HDF5 with peptide data')
                for pepdata in peptide_list:
                    pepdata['peptide_id'] = peptide_id
                    peptide_id += 1

                context = baseContext + 'Updating peptide table'
                hdf5results.writePeptide(peptide_list)
            hdf5results.createIndexes()

            logger.log.info('finalising HDF5 entries')
            hdf5results.writeFDRdata(self.importData.score2fdr, 'peptide')
            hdf5results.writeFDRdata(self.importData.proteinscore2fdr,
                                     'protein')

            topScoringProteinInfo = self.setsManager.addPeptideSetDBdata(
                hdf5results, self.importData.proteinscore2fdr)
            runtimedata = self.importData.getSummaryStatisticsDict()

            hdf5results.writeStatistics(runtimedata)

            finalMessage = 'queries matched: %i / %s (%.1f%%) ' % (
                runtimedata['spectra_in_qc_proteins'],
                runtimedata['mascot_matched_spectra'],
                (runtimedata['spectra_in_qc_proteins'] /
                 float(runtimedata['mascot_matched_spectra'])) * 100)
            finalMessage += 'spectra quantified: %i top hit %s (%s) ' % (
                runtimedata['quantified_spectra'], '', '')
            finalMessage += 'with total score %f and %i matched peptides (hook AND non hook)' % \
                            (topScoringProteinInfo[0], topScoringProteinInfo[2])

            baseContext = 'updateHDF5: '
            context = baseContext + 'Finalising HDF5 entries'
        except Exception, genEx:
            # make sure that there aren't any permanent changes
            ExHa.addContext(genEx, context)
            finalMessage = 'Error: %s' % ExHa.oneLineRepr(genEx)
            raise
예제 #4
0
def collectPeptideData(hdfObject, sample2source):
    logger.log.info('generating Peptide based output')
    hdf = hdfObject.hdf
    outputFile = renameFile(hdf.filePath, '_peptides')

    # extract required protein data
    logger.log.info('loading protein data')
    proteinhit = hdf.readTable('/proteinhit')
    proteins = {}
    for prot in proteinhit:
        try:
            proteins[prot['protein_group_no']].append(prot['protein_id'])
            # it's ok to sort here as we don't expect too many protein ids for the protein group
            proteins[prot['protein_group_no']].sort()
        except KeyError:
            proteins[prot['protein_group_no']] = [prot['protein_id']]

    proteinhit = None

    # extract required spectrum data
    logger.log.info('loading spectrum data')
    spectra = hdf.readTable('/spectrum')
    specs = {}
    for sp in spectra:
        source_file = sample2source[sp['sample_id']]
        specs[sp['spectrum_id']] = dict(source_file=source_file,
                                        msms_id=sp['msms_id'],
                                        charge_state=sp['charge_state'],
                                        precursor_mz=sp['precursor_mz'],
                                        peak_intensity=sp['peak_intensity'],
                                        s2i=sp['s2i'],
                                        p2t=sp['p2t'])

    # extract quantification data
    logger.log.info('loading quantification data')
    specquant = hdf.readTable('/specquant')
    quant = {}
    usedIsotopes = set()
    for sq in specquant:
        id = sq['spectrum_id']
        if id not in quant:
            quant[id] = dict(
                in_quantification_of_protein=sq['in_quantification_of_protein']
            )
        quant[id][sq['isotopelabel_id']] = sq['quant_allcorrected']
        usedIsotopes.add(sq['isotopelabel_id'])
    usedIsotopes = sorted(usedIsotopes)
    outString = 'protein_group_no\tprotein_id\tsequence\tmodifications\tmw'
    outString += '\tprecursor_mz\tcharge_state\tppm_error\tscore\tfdr_at_score\trank\tmsms_id\tsource_file'
    outString += '\tpeak_intensity\ts2i\tp2t\tis_unique\tin_quantification_of_protein\tin_protein_inference'
    outString += '\tseq_start\tseq_end'
    if usedIsotopes:
        isotope_data = dict([(str(i), i) for i in usedIsotopes])
        try:
            y = quantHandler.QuantMethods()
            g = y.getMethodByIsotope(usedIsotopes[0])
            for id, data in g['quantmasses'].iteritems():
                isotope_data[id] = data[0]['name']
        except:
            print 'error getting label name data, just using names present in .hdf5 file'
        for iso in usedIsotopes:
            outString += '\tsig_%s' % isotope_data[iso]
    # open text file output
    f_out = open(str(outputFile), 'w')
    f_out.write(outString + '\n')
    # integrate other data with peptide data and output to text file
    logger.log.info('loading peptide data')
    out_string_template = '%(protein_group_no)i\t%(proteins)s\t%(sequence)s\t%(modifications)s\t%(mw)f\t'
    out_string_template += '%(precursor_mz)f\t%(charge_state)i\t%(ppm_error)f\t%(score).0f\t%(fdr_at_score).3f\t'
    out_string_template += '%(rank)i\t%(msms_id)i\t%(source_file)s\t%(peak_intensity)f\t%(s2i)f\t%(p2t)f\t'
    out_string_template += '%(is_unique)i\t%(in_quantification_of_protein)i\t'
    out_string_template += '%(in_protein_inference)f\t%(seq_start)s\t%(seq_end)s'
    fdr_data = dict([(x['score'], x['global_fdr'])
                     for x in hdf.readTable('/fdrdata')
                     if x['data_type'] == 'peptide'])
    peptidetable = hdf.getTable(
        '/peptide')  # get reference to peptide table on disk
    current_pepgroupid = None
    tmplist = []
    pBar = progBar.ProgressBar(widgets=progBar.name_widgets,
                               maxval=len(peptidetable),
                               name='load peptides').start()
    for idx, p in enumerate(peptidetable.itersorted('protein_group_no')):
        pBar.update(idx)
        if current_pepgroupid == p['protein_group_no']:
            tmplist.append(
                preparePeptideData(p, proteins, fdr_data, specs, quant))
        else:
            if tmplist:
                tmplist = sorted(tmplist, key=lambda y: y['seq_start'])
                for x in tmplist:
                    outString = out_string_template % x
                    for iso in usedIsotopes:
                        try:
                            outString += '\t%f' % x[iso]
                        except KeyError:
                            outString += '\tNA'

                    f_out.write(outString + '\n')
            tmplist = [preparePeptideData(p, proteins, fdr_data, specs, quant)]
        current_pepgroupid = p['protein_group_no']
    pBar.finish()
    # not to forget the last protein groups data in tmplist
    if tmplist:
        tmplist = sorted(tmplist, key=lambda y: y['seq_start'])
        for x in tmplist:
            outString = out_string_template % x
            for iso in usedIsotopes:
                try:
                    outString += '\t%f' % x[iso]
                except KeyError:
                    outString += '\tNA'

            f_out.write(outString + '\n')
    f_out.close()
    return
예제 #5
0
    def performBootstrapQuant(self, protein_group_nos, reference):
        """
        @brief get all filtered spectra from spec quant table then fetch peptide data
        (sequence and spectrum id ) from all protein sets.
        for every protein group perform fold change calculation using bootstrap method
        @param protein_group_nos list of protein group ids
        @param reference id of value used for fold change calculation using bootstrap model
        """

        all_proteins_quantdata = self.hdf5quantprot.getAllProteinDatafromSpecQuant(
        )
        peptidedatafromsets = self.hdf5quantprot.getPeptideDataforSets()
        protein2quantdata = {
        }  # keeps fold change result,sum ion area, isotopelabel
        usedIsotopes = sorted(
            list(set([x['isotopelabel_id'] for x in all_proteins_quantdata])))
        pBar = progBar.ProgressBar(widgets=progBar.name_widgets,
                                   maxval=len(protein_group_nos),
                                   name='bootstrap quant').start()
        for idx, protein_group_no in enumerate(protein_group_nos):
            pBar.update(idx)
            missingrefevents = 0
            datadict = {}
            self.cfg.log.debug('starting for proteingroup %s' %
                               protein_group_no)
            protlocation = all_proteins_quantdata[
                'protein_group_no'] == protein_group_no
            data = all_proteins_quantdata[protlocation]
            peptidedataforset = peptidedatafromsets[protein_group_no]

            quantuniquepeps = set([
                peptidedataforset[spectrum_id]
                for spectrum_id in data['spectrum_id']
            ])

            totalquantevents = len(set(data['spectrum_id']))
            refdata = data[data['isotopelabel_id'] ==
                           reference]['quant_allcorrected']
            sumrefdata = refdata.sum()
            refspectraquantified = len(refdata)
            if refspectraquantified != totalquantevents:
                missingrefevents = totalquantevents - refspectraquantified
            qupm = len(quantuniquepeps)
            if sumrefdata:
                datadict[reference] = [
                    sumrefdata, refspectraquantified, qupm, (1, 0, 0)
                ]
            else:
                # if there are no quantified peptides from the reference label then we cannot calculate a fold change
                datadict[reference] = [
                    sumrefdata, refspectraquantified, qupm, (-1, -1, -1)
                ]
            for isotopelabel_id in usedIsotopes:
                missingqueryevents = 0
                self.cfg.log.debug('assessing isotopelabel %s' %
                                   isotopelabel_id)
                if isotopelabel_id != reference:
                    queryvalues = data[data['isotopelabel_id'] ==
                                       isotopelabel_id]['quant_allcorrected']
                    sumqueryvalues = queryvalues.sum()
                    qssm = len(queryvalues)

                    if qssm != totalquantevents:
                        missingqueryevents = totalquantevents - qssm
                    self.cfg.log.debug('length ref %s, length query %s' %
                                       (len(refdata), len(queryvalues)))
                    if queryvalues.any() and refdata.any():
                        minquantspectra = cfg.parameters['general'][
                            'minquantspectra']
                        result = self.makeBootstrap(
                            queryvalues.tolist() + [0] * missingqueryevents,
                            refdata.tolist() + [0] * missingrefevents,
                            minquantspectra)
                        self.cfg.log.debug('bootstrap result for %s : %s ' %
                                           (protein_group_no, result))
                    elif refdata.any():
                        self.cfg.log.debug(
                            'there are no valid query values for %s: FC will be zero, as reference'
                            ' is present' % protein_group_no)
                        result = (0, -1, -1)
                    else:
                        result = (-1, -1, -1)
                    datadict[isotopelabel_id] = [
                        sumqueryvalues, qssm, qupm, result
                    ]
            protein2quantdata[protein_group_no] = datadict
        pBar.finish()
        return protein2quantdata
예제 #6
0
    def performSimpleSumQuant(self, protein_group_nos, reference):
        """
        @brief perform simple sum ratio calculation using valid quant data from all spectra
        """
        all_proteins_quantdata = self.hdf5quantprot.getAllProteinDatafromSpecQuant(
        )
        peptidedatafromsets = self.hdf5quantprot.getPeptideDataforSets()
        protein2quantdata = {
        }  # keeps fold change result,sum ion area, isotopelabel
        usedIsotopes = sorted(
            list(set([x['isotopelabel_id'] for x in all_proteins_quantdata])))
        pBar = progBar.ProgressBar(widgets=progBar.name_widgets,
                                   maxval=len(protein_group_nos),
                                   name='bootstrap quant').start()

        # scan through each protein group and perform quantification (according to method given) on quant values
        #
        for idx, protein_group_no in enumerate(protein_group_nos):
            pBar.update(idx)
            datadict = {}
            self.cfg.log.debug('starting for proteingroup %s' %
                               protein_group_no)
            protlocation = all_proteins_quantdata[
                'protein_group_no'] == protein_group_no
            data = all_proteins_quantdata[protlocation]
            peptidedataforset = peptidedatafromsets[protein_group_no]

            quantuniquepeps = set([
                peptidedataforset[spectrum_id]
                for spectrum_id in data['spectrum_id']
            ])

            refdata = data[data['isotopelabel_id'] ==
                           reference]['quant_allcorrected']
            sumrefdata = refdata.sum()
            qupm = len(quantuniquepeps)
            refspectraquantified = len(refdata)
            if sumrefdata:
                datadict[reference] = [
                    sumrefdata, refspectraquantified, qupm, (1, 0, 0)
                ]
            else:
                datadict[reference] = [
                    sumrefdata, refspectraquantified, qupm, (-1, -1, -1)
                ]
                for isotopelabel_id in usedIsotopes:
                    datadict[isotopelabel_id] = [0, 0, qupm, (-1, -1, -1)]
                    # cannot continue as there are no references!
                    break
            for isotopelabel_id in usedIsotopes:

                self.cfg.log.debug('assessing isotopelabel %s' %
                                   isotopelabel_id)
                if isotopelabel_id != reference:
                    queryvalues = data[data['isotopelabel_id'] ==
                                       isotopelabel_id]['quant_allcorrected']
                    sumqueryvalues = queryvalues.sum()
                    qssm = len(queryvalues)

                    self.cfg.log.debug('length ref %s, length query %s' %
                                       (len(refdata), len(queryvalues)))
                    if queryvalues.any() and refdata.any():
                        ratio_result = sumqueryvalues / sumrefdata
                        result = (ratio_result, -1, -1)
                        self.cfg.log.debug('simple ratio result for %s : %s ' %
                                           (protein_group_no, result))
                    elif refdata.any():
                        self.cfg.log.debug(
                            'there are no valid query values for %s: FC will be zero, as reference'
                            ' is present' % protein_group_no)
                        result = (0, -1, -1)
                    else:
                        result = (-1, -1, -1)
                    datadict[isotopelabel_id] = [
                        sumqueryvalues, qssm, qupm, result
                    ]
            protein2quantdata[protein_group_no] = datadict
        pBar.finish()
        return protein2quantdata