Exemple #1
0
 def get_psms(self):
     self.header = self.oldheader[:]
     if self.denomcols is not None:
         denomcols = [
             self.number_to_headerfield(col, self.oldheader)
             for col in self.denomcols
         ]
     elif self.denompatterns is not None:
         denomcolnrs = [
             tsv.get_columns_by_pattern(self.oldheader, pattern)
             for pattern in self.denompatterns
         ]
         denomcols = set([col for cols in denomcolnrs for col in cols])
     else:
         raise RuntimeError('Must define either denominator column numbers '
                            'or regex pattterns to find them')
     quantcols = tsv.get_columns_by_pattern(self.oldheader,
                                            self.quantcolpattern)
     if self.medianpsms is not None:
         medianheader = tsv.get_tsv_header(self.medianpsms)
     else:
         medianheader = False
     self.psms = prep.get_normalized_ratios(self.fn, self.oldheader,
                                            quantcols, denomcols,
                                            self.minint, self.medianpsms,
                                            medianheader)
Exemple #2
0
 def set_features(self):
     denomcols = False
     if self.denomcols is not None:
         denomcols = [self.number_to_headerfield(col, self.oldheader)
                      for col in self.denomcols]
     elif self.denompatterns is not None:
         denomcolnrs = [tsvreader.get_columns_by_pattern(self.oldheader, pattern)
                        for pattern in self.denompatterns]
         denomcols = set([col for cols in denomcolnrs for col in cols])
     elif not self.mediansweep and not self.medianintensity:
         raise RuntimeError('Must define either denominator column numbers '
                            'or regex pattterns to find them')
     quantcols = tsvreader.get_columns_by_pattern(self.oldheader,
                                            self.quantcolpattern)
     mn_factors = False
     if self.mednorm_factors:
         mnhead = tsvreader.get_tsv_header(self.mednorm_factors)
         mn_factors = tsvreader.generate_split_tsv_lines(self.mednorm_factors, mnhead)
     nopsms = [isosummarize.get_no_psms_field(qf) for qf in quantcols]
     if self.featcol:
         self.get_column_header_for_number(['featcol'], self.oldheader)
         self.header = [self.featcol] + quantcols + nopsms + [HEADER_NO_FULLQ_PSMS]
     else:
         self.header = (self.oldheader +
                        ['ratio_{}'.format(x) for x in quantcols])
     self.psms = isosummarize.get_isobaric_ratios(self.fn, self.oldheader,
             quantcols, denomcols, self.mediansweep, self.medianintensity,
             self.median_or_avg, self.minint, False, False, self.featcol,
             False, False, False, self.logisoquant, self.mediannormalize,
             mn_factors, self.keepnapsms)
Exemple #3
0
 def get_psms(self):
     if self.denomcols is not None:
         denomcols = [self.number_to_headerfield(col, self.oldheader)
                      for col in self.denomcols]
     elif self.denompatterns is not None:
         denomcolnrs = [tsv.get_columns_by_pattern(self.oldheader, pattern)
                        for pattern in self.denompatterns]
         denomcols = set([col for cols in denomcolnrs for col in cols])
     else:
         raise RuntimeError('Must define either denominator column numbers '
                            'or regex pattterns to find them')
     quantcols = tsv.get_columns_by_pattern(self.oldheader,
                                            self.quantcolpattern)
     self.get_column_header_for_number(['proteincol'], self.oldheader)
     nopsms = [prep.get_no_psms_field(qf) for qf in quantcols]
     if self.proteincol and self.targettable:
         targetheader = tsv.get_tsv_header(self.targettable)
         self.header = targetheader + quantcols + nopsms
     elif not self.proteincol and not self.targettable:
         self.header = (self.oldheader +
                        ['ratio_{}'.format(x) for x in quantcols])
     elif self.proteincol and not self.targettable:
         self.header = [prottabledata.HEADER_ACCESSION] + quantcols + nopsms
     self.psms = prep.get_isobaric_ratios(self.fn, self.oldheader,
                                          quantcols, denomcols, self.minint,
                                          self.targettable, self.proteincol,
                                          self.normalize,
                                          self.normalizeratios)
Exemple #4
0
 def get_psms(self):
     if self.denomcols is not None:
         denomcols = [
             self.number_to_headerfield(col, self.oldheader)
             for col in self.denomcols
         ]
     elif self.denompatterns is not None:
         denomcolnrs = [
             tsv.get_columns_by_pattern(self.oldheader, pattern)
             for pattern in self.denompatterns
         ]
         denomcols = set([col for cols in denomcolnrs for col in cols])
     else:
         raise RuntimeError('Must define either denominator column numbers '
                            'or regex pattterns to find them')
     quantcols = tsv.get_columns_by_pattern(self.oldheader,
                                            self.quantcolpattern)
     self.get_column_header_for_number(['proteincol'], self.oldheader)
     nopsms = [prep.get_no_psms_field(qf) for qf in quantcols]
     if self.proteincol and self.targettable:
         targetheader = tsv.get_tsv_header(self.targettable)
         self.header = targetheader + quantcols + nopsms
     elif not self.proteincol and not self.targettable:
         self.header = (self.oldheader +
                        ['ratio_{}'.format(x) for x in quantcols])
     elif self.proteincol and not self.targettable:
         self.header = [prottabledata.HEADER_ACCESSION] + quantcols + nopsms
     self.psms = prep.get_isobaric_ratios(self.fn, self.oldheader,
                                          quantcols, denomcols, self.minint,
                                          self.targettable, self.proteincol,
                                          self.normalize,
                                          self.normalizeratios)
Exemple #5
0
 def get_quant(self, theader, features):
     if self.precursor:
         tpeps = tsvreader.generate_split_tsv_lines(self.fn, theader)
         self.header.append(prottabledata.HEADER_AREA)
         features = proteins.add_ms1_quant_from_top3_mzidtsv(
             features, tpeps, self.headeraccfield, self.fixedfeatcol)
     if self.quantcolpattern:
         psmheader = tsvreader.get_tsv_header(self.psmfile)
         denomcols = False
         if self.denomcols is not None:
             denomcols = [
                 self.number_to_headerfield(col, psmheader)
                 for col in self.denomcols
             ]
         elif self.denompatterns is not None:
             denomcolnrs = [
                 tsvreader.get_columns_by_pattern(psmheader, pattern)
                 for pattern in self.denompatterns
             ]
             denomcols = set([col for cols in denomcolnrs for col in cols])
         elif not self.mediansweep and not self.medianintensity:
             print(
                 'Must define either denominator column numbers '
                 'or regex pattterns to find them, or use median sweep, or '
                 'report median intensities.')
             sys.exit(1)
         elif self.medianintensity and self.mediannormalize:
             print(
                 'Cannot do median-centering on intensity values, exiting')
             sys.exit(1)
         quantcols = tsvreader.get_columns_by_pattern(
             psmheader, self.quantcolpattern)
         mn_factors = False
         if self.mednorm_factors:
             mnhead = tsvreader.get_tsv_header(self.mednorm_factors)
             mn_factors = tsvreader.generate_split_tsv_lines(
                 self.mednorm_factors, mnhead)
         nopsms = [isosummarize.get_no_psms_field(qf) for qf in quantcols]
         self.header = self.header + quantcols + nopsms + [
             prottabledata.HEADER_NO_FULLQ_PSMS
         ]
         features = isosummarize.get_isobaric_ratios(
             self.psmfile, psmheader, quantcols, denomcols,
             self.mediansweep, self.medianintensity, self.median_or_avg,
             self.minint, features, self.headeraccfield, self.fixedfeatcol,
             False, False, False, self.logisoquant, self.mediannormalize,
             mn_factors, self.keepnapsms)
     return features
Exemple #6
0
 def get_psms(self):
     self.header = self.oldheader[:]
     if self.denomcols is not None:
         denomcols = [self.number_to_headerfield(col, self.oldheader)
                      for col in self.denomcols]
     elif self.denompatterns is not None:
         denomcolnrs = [tsv.get_columns_by_pattern(self.oldheader, pattern)
                        for pattern in self.denompatterns]
         denomcols = set([col for cols in denomcolnrs for col in cols])
     else:
         raise RuntimeError('Must define either denominator column numbers '
                            'or regex pattterns to find them')
     quantcols = tsv.get_columns_by_pattern(self.oldheader,
                                            self.quantcolpattern)
     if self.medianpsms is not None:
         medianheader = tsv.get_tsv_header(self.medianpsms)
     else:
         medianheader = False
     self.psms = prep.get_normalized_ratios(self.fn, self.oldheader,
                                            quantcols, denomcols,
                                            self.minint, self.medianpsms,
                                            medianheader)
Exemple #7
0
def get_psm2pep_header(oldheader, isobq_pattern=False, precurqfield=False):
    header = oldheader[:]
    if isobq_pattern:
        isocols = tsv.get_columns_by_pattern(header, isobq_pattern)
        for col in isocols:
            header.pop(header.index(col))
    if precurqfield:
        header = [peptabledata.HEADER_AREA if x == precurqfield
                  else x for x in header]
    peptable_header = [peptabledata.HEADER_LINKED_PSMS]
    ix = header.index(mzidtsvdata.HEADER_PEPTIDE)
    header = header[:ix] + peptable_header + header[ix:]
    switch_map = switch_psm_to_peptable_fields(header)
    return [switch_map[field] if field in switch_map else field
            for field in header]
Exemple #8
0
def get_psm2pep_header(oldheader, isobq_pattern=False, precurqfield=False):
    header = oldheader[:]
    if isobq_pattern:
        isocols = tsv.get_columns_by_pattern(header, isobq_pattern)
        for col in isocols:
            header.pop(header.index(col))
    if precurqfield:
        header = [
            peptabledata.HEADER_AREA if x == precurqfield else x
            for x in header
        ]
    peptable_header = [peptabledata.HEADER_LINKED_PSMS]
    ix = header.index(mzidtsvdata.HEADER_PEPTIDE)
    header = header[:ix] + peptable_header + header[ix:]
    switch_map = switch_psm_to_peptable_fields(header)
    return [
        switch_map[field] if field in switch_map else field for field in header
    ]
Exemple #9
0
 def set_features(self):
     qpat = self.quantcolpattern if self.quantcolpattern else '[a-z]+[0-9]+plex_'
     header = [x for x in self.oldheader if x != psmh.HEADER_SPECFILE]
     try:
         isocols = tsvreader.get_columns_by_pattern(header, qpat)
     except RuntimeError:
         pass
     else:
         for col in isocols:
             header.pop(header.index(col))
     if self.precurquantcol:
         header = [peph.HEADER_AREA if x == self.precurquantcol
                   else x for x in header]
     header = [peph.HEADER_PEPTIDE, peph.HEADER_LINKED_PSMS] + [
             x for x in header if x != psmh.HEADER_PEPTIDE]
     switch_map = {old: new for old, new in zip(
         [psmh.HEADER_PEPTIDE, psmh.HEADER_PROTEIN, psmh.HEADER_PEPTIDE_Q],
         [peph.HEADER_PEPTIDE, peph.HEADER_PROTEINS, peph.HEADER_QVAL])}
     self.header = [switch_map[field] if field in switch_map else field
             for field in header]
     peptides = psmtopeptable.generate_peptides(self.fn, self.oldheader,
             switch_map, self.scorecol, self.precurquantcol, self.spectracol)
     # Remove quant data if not specified any way to summarize
     if self.quantcolpattern and any([self.denomcols, self.denompatterns,
             self.mediansweep, self.medianintensity]):
         denomcols = False
         if self.denomcols is not None:
             denomcols = [self.number_to_headerfield(col, self.oldheader)
                          for col in self.denomcols]
         elif self.denompatterns is not None:
             denomcolnrs = [tsvreader.get_columns_by_pattern(self.oldheader, pattern)
                            for pattern in self.denompatterns]
             denomcols = set([col for cols in denomcolnrs for col in cols])
         quantcols = tsvreader.get_columns_by_pattern(self.oldheader,
                                                self.quantcolpattern)
         totalproteome, tpacc, tp_pepacc = False, False, False
         if self.totalprotfn:
             pep_tp_accs = [psmh.HEADER_MASTER_PROT, psmh.HEADER_SYMBOL,
                     psmh.HEADER_GENE, peph.HEADER_PROTEINS]
             totalphead = tsvreader.get_tsv_header(self.totalprotfn)
             totalpfield_found = False
             for tpacc, tp_pepacc in zip(proth.TPROT_HEADER_ACCS, pep_tp_accs):
                 if totalphead[0] == tpacc and tp_pepacc in self.header:
                     totalpfield_found = True
                     break
             if not totalpfield_found:
                 print('Could not find correct header field name in the total '
                         'proteome table passed. '
                         'Should be one of {}'.format(proth.TPROT_HEADER_ACCS))
                 sys.exit(1)
             totalproteome = tsvreader.generate_split_tsv_lines(self.totalprotfn, totalphead)
         mn_factors = False
         if self.mednorm_factors:
             mnhead = tsvreader.get_tsv_header(self.mednorm_factors)
             mn_factors = tsvreader.generate_split_tsv_lines(self.mednorm_factors, mnhead)
         nopsms = [isosummarize.get_no_psms_field(qf) for qf in quantcols]
         self.header = self.header + quantcols + nopsms + [proth.HEADER_NO_FULLQ_PSMS]
         peptides = isosummarize.get_isobaric_ratios(self.fn, self.oldheader, 
                 quantcols, denomcols, self.mediansweep, self.medianintensity,
                 self.median_or_avg, self.minint, peptides, self.header[0],
                 psmh.HEADER_PEPTIDE, totalproteome, tpacc, tp_pepacc,
                 self.logisoquant, self.mediannormalize, mn_factors, self.keepnapsms)
     if self.modelqvals:
         qix = self.header.index(peph.HEADER_QVAL) + 1
         self.header = self.header[:qix] + [peph.HEADER_QVAL_MODELED] + self.header[qix:]
         scorecol = tsvreader.get_cols_in_file(self.scorecolpattern,
                 self.oldheader, True)
         peptides = psmtopeptable.recalculate_qvals_linear_model(peptides, 
                 scorecol, self.qvalthreshold, self.minpeptidenr)
     self.features = peptides