Ejemplo n.º 1
0
def get_colmap(fns, pattern, single_col=False, antipattern=False):
    """For table files, loops through headers and checks which column(s)
    match a passed pattern. Those column(s) names are returned in a map with
    filenames as keys"""
    colmap = {}
    for fn in fns:
        header = tsvreader.get_tsv_header(fn)
        basefn = os.path.basename(fn)
        try:
            cols = tsvreader.get_cols_in_file(pattern, header, single_col)
        except RuntimeError:
            # Columns are not in this file
            cols = []
        if antipattern:
            try:
                anticols = tsvreader.get_cols_in_file(antipattern, header,
                                                      single_col)
            except RuntimeError:
                # The filtering "anti"-columns are not in the file,
                anticols = []
            cols = [col for col in cols if col not in anticols]
        if cols:
            colmap[basefn] = cols
        else:
            return False
    return colmap
Ejemplo n.º 2
0
 def initialize_input(self):
     super().initialize_input()
     quantheader = reader.get_tsv_header(self.quantfile)
     self.quantfields = reader.get_cols_in_file(self.quantcolpattern,
                                                quantheader)
     self.quantacc = reader.get_cols_in_file(self.quantacccolpattern,
                                             quantheader, single_col=True)
     self.quantpeptides = reader.generate_tsv_proteins(self.quantfile,
                                                       quantheader)
Ejemplo n.º 3
0
 def initialize_input(self):
     super().initialize_input()
     quantheader = reader.get_tsv_header(self.quantfile)
     self.quantfields = reader.get_cols_in_file(self.quantcolpattern,
                                                quantheader)
     self.quantacc = reader.get_cols_in_file(self.quantacccolpattern,
                                             quantheader,
                                             single_col=True)
     self.quantfeatures = reader.generate_tsv_proteins(
         self.quantfile, quantheader)
Ejemplo n.º 4
0
 def initialize_input(self):
     super().initialize_input()
     self.pepheader = tsvreader.get_tsv_header(self.pepfile)
     if self.proteincol:
         self.get_column_header_for_number(['proteincol'], self.pepheader)
     elif self.pcolpattern:
         self.proteincol = tsvreader.get_cols_in_file(
             self.pcolpattern, self.pepheader, True)
     self.scorecol = tsvreader.get_cols_in_file(self.scorecolpattern,
                                                self.pepheader, True)
Ejemplo n.º 5
0
 def initialize_input(self):
     self.oldheader = tsvreader.get_tsv_header(self.fn)
     self.get_column_header_for_number(['spectracol'])
     self.scorecol = tsvreader.get_cols_in_file(self.scorecolpattern,
                                                self.oldheader, True)
     self.precurquantcol = prep.get_quantcols(self.precursorquantcolpattern,
                                              self.oldheader, 'precur')
Ejemplo n.º 6
0
def get_colmap(fns, pattern, single_col=False, antipattern=False):
    """For table files, loops through headers and checks which column(s)
    match a passed pattern. Those column(s) names are returned in a map with
    filenames as keys"""
    colmap = {}
    for fn in fns:
        header = tsvreader.get_tsv_header(fn)
        basefn = os.path.basename(fn)
        cols = tsvreader.get_cols_in_file(pattern, header, single_col)
        if antipattern:
            anticols = tsvreader.get_cols_in_file(antipattern, header,
                                                  single_col)
            cols = [col for col in cols if col not in anticols]
        if cols:
            colmap[basefn] = cols
    return colmap
Ejemplo n.º 7
0
 def initialize_input(self):
     self.oldheader = tsvreader.get_tsv_header(self.fn)
     self.get_column_header_for_number(['spectracol'])
     self.scorecol = tsvreader.get_cols_in_file(self.scorecolpattern,
                                                self.oldheader, True)
     self.precurquantcol = prep.get_quantcols(self.precursorquantcolpattern,
                                              self.oldheader, 'precur')
Ejemplo n.º 8
0
def get_colmap(fns, pattern, single_col=False, antipattern=False):
    """For table files, loops through headers and checks which column(s)
    match a passed pattern. Those column(s) names are returned in a map with
    filenames as keys"""
    colmap = {}
    for fn in fns:
        header = tsvreader.get_tsv_header(fn)
        basefn = os.path.basename(fn)
        cols = tsvreader.get_cols_in_file(pattern, header, single_col)
        if antipattern:
            anticols = tsvreader.get_cols_in_file(antipattern, header,
                                                  single_col)
            cols = [col for col in cols if col not in anticols]
        if cols:
            colmap[basefn] = cols
    return colmap
Ejemplo n.º 9
0
 def prepare(self):
     self.oldheader = tsvreader.get_tsv_header(self.fn)
     self.get_column_header_for_number(['spectracol'])
     self.scorecol = tsvreader.get_cols_in_file(self.scorecolpattern,
                                                self.oldheader, True)
     self.precurquantcol = psmtopeptable.get_quantcols(self.precursorquantcolpattern,
                                              self.oldheader, 'precur')
Ejemplo n.º 10
0
 def get_td_proteins_bestpep(self, theader, dheader):
     self.header = [self.headeraccfield] + prottabledata.PICKED_HEADER
     tscorecol = tsvreader.get_cols_in_file(self.scorecolpattern, theader,
                                            True)
     dscorecol = tsvreader.get_cols_in_file(self.scorecolpattern, dheader,
                                            True)
     tpeps = tsvreader.generate_split_tsv_lines(self.fn, theader)
     dpeps = tsvreader.generate_split_tsv_lines(self.decoyfn, dheader)
     targets = proteins.generate_bestpep_proteins(tpeps, tscorecol,
                                                  self.minlogscore,
                                                  self.headeraccfield,
                                                  self.fixedfeatcol)
     decoys = proteins.generate_bestpep_proteins(dpeps, dscorecol,
                                                 self.minlogscore,
                                                 self.headeraccfield,
                                                 self.fixedfeatcol)
     return targets, decoys
Ejemplo n.º 11
0
def get_quantcols(pattern, oldheader, coltype):
    """Searches for quantification columns using pattern and header list.
    Calls reader function to do regexp. Returns a single column for
    precursor quant."""
    if pattern is None:
       return False
    if coltype == 'precur':
        return reader.get_cols_in_file(pattern, oldheader, single_col=True)
Ejemplo n.º 12
0
 def get_psms(self):
     self.header = self.oldheader[:]
     if self.confpattern:
         confkey = tsvreader.get_cols_in_file(self.confpattern, self.header,
                                              True)
     elif self.confcol:
         confkey = self.header[int(self.confcol) - 1]
     else:
         raise RuntimeError('Must define either --confcol or '
                            '--confcolpattern')
     self.psms = prep.generate_psms(self.fn, self.oldheader, confkey,
                                    self.conflvl, self.lowerbetter)
Ejemplo n.º 13
0
 def set_features(self):
     self.header = self.oldheader[:]
     if self.confpattern:
         confkey = tsvreader.get_cols_in_file(self.confpattern,
                                              self.header, True)
     elif self.confcol:
         confkey = self.header[int(self.confcol) - 1]
     else:
         print('Must define either --confcol or --confcolpattern')
         sys.exit(1)
     self.psms = filtering.filter_psms_conf(self.oldpsms,
                                    confkey,
                                    self.conflvl,
                                    self.lowerbetter)
Ejemplo n.º 14
0
 def get_psms(self):
     self.header = self.oldheader[:]
     if self.confpattern:
         confkey = tsvreader.get_cols_in_file(self.confpattern,
                                              self.header, True)
     elif self.confcol:
         confkey = self.header[int(self.confcol) - 1]
     else:
         raise RuntimeError('Must define either --confcol or '
                            '--confcolpattern')
     self.psms = prep.generate_psms(self.fn,
                                    self.oldheader,
                                    confkey,
                                    self.conflvl,
                                    self.lowerbetter)
Ejemplo n.º 15
0
 def initialize_input(self):
     self.oldheader = tsvreader.get_tsv_header(self.fn)
     self.scorecol = tsvreader.get_cols_in_file(self.scorecolpattern,
                                                self.oldheader, True)
     self.qvalcol = tsvreader.get_cols_in_file(self.fdrcolpattern,
                                               self.oldheader, True)
Ejemplo n.º 16
0
 def set_features(self):
     qpat = self.quantcolpattern if self.quantcolpattern else '[a-z]+[0-9]+plex_'
     header = [x for x in self.oldheader if x != psmh.HEADER_SPECFILE]
     try:
         isocols = tsvreader.get_columns_by_pattern(header, qpat)
     except RuntimeError:
         pass
     else:
         for col in isocols:
             header.pop(header.index(col))
     if self.precurquantcol:
         header = [peph.HEADER_AREA if x == self.precurquantcol
                   else x for x in header]
     header = [peph.HEADER_PEPTIDE, peph.HEADER_LINKED_PSMS] + [
             x for x in header if x != psmh.HEADER_PEPTIDE]
     switch_map = {old: new for old, new in zip(
         [psmh.HEADER_PEPTIDE, psmh.HEADER_PROTEIN, psmh.HEADER_PEPTIDE_Q],
         [peph.HEADER_PEPTIDE, peph.HEADER_PROTEINS, peph.HEADER_QVAL])}
     self.header = [switch_map[field] if field in switch_map else field
             for field in header]
     peptides = psmtopeptable.generate_peptides(self.fn, self.oldheader,
             switch_map, self.scorecol, self.precurquantcol, self.spectracol)
     # Remove quant data if not specified any way to summarize
     if self.quantcolpattern and any([self.denomcols, self.denompatterns,
             self.mediansweep, self.medianintensity]):
         denomcols = False
         if self.denomcols is not None:
             denomcols = [self.number_to_headerfield(col, self.oldheader)
                          for col in self.denomcols]
         elif self.denompatterns is not None:
             denomcolnrs = [tsvreader.get_columns_by_pattern(self.oldheader, pattern)
                            for pattern in self.denompatterns]
             denomcols = set([col for cols in denomcolnrs for col in cols])
         quantcols = tsvreader.get_columns_by_pattern(self.oldheader,
                                                self.quantcolpattern)
         totalproteome, tpacc, tp_pepacc = False, False, False
         if self.totalprotfn:
             pep_tp_accs = [psmh.HEADER_MASTER_PROT, psmh.HEADER_SYMBOL,
                     psmh.HEADER_GENE, peph.HEADER_PROTEINS]
             totalphead = tsvreader.get_tsv_header(self.totalprotfn)
             totalpfield_found = False
             for tpacc, tp_pepacc in zip(proth.TPROT_HEADER_ACCS, pep_tp_accs):
                 if totalphead[0] == tpacc and tp_pepacc in self.header:
                     totalpfield_found = True
                     break
             if not totalpfield_found:
                 print('Could not find correct header field name in the total '
                         'proteome table passed. '
                         'Should be one of {}'.format(proth.TPROT_HEADER_ACCS))
                 sys.exit(1)
             totalproteome = tsvreader.generate_split_tsv_lines(self.totalprotfn, totalphead)
         mn_factors = False
         if self.mednorm_factors:
             mnhead = tsvreader.get_tsv_header(self.mednorm_factors)
             mn_factors = tsvreader.generate_split_tsv_lines(self.mednorm_factors, mnhead)
         nopsms = [isosummarize.get_no_psms_field(qf) for qf in quantcols]
         self.header = self.header + quantcols + nopsms + [proth.HEADER_NO_FULLQ_PSMS]
         peptides = isosummarize.get_isobaric_ratios(self.fn, self.oldheader, 
                 quantcols, denomcols, self.mediansweep, self.medianintensity,
                 self.median_or_avg, self.minint, peptides, self.header[0],
                 psmh.HEADER_PEPTIDE, totalproteome, tpacc, tp_pepacc,
                 self.logisoquant, self.mediannormalize, mn_factors, self.keepnapsms)
     if self.modelqvals:
         qix = self.header.index(peph.HEADER_QVAL) + 1
         self.header = self.header[:qix] + [peph.HEADER_QVAL_MODELED] + self.header[qix:]
         scorecol = tsvreader.get_cols_in_file(self.scorecolpattern,
                 self.oldheader, True)
         peptides = psmtopeptable.recalculate_qvals_linear_model(peptides, 
                 scorecol, self.qvalthreshold, self.minpeptidenr)
     self.features = peptides
Ejemplo n.º 17
0
 def initialize_input(self):
     super().initialize_input()
     self.scorecol = reader.get_cols_in_file(self.scorecolpattern,
                                             self.oldheader, True)
Ejemplo n.º 18
0
 def initialize_input(self):
     self.oldheader = tsvreader.get_tsv_header(self.fn)
     self.scorecol = tsvreader.get_cols_in_file(self.scorecolpattern,
                                                self.oldheader, True)
     self.qvalcol = tsvreader.get_cols_in_file(self.fdrcolpattern,
                                               self.oldheader, True)
Ejemplo n.º 19
0
 def initialize_input(self):
     super().initialize_input()
     self.scorecol = reader.get_cols_in_file(self.scorecolpattern,
                                             self.oldheader, True)