def get_colmap(fns, pattern, single_col=False, antipattern=False): """For table files, loops through headers and checks which column(s) match a passed pattern. Those column(s) names are returned in a map with filenames as keys""" colmap = {} for fn in fns: header = tsvreader.get_tsv_header(fn) basefn = os.path.basename(fn) try: cols = tsvreader.get_cols_in_file(pattern, header, single_col) except RuntimeError: # Columns are not in this file cols = [] if antipattern: try: anticols = tsvreader.get_cols_in_file(antipattern, header, single_col) except RuntimeError: # The filtering "anti"-columns are not in the file, anticols = [] cols = [col for col in cols if col not in anticols] if cols: colmap[basefn] = cols else: return False return colmap
def initialize_input(self): super().initialize_input() quantheader = reader.get_tsv_header(self.quantfile) self.quantfields = reader.get_cols_in_file(self.quantcolpattern, quantheader) self.quantacc = reader.get_cols_in_file(self.quantacccolpattern, quantheader, single_col=True) self.quantpeptides = reader.generate_tsv_proteins(self.quantfile, quantheader)
def initialize_input(self): super().initialize_input() quantheader = reader.get_tsv_header(self.quantfile) self.quantfields = reader.get_cols_in_file(self.quantcolpattern, quantheader) self.quantacc = reader.get_cols_in_file(self.quantacccolpattern, quantheader, single_col=True) self.quantfeatures = reader.generate_tsv_proteins( self.quantfile, quantheader)
def initialize_input(self): super().initialize_input() self.pepheader = tsvreader.get_tsv_header(self.pepfile) if self.proteincol: self.get_column_header_for_number(['proteincol'], self.pepheader) elif self.pcolpattern: self.proteincol = tsvreader.get_cols_in_file( self.pcolpattern, self.pepheader, True) self.scorecol = tsvreader.get_cols_in_file(self.scorecolpattern, self.pepheader, True)
def initialize_input(self): self.oldheader = tsvreader.get_tsv_header(self.fn) self.get_column_header_for_number(['spectracol']) self.scorecol = tsvreader.get_cols_in_file(self.scorecolpattern, self.oldheader, True) self.precurquantcol = prep.get_quantcols(self.precursorquantcolpattern, self.oldheader, 'precur')
def get_colmap(fns, pattern, single_col=False, antipattern=False): """For table files, loops through headers and checks which column(s) match a passed pattern. Those column(s) names are returned in a map with filenames as keys""" colmap = {} for fn in fns: header = tsvreader.get_tsv_header(fn) basefn = os.path.basename(fn) cols = tsvreader.get_cols_in_file(pattern, header, single_col) if antipattern: anticols = tsvreader.get_cols_in_file(antipattern, header, single_col) cols = [col for col in cols if col not in anticols] if cols: colmap[basefn] = cols return colmap
def prepare(self): self.oldheader = tsvreader.get_tsv_header(self.fn) self.get_column_header_for_number(['spectracol']) self.scorecol = tsvreader.get_cols_in_file(self.scorecolpattern, self.oldheader, True) self.precurquantcol = psmtopeptable.get_quantcols(self.precursorquantcolpattern, self.oldheader, 'precur')
def get_td_proteins_bestpep(self, theader, dheader): self.header = [self.headeraccfield] + prottabledata.PICKED_HEADER tscorecol = tsvreader.get_cols_in_file(self.scorecolpattern, theader, True) dscorecol = tsvreader.get_cols_in_file(self.scorecolpattern, dheader, True) tpeps = tsvreader.generate_split_tsv_lines(self.fn, theader) dpeps = tsvreader.generate_split_tsv_lines(self.decoyfn, dheader) targets = proteins.generate_bestpep_proteins(tpeps, tscorecol, self.minlogscore, self.headeraccfield, self.fixedfeatcol) decoys = proteins.generate_bestpep_proteins(dpeps, dscorecol, self.minlogscore, self.headeraccfield, self.fixedfeatcol) return targets, decoys
def get_quantcols(pattern, oldheader, coltype): """Searches for quantification columns using pattern and header list. Calls reader function to do regexp. Returns a single column for precursor quant.""" if pattern is None: return False if coltype == 'precur': return reader.get_cols_in_file(pattern, oldheader, single_col=True)
def get_psms(self): self.header = self.oldheader[:] if self.confpattern: confkey = tsvreader.get_cols_in_file(self.confpattern, self.header, True) elif self.confcol: confkey = self.header[int(self.confcol) - 1] else: raise RuntimeError('Must define either --confcol or ' '--confcolpattern') self.psms = prep.generate_psms(self.fn, self.oldheader, confkey, self.conflvl, self.lowerbetter)
def set_features(self): self.header = self.oldheader[:] if self.confpattern: confkey = tsvreader.get_cols_in_file(self.confpattern, self.header, True) elif self.confcol: confkey = self.header[int(self.confcol) - 1] else: print('Must define either --confcol or --confcolpattern') sys.exit(1) self.psms = filtering.filter_psms_conf(self.oldpsms, confkey, self.conflvl, self.lowerbetter)
def initialize_input(self): self.oldheader = tsvreader.get_tsv_header(self.fn) self.scorecol = tsvreader.get_cols_in_file(self.scorecolpattern, self.oldheader, True) self.qvalcol = tsvreader.get_cols_in_file(self.fdrcolpattern, self.oldheader, True)
def set_features(self): qpat = self.quantcolpattern if self.quantcolpattern else '[a-z]+[0-9]+plex_' header = [x for x in self.oldheader if x != psmh.HEADER_SPECFILE] try: isocols = tsvreader.get_columns_by_pattern(header, qpat) except RuntimeError: pass else: for col in isocols: header.pop(header.index(col)) if self.precurquantcol: header = [peph.HEADER_AREA if x == self.precurquantcol else x for x in header] header = [peph.HEADER_PEPTIDE, peph.HEADER_LINKED_PSMS] + [ x for x in header if x != psmh.HEADER_PEPTIDE] switch_map = {old: new for old, new in zip( [psmh.HEADER_PEPTIDE, psmh.HEADER_PROTEIN, psmh.HEADER_PEPTIDE_Q], [peph.HEADER_PEPTIDE, peph.HEADER_PROTEINS, peph.HEADER_QVAL])} self.header = [switch_map[field] if field in switch_map else field for field in header] peptides = psmtopeptable.generate_peptides(self.fn, self.oldheader, switch_map, self.scorecol, self.precurquantcol, self.spectracol) # Remove quant data if not specified any way to summarize if self.quantcolpattern and any([self.denomcols, self.denompatterns, self.mediansweep, self.medianintensity]): denomcols = False if self.denomcols is not None: denomcols = [self.number_to_headerfield(col, self.oldheader) for col in self.denomcols] elif self.denompatterns is not None: denomcolnrs = [tsvreader.get_columns_by_pattern(self.oldheader, pattern) for pattern in self.denompatterns] denomcols = set([col for cols in denomcolnrs for col in cols]) quantcols = tsvreader.get_columns_by_pattern(self.oldheader, self.quantcolpattern) totalproteome, tpacc, tp_pepacc = False, False, False if self.totalprotfn: pep_tp_accs = [psmh.HEADER_MASTER_PROT, psmh.HEADER_SYMBOL, psmh.HEADER_GENE, peph.HEADER_PROTEINS] totalphead = tsvreader.get_tsv_header(self.totalprotfn) totalpfield_found = False for tpacc, tp_pepacc in zip(proth.TPROT_HEADER_ACCS, pep_tp_accs): if totalphead[0] == tpacc and tp_pepacc in self.header: totalpfield_found = True break if not totalpfield_found: print('Could not find correct header field name in the total ' 'proteome table passed. ' 'Should be one of {}'.format(proth.TPROT_HEADER_ACCS)) sys.exit(1) totalproteome = tsvreader.generate_split_tsv_lines(self.totalprotfn, totalphead) mn_factors = False if self.mednorm_factors: mnhead = tsvreader.get_tsv_header(self.mednorm_factors) mn_factors = tsvreader.generate_split_tsv_lines(self.mednorm_factors, mnhead) nopsms = [isosummarize.get_no_psms_field(qf) for qf in quantcols] self.header = self.header + quantcols + nopsms + [proth.HEADER_NO_FULLQ_PSMS] peptides = isosummarize.get_isobaric_ratios(self.fn, self.oldheader, quantcols, denomcols, self.mediansweep, self.medianintensity, self.median_or_avg, self.minint, peptides, self.header[0], psmh.HEADER_PEPTIDE, totalproteome, tpacc, tp_pepacc, self.logisoquant, self.mediannormalize, mn_factors, self.keepnapsms) if self.modelqvals: qix = self.header.index(peph.HEADER_QVAL) + 1 self.header = self.header[:qix] + [peph.HEADER_QVAL_MODELED] + self.header[qix:] scorecol = tsvreader.get_cols_in_file(self.scorecolpattern, self.oldheader, True) peptides = psmtopeptable.recalculate_qvals_linear_model(peptides, scorecol, self.qvalthreshold, self.minpeptidenr) self.features = peptides
def initialize_input(self): super().initialize_input() self.scorecol = reader.get_cols_in_file(self.scorecolpattern, self.oldheader, True)