def get_psms(self): self.header = self.oldheader[:] if self.denomcols is not None: denomcols = [ self.number_to_headerfield(col, self.oldheader) for col in self.denomcols ] elif self.denompatterns is not None: denomcolnrs = [ tsv.get_columns_by_pattern(self.oldheader, pattern) for pattern in self.denompatterns ] denomcols = set([col for cols in denomcolnrs for col in cols]) else: raise RuntimeError('Must define either denominator column numbers ' 'or regex pattterns to find them') quantcols = tsv.get_columns_by_pattern(self.oldheader, self.quantcolpattern) if self.medianpsms is not None: medianheader = tsv.get_tsv_header(self.medianpsms) else: medianheader = False self.psms = prep.get_normalized_ratios(self.fn, self.oldheader, quantcols, denomcols, self.minint, self.medianpsms, medianheader)
def set_features(self): denomcols = False if self.denomcols is not None: denomcols = [self.number_to_headerfield(col, self.oldheader) for col in self.denomcols] elif self.denompatterns is not None: denomcolnrs = [tsvreader.get_columns_by_pattern(self.oldheader, pattern) for pattern in self.denompatterns] denomcols = set([col for cols in denomcolnrs for col in cols]) elif not self.mediansweep and not self.medianintensity: raise RuntimeError('Must define either denominator column numbers ' 'or regex pattterns to find them') quantcols = tsvreader.get_columns_by_pattern(self.oldheader, self.quantcolpattern) mn_factors = False if self.mednorm_factors: mnhead = tsvreader.get_tsv_header(self.mednorm_factors) mn_factors = tsvreader.generate_split_tsv_lines(self.mednorm_factors, mnhead) nopsms = [isosummarize.get_no_psms_field(qf) for qf in quantcols] if self.featcol: self.get_column_header_for_number(['featcol'], self.oldheader) self.header = [self.featcol] + quantcols + nopsms + [HEADER_NO_FULLQ_PSMS] else: self.header = (self.oldheader + ['ratio_{}'.format(x) for x in quantcols]) self.psms = isosummarize.get_isobaric_ratios(self.fn, self.oldheader, quantcols, denomcols, self.mediansweep, self.medianintensity, self.median_or_avg, self.minint, False, False, self.featcol, False, False, False, self.logisoquant, self.mediannormalize, mn_factors, self.keepnapsms)
def get_psms(self): if self.denomcols is not None: denomcols = [self.number_to_headerfield(col, self.oldheader) for col in self.denomcols] elif self.denompatterns is not None: denomcolnrs = [tsv.get_columns_by_pattern(self.oldheader, pattern) for pattern in self.denompatterns] denomcols = set([col for cols in denomcolnrs for col in cols]) else: raise RuntimeError('Must define either denominator column numbers ' 'or regex pattterns to find them') quantcols = tsv.get_columns_by_pattern(self.oldheader, self.quantcolpattern) self.get_column_header_for_number(['proteincol'], self.oldheader) nopsms = [prep.get_no_psms_field(qf) for qf in quantcols] if self.proteincol and self.targettable: targetheader = tsv.get_tsv_header(self.targettable) self.header = targetheader + quantcols + nopsms elif not self.proteincol and not self.targettable: self.header = (self.oldheader + ['ratio_{}'.format(x) for x in quantcols]) elif self.proteincol and not self.targettable: self.header = [prottabledata.HEADER_ACCESSION] + quantcols + nopsms self.psms = prep.get_isobaric_ratios(self.fn, self.oldheader, quantcols, denomcols, self.minint, self.targettable, self.proteincol, self.normalize, self.normalizeratios)
def get_psms(self): if self.denomcols is not None: denomcols = [ self.number_to_headerfield(col, self.oldheader) for col in self.denomcols ] elif self.denompatterns is not None: denomcolnrs = [ tsv.get_columns_by_pattern(self.oldheader, pattern) for pattern in self.denompatterns ] denomcols = set([col for cols in denomcolnrs for col in cols]) else: raise RuntimeError('Must define either denominator column numbers ' 'or regex pattterns to find them') quantcols = tsv.get_columns_by_pattern(self.oldheader, self.quantcolpattern) self.get_column_header_for_number(['proteincol'], self.oldheader) nopsms = [prep.get_no_psms_field(qf) for qf in quantcols] if self.proteincol and self.targettable: targetheader = tsv.get_tsv_header(self.targettable) self.header = targetheader + quantcols + nopsms elif not self.proteincol and not self.targettable: self.header = (self.oldheader + ['ratio_{}'.format(x) for x in quantcols]) elif self.proteincol and not self.targettable: self.header = [prottabledata.HEADER_ACCESSION] + quantcols + nopsms self.psms = prep.get_isobaric_ratios(self.fn, self.oldheader, quantcols, denomcols, self.minint, self.targettable, self.proteincol, self.normalize, self.normalizeratios)
def get_quant(self, theader, features): if self.precursor: tpeps = tsvreader.generate_split_tsv_lines(self.fn, theader) self.header.append(prottabledata.HEADER_AREA) features = proteins.add_ms1_quant_from_top3_mzidtsv( features, tpeps, self.headeraccfield, self.fixedfeatcol) if self.quantcolpattern: psmheader = tsvreader.get_tsv_header(self.psmfile) denomcols = False if self.denomcols is not None: denomcols = [ self.number_to_headerfield(col, psmheader) for col in self.denomcols ] elif self.denompatterns is not None: denomcolnrs = [ tsvreader.get_columns_by_pattern(psmheader, pattern) for pattern in self.denompatterns ] denomcols = set([col for cols in denomcolnrs for col in cols]) elif not self.mediansweep and not self.medianintensity: print( 'Must define either denominator column numbers ' 'or regex pattterns to find them, or use median sweep, or ' 'report median intensities.') sys.exit(1) elif self.medianintensity and self.mediannormalize: print( 'Cannot do median-centering on intensity values, exiting') sys.exit(1) quantcols = tsvreader.get_columns_by_pattern( psmheader, self.quantcolpattern) mn_factors = False if self.mednorm_factors: mnhead = tsvreader.get_tsv_header(self.mednorm_factors) mn_factors = tsvreader.generate_split_tsv_lines( self.mednorm_factors, mnhead) nopsms = [isosummarize.get_no_psms_field(qf) for qf in quantcols] self.header = self.header + quantcols + nopsms + [ prottabledata.HEADER_NO_FULLQ_PSMS ] features = isosummarize.get_isobaric_ratios( self.psmfile, psmheader, quantcols, denomcols, self.mediansweep, self.medianintensity, self.median_or_avg, self.minint, features, self.headeraccfield, self.fixedfeatcol, False, False, False, self.logisoquant, self.mediannormalize, mn_factors, self.keepnapsms) return features
def get_psms(self): self.header = self.oldheader[:] if self.denomcols is not None: denomcols = [self.number_to_headerfield(col, self.oldheader) for col in self.denomcols] elif self.denompatterns is not None: denomcolnrs = [tsv.get_columns_by_pattern(self.oldheader, pattern) for pattern in self.denompatterns] denomcols = set([col for cols in denomcolnrs for col in cols]) else: raise RuntimeError('Must define either denominator column numbers ' 'or regex pattterns to find them') quantcols = tsv.get_columns_by_pattern(self.oldheader, self.quantcolpattern) if self.medianpsms is not None: medianheader = tsv.get_tsv_header(self.medianpsms) else: medianheader = False self.psms = prep.get_normalized_ratios(self.fn, self.oldheader, quantcols, denomcols, self.minint, self.medianpsms, medianheader)
def get_psm2pep_header(oldheader, isobq_pattern=False, precurqfield=False): header = oldheader[:] if isobq_pattern: isocols = tsv.get_columns_by_pattern(header, isobq_pattern) for col in isocols: header.pop(header.index(col)) if precurqfield: header = [peptabledata.HEADER_AREA if x == precurqfield else x for x in header] peptable_header = [peptabledata.HEADER_LINKED_PSMS] ix = header.index(mzidtsvdata.HEADER_PEPTIDE) header = header[:ix] + peptable_header + header[ix:] switch_map = switch_psm_to_peptable_fields(header) return [switch_map[field] if field in switch_map else field for field in header]
def get_psm2pep_header(oldheader, isobq_pattern=False, precurqfield=False): header = oldheader[:] if isobq_pattern: isocols = tsv.get_columns_by_pattern(header, isobq_pattern) for col in isocols: header.pop(header.index(col)) if precurqfield: header = [ peptabledata.HEADER_AREA if x == precurqfield else x for x in header ] peptable_header = [peptabledata.HEADER_LINKED_PSMS] ix = header.index(mzidtsvdata.HEADER_PEPTIDE) header = header[:ix] + peptable_header + header[ix:] switch_map = switch_psm_to_peptable_fields(header) return [ switch_map[field] if field in switch_map else field for field in header ]
def set_features(self): qpat = self.quantcolpattern if self.quantcolpattern else '[a-z]+[0-9]+plex_' header = [x for x in self.oldheader if x != psmh.HEADER_SPECFILE] try: isocols = tsvreader.get_columns_by_pattern(header, qpat) except RuntimeError: pass else: for col in isocols: header.pop(header.index(col)) if self.precurquantcol: header = [peph.HEADER_AREA if x == self.precurquantcol else x for x in header] header = [peph.HEADER_PEPTIDE, peph.HEADER_LINKED_PSMS] + [ x for x in header if x != psmh.HEADER_PEPTIDE] switch_map = {old: new for old, new in zip( [psmh.HEADER_PEPTIDE, psmh.HEADER_PROTEIN, psmh.HEADER_PEPTIDE_Q], [peph.HEADER_PEPTIDE, peph.HEADER_PROTEINS, peph.HEADER_QVAL])} self.header = [switch_map[field] if field in switch_map else field for field in header] peptides = psmtopeptable.generate_peptides(self.fn, self.oldheader, switch_map, self.scorecol, self.precurquantcol, self.spectracol) # Remove quant data if not specified any way to summarize if self.quantcolpattern and any([self.denomcols, self.denompatterns, self.mediansweep, self.medianintensity]): denomcols = False if self.denomcols is not None: denomcols = [self.number_to_headerfield(col, self.oldheader) for col in self.denomcols] elif self.denompatterns is not None: denomcolnrs = [tsvreader.get_columns_by_pattern(self.oldheader, pattern) for pattern in self.denompatterns] denomcols = set([col for cols in denomcolnrs for col in cols]) quantcols = tsvreader.get_columns_by_pattern(self.oldheader, self.quantcolpattern) totalproteome, tpacc, tp_pepacc = False, False, False if self.totalprotfn: pep_tp_accs = [psmh.HEADER_MASTER_PROT, psmh.HEADER_SYMBOL, psmh.HEADER_GENE, peph.HEADER_PROTEINS] totalphead = tsvreader.get_tsv_header(self.totalprotfn) totalpfield_found = False for tpacc, tp_pepacc in zip(proth.TPROT_HEADER_ACCS, pep_tp_accs): if totalphead[0] == tpacc and tp_pepacc in self.header: totalpfield_found = True break if not totalpfield_found: print('Could not find correct header field name in the total ' 'proteome table passed. ' 'Should be one of {}'.format(proth.TPROT_HEADER_ACCS)) sys.exit(1) totalproteome = tsvreader.generate_split_tsv_lines(self.totalprotfn, totalphead) mn_factors = False if self.mednorm_factors: mnhead = tsvreader.get_tsv_header(self.mednorm_factors) mn_factors = tsvreader.generate_split_tsv_lines(self.mednorm_factors, mnhead) nopsms = [isosummarize.get_no_psms_field(qf) for qf in quantcols] self.header = self.header + quantcols + nopsms + [proth.HEADER_NO_FULLQ_PSMS] peptides = isosummarize.get_isobaric_ratios(self.fn, self.oldheader, quantcols, denomcols, self.mediansweep, self.medianintensity, self.median_or_avg, self.minint, peptides, self.header[0], psmh.HEADER_PEPTIDE, totalproteome, tpacc, tp_pepacc, self.logisoquant, self.mediannormalize, mn_factors, self.keepnapsms) if self.modelqvals: qix = self.header.index(peph.HEADER_QVAL) + 1 self.header = self.header[:qix] + [peph.HEADER_QVAL_MODELED] + self.header[qix:] scorecol = tsvreader.get_cols_in_file(self.scorecolpattern, self.oldheader, True) peptides = psmtopeptable.recalculate_qvals_linear_model(peptides, scorecol, self.qvalthreshold, self.minpeptidenr) self.features = peptides