def _build_protein_index(self): hypothesis_id = self.hypothesis_id theoretical_counts = self.session.query( Protein.name, Protein.id, func.count(Glycopeptide.id)).join(Glycopeptide).group_by( Protein.id).filter( Protein.hypothesis_id == hypothesis_id).all() matched_counts = self.session.query( Protein.name, Protein.id, func.count( IdentifiedGlycopeptide.id)).join(Protein.glycopeptides).join( IdentifiedGlycopeptide, IdentifiedGlycopeptide.structure_id == Glycopeptide.id).group_by(Protein.id).filter( IdentifiedGlycopeptide.ms2_score > self.threshold, IdentifiedGlycopeptide.analysis_id == self.analysis_id).all() listing = [] index = {} for protein_name, protein_id, glycopeptide_count in theoretical_counts: index[protein_id] = { "protein_name": protein_name, "protein_id": protein_id, } for protein_name, protein_id, glycopeptide_count in matched_counts: entry = index[protein_id] entry['identified_glycopeptide_count'] = glycopeptide_count listing.append(entry) self.protein_index = sorted( listing, key=lambda x: x["identified_glycopeptide_count"], reverse=True) for protein_entry in self.protein_index: protein_entry['protein'] = self.session.query(Protein).get( protein_entry["protein_id"]) return self.protein_index
def _build_protein_index(self): hypothesis_id = self.hypothesis_id theoretical_counts = self.session.query(Protein.name, Protein.id, func.count(Glycopeptide.id)).join( Glycopeptide).group_by(Protein.id).filter( Protein.hypothesis_id == hypothesis_id).all() matched_counts = self.session.query(Protein.name, Protein.id, func.count(IdentifiedGlycopeptide.id)).join( Glycopeptide).join( IdentifiedGlycopeptide, IdentifiedGlycopeptide.structure_id == Glycopeptide.id).group_by( Protein.id).filter( IdentifiedGlycopeptide.ms2_score > self.threshold, IdentifiedGlycopeptide.analysis_id == self.analysis_id).all() listing = [] index = {} for protein_name, protein_id, glycopeptide_count in theoretical_counts: index[protein_id] = { "protein_name": protein_name, "protein_id": protein_id, } for protein_name, protein_id, glycopeptide_count in matched_counts: entry = index[protein_id] entry['identified_glycopeptide_count'] = glycopeptide_count listing.append(entry) self.protein_index = sorted(listing, key=lambda x: x["identified_glycopeptide_count"], reverse=True) for protein_entry in self.protein_index: protein_entry['protein'] = self.session.query(Protein).get(protein_entry["protein_id"]) return self.protein_index
def __len__(self): try: return self.hypothesis.parameters['database_size'] except KeyError: stmt = select([func.count(self.identity_field) ]).select_from(self.selectable) stmt = self._limit_to_hypothesis(stmt) return self.session.execute(stmt).scalar()
def __len__(self): try: return self.hypothesis.parameters['database_size'] except KeyError: stmt = select([func.count(self.identity_field)]).select_from( self.selectable) stmt = self._limit_to_hypothesis(stmt) return self.session.execute(stmt).scalar()
def _count_produced_glycopeptides(self): count = self.query( func.count(Glycopeptide.id)).filter( Glycopeptide.hypothesis_id == self.hypothesis_id).scalar() self.log("Generated %d glycopeptides" % count) self.set_parameters({ "database_size": count }) return count
def summarize_glycopeptide_hypothesis(database_connection, hypothesis_identifier): session = database_connection.session hypothesis = get_by_name_or_id(session, GlycopeptideHypothesis, hypothesis_identifier) counts = session.query(Protein, func.count(Glycopeptide.id)).join( Glycopeptide).group_by(Protein.id).filter(Protein.hypothesis_id == hypothesis.id).all() counts = sorted(counts, key=lambda x: x[1], reverse=1) total = 0 for protein, count in counts: click.echo("%s: %d" % (protein.name, count)) total += count click.echo("Total: %d" % (total,))
def digest_proteins(self): digestor = ProteinDigestor(self.protease, self.constant_modifications, self.variable_modifications, self.max_missed_cleavages) task = MultipleProcessProteinDigestor(self._original_connection, self.hypothesis_id, self.protein_ids(), digestor, n_processes=self.n_processes) task.run() n_peptides = self.query(func.count(Peptide.id)).filter( Peptide.hypothesis_id == self.hypothesis_id).scalar() self.log("%d Base Peptides Produced" % (n_peptides, ))
def digest_proteins(self): digestor = ProteinDigestor( self.protease, self.constant_modifications, self.variable_modifications, self.max_missed_cleavages, semispecific=self.semispecific) task = MultipleProcessProteinDigestor( self._original_connection, self.hypothesis_id, self.protein_ids(), digestor, n_processes=self.n_processes) task.run() n_peptides = self.query(func.count(Peptide.id)).filter( Peptide.hypothesis_id == self.hypothesis_id).scalar() self.log("%d Base Peptides Produced" % (n_peptides,))
def summarize_glycopeptide_hypothesis(database_connection, hypothesis_identifier): session = database_connection.session hypothesis = get_by_name_or_id(session, GlycopeptideHypothesis, hypothesis_identifier) counts = session.query(Protein, func.count( Glycopeptide.id)).join(Glycopeptide).group_by( Protein.id).filter(Protein.hypothesis_id == hypothesis.id).all() counts = sorted(counts, key=lambda x: x[1], reverse=1) total = 0 for protein, count in counts: click.echo("%s: %d" % (protein.name, count)) total += count click.echo("Total: %d" % (total, ))
def protein_index(session, hypothesis_id): theoretical_counts = session.query(Protein.name, Protein.id, func.count(Glycopeptide.id)).join( Glycopeptide).group_by(Protein.id).filter( Protein.hypothesis_id == hypothesis_id).all() listing = [] for protein_name, protein_id, glycopeptide_count in theoretical_counts: entry = { "protein_name": protein_name, "protein_id": protein_id, "theoretical_count": glycopeptide_count } listing.append(entry) protein_index = sorted(listing, key=lambda x: x["protein_name"]) for protein_entry in protein_index: protein_entry['protein'] = session.query(Protein).get(protein_entry["protein_id"]) return protein_index
def _count_produced_glycopeptides(self): count = self.query( func.count(Glycopeptide.id)).filter( Glycopeptide.hypothesis_id == self.hypothesis_id).scalar() self.log("Generated %d glycopeptides" % count)
def __len__(self): stmt = select([func.count(self.identity_field) ]).select_from(self.selectable) stmt = self._limit_to_hypothesis(stmt) return self.session.execute(stmt).scalar()
def _count_produced_glycopeptides(self): count = self.query(func.count(Glycopeptide.id)).filter( Glycopeptide.hypothesis_id == self.hypothesis_id).scalar() self.log("Generated %d glycopeptides" % count) self.set_parameters({"database_size": count}) return count