def _copy_hypothesis_across_file_boundaries(database_connection, source, hypothesis_name, identifier=None): source_handle = DatabaseBoundOperation(source) source_hypothesis_id = None source_hypothesis_name = None try: hypothesis_id = int(identifier) inst = source_handle.query(GlycanHypothesis).get(hypothesis_id) if inst is not None: source_hypothesis_id = hypothesis_id source_hypothesis_name = inst.name except TypeError: hypothesis_name = identifier inst = source_handle.query(GlycanHypothesis).filter( GlycanHypothesis.name == hypothesis_name).first() if inst is not None: source_hypothesis_id = inst.id source_hypothesis_name = inst.name if source == database_connection: return source_hypothesis_id mover = GlycanHypothesisCopier(database_connection, [(source, source_hypothesis_id)], hypothesis_name=source_hypothesis_name) mover.run() return mover.hypothesis_id
def _copy_hypothesis_across_file_boundaries(database_connection, source, hypothesis_name, identifier=None): source_handle = DatabaseBoundOperation(source) source_hypothesis_id = None source_hypothesis_name = None try: hypothesis_id = int(identifier) inst = source_handle.query(GlycanHypothesis).get(hypothesis_id) if inst is not None: source_hypothesis_id = hypothesis_id source_hypothesis_name = inst.name except TypeError: hypothesis_name = identifier inst = source_handle.query(GlycanHypothesis).filter( GlycanHypothesis.name == hypothesis_name).first() if inst is not None: source_hypothesis_id = inst.id source_hypothesis_name = inst.name if source == database_connection: return source_hypothesis_id mover = GlycanHypothesisCopier( database_connection, [(source, source_hypothesis_id)], hypothesis_name=source_hypothesis_name) mover.run() return mover.hypothesis_id
def _copy_analysis_across_file_boundaries(database_connection, source, hypothesis_name, identifier=None): source_handle = DatabaseBoundOperation(source) source_analysis_id = None source_analysis_name = None try: hypothesis_id = int(identifier) inst = source_handle.query(Analysis).get(hypothesis_id) if inst is not None: source_analysis_id = hypothesis_id source_analysis_name = inst.name except TypeError: hypothesis_name = identifier inst = source_handle.query(Analysis).filter( Analysis.name == hypothesis_name).first() if inst is not None: source_analysis_id = inst.id source_analysis_name = inst.name if hypothesis_name is None: hypothesis_name = source_analysis_name mover = GlycanAnalysisHypothesisSerializer(source, source_analysis_id, hypothesis_name, database_connection) mover.run() return mover.hypothesis_id
def mass_search_dispatch(uuid): try: arguments, state = request_arguments_and_context() record = _locate_hypothesis(uuid) handle = DatabaseBoundOperation(record.path) hypothesis = handle.query(GlycanHypothesis).filter( GlycanHypothesis.uuid == record.uuid).first() if hypothesis is not None: return search_glycan_hypothesis(hypothesis.uuid, arguments['mass'], arguments['tolerance']) hypothesis = handle.query(GlycopeptideHypothesis).filter( GlycopeptideHypothesis.uuid == record.uuid).first() if hypothesis is not None: return search_glycopeptide_hypothesis(hypothesis.uuid, arguments['mass'], arguments['tolerance']) return jsonify(*[]) except Exception, e: logging.exception("An exception occurred for %r", request.get_json(), exc_info=e) return jsonify(*[])
def _copy_analysis_across_file_boundaries(database_connection, source, hypothesis_name, identifier=None): source_handle = DatabaseBoundOperation(source) source_analysis_id = None source_analysis_name = None try: hypothesis_id = int(identifier) inst = source_handle.query(Analysis).get(hypothesis_id) if inst is not None: source_analysis_id = hypothesis_id source_analysis_name = inst.name except TypeError: hypothesis_name = identifier inst = source_handle.query(Analysis).filter( Analysis.name == hypothesis_name).first() if inst is not None: source_analysis_id = inst.id source_analysis_name = inst.name if hypothesis_name is None: hypothesis_name = source_analysis_name mover = GlycanAnalysisHypothesisSerializer( source, source_analysis_id, hypothesis_name, database_connection) mover.run() return mover.hypothesis_id
class HypothesisGlycanSourceValidator(GlycanSourceValidatorBase): def __init__(self, database_connection, source, source_type, source_identifier=None): super(HypothesisGlycanSourceValidator, self).__init__(database_connection, source, source_type, source_identifier) self.handle = DatabaseBoundOperation(source) def validate(self): if self.source_identifier is None: click.secho("No value passed through --glycan-source-identifier.", fg='magenta') return False try: hypothesis_id = int(self.source_identifier) inst = self.handle.query(GlycanHypothesis).get(hypothesis_id) return inst is not None except TypeError: hypothesis_name = self.source inst = self.handle.query(GlycanHypothesis).filter( GlycanHypothesis.name == hypothesis_name).first() return inst is not None
def validate_unique_name(context, database_connection, name, klass): handle = DatabaseBoundOperation(database_connection) obj = handle.query(klass).filter(klass.name == name).first() if obj is not None: return klass.make_unique_name(handle.session, name) else: return name
def glycopeptide_mzidentml(database_connection, analysis_identifier, output_path=None, mzml_path=None, embed_protein_sequences=True): '''Write identified glycopeptides as mzIdentML file, and associated MSn spectra to a paired mzML file if the matched data are available. If an mzML file is written it will also contain the extracted ion chromatograms for each glycopeptide with an extracted elution profile. ''' database_connection = DatabaseBoundOperation(database_connection) session = database_connection.session() # pylint: disable=not-callable analysis = get_by_name_or_id(session, Analysis, analysis_identifier) if not analysis.analysis_type == AnalysisTypeEnum.glycopeptide_lc_msms: click.secho("Analysis %r is of type %r." % (str(analysis.name), str(analysis.analysis_type)), fg='red', err=True) raise click.Abort() loader = AnalysisDeserializer(database_connection._original_connection, analysis_id=analysis.id) click.echo("Loading Identifications") # glycopeptides = loader.load_identified_glycopeptides() glycopeptides = loader.query(IdentifiedGlycopeptide).filter( IdentifiedGlycopeptide.analysis_id == analysis_identifier).all() with open(output_path, 'wb') as outfile: writer = MzIdentMLSerializer( outfile, glycopeptides, analysis, loader, source_mzml_path=mzml_path, embed_protein_sequences=embed_protein_sequences) writer.run()
def export_identified_glycans_from_glycopeptides(database_connection, analysis_identifier, output_path): database_connection = DatabaseBoundOperation(database_connection) session = database_connection.session() # pylint: disable=not-callable analysis = get_by_name_or_id(session, Analysis, analysis_identifier) if not analysis.analysis_type == AnalysisTypeEnum.glycopeptide_lc_msms: click.secho("Analysis %r is of type %r." % (str(analysis.name), str(analysis.analysis_type)), fg='red', err=True) raise click.Abort() glycans = session.query(GlycanComposition).join( GlycanCombinationGlycanComposition).join(GlycanCombination).join( Glycopeptide, Glycopeptide.glycan_combination_id == GlycanCombination.id).join( IdentifiedGlycopeptide, IdentifiedGlycopeptide.structure_id == Glycopeptide.id).filter( IdentifiedGlycopeptide.analysis_id == analysis.id).all() if output_path is None: output_stream = ctxstream(click.get_binary_stream('stdout')) else: output_stream = open(output_path, 'wb') with output_stream: job = ImportableGlycanHypothesisCSVSerializer(output_stream, glycans) job.run()
def glycopeptide_hypothesis(database_connection, hypothesis_identifier, output_path, multifasta=False): '''Write each theoretical glycopeptide in CSV format ''' database_connection = DatabaseBoundOperation(database_connection) session = database_connection.session() hypothesis = get_by_name_or_id(session, GlycopeptideHypothesis, hypothesis_identifier) def generate(): interval = 100000 i = 0 while True: session.expire_all() chunk = hypothesis.glycopeptides.slice(i, i + interval).all() if len(chunk) == 0: break for glycopeptide in chunk: yield glycopeptide i += interval if output_path is None: output_stream = ctxstream(sys.stdout) else: output_stream = open(output_path, 'wb') with output_stream: job = GlycopeptideHypothesisCSVSerializer(output_stream, generate()) job.run()
def from_analysis(context, database_connection, analysis_connection, analysis_identifier, reduction, derivatization, name): database_connection = DatabaseBoundOperation(database_connection) if name is not None: name = validate_glycan_hypothesis_name( context, database_connection._original_connection, name) click.secho("Building Glycan Hypothesis %s" % name, fg='cyan') reduction = validate_reduction(context, reduction) derivatization = validate_derivatization(context, derivatization) analysis_connection = DatabaseBoundOperation(analysis_connection) analysis = get_by_name_or_id(analysis_connection.session, Analysis, analysis_identifier) if analysis.analysis_type == AnalysisTypeEnum.glycan_lc_ms: job = GlycanAnalysisHypothesisSerializer( analysis_connection._original_connection, analysis.id, name, output_connection=database_connection._original_connection) job.display_header() job.start() elif analysis.analysis_type == AnalysisTypeEnum.glycopeptide_lc_msms: job = GlycopeptideAnalysisGlycanCompositionExtractionHypothesisSerializer( analysis_connection._original_connection, analysis.id, name, output_connection=database_connection._original_connection) job.display_header() job.start() else: click.secho("Analysis Type %r could not be converted" % (analysis.analysis_type.name, ), fg='red')
def __init__(self, database_connection, hypothesis_name=None, glycan_hypothesis_id=None): DatabaseBoundOperation.__init__(self, database_connection) self._hypothesis_name = hypothesis_name self._hypothesis_id = None self._hypothesis = None self._glycan_hypothesis_id = glycan_hypothesis_id self.uuid = str(uuid4().hex) self.total_glycan_combination_count = -1
def __init__(self, connection, hypothesis_id=None, *args, **kwargs): if hypothesis_id is None: hypothesis_id = 1 DatabaseBoundOperation.__init__(self, connection) PeptideCollectionBase.__init__(self, *args, **kwargs) self.hypothesis_id = hypothesis_id self._operation_count = 0 self._batch_size = int(kwargs.get("batch_size", 1000))
def __init__(self, mzid_path, connection, hypothesis_id, include_baseline_peptides=True, target_proteins=None, reference_fasta=None, peptide_length_range=(5, 60)): DatabaseBoundOperation.__init__(self, connection) MzIdentMLProteomeExtraction.__init__(self, mzid_path, reference_fasta) self.hypothesis_id = hypothesis_id self.target_proteins = target_proteins self.include_baseline_peptides = include_baseline_peptides self.peptide_length_range = peptide_length_range or (5, 60)
def validate_unique_name(context, database_connection, name, klass): handle = DatabaseBoundOperation(database_connection) obj = handle.query(klass).filter( klass.name == name).first() if obj is not None: return klass.make_unique_name(handle.session, name) else: return name
def glycan_composition_identification(database_connection, analysis_identifier, output_path=None, threshold=0, report=False): '''Write each glycan chromatogram in CSV format ''' database_connection = DatabaseBoundOperation(database_connection) session = database_connection.session() analysis = get_by_name_or_id(session, Analysis, analysis_identifier) if not analysis.analysis_type == AnalysisTypeEnum.glycan_lc_ms: click.secho("Analysis %r is of type %r." % ( str(analysis.name), str(analysis.analysis_type)), fg='red', err=True) raise click.Abort() analysis_id = analysis.id if output_path is None: output_stream = ctxstream(sys.stdout) else: output_stream = open(output_path, 'wb') if report: with output_stream: job = GlycanChromatogramReportCreator( database_connection._original_connection, analysis_id, output_stream, threshold=threshold) job.run() else: def generate(): i = 0 interval = 100 query = session.query(GlycanCompositionChromatogram).filter( GlycanCompositionChromatogram.analysis_id == analysis_id, GlycanCompositionChromatogram.score > threshold) while True: session.expire_all() chunk = query.slice(i, i + interval).all() if len(chunk) == 0: break for gcs in chunk: yield gcs.convert() i += interval i = 0 query = session.query(UnidentifiedChromatogram).filter( UnidentifiedChromatogram.analysis_id == analysis_id, UnidentifiedChromatogram.score > threshold) while True: session.expire_all() chunk = query.slice(i, i + interval).all() if len(chunk) == 0: break for gcs in chunk: yield gcs.convert() i += interval with output_stream: job = GlycanLCMSAnalysisCSVSerializer(output_stream, generate()) job.run()
def __init__(self, database_connection): DatabaseBoundOperation.__init__(self, database_connection) self.hypothesis_id = None self.glycan_hypothesis_id = None self._glycan_hypothesis_migrator = None self.protein_id_map = dict() self.peptide_id_map = dict() self.glycan_combination_id_map = dict() self.glycopeptide_id_map = dict()
def sql_shell(database_connection, script=None): db = DatabaseBoundOperation(database_connection) session = db.session() # pylint: disable=not-callable interpreter = SQLShellInterpreter(session) if script is None: interpreter.cmdloop() else: result = session.execute(script) interpreter._to_csv(list(result), sys.stdout)
def __init__(self, database_connection, hypothesis_name=None, uuid=None): if uuid is None: uuid = str(uuid4().hex) DatabaseBoundOperation.__init__(self, database_connection) self._hypothesis_name = hypothesis_name self._hypothesis_id = None self._hypothesis = None self._structure_class_loader = None self.uuid = uuid
def __init__(self, database_connection, hypothesis_name=None, glycan_hypothesis_id=None, full_cross_product=True): DatabaseBoundOperation.__init__(self, database_connection) self._hypothesis_name = hypothesis_name self._hypothesis_id = None self._hypothesis = None self._glycan_hypothesis_id = glycan_hypothesis_id self.uuid = str(uuid4().hex) self.total_glycan_combination_count = -1 self.full_cross_product = full_cross_product
def __init__(self, connection, analysis_name, sample_run, chromatogram_extractor): DatabaseBoundOperation.__init__(self, connection) self.sample_run = sample_run self.chromatogram_extractor = chromatogram_extractor self._seed_analysis_name = analysis_name self._analysis_serializer = None self._sample_migrator = None
def sql_shell(database_connection, script=None): db = DatabaseBoundOperation(database_connection) session = db.session() interpreter = SQLShellInterpreter(session) if script is None: interpreter.cmdloop() else: result = session.execute(script) interpreter._to_csv(list(result), sys.stdout)
def __init__(self, database_connection, source, source_type, source_identifier=None): super(GlycanAnalysisGlycanSourceValidator, self).__init__(database_connection, source, source_type, source_identifier) self.handle = DatabaseBoundOperation(source)
def __init__(self, database_connection, source, source_type, source_identifier=None): DatabaseBoundOperation.__init__(self, database_connection) self.source = source self.source_type = source_type self.source_identifier = source_identifier
def stream_from_hypotheses(self, connection, hypothesis_id): self.log("Streaming from %s for hypothesis %d" % (connection, hypothesis_id)) connection = DatabaseBoundOperation(connection) session = connection.session() for db_composition in session.query(DBGlycanComposition).filter( DBGlycanComposition.hypothesis_id == hypothesis_id): structure_classes = list(db_composition.structure_classes) if len(structure_classes) > 0: yield db_composition, [sc.name for sc in db_composition.structure_classes] else: yield db_composition, [None]
def __init__(self, connection, hypothesis_id, target_proteins=None, constant_modifications=None, variable_modifications=None): if constant_modifications is None: constant_modifications = [] if variable_modifications is None: variable_modifications = [] DatabaseBoundOperation.__init__(self, connection) self.hypothesis_id = hypothesis_id self.target_proteins = target_proteins self.constant_modifications = constant_modifications self.variable_modifications = variable_modifications
def __init__(self, connection, hypothesis_id=1, cache_size=DEFAULT_CACHE_SIZE, loading_interval=DEFAULT_LOADING_INTERVAL, threshold_cache_total_count=DEFAULT_THRESHOLD_CACHE_TOTAL_COUNT, model_type=Glycopeptide): DatabaseBoundOperation.__init__(self, connection) self.hypothesis_id = hypothesis_id self.model_type = model_type self.loading_interval = loading_interval self.threshold_cache_total_count = threshold_cache_total_count self._intervals = LRUIntervalSet([], cache_size) self._ignored_intervals = IntervalSet([]) self.proteins = ProteinIndex(self.session, self.hypothesis_id) self.peptides = PeptideIndex(self.session, self.hypothesis_id)
def __init__(self, input_connection, analysis_id, hypothesis_name, output_connection=None): if output_connection is None: output_connection = input_connection self.input_connection = DatabaseBoundOperation(input_connection) self.output_connection = DatabaseBoundOperation(output_connection) GlycanHypothesisSerializerBase.__init__(self, output_connection, hypothesis_name) self.analysis_id = analysis_id self.seen_cache = set()
def glycopeptide_identification(database_connection, analysis_identifier, output_path=None, report=False, mzml_path=None, threshold=0): '''Write each distinct identified glycopeptide in CSV format ''' database_connection = DatabaseBoundOperation(database_connection) session = database_connection.session() analysis = get_by_name_or_id(session, Analysis, analysis_identifier) if not analysis.analysis_type == AnalysisTypeEnum.glycopeptide_lc_msms: click.secho("Analysis %r is of type %r." % ( str(analysis.name), str(analysis.analysis_type)), fg='red', err=True) raise click.Abort() analysis_id = analysis.id if output_path is None: output_stream = ctxstream(sys.stdout) else: output_stream = open(output_path, 'wb') if report: with output_stream: if mzml_path is None: mzml_path = analysis.parameters['sample_path'] if not os.path.exists(mzml_path): raise click.ClickException( ("Sample path {} not found. Pass the path to" " this file as `-m/--mzml-path` for this command.").format( mzml_path)) GlycopeptideDatabaseSearchReportCreator( database_connection._original_connection, analysis_id, stream=output_stream, threshold=threshold, mzml_path=mzml_path).run() else: query = session.query(Protein.id, Protein.name).join(Protein.glycopeptides).join( IdentifiedGlycopeptide).filter( IdentifiedGlycopeptide.analysis_id == analysis.id) protein_index = dict(query) def generate(): i = 0 interval = 100 query = session.query(IdentifiedGlycopeptide).filter( IdentifiedGlycopeptide.analysis_id == analysis_id) while True: session.expire_all() chunk = query.slice(i, i + interval).all() if len(chunk) == 0: break for glycopeptide in chunk: yield glycopeptide.convert() i += interval with output_stream: job = GlycopeptideLCMSMSAnalysisCSVSerializer(output_stream, generate(), protein_index) job.run()
def __init__(self, database_connection, analysis_id, output_path, mzml_path=None): DatabaseBoundOperation.__init__(self, database_connection) self.analysis_id = analysis_id self.mzml_path = mzml_path self.output_path = output_path self.analysis = self.session.query(serialize.Analysis).get(self.analysis_id) self.scan_loader = None self._mpl_style = { 'figure.facecolor': 'white', 'figure.edgecolor': 'white', 'font.size': 10, 'savefig.dpi': 72, 'figure.subplot.bottom': .125 }
def glycopeptide_training_mgf(database_connection, analysis_identifier, output_path=None, mzml_path=None, threshold=None): database_connection = DatabaseBoundOperation(database_connection) session = database_connection.session() analysis = get_by_name_or_id(session, Analysis, analysis_identifier) if not analysis.analysis_type == AnalysisTypeEnum.glycopeptide_lc_msms: click.secho("Analysis %r is of type %r." % ( str(analysis.name), str(analysis.analysis_type)), fg='red', err=True) raise click.Abort() if output_path is None: output_stream = ctxstream(sys.stdout) else: output_stream = open(output_path, 'wb') with output_stream: TrainingMGFExporter.from_analysis( database_connection, analysis.id, output_stream, mzml_path, threshold).run()
def annotate_matched_spectra(database_connection, analysis_identifier, output_path, mzml_path=None): database_connection = DatabaseBoundOperation(database_connection) session = database_connection.session() analysis = get_by_name_or_id(session, Analysis, analysis_identifier) if not analysis.analysis_type == AnalysisTypeEnum.glycopeptide_lc_msms: click.secho("Analysis %r is of type %r." % ( str(analysis.name), str(analysis.analysis_type)), fg='red', err=True) raise click.Abort() if output_path is None: output_path = os.path.dirname(database_connection._original_connection) task = SpectrumAnnotatorExport( database_connection._original_connection, analysis.id, output_path, mzml_path) task.display_header() task.start()
def glycopeptide_chromatogram_records(database_connection, analysis_identifier, output_path, apex_time_range=None): if apex_time_range is None: apex_time_range = (0, float('inf')) database_connection = DatabaseBoundOperation(database_connection) session = database_connection.session() # pylint: disable=not-callable analysis = get_by_name_or_id(session, Analysis, analysis_identifier) if not analysis.analysis_type == AnalysisTypeEnum.glycopeptide_lc_msms: click.secho("Analysis %r is of type %r." % (str(analysis.name), str(analysis.analysis_type)), fg='red', err=True) raise click.Abort() if output_path is None: fh = click.get_binary_stream('stdout') else: fh = open(output_path, 'wb') idgps = session.query(IdentifiedGlycopeptide).filter( IdentifiedGlycopeptide.analysis_id == analysis.id).all() n = len(idgps) from glycan_profiling.scoring.elution_time_grouping import GlycopeptideChromatogramProxy cases = [] analysis_name = analysis.name start_time, stop_time = apex_time_range for i, idgp in enumerate(idgps): if i % 50 == 0: click.echo("%d/%d Records Processed" % (i, n), err=True) if idgp.chromatogram is None: continue if idgp.ms1_score < 0: continue obj = GlycopeptideChromatogramProxy.from_obj( idgp, ms1_score=idgp.ms1_score, ms2_score=idgp.ms2_score, q_value=idgp.q_value, analysis_name=analysis_name, mass_shifts=';'.join( [m.name for m in idgp.chromatogram.mass_shifts])) if obj.apex_time < start_time or obj.apex_time > stop_time: continue cases.append(obj) click.echo("Writing %d Records" % (len(cases), ), err=True) with fh: GlycopeptideChromatogramProxy.to_csv(cases, csv_stream(fh))
def __init__(self, database_path, analysis_id, mzml_path=None): self.database_connection = DatabaseBoundOperation(database_path) self.analysis_id = analysis_id self.analysis = self.session.query(serialize.Analysis).get( self.analysis_id) self.mzml_path = mzml_path self.scan_loader = None self._make_scan_loader()
def __init__( self, connection, hypothesis_id=1, cache_size=DEFAULT_CACHE_SIZE, loading_interval=DEFAULT_LOADING_INTERVAL, threshold_cache_total_count=DEFAULT_THRESHOLD_CACHE_TOTAL_COUNT, model_type=Glycopeptide): DatabaseBoundOperation.__init__(self, connection) self.hypothesis_id = hypothesis_id self.model_type = model_type self.loading_interval = loading_interval self.threshold_cache_total_count = threshold_cache_total_count self._intervals = LRUIntervalSet([], cache_size) self._ignored_intervals = IntervalSet([]) self.proteins = ProteinIndex(self.session, self.hypothesis_id) self.peptides = PeptideIndex(self.session, self.hypothesis_id)
def validate_database_unlocked(database_connection): try: db = DatabaseBoundOperation(database_connection) db.session.add(GlycanHypothesis(name="_____not_real_do_not_use______")) db.session.rollback() return True except OperationalError: return False
class HypothesisGlycanSourceValidator(GlycanSourceValidatorBase): def __init__(self, database_connection, source, source_type, source_identifier=None): super(HypothesisGlycanSourceValidator, self).__init__( database_connection, source, source_type, source_identifier) self.handle = DatabaseBoundOperation(source) def validate(self): if self.source_identifier is None: click.secho("No value passed through --glycan-source-identifier.", fg='magenta') return False try: hypothesis_id = int(self.source_identifier) inst = self.handle.query(GlycanHypothesis).get(hypothesis_id) return inst is not None except TypeError: hypothesis_name = self.source inst = self.handle.query(GlycanHypothesis).filter(GlycanHypothesis.name == hypothesis_name).first() return inst is not None
def merge_glycan_hypotheses(context, database_connection, hypothesis_specification, name): database_connection = DatabaseBoundOperation(database_connection) hypothesis_ids = [] for connection, ident in hypothesis_specification: hypothesis = get_by_name_or_id(DatabaseBoundOperation(connection), GlycanHypothesis, ident) hypothesis_ids.append((connection, hypothesis.id)) if name is not None: name = validate_glycan_hypothesis_name( context, database_connection._original_connection, name) click.secho("Building Glycan Hypothesis %s" % name, fg='cyan') task = GlycanCompositionHypothesisMerger( database_connection._original_connection, hypothesis_ids, name) task.display_header() task.start()
def glycopeptide_mzidentml(database_connection, analysis_identifier, output_path=None, mzml_path=None): '''Write identified glycopeptides as mzIdentML file, and associated MSn spectra to a paired mzML file if the matched data are available. If an mzML file is written it will also contain the extracted ion chromatograms for each glycopeptide with an extracted elution profile. ''' database_connection = DatabaseBoundOperation(database_connection) session = database_connection.session() analysis = get_by_name_or_id(session, Analysis, analysis_identifier) if not analysis.analysis_type == AnalysisTypeEnum.glycopeptide_lc_msms: click.secho("Analysis %r is of type %r." % ( str(analysis.name), str(analysis.analysis_type)), fg='red', err=True) raise click.Abort() loader = AnalysisDeserializer( database_connection._original_connection, analysis_id=analysis.id) glycopeptides = loader.load_identified_glycopeptides() with open(output_path, 'wb') as outfile: writer = MzIdentMLSerializer( outfile, glycopeptides, analysis, loader, source_mzml_path=mzml_path) writer.run()
def export_identified_glycans_from_glycopeptides(database_connection, analysis_identifier, output_path): database_connection = DatabaseBoundOperation(database_connection) session = database_connection.session() analysis = get_by_name_or_id(session, Analysis, analysis_identifier) if not analysis.analysis_type == AnalysisTypeEnum.glycopeptide_lc_msms: click.secho("Analysis %r is of type %r." % ( str(analysis.name), str(analysis.analysis_type)), fg='red', err=True) raise click.Abort() glycans = session.query(GlycanComposition).join( GlycanCombinationGlycanComposition).join(GlycanCombination).join( Glycopeptide, Glycopeptide.glycan_combination_id == GlycanCombination.id).join( IdentifiedGlycopeptide, IdentifiedGlycopeptide.structure_id == Glycopeptide.id).filter( IdentifiedGlycopeptide.analysis_id == analysis.id).all() if output_path is None: output_stream = ctxstream(sys.stdout) else: output_stream = open(output_path, 'wb') with output_stream: job = ImportableGlycanHypothesisCSVSerializer(output_stream, glycans) job.run()
def glycopeptide_spectrum_matches(database_connection, analysis_identifier, output_path=None): '''Write each matched glycopeptide spectrum in CSV format ''' database_connection = DatabaseBoundOperation(database_connection) session = database_connection.session() analysis = get_by_name_or_id(session, Analysis, analysis_identifier) if not analysis.analysis_type == AnalysisTypeEnum.glycopeptide_lc_msms: click.secho("Analysis %r is of type %r." % ( str(analysis.name), str(analysis.analysis_type)), fg='red', err=True) raise click.Abort() analysis_id = analysis.id query = session.query(Protein.id, Protein.name).join(Protein.glycopeptides).join( GlycopeptideSpectrumMatch).filter( GlycopeptideSpectrumMatch.analysis_id == analysis.id) protein_index = dict(query) def generate(): i = 0 interval = 100000 query = session.query(GlycopeptideSpectrumMatch).filter( GlycopeptideSpectrumMatch.analysis_id == analysis_id).order_by( GlycopeptideSpectrumMatch.scan_id) while True: session.expire_all() chunk = query.slice(i, i + interval).all() if len(chunk) == 0: break for glycopeptide in chunk: yield glycopeptide.convert() i += interval if output_path is None: output_stream = ctxstream(sys.stdout) else: output_stream = open(output_path, 'wb') with output_stream: job = GlycopeptideSpectrumMatchAnalysisCSVSerializer(output_stream, generate(), protein_index) job.run()
class GlycopeptideAnalysisGlycanCompositionExtractionHypothesisSerializer(GlycanHypothesisSerializerBase): def __init__(self, input_connection, analysis_id, hypothesis_name, output_connection=None): if output_connection is None: output_connection = input_connection self.input_connection = DatabaseBoundOperation(input_connection) self.output_connection = DatabaseBoundOperation(output_connection) GlycanHypothesisSerializerBase.__init__(self, output_connection, hypothesis_name) self.analysis_id = analysis_id self.seen_cache = set() def get_all_compositions(self): return self.input_connection.query(DBGlycanComposition).join(GlycanCombinationGlycanComposition).join( Glycopeptide, GlycanCombinationGlycanComposition.c.combination_id == Glycopeptide.glycan_combination_id).join( IdentifiedGlycopeptide, IdentifiedGlycopeptide.structure_id == Glycopeptide.id).filter( IdentifiedGlycopeptide.analysis_id == self.analysis_id) def extract_composition(self, db_obj): composition = GlycanComposition.parse(db_obj.composition) if str(composition) in self.seen_cache: return self.seen_cache.add(str(composition)) mass = composition.mass() composition_string = composition.serialize() formula_string = formula(composition.total_composition()) inst = DBGlycanComposition( calculated_mass=mass, formula=formula_string, composition=composition_string, hypothesis_id=self.hypothesis_id) self.output_connection.session.add(inst) self.output_connection.session.flush() for sc in db_obj.structure_classes: self.output_connection.session.execute( GlycanCompositionToClass.insert(), dict(glycan_id=inst.id, class_id=sc.id)) self.output_connection.session.flush() def run(self): q = self.get_all_compositions() for gc in q: self.extract_composition(gc) self.output_connection.session.commit()
def __init__(self, connection): DatabaseBoundOperation.__init__(self, connection) self.sample_run_id = None self.ms_scan_id_map = dict() self.peak_id_map = dict()
def __init__(self, database_connection, hypothesis_id): DatabaseBoundOperation.__init__(self, database_connection) self.hypothesis_id = hypothesis_id
def __init__(self, connection, source_hypothesis_id, target_hypothesis_id, max_size=1): DatabaseBoundOperation.__init__(self, connection) self.source_hypothesis_id = source_hypothesis_id self.target_hypothesis_id = target_hypothesis_id self.max_size = max_size self.total_count = 0
def __init__(self, connection, hypothesis_id): DatabaseBoundOperation.__init__(self, connection) self.hypothesis_id = hypothesis_id self.index = PeptideIndex() self.index.populate(self._get_all_peptides())
def __init__(self, database_connection): DatabaseBoundOperation.__init__(self, database_connection) self.hypothesis_id = None self.glycan_composition_id_map = dict() self._structure_class_loader = None