Esempi in Python per get_by_name_or_id, esempi in Python per glycan_profiling.cli.validators.get_by_name_or_id

Esempio n. 1

0

Mostra file

File: export.py Progetto: mobiusklein/glycan_profiling

def glycopeptide_hypothesis(database_connection, hypothesis_identifier, output_path, multifasta=False):
    '''Write each theoretical glycopeptide in CSV format
    '''
    database_connection = DatabaseBoundOperation(database_connection)
    session = database_connection.session()
    hypothesis = get_by_name_or_id(session, GlycopeptideHypothesis, hypothesis_identifier)

    def generate():
        interval = 100000
        i = 0
        while True:
            session.expire_all()
            chunk = hypothesis.glycopeptides.slice(i, i + interval).all()
            if len(chunk) == 0:
                break
            for glycopeptide in chunk:
                yield glycopeptide
            i += interval
    if output_path is None:
        output_stream = ctxstream(sys.stdout)
    else:
        output_stream = open(output_path, 'wb')
    with output_stream:
        job = GlycopeptideHypothesisCSVSerializer(output_stream, generate())
        job.run()

Esempio n. 2

0

Mostra file

File: build_db.py Progetto: BostonUniversityCBMS/glycresoft

def from_analysis(context, database_connection, analysis_connection,
                  analysis_identifier, reduction, derivatization, name):
    database_connection = DatabaseBoundOperation(database_connection)
    if name is not None:
        name = validate_glycan_hypothesis_name(
            context, database_connection._original_connection, name)
        click.secho("Building Glycan Hypothesis %s" % name, fg='cyan')
    reduction = validate_reduction(context, reduction)
    derivatization = validate_derivatization(context, derivatization)

    analysis_connection = DatabaseBoundOperation(analysis_connection)
    analysis = get_by_name_or_id(analysis_connection.session, Analysis,
                                 analysis_identifier)
    if analysis.analysis_type == AnalysisTypeEnum.glycan_lc_ms:
        job = GlycanAnalysisHypothesisSerializer(
            analysis_connection._original_connection,
            analysis.id,
            name,
            output_connection=database_connection._original_connection)
        job.display_header()
        job.start()
    elif analysis.analysis_type == AnalysisTypeEnum.glycopeptide_lc_msms:
        job = GlycopeptideAnalysisGlycanCompositionExtractionHypothesisSerializer(
            analysis_connection._original_connection,
            analysis.id,
            name,
            output_connection=database_connection._original_connection)
        job.display_header()
        job.start()
    else:
        click.secho("Analysis Type %r could not be converted" %
                    (analysis.analysis_type.name, ),
                    fg='red')

Esempio n. 3

0

Mostra file

def glycopeptide_mzidentml(database_connection,
                           analysis_identifier,
                           output_path=None,
                           mzml_path=None,
                           embed_protein_sequences=True):
    '''Write identified glycopeptides as mzIdentML file, and associated MSn spectra
    to a paired mzML file if the matched data are available. If an mzML file is written
    it will also contain the extracted ion chromatograms for each glycopeptide with an
    extracted elution profile.
    '''
    database_connection = DatabaseBoundOperation(database_connection)
    session = database_connection.session()  # pylint: disable=not-callable
    analysis = get_by_name_or_id(session, Analysis, analysis_identifier)
    if not analysis.analysis_type == AnalysisTypeEnum.glycopeptide_lc_msms:
        click.secho("Analysis %r is of type %r." %
                    (str(analysis.name), str(analysis.analysis_type)),
                    fg='red',
                    err=True)
        raise click.Abort()
    loader = AnalysisDeserializer(database_connection._original_connection,
                                  analysis_id=analysis.id)
    click.echo("Loading Identifications")
    # glycopeptides = loader.load_identified_glycopeptides()
    glycopeptides = loader.query(IdentifiedGlycopeptide).filter(
        IdentifiedGlycopeptide.analysis_id == analysis_identifier).all()
    with open(output_path, 'wb') as outfile:
        writer = MzIdentMLSerializer(
            outfile,
            glycopeptides,
            analysis,
            loader,
            source_mzml_path=mzml_path,
            embed_protein_sequences=embed_protein_sequences)
        writer.run()

Esempio n. 4

0

Mostra file

File: build_db.py Progetto: mobiusklein/glycan_profiling

def from_analysis(context, database_connection, analysis_connection, analysis_identifier,
                  reduction, derivatization, name):
    database_connection = DatabaseBoundOperation(database_connection)
    if name is not None:
        name = validate_glycan_hypothesis_name(context, database_connection._original_connection, name)
        click.secho("Building Glycan Hypothesis %s" % name, fg='cyan')
    reduction = validate_reduction(context, reduction)
    derivatization = validate_derivatization(context, derivatization)

    analysis_connection = DatabaseBoundOperation(analysis_connection)
    analysis = get_by_name_or_id(analysis_connection.session, Analysis, analysis_identifier)
    if analysis.analysis_type == AnalysisTypeEnum.glycan_lc_ms:
        job = GlycanAnalysisHypothesisSerializer(
            analysis_connection._original_connection, analysis.id, name,
            output_connection=database_connection._original_connection)
        job.display_header()
        job.start()
    elif analysis.analysis_type == AnalysisTypeEnum.glycopeptide_lc_msms:
        job = GlycopeptideAnalysisGlycanCompositionExtractionHypothesisSerializer(
            analysis_connection._original_connection, analysis.id, name,
            output_connection=database_connection._original_connection)
        job.display_header()
        job.start()
    else:
        click.secho("Analysis Type %r could not be converted" % (
            analysis.analysis_type.name,), fg='red')

Esempio n. 5

0

Mostra file

def export_identified_glycans_from_glycopeptides(database_connection,
                                                 analysis_identifier,
                                                 output_path):
    database_connection = DatabaseBoundOperation(database_connection)
    session = database_connection.session()  # pylint: disable=not-callable
    analysis = get_by_name_or_id(session, Analysis, analysis_identifier)
    if not analysis.analysis_type == AnalysisTypeEnum.glycopeptide_lc_msms:
        click.secho("Analysis %r is of type %r." %
                    (str(analysis.name), str(analysis.analysis_type)),
                    fg='red',
                    err=True)
        raise click.Abort()
    glycans = session.query(GlycanComposition).join(
        GlycanCombinationGlycanComposition).join(GlycanCombination).join(
            Glycopeptide,
            Glycopeptide.glycan_combination_id == GlycanCombination.id).join(
                IdentifiedGlycopeptide,
                IdentifiedGlycopeptide.structure_id == Glycopeptide.id).filter(
                    IdentifiedGlycopeptide.analysis_id == analysis.id).all()
    if output_path is None:
        output_stream = ctxstream(click.get_binary_stream('stdout'))
    else:
        output_stream = open(output_path, 'wb')
    with output_stream:
        job = ImportableGlycanHypothesisCSVSerializer(output_stream, glycans)
        job.run()

Esempio n. 6

0

Mostra file

def glycopeptide_hypothesis(database_connection, hypothesis_identifier, output_path, multifasta=False):
    '''Write each theoretical glycopeptide in CSV format
    '''
    database_connection = DatabaseBoundOperation(database_connection)
    session = database_connection.session()
    hypothesis = get_by_name_or_id(session, GlycopeptideHypothesis, hypothesis_identifier)

    def generate():
        interval = 100000
        i = 0
        while True:
            session.expire_all()
            chunk = hypothesis.glycopeptides.slice(i, i + interval).all()
            if len(chunk) == 0:
                break
            for glycopeptide in chunk:
                yield glycopeptide
            i += interval
    if output_path is None:
        output_stream = ctxstream(sys.stdout)
    else:
        output_stream = open(output_path, 'wb')
    with output_stream:
        job = GlycopeptideHypothesisCSVSerializer(output_stream, generate())
        job.run()

Esempio n. 7

0

Mostra file

File: export.py Progetto: mobiusklein/glycan_profiling

def glycan_composition_identification(database_connection, analysis_identifier, output_path=None,
                                      threshold=0, report=False):
    '''Write each glycan chromatogram in CSV format
    '''
    database_connection = DatabaseBoundOperation(database_connection)
    session = database_connection.session()
    analysis = get_by_name_or_id(session, Analysis, analysis_identifier)
    if not analysis.analysis_type == AnalysisTypeEnum.glycan_lc_ms:
        click.secho("Analysis %r is of type %r." % (
            str(analysis.name), str(analysis.analysis_type)), fg='red', err=True)
        raise click.Abort()
    analysis_id = analysis.id
    if output_path is None:
        output_stream = ctxstream(sys.stdout)
    else:
        output_stream = open(output_path, 'wb')

    if report:
        with output_stream:
            job = GlycanChromatogramReportCreator(
                database_connection._original_connection,
                analysis_id, output_stream, threshold=threshold)
            job.run()
    else:
        def generate():
            i = 0
            interval = 100
            query = session.query(GlycanCompositionChromatogram).filter(
                GlycanCompositionChromatogram.analysis_id == analysis_id,
                GlycanCompositionChromatogram.score > threshold)

            while True:
                session.expire_all()
                chunk = query.slice(i, i + interval).all()
                if len(chunk) == 0:
                    break
                for gcs in chunk:
                    yield gcs.convert()
                i += interval

            i = 0
            query = session.query(UnidentifiedChromatogram).filter(
                UnidentifiedChromatogram.analysis_id == analysis_id,
                UnidentifiedChromatogram.score > threshold)

            while True:
                session.expire_all()
                chunk = query.slice(i, i + interval).all()
                if len(chunk) == 0:
                    break
                for gcs in chunk:
                    yield gcs.convert()
                i += interval

        with output_stream:
            job = GlycanLCMSAnalysisCSVSerializer(output_stream, generate())
            job.run()

Esempio n. 8

0

Mostra file

def glycan_composition_identification(database_connection, analysis_identifier, output_path=None,
                                      threshold=0, report=False):
    '''Write each glycan chromatogram in CSV format
    '''
    database_connection = DatabaseBoundOperation(database_connection)
    session = database_connection.session()
    analysis = get_by_name_or_id(session, Analysis, analysis_identifier)
    if not analysis.analysis_type == AnalysisTypeEnum.glycan_lc_ms:
        click.secho("Analysis %r is of type %r." % (
            str(analysis.name), str(analysis.analysis_type)), fg='red', err=True)
        raise click.Abort()
    analysis_id = analysis.id
    if output_path is None:
        output_stream = ctxstream(sys.stdout)
    else:
        output_stream = open(output_path, 'wb')

    if report:
        with output_stream:
            job = GlycanChromatogramReportCreator(
                database_connection._original_connection,
                analysis_id, output_stream, threshold=threshold)
            job.run()
    else:
        def generate():
            i = 0
            interval = 100
            query = session.query(GlycanCompositionChromatogram).filter(
                GlycanCompositionChromatogram.analysis_id == analysis_id,
                GlycanCompositionChromatogram.score > threshold)

            while True:
                session.expire_all()
                chunk = query.slice(i, i + interval).all()
                if len(chunk) == 0:
                    break
                for gcs in chunk:
                    yield gcs.convert()
                i += interval

            i = 0
            query = session.query(UnidentifiedChromatogram).filter(
                UnidentifiedChromatogram.analysis_id == analysis_id,
                UnidentifiedChromatogram.score > threshold)

            while True:
                session.expire_all()
                chunk = query.slice(i, i + interval).all()
                if len(chunk) == 0:
                    break
                for gcs in chunk:
                    yield gcs.convert()
                i += interval

        with output_stream:
            job = GlycanLCMSAnalysisCSVSerializer(output_stream, generate())
            job.run()

Esempio n. 9

0

Mostra file

File: export.py Progetto: mobiusklein/glycan_profiling

def glycopeptide_identification(database_connection, analysis_identifier, output_path=None,
                                report=False, mzml_path=None, threshold=0):
    '''Write each distinct identified glycopeptide in CSV format
    '''
    database_connection = DatabaseBoundOperation(database_connection)
    session = database_connection.session()
    analysis = get_by_name_or_id(session, Analysis, analysis_identifier)
    if not analysis.analysis_type == AnalysisTypeEnum.glycopeptide_lc_msms:
        click.secho("Analysis %r is of type %r." % (
            str(analysis.name), str(analysis.analysis_type)), fg='red', err=True)
        raise click.Abort()
    analysis_id = analysis.id
    if output_path is None:
        output_stream = ctxstream(sys.stdout)
    else:
        output_stream = open(output_path, 'wb')
    if report:
        with output_stream:
            if mzml_path is None:
                mzml_path = analysis.parameters['sample_path']
                if not os.path.exists(mzml_path):
                    raise click.ClickException(
                        ("Sample path {} not found. Pass the path to"
                         " this file as `-m/--mzml-path` for this command.").format(
                            mzml_path))
            GlycopeptideDatabaseSearchReportCreator(
                database_connection._original_connection, analysis_id,
                stream=output_stream, threshold=threshold,
                mzml_path=mzml_path).run()
    else:
        query = session.query(Protein.id, Protein.name).join(Protein.glycopeptides).join(
            IdentifiedGlycopeptide).filter(
            IdentifiedGlycopeptide.analysis_id == analysis.id)
        protein_index = dict(query)

        def generate():
            i = 0
            interval = 100
            query = session.query(IdentifiedGlycopeptide).filter(
                IdentifiedGlycopeptide.analysis_id == analysis_id)
            while True:
                session.expire_all()
                chunk = query.slice(i, i + interval).all()
                if len(chunk) == 0:
                    break
                for glycopeptide in chunk:
                    yield glycopeptide.convert()
                i += interval
        with output_stream:
            job = GlycopeptideLCMSMSAnalysisCSVSerializer(output_stream, generate(), protein_index)
            job.run()

Esempio n. 10

0

Mostra file

def glycopeptide_identification(database_connection, analysis_identifier, output_path=None,
                                report=False, mzml_path=None, threshold=0):
    '''Write each distinct identified glycopeptide in CSV format
    '''
    database_connection = DatabaseBoundOperation(database_connection)
    session = database_connection.session()
    analysis = get_by_name_or_id(session, Analysis, analysis_identifier)
    if not analysis.analysis_type == AnalysisTypeEnum.glycopeptide_lc_msms:
        click.secho("Analysis %r is of type %r." % (
            str(analysis.name), str(analysis.analysis_type)), fg='red', err=True)
        raise click.Abort()
    analysis_id = analysis.id
    if output_path is None:
        output_stream = ctxstream(sys.stdout)
    else:
        output_stream = open(output_path, 'wb')
    if report:
        with output_stream:
            if mzml_path is None:
                mzml_path = analysis.parameters['sample_path']
                if not os.path.exists(mzml_path):
                    raise click.ClickException(
                        ("Sample path {} not found. Pass the path to"
                         " this file as `-m/--mzml-path` for this command.").format(
                            mzml_path))
            GlycopeptideDatabaseSearchReportCreator(
                database_connection._original_connection, analysis_id,
                stream=output_stream, threshold=threshold,
                mzml_path=mzml_path).run()
    else:
        query = session.query(Protein.id, Protein.name).join(Protein.glycopeptides).join(
            IdentifiedGlycopeptide).filter(
            IdentifiedGlycopeptide.analysis_id == analysis.id)
        protein_index = dict(query)

        def generate():
            i = 0
            interval = 100
            query = session.query(IdentifiedGlycopeptide).filter(
                IdentifiedGlycopeptide.analysis_id == analysis_id)
            while True:
                session.expire_all()
                chunk = query.slice(i, i + interval).all()
                if len(chunk) == 0:
                    break
                for glycopeptide in chunk:
                    yield glycopeptide.convert()
                i += interval
        with output_stream:
            job = GlycopeptideLCMSMSAnalysisCSVSerializer(output_stream, generate(), protein_index)
            job.run()

Esempio n. 11

0

Mostra file

File: build_db.py Progetto: mobiusklein/glycan_profiling

def merge_glycan_hypotheses(context, database_connection, hypothesis_specification, name):
    database_connection = DatabaseBoundOperation(database_connection)
    hypothesis_ids = []
    for connection, ident in hypothesis_specification:
        hypothesis = get_by_name_or_id(DatabaseBoundOperation(connection), GlycanHypothesis, ident)
        hypothesis_ids.append((connection, hypothesis.id))

    if name is not None:
        name = validate_glycan_hypothesis_name(context, database_connection._original_connection, name)
        click.secho("Building Glycan Hypothesis %s" % name, fg='cyan')

    task = GlycanCompositionHypothesisMerger(
        database_connection._original_connection, hypothesis_ids, name)
    task.display_header()
    task.start()

Esempio n. 12

0

Mostra file

File: export.py Progetto: mobiusklein/glycan_profiling

def glycopeptide_training_mgf(database_connection, analysis_identifier, output_path=None,
                              mzml_path=None, threshold=None):
    database_connection = DatabaseBoundOperation(database_connection)
    session = database_connection.session()
    analysis = get_by_name_or_id(session, Analysis, analysis_identifier)
    if not analysis.analysis_type == AnalysisTypeEnum.glycopeptide_lc_msms:
        click.secho("Analysis %r is of type %r." % (
            str(analysis.name), str(analysis.analysis_type)), fg='red', err=True)
        raise click.Abort()
    if output_path is None:
        output_stream = ctxstream(sys.stdout)
    else:
        output_stream = open(output_path, 'wb')
    with output_stream:
        TrainingMGFExporter.from_analysis(
            database_connection, analysis.id, output_stream, mzml_path, threshold).run()

Esempio n. 13

0

Mostra file

File: export.py Progetto: mobiusklein/glycan_profiling

def glycan_hypothesis(database_connection, hypothesis_identifier, output_path=None, importable=False):
    '''Write each theoretical glycan composition in CSV format
    '''
    database_connection = DatabaseBoundOperation(database_connection)
    hypothesis = get_by_name_or_id(database_connection, GlycanHypothesis, hypothesis_identifier)
    if importable:
        task_type = ImportableGlycanHypothesisCSVSerializer
    else:
        task_type = GlycanHypothesisCSVSerializer
    if output_path is None:
        output_stream = ctxstream(sys.stdout)
    else:
        output_stream = open(output_path, 'wb')
    with output_stream:
        job = task_type(output_stream, hypothesis.glycans)
        job.run()

Esempio n. 14

0

Mostra file

File: export.py Progetto: mobiusklein/glycan_profiling

def annotate_matched_spectra(database_connection, analysis_identifier, output_path, mzml_path=None):
    database_connection = DatabaseBoundOperation(database_connection)
    session = database_connection.session()
    analysis = get_by_name_or_id(session, Analysis, analysis_identifier)
    if not analysis.analysis_type == AnalysisTypeEnum.glycopeptide_lc_msms:
        click.secho("Analysis %r is of type %r." % (
            str(analysis.name), str(analysis.analysis_type)), fg='red', err=True)
        raise click.Abort()
    if output_path is None:
        output_path = os.path.dirname(database_connection._original_connection)

    task = SpectrumAnnotatorExport(
        database_connection._original_connection, analysis.id, output_path,
        mzml_path)
    task.display_header()
    task.start()

Esempio n. 15

0

Mostra file

def glycan_hypothesis(database_connection, hypothesis_identifier, output_path=None, importable=False):
    '''Write each theoretical glycan composition in CSV format
    '''
    database_connection = DatabaseBoundOperation(database_connection)
    hypothesis = get_by_name_or_id(database_connection, GlycanHypothesis, hypothesis_identifier)
    if importable:
        task_type = ImportableGlycanHypothesisCSVSerializer
    else:
        task_type = GlycanHypothesisCSVSerializer
    if output_path is None:
        output_stream = ctxstream(sys.stdout)
    else:
        output_stream = open(output_path, 'wb')
    with output_stream:
        job = task_type(output_stream, hypothesis.glycans)
        job.run()

Esempio n. 16

0

Mostra file

def glycopeptide_training_mgf(database_connection, analysis_identifier, output_path=None,
                              mzml_path=None, threshold=None):
    database_connection = DatabaseBoundOperation(database_connection)
    session = database_connection.session()
    analysis = get_by_name_or_id(session, Analysis, analysis_identifier)
    if not analysis.analysis_type == AnalysisTypeEnum.glycopeptide_lc_msms:
        click.secho("Analysis %r is of type %r." % (
            str(analysis.name), str(analysis.analysis_type)), fg='red', err=True)
        raise click.Abort()
    if output_path is None:
        output_stream = ctxstream(sys.stdout)
    else:
        output_stream = open(output_path, 'wb')
    with output_stream:
        TrainingMGFExporter.from_analysis(
            database_connection, analysis.id, output_stream, mzml_path, threshold).run()

Esempio n. 17

0

Mostra file

def glycopeptide_chromatogram_records(database_connection,
                                      analysis_identifier,
                                      output_path,
                                      apex_time_range=None):
    if apex_time_range is None:
        apex_time_range = (0, float('inf'))
    database_connection = DatabaseBoundOperation(database_connection)
    session = database_connection.session()  # pylint: disable=not-callable
    analysis = get_by_name_or_id(session, Analysis, analysis_identifier)
    if not analysis.analysis_type == AnalysisTypeEnum.glycopeptide_lc_msms:
        click.secho("Analysis %r is of type %r." %
                    (str(analysis.name), str(analysis.analysis_type)),
                    fg='red',
                    err=True)
        raise click.Abort()
    if output_path is None:
        fh = click.get_binary_stream('stdout')
    else:
        fh = open(output_path, 'wb')
    idgps = session.query(IdentifiedGlycopeptide).filter(
        IdentifiedGlycopeptide.analysis_id == analysis.id).all()
    n = len(idgps)
    from glycan_profiling.scoring.elution_time_grouping import GlycopeptideChromatogramProxy
    cases = []
    analysis_name = analysis.name
    start_time, stop_time = apex_time_range
    for i, idgp in enumerate(idgps):
        if i % 50 == 0:
            click.echo("%d/%d Records Processed" % (i, n), err=True)
        if idgp.chromatogram is None:
            continue
        if idgp.ms1_score < 0:
            continue
        obj = GlycopeptideChromatogramProxy.from_obj(
            idgp,
            ms1_score=idgp.ms1_score,
            ms2_score=idgp.ms2_score,
            q_value=idgp.q_value,
            analysis_name=analysis_name,
            mass_shifts=';'.join(
                [m.name for m in idgp.chromatogram.mass_shifts]))
        if obj.apex_time < start_time or obj.apex_time > stop_time:
            continue
        cases.append(obj)
    click.echo("Writing %d Records" % (len(cases), ), err=True)
    with fh:
        GlycopeptideChromatogramProxy.to_csv(cases, csv_stream(fh))

Esempio n. 18

0

Mostra file

File: build_db.py Progetto: BostonUniversityCBMS/glycresoft

def merge_glycan_hypotheses(context, database_connection,
                            hypothesis_specification, name):
    database_connection = DatabaseBoundOperation(database_connection)
    hypothesis_ids = []
    for connection, ident in hypothesis_specification:
        hypothesis = get_by_name_or_id(DatabaseBoundOperation(connection),
                                       GlycanHypothesis, ident)
        hypothesis_ids.append((connection, hypothesis.id))

    if name is not None:
        name = validate_glycan_hypothesis_name(
            context, database_connection._original_connection, name)
        click.secho("Building Glycan Hypothesis %s" % name, fg='cyan')

    task = GlycanCompositionHypothesisMerger(
        database_connection._original_connection, hypothesis_ids, name)
    task.display_header()
    task.start()

Esempio n. 19

0

Mostra file

File: build_db.py Progetto: mobiusklein/glycan_profiling

def glycan_network(context, database_connection, hypothesis_identifier, edge_strategy, output_path):
    conn = DatabaseBoundOperation(database_connection)
    hypothesis = get_by_name_or_id(conn, GlycanHypothesis, hypothesis_identifier)
    if output_path is None:
        output_stream = ctxstream(sys.stdout)
    else:
        output_stream = open(output_path, 'wb')
    with output_stream:
        db = GlycanCompositionDiskBackedStructureDatabase(
            database_connection, hypothesis.id)
        glycans = list(db)
        graph = CompositionGraph(glycans)
        if edge_strategy == 'manhattan':
            graph.create_edges(1)
        else:
            raise click.ClickException(
                "Could not find edge strategy %r" % (edge_strategy,))
        GraphWriter(graph, output_stream)

Esempio n. 20

0

Mostra file

def glycopeptide_spectrum_matches(database_connection,
                                  analysis_identifier,
                                  output_path=None):
    '''Write each matched glycopeptide spectrum in CSV format
    '''
    database_connection = DatabaseBoundOperation(database_connection)
    session = database_connection.session()  # pylint: disable=not-callable
    analysis = get_by_name_or_id(session, Analysis, analysis_identifier)
    if not analysis.analysis_type == AnalysisTypeEnum.glycopeptide_lc_msms:
        click.secho("Analysis %r is of type %r." %
                    (str(analysis.name), str(analysis.analysis_type)),
                    fg='red',
                    err=True)
        raise click.Abort()
    analysis_id = analysis.id
    query = session.query(Protein.id, Protein.name).join(
        Protein.glycopeptides).join(GlycopeptideSpectrumMatch).filter(
            GlycopeptideSpectrumMatch.analysis_id == analysis.id)
    protein_index = dict(query)

    def generate():
        i = 0
        interval = 100000
        query = session.query(GlycopeptideSpectrumMatch).filter(
            GlycopeptideSpectrumMatch.analysis_id == analysis_id).order_by(
                GlycopeptideSpectrumMatch.scan_id)
        while True:
            session.expire_all()
            chunk = query.slice(i, i + interval).all()
            if len(chunk) == 0:
                break
            for glycopeptide in chunk:
                yield glycopeptide.convert()
            i += interval

    if output_path is None:
        output_stream = ctxstream(click.get_binary_stream('stdout'))
    else:
        output_stream = open(output_path, 'wb')
    with output_stream:
        job = GlycopeptideSpectrumMatchAnalysisCSVSerializer(
            output_stream, generate(), protein_index)
        job.run()

Esempio n. 21

0

Mostra file

File: build_db.py Progetto: BostonUniversityCBMS/glycresoft

def glycan_network(context, database_connection, hypothesis_identifier,
                   edge_strategy, output_path):
    conn = DatabaseBoundOperation(database_connection)
    hypothesis = get_by_name_or_id(conn, GlycanHypothesis,
                                   hypothesis_identifier)
    if output_path is None:
        output_stream = ctxstream(sys.stdout)
    else:
        output_stream = open(output_path, 'wb')
    with output_stream:
        db = GlycanCompositionDiskBackedStructureDatabase(
            database_connection, hypothesis.id)
        glycans = list(db)
        graph = CompositionGraph(glycans)
        if edge_strategy == 'manhattan':
            graph.create_edges(1)
        else:
            raise click.ClickException("Could not find edge strategy %r" %
                                       (edge_strategy, ))
        GraphWriter(graph, output_stream)

Esempio n. 22

0

Mostra file

def annotate_matched_spectra(database_connection,
                             analysis_identifier,
                             output_path,
                             mzml_path=None):
    database_connection = DatabaseBoundOperation(database_connection)
    session = database_connection.session()  # pylint: disable=not-callable
    analysis = get_by_name_or_id(session, Analysis, analysis_identifier)
    if not analysis.analysis_type == AnalysisTypeEnum.glycopeptide_lc_msms:
        click.secho("Analysis %r is of type %r." %
                    (str(analysis.name), str(analysis.analysis_type)),
                    fg='red',
                    err=True)
        raise click.Abort()
    if output_path is None:
        output_path = os.path.dirname(database_connection._original_connection)

    task = SpectrumAnnotatorExport(database_connection._original_connection,
                                   analysis.id, output_path, mzml_path)
    task.display_header()
    task.start()

Esempio n. 23

0

Mostra file

File: export.py Progetto: mobiusklein/glycan_profiling

def glycopeptide_mzidentml(database_connection, analysis_identifier, output_path=None,
                           mzml_path=None):
    '''Write identified glycopeptides as mzIdentML file, and associated MSn spectra
    to a paired mzML file if the matched data are available. If an mzML file is written
    it will also contain the extracted ion chromatograms for each glycopeptide with an
    extracted elution profile.
    '''
    database_connection = DatabaseBoundOperation(database_connection)
    session = database_connection.session()
    analysis = get_by_name_or_id(session, Analysis, analysis_identifier)
    if not analysis.analysis_type == AnalysisTypeEnum.glycopeptide_lc_msms:
        click.secho("Analysis %r is of type %r." % (
            str(analysis.name), str(analysis.analysis_type)), fg='red', err=True)
        raise click.Abort()
    loader = AnalysisDeserializer(
        database_connection._original_connection, analysis_id=analysis.id)
    glycopeptides = loader.load_identified_glycopeptides()
    with open(output_path, 'wb') as outfile:
        writer = MzIdentMLSerializer(
            outfile, glycopeptides, analysis, loader, source_mzml_path=mzml_path)
        writer.run()

Esempio n. 24

0

Mostra file

File: export.py Progetto: mobiusklein/glycan_profiling

def export_identified_glycans_from_glycopeptides(database_connection, analysis_identifier, output_path):
    database_connection = DatabaseBoundOperation(database_connection)
    session = database_connection.session()
    analysis = get_by_name_or_id(session, Analysis, analysis_identifier)
    if not analysis.analysis_type == AnalysisTypeEnum.glycopeptide_lc_msms:
        click.secho("Analysis %r is of type %r." % (
            str(analysis.name), str(analysis.analysis_type)), fg='red', err=True)
        raise click.Abort()
    glycans = session.query(GlycanComposition).join(
        GlycanCombinationGlycanComposition).join(GlycanCombination).join(
        Glycopeptide,
        Glycopeptide.glycan_combination_id == GlycanCombination.id).join(
        IdentifiedGlycopeptide,
        IdentifiedGlycopeptide.structure_id == Glycopeptide.id).filter(
        IdentifiedGlycopeptide.analysis_id == analysis.id).all()
    if output_path is None:
        output_stream = ctxstream(sys.stdout)
    else:
        output_stream = open(output_path, 'wb')
    with output_stream:
        job = ImportableGlycanHypothesisCSVSerializer(output_stream, glycans)
        job.run()

Esempio n. 25

0

Mostra file

File: export.py Progetto: mobiusklein/glycan_profiling

def glycopeptide_spectrum_matches(database_connection, analysis_identifier, output_path=None):
    '''Write each matched glycopeptide spectrum in CSV format
    '''
    database_connection = DatabaseBoundOperation(database_connection)
    session = database_connection.session()
    analysis = get_by_name_or_id(session, Analysis, analysis_identifier)
    if not analysis.analysis_type == AnalysisTypeEnum.glycopeptide_lc_msms:
        click.secho("Analysis %r is of type %r." % (
            str(analysis.name), str(analysis.analysis_type)), fg='red', err=True)
        raise click.Abort()
    analysis_id = analysis.id
    query = session.query(Protein.id, Protein.name).join(Protein.glycopeptides).join(
        GlycopeptideSpectrumMatch).filter(
        GlycopeptideSpectrumMatch.analysis_id == analysis.id)
    protein_index = dict(query)

    def generate():
        i = 0
        interval = 100000
        query = session.query(GlycopeptideSpectrumMatch).filter(
            GlycopeptideSpectrumMatch.analysis_id == analysis_id).order_by(
            GlycopeptideSpectrumMatch.scan_id)
        while True:
            session.expire_all()
            chunk = query.slice(i, i + interval).all()
            if len(chunk) == 0:
                break
            for glycopeptide in chunk:
                yield glycopeptide.convert()
            i += interval

    if output_path is None:
        output_stream = ctxstream(sys.stdout)
    else:
        output_stream = open(output_path, 'wb')
    with output_stream:
        job = GlycopeptideSpectrumMatchAnalysisCSVSerializer(output_stream, generate(), protein_index)
        job.run()