Esempi in Python per DatabaseBoundOperation, esempi in Python per glycan_profiling.serialize.DatabaseBoundOperation

Esempio n. 1

0

Mostra file

File: build_db.py Progetto: BostonUniversityCBMS/glycresoft

def _copy_hypothesis_across_file_boundaries(database_connection,
                                            source,
                                            hypothesis_name,
                                            identifier=None):
    source_handle = DatabaseBoundOperation(source)
    source_hypothesis_id = None
    source_hypothesis_name = None

    try:
        hypothesis_id = int(identifier)
        inst = source_handle.query(GlycanHypothesis).get(hypothesis_id)
        if inst is not None:
            source_hypothesis_id = hypothesis_id
            source_hypothesis_name = inst.name

    except TypeError:
        hypothesis_name = identifier
        inst = source_handle.query(GlycanHypothesis).filter(
            GlycanHypothesis.name == hypothesis_name).first()
        if inst is not None:
            source_hypothesis_id = inst.id
            source_hypothesis_name = inst.name

    if source == database_connection:
        return source_hypothesis_id

    mover = GlycanHypothesisCopier(database_connection,
                                   [(source, source_hypothesis_id)],
                                   hypothesis_name=source_hypothesis_name)
    mover.run()
    return mover.hypothesis_id

Esempio n. 2

0

Mostra file

File: build_db.py Progetto: mobiusklein/glycan_profiling

def _copy_hypothesis_across_file_boundaries(database_connection, source, hypothesis_name,
                                            identifier=None):
    source_handle = DatabaseBoundOperation(source)
    source_hypothesis_id = None
    source_hypothesis_name = None

    try:
        hypothesis_id = int(identifier)
        inst = source_handle.query(GlycanHypothesis).get(hypothesis_id)
        if inst is not None:
            source_hypothesis_id = hypothesis_id
            source_hypothesis_name = inst.name

    except TypeError:
        hypothesis_name = identifier
        inst = source_handle.query(GlycanHypothesis).filter(
            GlycanHypothesis.name == hypothesis_name).first()
        if inst is not None:
            source_hypothesis_id = inst.id
            source_hypothesis_name = inst.name

    if source == database_connection:
        return source_hypothesis_id

    mover = GlycanHypothesisCopier(
        database_connection, [(source, source_hypothesis_id)],
        hypothesis_name=source_hypothesis_name)
    mover.run()
    return mover.hypothesis_id

Esempio n. 3

0

Mostra file

File: build_db.py Progetto: BostonUniversityCBMS/glycresoft

def _copy_analysis_across_file_boundaries(database_connection,
                                          source,
                                          hypothesis_name,
                                          identifier=None):
    source_handle = DatabaseBoundOperation(source)
    source_analysis_id = None
    source_analysis_name = None
    try:
        hypothesis_id = int(identifier)
        inst = source_handle.query(Analysis).get(hypothesis_id)
        if inst is not None:
            source_analysis_id = hypothesis_id
            source_analysis_name = inst.name

    except TypeError:
        hypothesis_name = identifier
        inst = source_handle.query(Analysis).filter(
            Analysis.name == hypothesis_name).first()
        if inst is not None:
            source_analysis_id = inst.id
            source_analysis_name = inst.name
    if hypothesis_name is None:
        hypothesis_name = source_analysis_name
    mover = GlycanAnalysisHypothesisSerializer(source, source_analysis_id,
                                               hypothesis_name,
                                               database_connection)
    mover.run()
    return mover.hypothesis_id

Esempio n. 4

0

Mostra file

File: view_hypothesis.py Progetto: BostonUniversityCBMS/glycresoft_app

def mass_search_dispatch(uuid):
    try:
        arguments, state = request_arguments_and_context()
        record = _locate_hypothesis(uuid)
        handle = DatabaseBoundOperation(record.path)
        hypothesis = handle.query(GlycanHypothesis).filter(
            GlycanHypothesis.uuid == record.uuid).first()

        if hypothesis is not None:
            return search_glycan_hypothesis(hypothesis.uuid, arguments['mass'],
                                            arguments['tolerance'])

        hypothesis = handle.query(GlycopeptideHypothesis).filter(
            GlycopeptideHypothesis.uuid == record.uuid).first()
        if hypothesis is not None:
            return search_glycopeptide_hypothesis(hypothesis.uuid,
                                                  arguments['mass'],
                                                  arguments['tolerance'])

        return jsonify(*[])
    except Exception, e:
        logging.exception("An exception occurred for %r",
                          request.get_json(),
                          exc_info=e)
        return jsonify(*[])

Esempio n. 5

0

Mostra file

File: build_db.py Progetto: mobiusklein/glycan_profiling

def _copy_analysis_across_file_boundaries(database_connection, source, hypothesis_name,
                                          identifier=None):
    source_handle = DatabaseBoundOperation(source)
    source_analysis_id = None
    source_analysis_name = None
    try:
        hypothesis_id = int(identifier)
        inst = source_handle.query(Analysis).get(hypothesis_id)
        if inst is not None:
            source_analysis_id = hypothesis_id
            source_analysis_name = inst.name

    except TypeError:
        hypothesis_name = identifier
        inst = source_handle.query(Analysis).filter(
            Analysis.name == hypothesis_name).first()
        if inst is not None:
            source_analysis_id = inst.id
            source_analysis_name = inst.name
    if hypothesis_name is None:
        hypothesis_name = source_analysis_name
    mover = GlycanAnalysisHypothesisSerializer(
        source, source_analysis_id, hypothesis_name,
        database_connection)
    mover.run()
    return mover.hypothesis_id

Esempio n. 6

0

Mostra file

File: validators.py Progetto: mstim/glycresoft

class HypothesisGlycanSourceValidator(GlycanSourceValidatorBase):
    def __init__(self,
                 database_connection,
                 source,
                 source_type,
                 source_identifier=None):
        super(HypothesisGlycanSourceValidator,
              self).__init__(database_connection, source, source_type,
                             source_identifier)
        self.handle = DatabaseBoundOperation(source)

    def validate(self):
        if self.source_identifier is None:
            click.secho("No value passed through --glycan-source-identifier.",
                        fg='magenta')
            return False
        try:
            hypothesis_id = int(self.source_identifier)
            inst = self.handle.query(GlycanHypothesis).get(hypothesis_id)
            return inst is not None
        except TypeError:
            hypothesis_name = self.source
            inst = self.handle.query(GlycanHypothesis).filter(
                GlycanHypothesis.name == hypothesis_name).first()
            return inst is not None

Esempio n. 7

0

Mostra file

File: validators.py Progetto: mstim/glycresoft

def validate_unique_name(context, database_connection, name, klass):
    handle = DatabaseBoundOperation(database_connection)
    obj = handle.query(klass).filter(klass.name == name).first()
    if obj is not None:
        return klass.make_unique_name(handle.session, name)
    else:
        return name

Esempio n. 8

0

Mostra file

def glycopeptide_mzidentml(database_connection,
                           analysis_identifier,
                           output_path=None,
                           mzml_path=None,
                           embed_protein_sequences=True):
    '''Write identified glycopeptides as mzIdentML file, and associated MSn spectra
    to a paired mzML file if the matched data are available. If an mzML file is written
    it will also contain the extracted ion chromatograms for each glycopeptide with an
    extracted elution profile.
    '''
    database_connection = DatabaseBoundOperation(database_connection)
    session = database_connection.session()  # pylint: disable=not-callable
    analysis = get_by_name_or_id(session, Analysis, analysis_identifier)
    if not analysis.analysis_type == AnalysisTypeEnum.glycopeptide_lc_msms:
        click.secho("Analysis %r is of type %r." %
                    (str(analysis.name), str(analysis.analysis_type)),
                    fg='red',
                    err=True)
        raise click.Abort()
    loader = AnalysisDeserializer(database_connection._original_connection,
                                  analysis_id=analysis.id)
    click.echo("Loading Identifications")
    # glycopeptides = loader.load_identified_glycopeptides()
    glycopeptides = loader.query(IdentifiedGlycopeptide).filter(
        IdentifiedGlycopeptide.analysis_id == analysis_identifier).all()
    with open(output_path, 'wb') as outfile:
        writer = MzIdentMLSerializer(
            outfile,
            glycopeptides,
            analysis,
            loader,
            source_mzml_path=mzml_path,
            embed_protein_sequences=embed_protein_sequences)
        writer.run()

Esempio n. 9

0

Mostra file

def export_identified_glycans_from_glycopeptides(database_connection,
                                                 analysis_identifier,
                                                 output_path):
    database_connection = DatabaseBoundOperation(database_connection)
    session = database_connection.session()  # pylint: disable=not-callable
    analysis = get_by_name_or_id(session, Analysis, analysis_identifier)
    if not analysis.analysis_type == AnalysisTypeEnum.glycopeptide_lc_msms:
        click.secho("Analysis %r is of type %r." %
                    (str(analysis.name), str(analysis.analysis_type)),
                    fg='red',
                    err=True)
        raise click.Abort()
    glycans = session.query(GlycanComposition).join(
        GlycanCombinationGlycanComposition).join(GlycanCombination).join(
            Glycopeptide,
            Glycopeptide.glycan_combination_id == GlycanCombination.id).join(
                IdentifiedGlycopeptide,
                IdentifiedGlycopeptide.structure_id == Glycopeptide.id).filter(
                    IdentifiedGlycopeptide.analysis_id == analysis.id).all()
    if output_path is None:
        output_stream = ctxstream(click.get_binary_stream('stdout'))
    else:
        output_stream = open(output_path, 'wb')
    with output_stream:
        job = ImportableGlycanHypothesisCSVSerializer(output_stream, glycans)
        job.run()

Esempio n. 10

0

Mostra file

def glycopeptide_hypothesis(database_connection, hypothesis_identifier, output_path, multifasta=False):
    '''Write each theoretical glycopeptide in CSV format
    '''
    database_connection = DatabaseBoundOperation(database_connection)
    session = database_connection.session()
    hypothesis = get_by_name_or_id(session, GlycopeptideHypothesis, hypothesis_identifier)

    def generate():
        interval = 100000
        i = 0
        while True:
            session.expire_all()
            chunk = hypothesis.glycopeptides.slice(i, i + interval).all()
            if len(chunk) == 0:
                break
            for glycopeptide in chunk:
                yield glycopeptide
            i += interval
    if output_path is None:
        output_stream = ctxstream(sys.stdout)
    else:
        output_stream = open(output_path, 'wb')
    with output_stream:
        job = GlycopeptideHypothesisCSVSerializer(output_stream, generate())
        job.run()

Esempio n. 11

0

Mostra file

File: build_db.py Progetto: BostonUniversityCBMS/glycresoft

def from_analysis(context, database_connection, analysis_connection,
                  analysis_identifier, reduction, derivatization, name):
    database_connection = DatabaseBoundOperation(database_connection)
    if name is not None:
        name = validate_glycan_hypothesis_name(
            context, database_connection._original_connection, name)
        click.secho("Building Glycan Hypothesis %s" % name, fg='cyan')
    reduction = validate_reduction(context, reduction)
    derivatization = validate_derivatization(context, derivatization)

    analysis_connection = DatabaseBoundOperation(analysis_connection)
    analysis = get_by_name_or_id(analysis_connection.session, Analysis,
                                 analysis_identifier)
    if analysis.analysis_type == AnalysisTypeEnum.glycan_lc_ms:
        job = GlycanAnalysisHypothesisSerializer(
            analysis_connection._original_connection,
            analysis.id,
            name,
            output_connection=database_connection._original_connection)
        job.display_header()
        job.start()
    elif analysis.analysis_type == AnalysisTypeEnum.glycopeptide_lc_msms:
        job = GlycopeptideAnalysisGlycanCompositionExtractionHypothesisSerializer(
            analysis_connection._original_connection,
            analysis.id,
            name,
            output_connection=database_connection._original_connection)
        job.display_header()
        job.start()
    else:
        click.secho("Analysis Type %r could not be converted" %
                    (analysis.analysis_type.name, ),
                    fg='red')

Esempio n. 12

0

Mostra file

File: export.py Progetto: mobiusklein/glycan_profiling

def glycopeptide_hypothesis(database_connection, hypothesis_identifier, output_path, multifasta=False):
    '''Write each theoretical glycopeptide in CSV format
    '''
    database_connection = DatabaseBoundOperation(database_connection)
    session = database_connection.session()
    hypothesis = get_by_name_or_id(session, GlycopeptideHypothesis, hypothesis_identifier)

    def generate():
        interval = 100000
        i = 0
        while True:
            session.expire_all()
            chunk = hypothesis.glycopeptides.slice(i, i + interval).all()
            if len(chunk) == 0:
                break
            for glycopeptide in chunk:
                yield glycopeptide
            i += interval
    if output_path is None:
        output_stream = ctxstream(sys.stdout)
    else:
        output_stream = open(output_path, 'wb')
    with output_stream:
        job = GlycopeptideHypothesisCSVSerializer(output_stream, generate())
        job.run()

Esempio n. 13

0

Mostra file

 def __init__(self, database_connection, hypothesis_name=None, glycan_hypothesis_id=None):
     DatabaseBoundOperation.__init__(self, database_connection)
     self._hypothesis_name = hypothesis_name
     self._hypothesis_id = None
     self._hypothesis = None
     self._glycan_hypothesis_id = glycan_hypothesis_id
     self.uuid = str(uuid4().hex)
     self.total_glycan_combination_count = -1

Esempio n. 14

0

Mostra file

File: extending_glycopeptide.py Progetto: mobiusklein/glycan_profiling

 def __init__(self, connection, hypothesis_id=None, *args, **kwargs):
     if hypothesis_id is None:
         hypothesis_id = 1
     DatabaseBoundOperation.__init__(self, connection)
     PeptideCollectionBase.__init__(self, *args, **kwargs)
     self.hypothesis_id = hypothesis_id
     self._operation_count = 0
     self._batch_size = int(kwargs.get("batch_size", 1000))

Esempio n. 15

0

Mostra file

File: mzid_proteome.py Progetto: mobiusklein/glycan_profiling

 def __init__(self, mzid_path, connection, hypothesis_id, include_baseline_peptides=True,
              target_proteins=None, reference_fasta=None, peptide_length_range=(5, 60)):
     DatabaseBoundOperation.__init__(self, connection)
     MzIdentMLProteomeExtraction.__init__(self, mzid_path, reference_fasta)
     self.hypothesis_id = hypothesis_id
     self.target_proteins = target_proteins
     self.include_baseline_peptides = include_baseline_peptides
     self.peptide_length_range = peptide_length_range or (5, 60)

Esempio n. 16

0

Mostra file

 def __init__(self, connection, hypothesis_id=None, *args, **kwargs):
     if hypothesis_id is None:
         hypothesis_id = 1
     DatabaseBoundOperation.__init__(self, connection)
     PeptideCollectionBase.__init__(self, *args, **kwargs)
     self.hypothesis_id = hypothesis_id
     self._operation_count = 0
     self._batch_size = int(kwargs.get("batch_size", 1000))

Esempio n. 17

0

Mostra file

File: mzid_proteome.py Progetto: mstim/glycresoft

 def __init__(self, mzid_path, connection, hypothesis_id, include_baseline_peptides=True,
              target_proteins=None, reference_fasta=None, peptide_length_range=(5, 60)):
     DatabaseBoundOperation.__init__(self, connection)
     MzIdentMLProteomeExtraction.__init__(self, mzid_path, reference_fasta)
     self.hypothesis_id = hypothesis_id
     self.target_proteins = target_proteins
     self.include_baseline_peptides = include_baseline_peptides
     self.peptide_length_range = peptide_length_range or (5, 60)

Esempio n. 18

0

Mostra file

File: validators.py Progetto: mobiusklein/glycan_profiling

def validate_unique_name(context, database_connection, name, klass):
    handle = DatabaseBoundOperation(database_connection)
    obj = handle.query(klass).filter(
        klass.name == name).first()
    if obj is not None:
        return klass.make_unique_name(handle.session, name)
    else:
        return name

Esempio n. 19

0

Mostra file

def glycan_composition_identification(database_connection, analysis_identifier, output_path=None,
                                      threshold=0, report=False):
    '''Write each glycan chromatogram in CSV format
    '''
    database_connection = DatabaseBoundOperation(database_connection)
    session = database_connection.session()
    analysis = get_by_name_or_id(session, Analysis, analysis_identifier)
    if not analysis.analysis_type == AnalysisTypeEnum.glycan_lc_ms:
        click.secho("Analysis %r is of type %r." % (
            str(analysis.name), str(analysis.analysis_type)), fg='red', err=True)
        raise click.Abort()
    analysis_id = analysis.id
    if output_path is None:
        output_stream = ctxstream(sys.stdout)
    else:
        output_stream = open(output_path, 'wb')

    if report:
        with output_stream:
            job = GlycanChromatogramReportCreator(
                database_connection._original_connection,
                analysis_id, output_stream, threshold=threshold)
            job.run()
    else:
        def generate():
            i = 0
            interval = 100
            query = session.query(GlycanCompositionChromatogram).filter(
                GlycanCompositionChromatogram.analysis_id == analysis_id,
                GlycanCompositionChromatogram.score > threshold)

            while True:
                session.expire_all()
                chunk = query.slice(i, i + interval).all()
                if len(chunk) == 0:
                    break
                for gcs in chunk:
                    yield gcs.convert()
                i += interval

            i = 0
            query = session.query(UnidentifiedChromatogram).filter(
                UnidentifiedChromatogram.analysis_id == analysis_id,
                UnidentifiedChromatogram.score > threshold)

            while True:
                session.expire_all()
                chunk = query.slice(i, i + interval).all()
                if len(chunk) == 0:
                    break
                for gcs in chunk:
                    yield gcs.convert()
                i += interval

        with output_stream:
            job = GlycanLCMSAnalysisCSVSerializer(output_stream, generate())
            job.run()

Esempio n. 20

0

Mostra file

 def __init__(self, database_connection):
     DatabaseBoundOperation.__init__(self, database_connection)
     self.hypothesis_id = None
     self.glycan_hypothesis_id = None
     self._glycan_hypothesis_migrator = None
     self.protein_id_map = dict()
     self.peptide_id_map = dict()
     self.glycan_combination_id_map = dict()
     self.glycopeptide_id_map = dict()

Esempio n. 21

0

Mostra file

def sql_shell(database_connection, script=None):
    db = DatabaseBoundOperation(database_connection)
    session = db.session()  # pylint: disable=not-callable
    interpreter = SQLShellInterpreter(session)
    if script is None:
        interpreter.cmdloop()
    else:
        result = session.execute(script)
        interpreter._to_csv(list(result), sys.stdout)

Esempio n. 22

0

Mostra file

File: glycan_source.py Progetto: mobiusklein/glycan_profiling

 def __init__(self, database_connection, hypothesis_name=None, uuid=None):
     if uuid is None:
         uuid = str(uuid4().hex)
     DatabaseBoundOperation.__init__(self, database_connection)
     self._hypothesis_name = hypothesis_name
     self._hypothesis_id = None
     self._hypothesis = None
     self._structure_class_loader = None
     self.uuid = uuid

Esempio n. 23

0

Mostra file

File: export.py Progetto: mobiusklein/glycan_profiling

def glycan_composition_identification(database_connection, analysis_identifier, output_path=None,
                                      threshold=0, report=False):
    '''Write each glycan chromatogram in CSV format
    '''
    database_connection = DatabaseBoundOperation(database_connection)
    session = database_connection.session()
    analysis = get_by_name_or_id(session, Analysis, analysis_identifier)
    if not analysis.analysis_type == AnalysisTypeEnum.glycan_lc_ms:
        click.secho("Analysis %r is of type %r." % (
            str(analysis.name), str(analysis.analysis_type)), fg='red', err=True)
        raise click.Abort()
    analysis_id = analysis.id
    if output_path is None:
        output_stream = ctxstream(sys.stdout)
    else:
        output_stream = open(output_path, 'wb')

    if report:
        with output_stream:
            job = GlycanChromatogramReportCreator(
                database_connection._original_connection,
                analysis_id, output_stream, threshold=threshold)
            job.run()
    else:
        def generate():
            i = 0
            interval = 100
            query = session.query(GlycanCompositionChromatogram).filter(
                GlycanCompositionChromatogram.analysis_id == analysis_id,
                GlycanCompositionChromatogram.score > threshold)

            while True:
                session.expire_all()
                chunk = query.slice(i, i + interval).all()
                if len(chunk) == 0:
                    break
                for gcs in chunk:
                    yield gcs.convert()
                i += interval

            i = 0
            query = session.query(UnidentifiedChromatogram).filter(
                UnidentifiedChromatogram.analysis_id == analysis_id,
                UnidentifiedChromatogram.score > threshold)

            while True:
                session.expire_all()
                chunk = query.slice(i, i + interval).all()
                if len(chunk) == 0:
                    break
                for gcs in chunk:
                    yield gcs.convert()
                i += interval

        with output_stream:
            job = GlycanLCMSAnalysisCSVSerializer(output_stream, generate())
            job.run()

Esempio n. 24

0

Mostra file

File: common.py Progetto: mobiusklein/glycan_profiling

 def __init__(self, database_connection, hypothesis_name=None, glycan_hypothesis_id=None, full_cross_product=True):
     DatabaseBoundOperation.__init__(self, database_connection)
     self._hypothesis_name = hypothesis_name
     self._hypothesis_id = None
     self._hypothesis = None
     self._glycan_hypothesis_id = glycan_hypothesis_id
     self.uuid = str(uuid4().hex)
     self.total_glycan_combination_count = -1
     self.full_cross_product = full_cross_product

Esempio n. 25

0

Mostra file

File: migrate.py Progetto: mobiusklein/glycan_profiling

 def __init__(self, database_connection):
     DatabaseBoundOperation.__init__(self, database_connection)
     self.hypothesis_id = None
     self.glycan_hypothesis_id = None
     self._glycan_hypothesis_migrator = None
     self.protein_id_map = dict()
     self.peptide_id_map = dict()
     self.glycan_combination_id_map = dict()
     self.glycopeptide_id_map = dict()

Esempio n. 26

0

Mostra file

File: glycan_source.py Progetto: mstim/glycresoft

 def __init__(self, database_connection, hypothesis_name=None, uuid=None):
     if uuid is None:
         uuid = str(uuid4().hex)
     DatabaseBoundOperation.__init__(self, database_connection)
     self._hypothesis_name = hypothesis_name
     self._hypothesis_id = None
     self._hypothesis = None
     self._structure_class_loader = None
     self.uuid = uuid

Esempio n. 27

0

Mostra file

File: analysis_migration.py Progetto: mobiusklein/glycan_profiling

    def __init__(self, connection, analysis_name, sample_run, chromatogram_extractor):
        DatabaseBoundOperation.__init__(self, connection)

        self.sample_run = sample_run
        self.chromatogram_extractor = chromatogram_extractor

        self._seed_analysis_name = analysis_name
        self._analysis_serializer = None
        self._sample_migrator = None

Esempio n. 28

0

Mostra file

File: tools.py Progetto: mobiusklein/glycan_profiling

def sql_shell(database_connection, script=None):
    db = DatabaseBoundOperation(database_connection)
    session = db.session()
    interpreter = SQLShellInterpreter(session)
    if script is None:
        interpreter.cmdloop()
    else:
        result = session.execute(script)
        interpreter._to_csv(list(result), sys.stdout)

Esempio n. 29

0

Mostra file

File: validators.py Progetto: mstim/glycresoft

 def __init__(self,
              database_connection,
              source,
              source_type,
              source_identifier=None):
     super(GlycanAnalysisGlycanSourceValidator,
           self).__init__(database_connection, source, source_type,
                          source_identifier)
     self.handle = DatabaseBoundOperation(source)

Esempio n. 30

0

Mostra file

File: validators.py Progetto: mstim/glycresoft

 def __init__(self,
              database_connection,
              source,
              source_type,
              source_identifier=None):
     DatabaseBoundOperation.__init__(self, database_connection)
     self.source = source
     self.source_type = source_type
     self.source_identifier = source_identifier

Esempio n. 31

0

Mostra file

File: analysis_migration.py Progetto: mstim/glycresoft

    def __init__(self, connection, analysis_name, sample_run, chromatogram_extractor):
        DatabaseBoundOperation.__init__(self, connection)

        self.sample_run = sample_run
        self.chromatogram_extractor = chromatogram_extractor

        self._seed_analysis_name = analysis_name
        self._analysis_serializer = None
        self._sample_migrator = None

Esempio n. 32

0

Mostra file

File: glycan_source.py Progetto: mobiusklein/glycan_profiling

 def stream_from_hypotheses(self, connection, hypothesis_id):
     self.log("Streaming from %s for hypothesis %d" % (connection, hypothesis_id))
     connection = DatabaseBoundOperation(connection)
     session = connection.session()
     for db_composition in session.query(DBGlycanComposition).filter(
             DBGlycanComposition.hypothesis_id == hypothesis_id):
         structure_classes = list(db_composition.structure_classes)
         if len(structure_classes) > 0:
             yield db_composition, [sc.name for sc in db_composition.structure_classes]
         else:
             yield db_composition, [None]

Esempio n. 33

0

Mostra file

File: proteome.py Progetto: mobiusklein/glycan_profiling

 def __init__(self, connection, hypothesis_id, target_proteins=None,
              constant_modifications=None, variable_modifications=None):
     if constant_modifications is None:
         constant_modifications = []
     if variable_modifications is None:
         variable_modifications = []
     DatabaseBoundOperation.__init__(self, connection)
     self.hypothesis_id = hypothesis_id
     self.target_proteins = target_proteins
     self.constant_modifications = constant_modifications
     self.variable_modifications = variable_modifications

Esempio n. 34

0

Mostra file

File: glycan_source.py Progetto: BostonUniversityCBMS/glycresoft

 def stream_from_hypotheses(self, connection, hypothesis_id):
     self.log("Streaming from %s for hypothesis %d" % (connection, hypothesis_id))
     connection = DatabaseBoundOperation(connection)
     session = connection.session()
     for db_composition in session.query(DBGlycanComposition).filter(
             DBGlycanComposition.hypothesis_id == hypothesis_id):
         structure_classes = list(db_composition.structure_classes)
         if len(structure_classes) > 0:
             yield db_composition, [sc.name for sc in db_composition.structure_classes]
         else:
             yield db_composition, [None]

Esempio n. 35

0

Mostra file

File: disk_backed_database.py Progetto: mobiusklein/glycan_profiling

 def __init__(self, connection, hypothesis_id=1, cache_size=DEFAULT_CACHE_SIZE,
              loading_interval=DEFAULT_LOADING_INTERVAL,
              threshold_cache_total_count=DEFAULT_THRESHOLD_CACHE_TOTAL_COUNT,
              model_type=Glycopeptide):
     DatabaseBoundOperation.__init__(self, connection)
     self.hypothesis_id = hypothesis_id
     self.model_type = model_type
     self.loading_interval = loading_interval
     self.threshold_cache_total_count = threshold_cache_total_count
     self._intervals = LRUIntervalSet([], cache_size)
     self._ignored_intervals = IntervalSet([])
     self.proteins = ProteinIndex(self.session, self.hypothesis_id)
     self.peptides = PeptideIndex(self.session, self.hypothesis_id)

Esempio n. 36

0

Mostra file

File: convert_analysis.py Progetto: mstim/glycresoft

 def __init__(self,
              input_connection,
              analysis_id,
              hypothesis_name,
              output_connection=None):
     if output_connection is None:
         output_connection = input_connection
     self.input_connection = DatabaseBoundOperation(input_connection)
     self.output_connection = DatabaseBoundOperation(output_connection)
     GlycanHypothesisSerializerBase.__init__(self, output_connection,
                                             hypothesis_name)
     self.analysis_id = analysis_id
     self.seen_cache = set()

Esempio n. 37

0

Mostra file

def glycopeptide_identification(database_connection, analysis_identifier, output_path=None,
                                report=False, mzml_path=None, threshold=0):
    '''Write each distinct identified glycopeptide in CSV format
    '''
    database_connection = DatabaseBoundOperation(database_connection)
    session = database_connection.session()
    analysis = get_by_name_or_id(session, Analysis, analysis_identifier)
    if not analysis.analysis_type == AnalysisTypeEnum.glycopeptide_lc_msms:
        click.secho("Analysis %r is of type %r." % (
            str(analysis.name), str(analysis.analysis_type)), fg='red', err=True)
        raise click.Abort()
    analysis_id = analysis.id
    if output_path is None:
        output_stream = ctxstream(sys.stdout)
    else:
        output_stream = open(output_path, 'wb')
    if report:
        with output_stream:
            if mzml_path is None:
                mzml_path = analysis.parameters['sample_path']
                if not os.path.exists(mzml_path):
                    raise click.ClickException(
                        ("Sample path {} not found. Pass the path to"
                         " this file as `-m/--mzml-path` for this command.").format(
                            mzml_path))
            GlycopeptideDatabaseSearchReportCreator(
                database_connection._original_connection, analysis_id,
                stream=output_stream, threshold=threshold,
                mzml_path=mzml_path).run()
    else:
        query = session.query(Protein.id, Protein.name).join(Protein.glycopeptides).join(
            IdentifiedGlycopeptide).filter(
            IdentifiedGlycopeptide.analysis_id == analysis.id)
        protein_index = dict(query)

        def generate():
            i = 0
            interval = 100
            query = session.query(IdentifiedGlycopeptide).filter(
                IdentifiedGlycopeptide.analysis_id == analysis_id)
            while True:
                session.expire_all()
                chunk = query.slice(i, i + interval).all()
                if len(chunk) == 0:
                    break
                for glycopeptide in chunk:
                    yield glycopeptide.convert()
                i += interval
        with output_stream:
            job = GlycopeptideLCMSMSAnalysisCSVSerializer(output_stream, generate(), protein_index)
            job.run()

Esempio n. 38

0

Mostra file

File: export.py Progetto: mobiusklein/glycan_profiling

def glycopeptide_identification(database_connection, analysis_identifier, output_path=None,
                                report=False, mzml_path=None, threshold=0):
    '''Write each distinct identified glycopeptide in CSV format
    '''
    database_connection = DatabaseBoundOperation(database_connection)
    session = database_connection.session()
    analysis = get_by_name_or_id(session, Analysis, analysis_identifier)
    if not analysis.analysis_type == AnalysisTypeEnum.glycopeptide_lc_msms:
        click.secho("Analysis %r is of type %r." % (
            str(analysis.name), str(analysis.analysis_type)), fg='red', err=True)
        raise click.Abort()
    analysis_id = analysis.id
    if output_path is None:
        output_stream = ctxstream(sys.stdout)
    else:
        output_stream = open(output_path, 'wb')
    if report:
        with output_stream:
            if mzml_path is None:
                mzml_path = analysis.parameters['sample_path']
                if not os.path.exists(mzml_path):
                    raise click.ClickException(
                        ("Sample path {} not found. Pass the path to"
                         " this file as `-m/--mzml-path` for this command.").format(
                            mzml_path))
            GlycopeptideDatabaseSearchReportCreator(
                database_connection._original_connection, analysis_id,
                stream=output_stream, threshold=threshold,
                mzml_path=mzml_path).run()
    else:
        query = session.query(Protein.id, Protein.name).join(Protein.glycopeptides).join(
            IdentifiedGlycopeptide).filter(
            IdentifiedGlycopeptide.analysis_id == analysis.id)
        protein_index = dict(query)

        def generate():
            i = 0
            interval = 100
            query = session.query(IdentifiedGlycopeptide).filter(
                IdentifiedGlycopeptide.analysis_id == analysis_id)
            while True:
                session.expire_all()
                chunk = query.slice(i, i + interval).all()
                if len(chunk) == 0:
                    break
                for glycopeptide in chunk:
                    yield glycopeptide.convert()
                i += interval
        with output_stream:
            job = GlycopeptideLCMSMSAnalysisCSVSerializer(output_stream, generate(), protein_index)
            job.run()

Esempio n. 39

0

Mostra file

File: annotate_spectra.py Progetto: mstim/glycresoft

 def __init__(self, database_connection, analysis_id, output_path, mzml_path=None):
     DatabaseBoundOperation.__init__(self, database_connection)
     self.analysis_id = analysis_id
     self.mzml_path = mzml_path
     self.output_path = output_path
     self.analysis = self.session.query(serialize.Analysis).get(self.analysis_id)
     self.scan_loader = None
     self._mpl_style = {
         'figure.facecolor': 'white',
         'figure.edgecolor': 'white',
         'font.size': 10,
         'savefig.dpi': 72,
         'figure.subplot.bottom': .125
     }

Esempio n. 40

0

Mostra file

 def __init__(self,
              connection,
              hypothesis_id,
              target_proteins=None,
              constant_modifications=None,
              variable_modifications=None):
     if constant_modifications is None:
         constant_modifications = []
     if variable_modifications is None:
         variable_modifications = []
     DatabaseBoundOperation.__init__(self, connection)
     self.hypothesis_id = hypothesis_id
     self.target_proteins = target_proteins
     self.constant_modifications = constant_modifications
     self.variable_modifications = variable_modifications

Esempio n. 41

0

Mostra file

def glycopeptide_training_mgf(database_connection, analysis_identifier, output_path=None,
                              mzml_path=None, threshold=None):
    database_connection = DatabaseBoundOperation(database_connection)
    session = database_connection.session()
    analysis = get_by_name_or_id(session, Analysis, analysis_identifier)
    if not analysis.analysis_type == AnalysisTypeEnum.glycopeptide_lc_msms:
        click.secho("Analysis %r is of type %r." % (
            str(analysis.name), str(analysis.analysis_type)), fg='red', err=True)
        raise click.Abort()
    if output_path is None:
        output_stream = ctxstream(sys.stdout)
    else:
        output_stream = open(output_path, 'wb')
    with output_stream:
        TrainingMGFExporter.from_analysis(
            database_connection, analysis.id, output_stream, mzml_path, threshold).run()

Esempio n. 42

0

Mostra file

File: export.py Progetto: mobiusklein/glycan_profiling

def glycopeptide_training_mgf(database_connection, analysis_identifier, output_path=None,
                              mzml_path=None, threshold=None):
    database_connection = DatabaseBoundOperation(database_connection)
    session = database_connection.session()
    analysis = get_by_name_or_id(session, Analysis, analysis_identifier)
    if not analysis.analysis_type == AnalysisTypeEnum.glycopeptide_lc_msms:
        click.secho("Analysis %r is of type %r." % (
            str(analysis.name), str(analysis.analysis_type)), fg='red', err=True)
        raise click.Abort()
    if output_path is None:
        output_stream = ctxstream(sys.stdout)
    else:
        output_stream = open(output_path, 'wb')
    with output_stream:
        TrainingMGFExporter.from_analysis(
            database_connection, analysis.id, output_stream, mzml_path, threshold).run()

Esempio n. 43

0

Mostra file

File: export.py Progetto: mobiusklein/glycan_profiling

def annotate_matched_spectra(database_connection, analysis_identifier, output_path, mzml_path=None):
    database_connection = DatabaseBoundOperation(database_connection)
    session = database_connection.session()
    analysis = get_by_name_or_id(session, Analysis, analysis_identifier)
    if not analysis.analysis_type == AnalysisTypeEnum.glycopeptide_lc_msms:
        click.secho("Analysis %r is of type %r." % (
            str(analysis.name), str(analysis.analysis_type)), fg='red', err=True)
        raise click.Abort()
    if output_path is None:
        output_path = os.path.dirname(database_connection._original_connection)

    task = SpectrumAnnotatorExport(
        database_connection._original_connection, analysis.id, output_path,
        mzml_path)
    task.display_header()
    task.start()

Esempio n. 44

0

Mostra file

def glycopeptide_chromatogram_records(database_connection,
                                      analysis_identifier,
                                      output_path,
                                      apex_time_range=None):
    if apex_time_range is None:
        apex_time_range = (0, float('inf'))
    database_connection = DatabaseBoundOperation(database_connection)
    session = database_connection.session()  # pylint: disable=not-callable
    analysis = get_by_name_or_id(session, Analysis, analysis_identifier)
    if not analysis.analysis_type == AnalysisTypeEnum.glycopeptide_lc_msms:
        click.secho("Analysis %r is of type %r." %
                    (str(analysis.name), str(analysis.analysis_type)),
                    fg='red',
                    err=True)
        raise click.Abort()
    if output_path is None:
        fh = click.get_binary_stream('stdout')
    else:
        fh = open(output_path, 'wb')
    idgps = session.query(IdentifiedGlycopeptide).filter(
        IdentifiedGlycopeptide.analysis_id == analysis.id).all()
    n = len(idgps)
    from glycan_profiling.scoring.elution_time_grouping import GlycopeptideChromatogramProxy
    cases = []
    analysis_name = analysis.name
    start_time, stop_time = apex_time_range
    for i, idgp in enumerate(idgps):
        if i % 50 == 0:
            click.echo("%d/%d Records Processed" % (i, n), err=True)
        if idgp.chromatogram is None:
            continue
        if idgp.ms1_score < 0:
            continue
        obj = GlycopeptideChromatogramProxy.from_obj(
            idgp,
            ms1_score=idgp.ms1_score,
            ms2_score=idgp.ms2_score,
            q_value=idgp.q_value,
            analysis_name=analysis_name,
            mass_shifts=';'.join(
                [m.name for m in idgp.chromatogram.mass_shifts]))
        if obj.apex_time < start_time or obj.apex_time > stop_time:
            continue
        cases.append(obj)
    click.echo("Writing %d Records" % (len(cases), ), err=True)
    with fh:
        GlycopeptideChromatogramProxy.to_csv(cases, csv_stream(fh))

Esempio n. 45

0

Mostra file

 def __init__(self, database_path, analysis_id, mzml_path=None):
     self.database_connection = DatabaseBoundOperation(database_path)
     self.analysis_id = analysis_id
     self.analysis = self.session.query(serialize.Analysis).get(
         self.analysis_id)
     self.mzml_path = mzml_path
     self.scan_loader = None
     self._make_scan_loader()

Esempio n. 46

0

Mostra file

File: convert_analysis.py Progetto: mobiusklein/glycan_profiling

 def __init__(self, input_connection, analysis_id, hypothesis_name, output_connection=None):
     if output_connection is None:
         output_connection = input_connection
     self.input_connection = DatabaseBoundOperation(input_connection)
     self.output_connection = DatabaseBoundOperation(output_connection)
     GlycanHypothesisSerializerBase.__init__(self, output_connection, hypothesis_name)
     self.analysis_id = analysis_id
     self.seen_cache = set()

Esempio n. 47

0

Mostra file

File: disk_backed_database.py Progetto: mstim/glycresoft

 def __init__(
         self,
         connection,
         hypothesis_id=1,
         cache_size=DEFAULT_CACHE_SIZE,
         loading_interval=DEFAULT_LOADING_INTERVAL,
         threshold_cache_total_count=DEFAULT_THRESHOLD_CACHE_TOTAL_COUNT,
         model_type=Glycopeptide):
     DatabaseBoundOperation.__init__(self, connection)
     self.hypothesis_id = hypothesis_id
     self.model_type = model_type
     self.loading_interval = loading_interval
     self.threshold_cache_total_count = threshold_cache_total_count
     self._intervals = LRUIntervalSet([], cache_size)
     self._ignored_intervals = IntervalSet([])
     self.proteins = ProteinIndex(self.session, self.hypothesis_id)
     self.peptides = PeptideIndex(self.session, self.hypothesis_id)

Esempio n. 48

0

Mostra file

File: validators.py Progetto: mstim/glycresoft

def validate_database_unlocked(database_connection):
    try:
        db = DatabaseBoundOperation(database_connection)
        db.session.add(GlycanHypothesis(name="_____not_real_do_not_use______"))
        db.session.rollback()
        return True
    except OperationalError:
        return False

Esempio n. 49

0

Mostra file

File: validators.py Progetto: mobiusklein/glycan_profiling

class HypothesisGlycanSourceValidator(GlycanSourceValidatorBase):
    def __init__(self, database_connection, source, source_type, source_identifier=None):
        super(HypothesisGlycanSourceValidator, self).__init__(
            database_connection, source, source_type, source_identifier)
        self.handle = DatabaseBoundOperation(source)

    def validate(self):
        if self.source_identifier is None:
            click.secho("No value passed through --glycan-source-identifier.", fg='magenta')
            return False
        try:
            hypothesis_id = int(self.source_identifier)
            inst = self.handle.query(GlycanHypothesis).get(hypothesis_id)
            return inst is not None
        except TypeError:
            hypothesis_name = self.source
            inst = self.handle.query(GlycanHypothesis).filter(GlycanHypothesis.name == hypothesis_name).first()
            return inst is not None

Esempio n. 50

0

Mostra file

File: build_db.py Progetto: BostonUniversityCBMS/glycresoft

def merge_glycan_hypotheses(context, database_connection,
                            hypothesis_specification, name):
    database_connection = DatabaseBoundOperation(database_connection)
    hypothesis_ids = []
    for connection, ident in hypothesis_specification:
        hypothesis = get_by_name_or_id(DatabaseBoundOperation(connection),
                                       GlycanHypothesis, ident)
        hypothesis_ids.append((connection, hypothesis.id))

    if name is not None:
        name = validate_glycan_hypothesis_name(
            context, database_connection._original_connection, name)
        click.secho("Building Glycan Hypothesis %s" % name, fg='cyan')

    task = GlycanCompositionHypothesisMerger(
        database_connection._original_connection, hypothesis_ids, name)
    task.display_header()
    task.start()

Esempio n. 51

0

Mostra file

File: export.py Progetto: mobiusklein/glycan_profiling

def glycopeptide_mzidentml(database_connection, analysis_identifier, output_path=None,
                           mzml_path=None):
    '''Write identified glycopeptides as mzIdentML file, and associated MSn spectra
    to a paired mzML file if the matched data are available. If an mzML file is written
    it will also contain the extracted ion chromatograms for each glycopeptide with an
    extracted elution profile.
    '''
    database_connection = DatabaseBoundOperation(database_connection)
    session = database_connection.session()
    analysis = get_by_name_or_id(session, Analysis, analysis_identifier)
    if not analysis.analysis_type == AnalysisTypeEnum.glycopeptide_lc_msms:
        click.secho("Analysis %r is of type %r." % (
            str(analysis.name), str(analysis.analysis_type)), fg='red', err=True)
        raise click.Abort()
    loader = AnalysisDeserializer(
        database_connection._original_connection, analysis_id=analysis.id)
    glycopeptides = loader.load_identified_glycopeptides()
    with open(output_path, 'wb') as outfile:
        writer = MzIdentMLSerializer(
            outfile, glycopeptides, analysis, loader, source_mzml_path=mzml_path)
        writer.run()

Esempio n. 52

0

Mostra file

File: export.py Progetto: mobiusklein/glycan_profiling

def export_identified_glycans_from_glycopeptides(database_connection, analysis_identifier, output_path):
    database_connection = DatabaseBoundOperation(database_connection)
    session = database_connection.session()
    analysis = get_by_name_or_id(session, Analysis, analysis_identifier)
    if not analysis.analysis_type == AnalysisTypeEnum.glycopeptide_lc_msms:
        click.secho("Analysis %r is of type %r." % (
            str(analysis.name), str(analysis.analysis_type)), fg='red', err=True)
        raise click.Abort()
    glycans = session.query(GlycanComposition).join(
        GlycanCombinationGlycanComposition).join(GlycanCombination).join(
        Glycopeptide,
        Glycopeptide.glycan_combination_id == GlycanCombination.id).join(
        IdentifiedGlycopeptide,
        IdentifiedGlycopeptide.structure_id == Glycopeptide.id).filter(
        IdentifiedGlycopeptide.analysis_id == analysis.id).all()
    if output_path is None:
        output_stream = ctxstream(sys.stdout)
    else:
        output_stream = open(output_path, 'wb')
    with output_stream:
        job = ImportableGlycanHypothesisCSVSerializer(output_stream, glycans)
        job.run()

Esempio n. 53

0

Mostra file

File: export.py Progetto: mobiusklein/glycan_profiling

def glycopeptide_spectrum_matches(database_connection, analysis_identifier, output_path=None):
    '''Write each matched glycopeptide spectrum in CSV format
    '''
    database_connection = DatabaseBoundOperation(database_connection)
    session = database_connection.session()
    analysis = get_by_name_or_id(session, Analysis, analysis_identifier)
    if not analysis.analysis_type == AnalysisTypeEnum.glycopeptide_lc_msms:
        click.secho("Analysis %r is of type %r." % (
            str(analysis.name), str(analysis.analysis_type)), fg='red', err=True)
        raise click.Abort()
    analysis_id = analysis.id
    query = session.query(Protein.id, Protein.name).join(Protein.glycopeptides).join(
        GlycopeptideSpectrumMatch).filter(
        GlycopeptideSpectrumMatch.analysis_id == analysis.id)
    protein_index = dict(query)

    def generate():
        i = 0
        interval = 100000
        query = session.query(GlycopeptideSpectrumMatch).filter(
            GlycopeptideSpectrumMatch.analysis_id == analysis_id).order_by(
            GlycopeptideSpectrumMatch.scan_id)
        while True:
            session.expire_all()
            chunk = query.slice(i, i + interval).all()
            if len(chunk) == 0:
                break
            for glycopeptide in chunk:
                yield glycopeptide.convert()
            i += interval

    if output_path is None:
        output_stream = ctxstream(sys.stdout)
    else:
        output_stream = open(output_path, 'wb')
    with output_stream:
        job = GlycopeptideSpectrumMatchAnalysisCSVSerializer(output_stream, generate(), protein_index)
        job.run()

Esempio n. 54

0

Mostra file

File: convert_analysis.py Progetto: mobiusklein/glycan_profiling

class GlycopeptideAnalysisGlycanCompositionExtractionHypothesisSerializer(GlycanHypothesisSerializerBase):
    def __init__(self, input_connection, analysis_id, hypothesis_name, output_connection=None):
        if output_connection is None:
            output_connection = input_connection
        self.input_connection = DatabaseBoundOperation(input_connection)
        self.output_connection = DatabaseBoundOperation(output_connection)
        GlycanHypothesisSerializerBase.__init__(self, output_connection, hypothesis_name)
        self.analysis_id = analysis_id
        self.seen_cache = set()

    def get_all_compositions(self):
        return self.input_connection.query(DBGlycanComposition).join(GlycanCombinationGlycanComposition).join(
            Glycopeptide,
            GlycanCombinationGlycanComposition.c.combination_id == Glycopeptide.glycan_combination_id).join(
            IdentifiedGlycopeptide, IdentifiedGlycopeptide.structure_id == Glycopeptide.id).filter(
            IdentifiedGlycopeptide.analysis_id == self.analysis_id)

    def extract_composition(self, db_obj):
        composition = GlycanComposition.parse(db_obj.composition)
        if str(composition) in self.seen_cache:
            return
        self.seen_cache.add(str(composition))
        mass = composition.mass()
        composition_string = composition.serialize()
        formula_string = formula(composition.total_composition())
        inst = DBGlycanComposition(
            calculated_mass=mass, formula=formula_string,
            composition=composition_string,
            hypothesis_id=self.hypothesis_id)
        self.output_connection.session.add(inst)
        self.output_connection.session.flush()
        for sc in db_obj.structure_classes:
            self.output_connection.session.execute(
                GlycanCompositionToClass.insert(), dict(glycan_id=inst.id, class_id=sc.id))
        self.output_connection.session.flush()

    def run(self):
        q = self.get_all_compositions()
        for gc in q:
            self.extract_composition(gc)
        self.output_connection.session.commit()

Esempio n. 55

0

Mostra file

File: analysis_migration.py Progetto: mobiusklein/glycan_profiling

 def __init__(self, connection):
     DatabaseBoundOperation.__init__(self, connection)
     self.sample_run_id = None
     self.ms_scan_id_map = dict()
     self.peak_id_map = dict()

Esempio n. 56

0

Mostra file

File: common.py Progetto: mobiusklein/glycan_profiling

 def __init__(self, database_connection, hypothesis_id):
     DatabaseBoundOperation.__init__(self, database_connection)
     self.hypothesis_id = hypothesis_id

Esempio n. 57

0

Mostra file

File: glycan_combinator.py Progetto: mobiusklein/glycan_profiling

 def __init__(self, connection, source_hypothesis_id, target_hypothesis_id, max_size=1):
     DatabaseBoundOperation.__init__(self, connection)
     self.source_hypothesis_id = source_hypothesis_id
     self.target_hypothesis_id = target_hypothesis_id
     self.max_size = max_size
     self.total_count = 0

Esempio n. 58

0

Mostra file

File: share_peptides.py Progetto: mobiusklein/glycan_profiling

 def __init__(self, connection, hypothesis_id):
     DatabaseBoundOperation.__init__(self, connection)
     self.hypothesis_id = hypothesis_id
     self.index = PeptideIndex()
     self.index.populate(self._get_all_peptides())

Esempio n. 59

0

Mostra file

File: migrate.py Progetto: mobiusklein/glycan_profiling

 def __init__(self, database_connection):
     DatabaseBoundOperation.__init__(self, database_connection)
     self.hypothesis_id = None
     self.glycan_composition_id_map = dict()
     self._structure_class_loader = None