Ejemplo n.º 1
0
def glycopeptide_mzid(context, mzid_file, database_connection, name, occupied_glycosites, target_protein,
                      target_protein_re, processes, glycan_source, glycan_source_type, glycan_source_identifier,
                      reference_fasta, peptide_length_range=(5, 60)):
    '''Constructs a glycopeptide hypothesis from a MzIdentML file of proteins and a
    collection of glycans.
    '''
    proteins = validate_mzid_proteins(
        context, mzid_file, target_protein, target_protein_re)
    validate_glycan_source(context, database_connection,
                           glycan_source, glycan_source_type,
                           glycan_source_identifier)

    processes = min(multiprocessing.cpu_count(), processes)

    if name is not None:
        name = validate_glycopeptide_hypothesis_name(
            context, database_connection, name)
        click.secho("Building Glycopeptide Hypothesis %s" % name, fg='cyan')

    glycan_hypothesis_id = _glycan_hypothesis_builders[
        glycan_source_type](database_connection, glycan_source, name, glycan_source_identifier)

    builder = MultipleProcessMzIdentMLGlycopeptideHypothesisSerializer(
        mzid_file, database_connection,
        glycan_hypothesis_id=glycan_hypothesis_id,
        hypothesis_name=name,
        target_proteins=proteins,
        max_glycosylation_events=occupied_glycosites,
        reference_fasta=reference_fasta,
        n_processes=processes)
    builder.display_header()
    builder.start()
    return builder.hypothesis_id
Ejemplo n.º 2
0
def glycopeptide_mzid(context, mzid_file, database_connection, name,
                      occupied_glycosites, target_protein, target_protein_re,
                      processes, glycan_source, glycan_source_type,
                      glycan_source_identifier, reference_fasta):
    '''Constructs a glycopeptide hypothesis from a MzIdentML file of proteins and a
    collection of glycans.
    '''
    proteins = validate_mzid_proteins(context, mzid_file, target_protein,
                                      target_protein_re)
    validate_glycan_source(context, database_connection, glycan_source,
                           glycan_source_type, glycan_source_identifier)

    processes = min(multiprocessing.cpu_count(), processes)

    if name is not None:
        name = validate_glycopeptide_hypothesis_name(context,
                                                     database_connection, name)
        click.secho("Building Glycopeptide Hypothesis %s" % name, fg='cyan')

    glycan_hypothesis_id = _glycan_hypothesis_builders[glycan_source_type](
        database_connection, glycan_source, name, glycan_source_identifier)

    builder = MultipleProcessMzIdentMLGlycopeptideHypothesisSerializer(
        mzid_file,
        database_connection,
        glycan_hypothesis_id=glycan_hypothesis_id,
        hypothesis_name=name,
        target_proteins=proteins,
        max_glycosylation_events=occupied_glycosites,
        reference_fasta=reference_fasta,
        n_processes=processes)
    builder.display_header()
    builder.start()
    return builder.hypothesis_id
Ejemplo n.º 3
0
def glycopeptide_fa(context, fasta_file, database_connection, enzyme, missed_cleavages, occupied_glycosites, name,
                    constant_modification, variable_modification, processes, glycan_source, glycan_source_type,
                    glycan_source_identifier=None, semispecific_digest=False, reverse=False, dry_run=False,
                    peptide_length_range=(5, 60), not_full_crossproduct=False):
    '''Constructs a glycopeptide hypothesis from a FASTA file of proteins and a
    collection of glycans.
    '''
    if reverse:
        task_type = ReversingMultipleProcessFastaGlycopeptideHypothesisSerializer
        click.secho("Using ReversingMultipleProcessFastaGlycopeptideHypothesisSerializer", fg='yellow')
    elif dry_run:
        task_type = NonSavingMultipleProcessFastaGlycopeptideHypothesisSerializer
        click.secho("Using NonSavingMultipleProcessFastaGlycopeptideHypothesisSerializer", fg='yellow')
    else:
        task_type = MultipleProcessFastaGlycopeptideHypothesisSerializer

    validate_modifications(
        context, constant_modification + variable_modification)
    validate_glycan_source(context, database_connection,
                           glycan_source, glycan_source_type,
                           glycan_source_identifier)

    processes = min(multiprocessing.cpu_count(), processes)

    if name is not None:
        name = validate_glycopeptide_hypothesis_name(
            context, database_connection, name)
        click.secho("Building Glycopeptide Hypothesis %s" % name, fg='cyan')
    mt = RestrictedModificationTable(
        None, constant_modification, variable_modification)
    constant_modification = [mt[c] for c in constant_modification]
    variable_modification = [mt[c] for c in variable_modification]

    glycan_hypothesis_id = _glycan_hypothesis_builders[
        glycan_source_type](database_connection, glycan_source, name, glycan_source_identifier)

    builder = task_type(
        fasta_file, database_connection,
        glycan_hypothesis_id=glycan_hypothesis_id,
        protease=enzyme,
        constant_modifications=constant_modification,
        variable_modifications=variable_modification,
        max_missed_cleavages=missed_cleavages,
        max_glycosylation_events=occupied_glycosites,
        hypothesis_name=name,
        semispecific=semispecific_digest,
        n_processes=processes,
        full_cross_product=not not_full_crossproduct)
    builder.display_header()
    builder.start()
    return builder.hypothesis_id
def mzid_glycopeptide(database_connection, mzid_file, name, occupied_glycosites, target_protein,
                      processes, glycan_source, glycan_source_type, glycan_source_identifier,
                      channel):
    context = None
    proteins = validate_mzid_proteins(
        context, mzid_file, target_protein, [])
    try:
        validate_glycan_source(context, database_connection,
                               glycan_source, glycan_source_type,
                               glycan_source_identifier)
    except Abort:
        channel.abort("Could not validate the glycan source, %s, %s" % (glycan_source, glycan_source_type))
    if name is not None:
        name = validate_glycopeptide_hypothesis_name(
            context, database_connection, name)

    glycan_hypothesis_id = _glycan_hypothesis_builders[
        glycan_source_type](database_connection, glycan_source,
                            name, glycan_source_identifier)

    builder = MultipleProcessMzIdentMLGlycopeptideHypothesisSerializer(
        mzid_file, database_connection,
        glycan_hypothesis_id=glycan_hypothesis_id,
        hypothesis_name=name,
        target_proteins=proteins,
        max_glycosylation_events=occupied_glycosites,
        n_processes=processes)
    try:
        builder.start()
        record = project_hypothesis.HypothesisRecordSet(database_connection)
        hypothesis_record = None

        for item in record:
            if item.uuid == builder.hypothesis.uuid:
                hypothesis_record = item
                hypothesis_record = hypothesis_record._replace(user_id=channel.user.id)
                channel.send(Message(hypothesis_record.to_json(), "new-hypothesis"))
                break
        else:
            channel.send(Message("Something went wrong (%r)" % (list(record),)))
    except Exception:
        channel.abort(Message.traceback())
def fasta_glycopeptide(database_connection, fasta_file, enzyme,
                       missed_cleavages, occupied_glycosites, name,
                       constant_modification, variable_modification, processes,
                       glycan_source, glycan_source_type,
                       glycan_source_identifier, peptide_length_range,
                       semispecific_digest, generate_full_crossproduct,
                       generate_reverse_decoys, channel):
    context = None
    try:
        validate_modifications(context,
                               constant_modification + variable_modification)
    except Exception:
        channel.abort(
            "Could not validate the modification specification, Constant: %s, Variable: %s"
            % (constant_modification, variable_modification))
    try:
        validate_glycan_source(context, database_connection, glycan_source,
                               glycan_source_type, glycan_source_identifier)
    except Abort:
        channel.abort("Could not validate the glycan source, %s, %s" %
                      (glycan_source, glycan_source_type))

    if name is not None:
        name = validate_glycopeptide_hypothesis_name(context,
                                                     database_connection, name)

    mt = RestrictedModificationTable(None, constant_modification,
                                     variable_modification)
    constant_modification = [mt[c] for c in constant_modification]
    variable_modification = [mt[c] for c in variable_modification]

    glycan_hypothesis_id = _glycan_hypothesis_builders[glycan_source_type](
        database_connection, glycan_source, name, glycan_source_identifier)

    builder = MultipleProcessFastaGlycopeptideHypothesisSerializer(
        fasta_file,
        database_connection,
        glycan_hypothesis_id=glycan_hypothesis_id,
        protease=enzyme,
        constant_modifications=constant_modification,
        variable_modifications=variable_modification,
        max_missed_cleavages=missed_cleavages,
        max_glycosylation_events=occupied_glycosites,
        hypothesis_name=name,
        peptide_length_range=peptide_length_range,
        semispecific=semispecific_digest,
        n_processes=processes,
        full_cross_product=generate_full_crossproduct)

    decoy_builder = None
    decoy_database_connection = None
    if generate_reverse_decoys:
        prefix, ext = database_connection.rsplit(".", 1)
        decoy_database_connection = "%s.decoy.%s" % (prefix, ext)

        try:
            validate_glycan_source(context, decoy_database_connection,
                                   glycan_source, glycan_source_type,
                                   glycan_source_identifier)
        except Abort:
            channel.abort("Could not validate the glycan source, %s, %s" %
                          (glycan_source, glycan_source_type))

        decoy_glycan_hypothesis_id = _glycan_hypothesis_builders[
            glycan_source_type](decoy_database_connection, glycan_source, name,
                                glycan_source_identifier)

        decoy_builder = ReversingMultipleProcessFastaGlycopeptideHypothesisSerializer(
            fasta_file,
            decoy_database_connection,
            glycan_hypothesis_id=decoy_glycan_hypothesis_id,
            protease=enzyme,
            constant_modifications=constant_modification,
            variable_modifications=variable_modification,
            max_missed_cleavages=missed_cleavages,
            max_glycosylation_events=occupied_glycosites,
            hypothesis_name=name,
            peptide_length_range=peptide_length_range,
            semispecific=semispecific_digest,
            n_processes=processes,
            full_cross_product=generate_full_crossproduct)

    try:
        builder.start()
        record = project_hypothesis.HypothesisRecordSet(database_connection)
        hypothesis_record = None
        decoy_hypothesis_record = None

        if decoy_builder:
            decoy_builder.start()
            decoy_record = project_hypothesis.HypothesisRecordSet(
                decoy_database_connection)
            for item in decoy_record:
                if item.uuid == decoy_builder.hypothesis.uuid:
                    decoy_hypothesis_record = item._replace(
                        user_id=channel.user.id,
                        options={
                            'full_crossproduct': generate_full_crossproduct
                        })
                    break
            else:
                channel.send(
                    Message("Something went wrong (%r)" % (list(record), )))
                channel.abort("Could not extract decoy hypothesis")

        for item in record:
            if item.uuid == builder.hypothesis.uuid:
                hypothesis_record = item
                hypothesis_record = hypothesis_record._replace(
                    user_id=channel.user.id,
                    options={'full_crossproduct': generate_full_crossproduct})
                if decoy_hypothesis_record is not None:
                    hypothesis_record = hypothesis_record._replace(
                        decoy_hypothesis=decoy_hypothesis_record)
                channel.send(
                    Message(hypothesis_record.to_json(), "new-hypothesis"))
                break
        else:
            channel.send(
                Message("Something went wrong (%r)" % (list(record), )))
    except Exception:
        channel.send(Message.traceback())
def mzid_glycopeptide(database_connection, mzid_file, name,
                      occupied_glycosites, target_protein, processes,
                      glycan_source, glycan_source_type,
                      glycan_source_identifier, peptide_length_range,
                      generate_full_crossproduct, generate_reverse_decoys,
                      channel):
    context = None
    proteins = validate_mzid_proteins(context, mzid_file, target_protein, [])
    try:
        validate_glycan_source(context, database_connection, glycan_source,
                               glycan_source_type, glycan_source_identifier)
    except Abort:
        channel.abort("Could not validate the glycan source, %s, %s" %
                      (glycan_source, glycan_source_type))
    if name is not None:
        name = validate_glycopeptide_hypothesis_name(context,
                                                     database_connection, name)

    glycan_hypothesis_id = _glycan_hypothesis_builders[glycan_source_type](
        database_connection, glycan_source, name, glycan_source_identifier)

    builder = MultipleProcessMzIdentMLGlycopeptideHypothesisSerializer(
        mzid_file,
        database_connection,
        glycan_hypothesis_id=glycan_hypothesis_id,
        hypothesis_name=name,
        target_proteins=proteins,
        max_glycosylation_events=occupied_glycosites,
        peptide_length_range=peptide_length_range,
        n_processes=processes,
        full_cross_product=generate_full_crossproduct)

    decoy_builder = None
    decoy_database_connection = None
    if generate_reverse_decoys:
        # TODO Implement reversing mzIdentML database build
        prefix, ext = database_connection.rsplit(".", 1)
        decoy_database_connection = "%s.decoy.%s" % (prefix, ext)
        try:
            validate_glycan_source(context, decoy_database_connection,
                                   glycan_source, glycan_source_type,
                                   glycan_source_identifier)
        except Abort:
            channel.abort("Could not validate the glycan source, %s, %s" %
                          (glycan_source, glycan_source_type))
        channel.abort("Cannot build a reversed mzIdentML hypothesis")
        # decoy_glycan_hypothesis_id = _glycan_hypothesis_builders[
        #     glycan_source_type](decoy_database_connection, glycan_source,
        #                         name, glycan_source_identifier)
        # decoy_builder = ReversingMultipleProcessFastaGlycopeptideHypothesisSerializer(
        #     mzid_file, decoy_database_connection,
        #     glycan_hypothesis_id=decoy_glycan_hypothesis_id,
        #     max_glycosylation_events=occupied_glycosites,
        #     hypothesis_name="Reverse " + name,
        #     peptide_length_range=peptide_length_range,
        #     n_processes=processes,
        #     full_cross_product=full_cross_product)

    try:
        builder.start()
        record = project_hypothesis.HypothesisRecordSet(database_connection)
        hypothesis_record = None
        decoy_hypothesis_record = None

        if decoy_builder:
            decoy_builder.start()
            decoy_record = project_hypothesis.HypothesisRecordSet(
                decoy_database_connection)
            for item in decoy_record:
                if item.uuid == decoy_builder.hypothesis.uuid:
                    decoy_hypothesis_record = item
                    decoy_hypothesis_record = decoy_hypothesis_record._replace(
                        user_id=channel.user.id,
                        options={
                            'full_cross_product': generate_full_crossproduct
                        })
                    break

        for item in record:
            if item.uuid == builder.hypothesis.uuid:
                hypothesis_record = item
                hypothesis_record = hypothesis_record._replace(
                    user_id=channel.user.id,
                    options={'full_cross_product': generate_full_crossproduct})
                if decoy_hypothesis_record is not None:
                    hypothesis_record = hypothesis_record._replace(
                        decoy_hypothesis=decoy_hypothesis_record)
                channel.send(
                    Message(hypothesis_record.to_json(), "new-hypothesis"))
                break
        else:
            channel.send(
                Message("Something went wrong (%r)" % (list(record), )))
    except Exception:
        channel.abort(Message.traceback())
Ejemplo n.º 7
0
def glycopeptide_fa(context,
                    fasta_file,
                    database_connection,
                    enzyme,
                    missed_cleavages,
                    occupied_glycosites,
                    name,
                    constant_modification,
                    variable_modification,
                    processes,
                    glycan_source,
                    glycan_source_type,
                    glycan_source_identifier=None,
                    reverse=False,
                    dry_run=False):
    '''Constructs a glycopeptide hypothesis from a FASTA file of proteins and a
    collection of glycans.
    '''
    if reverse:
        task_type = ReversingMultipleProcessFastaGlycopeptideHypothesisSerializer
        click.secho(
            "Using ReversingMultipleProcessFastaGlycopeptideHypothesisSerializer",
            fg='yellow')
    elif dry_run:
        task_type = NonSavingMultipleProcessFastaGlycopeptideHypothesisSerializer
        click.secho(
            "Using NonSavingMultipleProcessFastaGlycopeptideHypothesisSerializer",
            fg='yellow')
    else:
        task_type = MultipleProcessFastaGlycopeptideHypothesisSerializer

    validate_modifications(context,
                           constant_modification + variable_modification)
    validate_glycan_source(context, database_connection, glycan_source,
                           glycan_source_type, glycan_source_identifier)

    processes = min(multiprocessing.cpu_count(), processes)

    if name is not None:
        name = validate_glycopeptide_hypothesis_name(context,
                                                     database_connection, name)
        click.secho("Building Glycopeptide Hypothesis %s" % name, fg='cyan')
    mt = RestrictedModificationTable(None, constant_modification,
                                     variable_modification)
    constant_modification = [mt[c] for c in constant_modification]
    variable_modification = [mt[c] for c in variable_modification]

    glycan_hypothesis_id = _glycan_hypothesis_builders[glycan_source_type](
        database_connection, glycan_source, name, glycan_source_identifier)

    builder = task_type(fasta_file,
                        database_connection,
                        glycan_hypothesis_id=glycan_hypothesis_id,
                        protease=enzyme,
                        constant_modifications=constant_modification,
                        variable_modifications=variable_modification,
                        max_missed_cleavages=missed_cleavages,
                        max_glycosylation_events=occupied_glycosites,
                        hypothesis_name=name,
                        n_processes=processes)
    builder.display_header()
    builder.start()
    return builder.hypothesis_id
def fasta_glycopeptide(database_connection, fasta_file, enzyme,
                       missed_cleavages, occupied_glycosites, name,
                       constant_modification, variable_modification, processes,
                       glycan_source, glycan_source_type,
                       glycan_source_identifier, channel):
    context = None
    try:
        validate_modifications(context,
                               constant_modification + variable_modification)
    except Exception:
        channel.abort(
            "Could not validate the modification specification, Constant: %s, Variable: %s"
            % (constant_modification, variable_modification))
    try:
        validate_glycan_source(context, database_connection, glycan_source,
                               glycan_source_type, glycan_source_identifier)
    except Abort:
        channel.abort("Could not validate the glycan source, %s, %s" %
                      (glycan_source, glycan_source_type))

    if name is not None:
        name = validate_glycopeptide_hypothesis_name(context,
                                                     database_connection, name)

    mt = RestrictedModificationTable(None, constant_modification,
                                     variable_modification)
    constant_modification = [mt[c] for c in constant_modification]
    variable_modification = [mt[c] for c in variable_modification]

    glycan_hypothesis_id = _glycan_hypothesis_builders[glycan_source_type](
        database_connection, glycan_source, name, glycan_source_identifier)

    builder = MultipleProcessFastaGlycopeptideHypothesisSerializer(
        fasta_file,
        database_connection,
        glycan_hypothesis_id=glycan_hypothesis_id,
        protease=enzyme,
        constant_modifications=constant_modification,
        variable_modifications=variable_modification,
        max_missed_cleavages=missed_cleavages,
        max_glycosylation_events=occupied_glycosites,
        hypothesis_name=name,
        n_processes=processes)

    try:
        builder.start()
        record = project_hypothesis.HypothesisRecordSet(database_connection)
        hypothesis_record = None

        for item in record:
            if item.uuid == builder.hypothesis.uuid:
                hypothesis_record = item
                hypothesis_record = hypothesis_record._replace(
                    user_id=channel.user.id)
                channel.send(
                    Message(hypothesis_record.to_json(), "new-hypothesis"))
                break
        else:
            channel.send(
                Message("Something went wrong (%r)" % (list(record), )))
    except Exception:
        channel.send(Message.traceback())