def glycopeptide_mzid(context, mzid_file, database_connection, name, occupied_glycosites, target_protein, target_protein_re, processes, glycan_source, glycan_source_type, glycan_source_identifier, reference_fasta, peptide_length_range=(5, 60)): '''Constructs a glycopeptide hypothesis from a MzIdentML file of proteins and a collection of glycans. ''' proteins = validate_mzid_proteins( context, mzid_file, target_protein, target_protein_re) validate_glycan_source(context, database_connection, glycan_source, glycan_source_type, glycan_source_identifier) processes = min(multiprocessing.cpu_count(), processes) if name is not None: name = validate_glycopeptide_hypothesis_name( context, database_connection, name) click.secho("Building Glycopeptide Hypothesis %s" % name, fg='cyan') glycan_hypothesis_id = _glycan_hypothesis_builders[ glycan_source_type](database_connection, glycan_source, name, glycan_source_identifier) builder = MultipleProcessMzIdentMLGlycopeptideHypothesisSerializer( mzid_file, database_connection, glycan_hypothesis_id=glycan_hypothesis_id, hypothesis_name=name, target_proteins=proteins, max_glycosylation_events=occupied_glycosites, reference_fasta=reference_fasta, n_processes=processes) builder.display_header() builder.start() return builder.hypothesis_id
def glycopeptide_mzid(context, mzid_file, database_connection, name, occupied_glycosites, target_protein, target_protein_re, processes, glycan_source, glycan_source_type, glycan_source_identifier, reference_fasta): '''Constructs a glycopeptide hypothesis from a MzIdentML file of proteins and a collection of glycans. ''' proteins = validate_mzid_proteins(context, mzid_file, target_protein, target_protein_re) validate_glycan_source(context, database_connection, glycan_source, glycan_source_type, glycan_source_identifier) processes = min(multiprocessing.cpu_count(), processes) if name is not None: name = validate_glycopeptide_hypothesis_name(context, database_connection, name) click.secho("Building Glycopeptide Hypothesis %s" % name, fg='cyan') glycan_hypothesis_id = _glycan_hypothesis_builders[glycan_source_type]( database_connection, glycan_source, name, glycan_source_identifier) builder = MultipleProcessMzIdentMLGlycopeptideHypothesisSerializer( mzid_file, database_connection, glycan_hypothesis_id=glycan_hypothesis_id, hypothesis_name=name, target_proteins=proteins, max_glycosylation_events=occupied_glycosites, reference_fasta=reference_fasta, n_processes=processes) builder.display_header() builder.start() return builder.hypothesis_id
def glycopeptide_fa(context, fasta_file, database_connection, enzyme, missed_cleavages, occupied_glycosites, name, constant_modification, variable_modification, processes, glycan_source, glycan_source_type, glycan_source_identifier=None, semispecific_digest=False, reverse=False, dry_run=False, peptide_length_range=(5, 60), not_full_crossproduct=False): '''Constructs a glycopeptide hypothesis from a FASTA file of proteins and a collection of glycans. ''' if reverse: task_type = ReversingMultipleProcessFastaGlycopeptideHypothesisSerializer click.secho("Using ReversingMultipleProcessFastaGlycopeptideHypothesisSerializer", fg='yellow') elif dry_run: task_type = NonSavingMultipleProcessFastaGlycopeptideHypothesisSerializer click.secho("Using NonSavingMultipleProcessFastaGlycopeptideHypothesisSerializer", fg='yellow') else: task_type = MultipleProcessFastaGlycopeptideHypothesisSerializer validate_modifications( context, constant_modification + variable_modification) validate_glycan_source(context, database_connection, glycan_source, glycan_source_type, glycan_source_identifier) processes = min(multiprocessing.cpu_count(), processes) if name is not None: name = validate_glycopeptide_hypothesis_name( context, database_connection, name) click.secho("Building Glycopeptide Hypothesis %s" % name, fg='cyan') mt = RestrictedModificationTable( None, constant_modification, variable_modification) constant_modification = [mt[c] for c in constant_modification] variable_modification = [mt[c] for c in variable_modification] glycan_hypothesis_id = _glycan_hypothesis_builders[ glycan_source_type](database_connection, glycan_source, name, glycan_source_identifier) builder = task_type( fasta_file, database_connection, glycan_hypothesis_id=glycan_hypothesis_id, protease=enzyme, constant_modifications=constant_modification, variable_modifications=variable_modification, max_missed_cleavages=missed_cleavages, max_glycosylation_events=occupied_glycosites, hypothesis_name=name, semispecific=semispecific_digest, n_processes=processes, full_cross_product=not not_full_crossproduct) builder.display_header() builder.start() return builder.hypothesis_id
def mzid_glycopeptide(database_connection, mzid_file, name, occupied_glycosites, target_protein, processes, glycan_source, glycan_source_type, glycan_source_identifier, channel): context = None proteins = validate_mzid_proteins( context, mzid_file, target_protein, []) try: validate_glycan_source(context, database_connection, glycan_source, glycan_source_type, glycan_source_identifier) except Abort: channel.abort("Could not validate the glycan source, %s, %s" % (glycan_source, glycan_source_type)) if name is not None: name = validate_glycopeptide_hypothesis_name( context, database_connection, name) glycan_hypothesis_id = _glycan_hypothesis_builders[ glycan_source_type](database_connection, glycan_source, name, glycan_source_identifier) builder = MultipleProcessMzIdentMLGlycopeptideHypothesisSerializer( mzid_file, database_connection, glycan_hypothesis_id=glycan_hypothesis_id, hypothesis_name=name, target_proteins=proteins, max_glycosylation_events=occupied_glycosites, n_processes=processes) try: builder.start() record = project_hypothesis.HypothesisRecordSet(database_connection) hypothesis_record = None for item in record: if item.uuid == builder.hypothesis.uuid: hypothesis_record = item hypothesis_record = hypothesis_record._replace(user_id=channel.user.id) channel.send(Message(hypothesis_record.to_json(), "new-hypothesis")) break else: channel.send(Message("Something went wrong (%r)" % (list(record),))) except Exception: channel.abort(Message.traceback())
def fasta_glycopeptide(database_connection, fasta_file, enzyme, missed_cleavages, occupied_glycosites, name, constant_modification, variable_modification, processes, glycan_source, glycan_source_type, glycan_source_identifier, peptide_length_range, semispecific_digest, generate_full_crossproduct, generate_reverse_decoys, channel): context = None try: validate_modifications(context, constant_modification + variable_modification) except Exception: channel.abort( "Could not validate the modification specification, Constant: %s, Variable: %s" % (constant_modification, variable_modification)) try: validate_glycan_source(context, database_connection, glycan_source, glycan_source_type, glycan_source_identifier) except Abort: channel.abort("Could not validate the glycan source, %s, %s" % (glycan_source, glycan_source_type)) if name is not None: name = validate_glycopeptide_hypothesis_name(context, database_connection, name) mt = RestrictedModificationTable(None, constant_modification, variable_modification) constant_modification = [mt[c] for c in constant_modification] variable_modification = [mt[c] for c in variable_modification] glycan_hypothesis_id = _glycan_hypothesis_builders[glycan_source_type]( database_connection, glycan_source, name, glycan_source_identifier) builder = MultipleProcessFastaGlycopeptideHypothesisSerializer( fasta_file, database_connection, glycan_hypothesis_id=glycan_hypothesis_id, protease=enzyme, constant_modifications=constant_modification, variable_modifications=variable_modification, max_missed_cleavages=missed_cleavages, max_glycosylation_events=occupied_glycosites, hypothesis_name=name, peptide_length_range=peptide_length_range, semispecific=semispecific_digest, n_processes=processes, full_cross_product=generate_full_crossproduct) decoy_builder = None decoy_database_connection = None if generate_reverse_decoys: prefix, ext = database_connection.rsplit(".", 1) decoy_database_connection = "%s.decoy.%s" % (prefix, ext) try: validate_glycan_source(context, decoy_database_connection, glycan_source, glycan_source_type, glycan_source_identifier) except Abort: channel.abort("Could not validate the glycan source, %s, %s" % (glycan_source, glycan_source_type)) decoy_glycan_hypothesis_id = _glycan_hypothesis_builders[ glycan_source_type](decoy_database_connection, glycan_source, name, glycan_source_identifier) decoy_builder = ReversingMultipleProcessFastaGlycopeptideHypothesisSerializer( fasta_file, decoy_database_connection, glycan_hypothesis_id=decoy_glycan_hypothesis_id, protease=enzyme, constant_modifications=constant_modification, variable_modifications=variable_modification, max_missed_cleavages=missed_cleavages, max_glycosylation_events=occupied_glycosites, hypothesis_name=name, peptide_length_range=peptide_length_range, semispecific=semispecific_digest, n_processes=processes, full_cross_product=generate_full_crossproduct) try: builder.start() record = project_hypothesis.HypothesisRecordSet(database_connection) hypothesis_record = None decoy_hypothesis_record = None if decoy_builder: decoy_builder.start() decoy_record = project_hypothesis.HypothesisRecordSet( decoy_database_connection) for item in decoy_record: if item.uuid == decoy_builder.hypothesis.uuid: decoy_hypothesis_record = item._replace( user_id=channel.user.id, options={ 'full_crossproduct': generate_full_crossproduct }) break else: channel.send( Message("Something went wrong (%r)" % (list(record), ))) channel.abort("Could not extract decoy hypothesis") for item in record: if item.uuid == builder.hypothesis.uuid: hypothesis_record = item hypothesis_record = hypothesis_record._replace( user_id=channel.user.id, options={'full_crossproduct': generate_full_crossproduct}) if decoy_hypothesis_record is not None: hypothesis_record = hypothesis_record._replace( decoy_hypothesis=decoy_hypothesis_record) channel.send( Message(hypothesis_record.to_json(), "new-hypothesis")) break else: channel.send( Message("Something went wrong (%r)" % (list(record), ))) except Exception: channel.send(Message.traceback())
def mzid_glycopeptide(database_connection, mzid_file, name, occupied_glycosites, target_protein, processes, glycan_source, glycan_source_type, glycan_source_identifier, peptide_length_range, generate_full_crossproduct, generate_reverse_decoys, channel): context = None proteins = validate_mzid_proteins(context, mzid_file, target_protein, []) try: validate_glycan_source(context, database_connection, glycan_source, glycan_source_type, glycan_source_identifier) except Abort: channel.abort("Could not validate the glycan source, %s, %s" % (glycan_source, glycan_source_type)) if name is not None: name = validate_glycopeptide_hypothesis_name(context, database_connection, name) glycan_hypothesis_id = _glycan_hypothesis_builders[glycan_source_type]( database_connection, glycan_source, name, glycan_source_identifier) builder = MultipleProcessMzIdentMLGlycopeptideHypothesisSerializer( mzid_file, database_connection, glycan_hypothesis_id=glycan_hypothesis_id, hypothesis_name=name, target_proteins=proteins, max_glycosylation_events=occupied_glycosites, peptide_length_range=peptide_length_range, n_processes=processes, full_cross_product=generate_full_crossproduct) decoy_builder = None decoy_database_connection = None if generate_reverse_decoys: # TODO Implement reversing mzIdentML database build prefix, ext = database_connection.rsplit(".", 1) decoy_database_connection = "%s.decoy.%s" % (prefix, ext) try: validate_glycan_source(context, decoy_database_connection, glycan_source, glycan_source_type, glycan_source_identifier) except Abort: channel.abort("Could not validate the glycan source, %s, %s" % (glycan_source, glycan_source_type)) channel.abort("Cannot build a reversed mzIdentML hypothesis") # decoy_glycan_hypothesis_id = _glycan_hypothesis_builders[ # glycan_source_type](decoy_database_connection, glycan_source, # name, glycan_source_identifier) # decoy_builder = ReversingMultipleProcessFastaGlycopeptideHypothesisSerializer( # mzid_file, decoy_database_connection, # glycan_hypothesis_id=decoy_glycan_hypothesis_id, # max_glycosylation_events=occupied_glycosites, # hypothesis_name="Reverse " + name, # peptide_length_range=peptide_length_range, # n_processes=processes, # full_cross_product=full_cross_product) try: builder.start() record = project_hypothesis.HypothesisRecordSet(database_connection) hypothesis_record = None decoy_hypothesis_record = None if decoy_builder: decoy_builder.start() decoy_record = project_hypothesis.HypothesisRecordSet( decoy_database_connection) for item in decoy_record: if item.uuid == decoy_builder.hypothesis.uuid: decoy_hypothesis_record = item decoy_hypothesis_record = decoy_hypothesis_record._replace( user_id=channel.user.id, options={ 'full_cross_product': generate_full_crossproduct }) break for item in record: if item.uuid == builder.hypothesis.uuid: hypothesis_record = item hypothesis_record = hypothesis_record._replace( user_id=channel.user.id, options={'full_cross_product': generate_full_crossproduct}) if decoy_hypothesis_record is not None: hypothesis_record = hypothesis_record._replace( decoy_hypothesis=decoy_hypothesis_record) channel.send( Message(hypothesis_record.to_json(), "new-hypothesis")) break else: channel.send( Message("Something went wrong (%r)" % (list(record), ))) except Exception: channel.abort(Message.traceback())
def glycopeptide_fa(context, fasta_file, database_connection, enzyme, missed_cleavages, occupied_glycosites, name, constant_modification, variable_modification, processes, glycan_source, glycan_source_type, glycan_source_identifier=None, reverse=False, dry_run=False): '''Constructs a glycopeptide hypothesis from a FASTA file of proteins and a collection of glycans. ''' if reverse: task_type = ReversingMultipleProcessFastaGlycopeptideHypothesisSerializer click.secho( "Using ReversingMultipleProcessFastaGlycopeptideHypothesisSerializer", fg='yellow') elif dry_run: task_type = NonSavingMultipleProcessFastaGlycopeptideHypothesisSerializer click.secho( "Using NonSavingMultipleProcessFastaGlycopeptideHypothesisSerializer", fg='yellow') else: task_type = MultipleProcessFastaGlycopeptideHypothesisSerializer validate_modifications(context, constant_modification + variable_modification) validate_glycan_source(context, database_connection, glycan_source, glycan_source_type, glycan_source_identifier) processes = min(multiprocessing.cpu_count(), processes) if name is not None: name = validate_glycopeptide_hypothesis_name(context, database_connection, name) click.secho("Building Glycopeptide Hypothesis %s" % name, fg='cyan') mt = RestrictedModificationTable(None, constant_modification, variable_modification) constant_modification = [mt[c] for c in constant_modification] variable_modification = [mt[c] for c in variable_modification] glycan_hypothesis_id = _glycan_hypothesis_builders[glycan_source_type]( database_connection, glycan_source, name, glycan_source_identifier) builder = task_type(fasta_file, database_connection, glycan_hypothesis_id=glycan_hypothesis_id, protease=enzyme, constant_modifications=constant_modification, variable_modifications=variable_modification, max_missed_cleavages=missed_cleavages, max_glycosylation_events=occupied_glycosites, hypothesis_name=name, n_processes=processes) builder.display_header() builder.start() return builder.hypothesis_id
def fasta_glycopeptide(database_connection, fasta_file, enzyme, missed_cleavages, occupied_glycosites, name, constant_modification, variable_modification, processes, glycan_source, glycan_source_type, glycan_source_identifier, channel): context = None try: validate_modifications(context, constant_modification + variable_modification) except Exception: channel.abort( "Could not validate the modification specification, Constant: %s, Variable: %s" % (constant_modification, variable_modification)) try: validate_glycan_source(context, database_connection, glycan_source, glycan_source_type, glycan_source_identifier) except Abort: channel.abort("Could not validate the glycan source, %s, %s" % (glycan_source, glycan_source_type)) if name is not None: name = validate_glycopeptide_hypothesis_name(context, database_connection, name) mt = RestrictedModificationTable(None, constant_modification, variable_modification) constant_modification = [mt[c] for c in constant_modification] variable_modification = [mt[c] for c in variable_modification] glycan_hypothesis_id = _glycan_hypothesis_builders[glycan_source_type]( database_connection, glycan_source, name, glycan_source_identifier) builder = MultipleProcessFastaGlycopeptideHypothesisSerializer( fasta_file, database_connection, glycan_hypothesis_id=glycan_hypothesis_id, protease=enzyme, constant_modifications=constant_modification, variable_modifications=variable_modification, max_missed_cleavages=missed_cleavages, max_glycosylation_events=occupied_glycosites, hypothesis_name=name, n_processes=processes) try: builder.start() record = project_hypothesis.HypothesisRecordSet(database_connection) hypothesis_record = None for item in record: if item.uuid == builder.hypothesis.uuid: hypothesis_record = item hypothesis_record = hypothesis_record._replace( user_id=channel.user.id) channel.send( Message(hypothesis_record.to_json(), "new-hypothesis")) break else: channel.send( Message("Something went wrong (%r)" % (list(record), ))) except Exception: channel.send(Message.traceback())