def run_search_post(): data = request.values matching_tolerance = float(data.get("ms1-tolerance", 10)) if matching_tolerance > 1e-4: matching_tolerance *= 1e-6 grouping_tolerance = float(data.get("peak-grouping-tolerance", 15)) if grouping_tolerance > 1e-4: grouping_tolerance *= 1e-6 ms2_matching_tolerance = float(data.get("ms2-tolerance", 20)) if ms2_matching_tolerance > 1e-4: ms2_matching_tolerance *= 1e-6 psm_fdr_threshold = float(data.get("q-value-threshold", 0.05)) hypothesis_uuid = (data.get("hypothesis_choice")) hypothesis_record = g.manager.hypothesis_manager.get(hypothesis_uuid) hypothesis_name = hypothesis_record.name sample_records = list( map(g.manager.sample_manager.get, data.getlist("samples"))) minimum_oxonium_threshold = float( data.get("minimum-oxonium-threshold", 0.05)) workload_size = int(data.get("batch-size", 1000)) for sample_record in sample_records: sample_name = sample_record.name job_number = g.manager.get_next_job_number() name_prefix = "%s at %s (%d)" % (hypothesis_name, sample_name, job_number) cleaned_prefix = re.sub(r"[\s\(\)]", "_", name_prefix) name_template = g.manager.get_results_path( secure_filename(cleaned_prefix) + "_%s.analysis.db") storage_path = make_unique_name(name_template) task = AnalyzeGlycopeptideSequenceTask( hypothesis_record.path, sample_record.path, hypothesis_record.id, storage_path, name_prefix, grouping_error_tolerance=grouping_tolerance, mass_error_tolerance=matching_tolerance, msn_mass_error_tolerance=ms2_matching_tolerance, psm_fdr_threshold=psm_fdr_threshold, minimum_oxonium_threshold=minimum_oxonium_threshold, workload_size=workload_size, job_name_part=job_number) g.add_task(task) print(task) return Response("Tasks Scheduled")
def run_search_post(): data = request.values mass_shift_data = list(zip(data.getlist('mass_shift_name'), data.getlist('mass_shift_max_count'))) mass_shift_data = mass_shift_data[:-1] mass_shift_data = [(a, int(b)) for a, b in mass_shift_data] matching_tolerance = float(data.get("mass-matching-tolerance", 10)) if matching_tolerance > 1e-4: matching_tolerance *= 1e-6 grouping_tolerance = float(data.get("peak-grouping-tolerance", 15)) if grouping_tolerance > 1e-4: grouping_tolerance *= 1e-6 hypothesis_uuid = (data.get("hypothesis_choice")) hypothesis_record = g.manager.hypothesis_manager.get(hypothesis_uuid) sample_records = list(map(g.manager.sample_manager.get, data.getlist("samples"))) minimum_mass = float(data.get("minimum-mass", 500.)) hypothesis_name = hypothesis_record.name for sample_record in sample_records: sample_name = sample_record.name job_number = g.manager.get_next_job_number() name_prefix = "%s at %s (%d)" % (hypothesis_name, sample_name, job_number) cleaned_prefix = re.sub(r"[\s\(\)]", "_", name_prefix) name_template = g.manager.get_results_path( secure_filename(cleaned_prefix) + "_%s.analysis.db") storage_path = make_unique_name(name_template) task = AnalyzeGlycanCompositionTask( hypothesis_record.path, sample_record.path, hypothesis_record.id, storage_path, name_prefix, mass_shift_data, grouping_tolerance, matching_tolerance, minimum_mass=minimum_mass, callback=lambda: 0, job_name_part=job_number) g.add_task(task) return Response("Tasks Scheduled")
def post_add_sample(): """Handle an uploaded sample file Returns ------- Response """ sample_name = request.values['sample-name'] if sample_name == "": sample_name = request.files['observed-ions-file'].filename # If no sample name could be constructed at this point # and we are not running a native client, stop now. if sample_name == "" and not g.has_native_client: current_app.logger.info("No sample name could be extracted. %r", request.values) return abort(400) # If we are running in the native client, then the program # have different information about where to read file information # from. Normal browsers cannot access the full path of files being # uploaded, but Electron can. It will intercept the file upload and # instead send its native path. Since the native client is running # on the local file system, we can directly read from that path # without needing to first copy the sample file to application server's # file system. if g.has_native_client: native_path = request.values.get("observed-ions-file-path") if sample_name == "": sample_name = os.path.splitext(os.path.basename(native_path))[0] if sample_name == "": current_app.logger.info("No sample name could be extracted. %r", request.values) abort(400) path = native_path sample_name = g.manager.make_unique_sample_name( sample_name) secure_name = secure_filename(sample_name) current_app.logger.info( "Preparing to run with native path: %r, %r, %r", path, sample_name, secure_name) else: file_name = request.files['observed-ions-file'].filename sample_name = g.manager.make_unique_sample_name( sample_name) secure_name = secure_filename(file_name) path = g.manager.get_temp_path(secure_name) request.files['observed-ions-file'].save(path) storage_path = g.manager.get_sample_path( re.sub(r"[\s\(\)]", "_", secure_name) + '-%s.mzML') storage_path = make_unique_name(storage_path) touch_file(storage_path) # Construct the task with a callback to add the processed sample # to the set of project samples start_time = float(request.values['start-time']) end_time = float(request.values['end-time']) extract_only_tandem_envelopes = bool(request.values.get("msms-features-only", False)) prefab_averagine = request.values['ms1-averagine'] prefab_msn_averagine = request.values['msn-averagine'] custom_ms1_averagine_formula = request.values['ms1-averagine-custom'] custom_msn_averagine_formula = request.values['msn-averagine-custom'] if custom_ms1_averagine_formula: averagine = custom_ms1_averagine_formula else: averagine = prefab_averagine if custom_msn_averagine_formula: msn_averagine = custom_msn_averagine_formula else: msn_averagine = prefab_msn_averagine ms1_score_threshold = float(request.values['ms1-minimum-isotopic-score']) msn_score_threshold = float(request.values['msn-minimum-isotopic-score']) missed_peaks = int(request.values['missed-peaks']) msn_missed_peaks = int(request.values['msn-missed-peaks']) maximum_charge_state = int(request.values['maximum-charge-state']) ms1_background_reduction = float(request.values.get( 'ms1-background-reduction', 5.)) msn_background_reduction = float(request.values.get( 'msn-background-reduction', 0.)) n_workers = g.manager.configuration.get("preprocessor_worker_count", 6) if cpu_count() < n_workers: n_workers = cpu_count() task = PreprocessMSTask( path, g.manager.connection_bridge, averagine, start_time, end_time, maximum_charge_state, sample_name, msn_averagine, ms1_score_threshold, msn_score_threshold, missed_peaks, msn_missed_peaks, n_processes=n_workers, storage_path=storage_path, extract_only_tandem_envelopes=extract_only_tandem_envelopes, ms1_background_reduction=ms1_background_reduction, msn_background_reduction=msn_background_reduction, callback=lambda: 0) g.add_task(task) return Response("Task Scheduled")
def build_glycopeptide_search_space_post(): values = request.values # Separate the JS-based workaround to avoid inappropriate multivalue encoding # being parsed incorrectly by Werkzeug constant_modifications = values.get("constant_modifications").split(";;;") variable_modifications = values.get("variable_modifications").split(";;;") constant_modifications = [ const_mod for const_mod in constant_modifications if const_mod ] variable_modifications = [ var_mod for var_mod in variable_modifications if var_mod ] enzyme = values.getlist("enzyme") if len(enzyme) == 1: enzyme = enzyme[0] custom_enzyme = values.get("custom-protease") if custom_enzyme: if enzyme: if isinstance(enzyme, list): enzyme.append(custom_enzyme) else: enzyme = [enzyme, custom_enzyme] else: enzyme = custom_enzyme hypothesis_name = values.get("hypothesis_name") hypothesis_name = g.manager.make_unique_hypothesis_name(hypothesis_name) generate_reverse_decoys = values.get("generate-reverse-decoys") == 'on' generate_full_crossproduct = values.get( "generate-full-crossproduct") == 'on' secure_name = secure_filename(hypothesis_name if hypothesis_name is not None else "glycopeptde_database") storage_path = safepath( g.manager.get_hypothesis_path(re.sub(r"[\s\(\)]", "_", secure_name)) + '_glycopeptde_%s.database') storage_path = make_unique_name(storage_path) touch_file(storage_path) protein_list = request.files["protein-list-file"] protein_list_type = values.get("proteomics-file-type") glycan_file = request.files.get("glycan-definition-file") glycan_database = values.get("glycan-database-source") glycan_file_type = values.get("glycans-file-format") glycan_options = {} max_missed_cleavages = intify(values.get("missed_cleavages")) maximum_glycosylation_sites = intify( values.get("max_glycosylation_sites", 1)) secure_protein_list = safepath( g.manager.get_temp_path(secure_filename(protein_list.filename))) protein_list.save(secure_protein_list) peptide_min_length = intify(values.get('peptide_min_length', 4), 4) peptide_max_length = intify(values.get('peptide_max_length', 60), 60) semispecific_digest = values.get('semispecific-digest') == 'on' non_specific_digest = values.get("non-specific-digest") == 'on' # The non-specific digest mode overrides all other digest parameters if non_specific_digest: enzyme = "." max_missed_cleavages = peptide_max_length semispecific_digest = False if glycan_database == "" or glycan_database is None: glycan_file_type = "text" glycan_options["glycan_source_type"] = glycan_file_type secure_glycan_file = g.manager.get_temp_path( secure_filename(glycan_file.filename)) glycan_file.save(secure_glycan_file) glycan_options["glycomics_source"] = secure_glycan_file glycan_options["glycan_source_identifier"] = None else: option_type, option_id = glycan_database.split(",", 1) record = g.manager.hypothesis_manager.get(option_id) identifier = record.id glycan_options["glycan_source_identifier"] = identifier if option_type == "Hypothesis": option_type = "hypothesis" glycan_options["glycomics_source"] = record.path elif option_type == "Analysis": option_type = "analysis" glycan_options["glycomics_source"] = record.path glycan_options["glycan_source_type"] = option_type n_workers = g.manager.configuration.get("database_build_worker_count", 4) if protein_list_type == "fasta": task = BuildGlycopeptideHypothesisFasta( storage_path, fasta_file=secure_protein_list, enzyme=enzyme, missed_cleavages=max_missed_cleavages, occupied_glycosites=maximum_glycosylation_sites, name=hypothesis_name, constant_modification=constant_modifications, variable_modification=variable_modifications, processes=n_workers, glycan_source=glycan_options["glycomics_source"], glycan_source_type=glycan_options["glycan_source_type"], glycan_source_identifier=glycan_options[ "glycan_source_identifier"], peptide_length_range=(peptide_min_length, peptide_max_length), semispecific_digest=semispecific_digest, generate_reverse_decoys=generate_reverse_decoys, generate_full_crossproduct=generate_full_crossproduct) g.add_task(task) elif protein_list_type == 'mzIdentML': protein_names = values.get("protein_names").split(",") task = BuildGlycopeptideHypothesisMzId( storage_path, secure_protein_list, name=hypothesis_name, occupied_glycosites=maximum_glycosylation_sites, target_protein=protein_names, processes=n_workers, glycan_source=glycan_options['glycomics_source'], glycan_source_type=glycan_options['glycan_source_type'], glycan_source_identifier=glycan_options[ "glycan_source_identifier"], peptide_length_range=(peptide_min_length, peptide_max_length), generate_reverse_decoys=generate_reverse_decoys, generate_full_crossproduct=generate_full_crossproduct) g.add_task(task) else: abort(400) return Response("Task Scheduled")
def schedule_error_dummy_task(): task = DummyTask(throw=True) g.add_task(task) return jsonify(task_id=task.id)
def schedule_dummy_task(): task = DummyTask() g.add_task(task) return jsonify(task_id=task.id)
def run_search_post(): data = request.values matching_tolerance = float(data.get("ms1-tolerance", 10)) if matching_tolerance > 1e-4: matching_tolerance *= 1e-6 grouping_tolerance = float(data.get("peak-grouping-tolerance", 15)) if grouping_tolerance > 1e-4: grouping_tolerance *= 1e-6 ms2_matching_tolerance = float(data.get("ms2-tolerance", 20)) if ms2_matching_tolerance > 1e-4: ms2_matching_tolerance *= 1e-6 psm_fdr_threshold = float(data.get("q-value-threshold", 0.05)) use_peptide_mass_filter = data.get("peptide-mass-filter") if use_peptide_mass_filter == 'on': use_peptide_mass_filter = True else: use_peptide_mass_filter = False permute_decoy_glycan_fragments = data.get("permute-decoy-glycan-fragments") if permute_decoy_glycan_fragments == 'on': permute_decoy_glycan_fragments = True else: permute_decoy_glycan_fragments = False include_rare_signature_ions = data.get("include-rare-signature-ions") if include_rare_signature_ions == 'on': include_rare_signature_ions = True else: include_rare_signature_ions = False model_retention_time = data.get("model-retention-time") if model_retention_time == 'on': model_retention_time = True else: model_retention_time = False msn_scoring_model_name = data.get("msn-scoring-model") tandem_scoring_model, msn_scoring_options = validate_glycopeptide_tandem_scoring_function(None, msn_scoring_model_name) if msn_scoring_options: current_app.logger.warning("Requested scoring model %s has unused options %s", msn_scoring_model_name, msn_scoring_options) hypothesis_uuid = (data.get("hypothesis_choice")) hypothesis_record = g.manager.hypothesis_manager.get(hypothesis_uuid) hypothesis_name = hypothesis_record.name decoy_hypothesis_record = hypothesis_record.decoy_hypothesis decoy_hypothesis_path = None decoy_hypothesis_id = None if decoy_hypothesis_record is not None: decoy_hypothesis_path = decoy_hypothesis_record.path decoy_hypothesis_id = decoy_hypothesis_record.id search_strategy = GlycopeptideSearchStrategyEnum.classic if decoy_hypothesis_record and not hypothesis_record.is_full_crossproduct: search_strategy = GlycopeptideSearchStrategyEnum.multipart elif decoy_hypothesis_record: search_strategy = GlycopeptideSearchStrategyEnum.classic_comparison sample_records = list(map(g.manager.sample_manager.get, data.getlist("samples"))) minimum_oxonium_threshold = float(data.get("minimum-oxonium-threshold", 0.05)) workload_size = int(data.get("batch-size", 1000)) mass_shift_data = list(zip(data.getlist('mass_shift_name'), data.getlist('mass_shift_max_count'))) mass_shift_data = mass_shift_data[:-1] mass_shift_data = [(a, int(b)) for a, b in mass_shift_data] for sample_record in sample_records: sample_name = sample_record.name job_number = g.manager.get_next_job_number() name_prefix = "%s at %s (%d)" % (hypothesis_name, sample_name, job_number) cleaned_prefix = re.sub(r"[\s\(\)]", "_", name_prefix) name_template = g.manager.get_results_path( secure_filename(cleaned_prefix) + "_%s.analysis.db") storage_path = make_unique_name(name_template) task = AnalyzeGlycopeptideSequenceTask( hypothesis_record.path, sample_record.path, hypothesis_record.id, storage_path, name_prefix, grouping_error_tolerance=grouping_tolerance, mass_error_tolerance=matching_tolerance, msn_mass_error_tolerance=ms2_matching_tolerance, psm_fdr_threshold=psm_fdr_threshold, minimum_oxonium_threshold=minimum_oxonium_threshold, workload_size=workload_size, use_peptide_mass_filter=use_peptide_mass_filter, mass_shifts=mass_shift_data, permute_decoy_glycan_fragments=permute_decoy_glycan_fragments, job_name_part=job_number, include_rare_signature_ions=include_rare_signature_ions, model_retention_time=model_retention_time, search_strategy=search_strategy, decoy_database_connection=decoy_hypothesis_path, decoy_hypothesis_id=decoy_hypothesis_id, tandem_scoring_model=tandem_scoring_model ) g.add_task(task) return Response("Tasks Scheduled")
def build_glycan_search_space_process(): data = request.values custom_reduction_type = data.get("custom-reduction-type") custom_derivatization_type = data.get("custom-derivatization-type") has_custom_reduction = custom_reduction_type != "" has_custom_derivatization = custom_derivatization_type != "" reduction_type = data.get("reduction-type") derivatization_type = data.get("derivatization-type") hypothesis_name = data.get("hypothesis-name") hypothesis_name = g.manager.make_unique_hypothesis_name(hypothesis_name) secure_name = secure_filename(hypothesis_name if hypothesis_name is not None else "glycan_database") storage_path = g.manager.get_hypothesis_path(re.sub(r"[\s\(\)]", "_", secure_name)) + '_glycan_%s.database' storage_path = make_unique_name(storage_path) touch_file(storage_path) if reduction_type in ("", "native"): reduction_type = None if derivatization_type in ("", "native"): derivatization_type = None try: reduction_type = validate_reduction(None, reduction_type) except Exception: g.manager.add_message(Message("Could not validate reduction type %r" % reduction_type, 'update')) return Response("Task Not Scheduled") try: derivatization_type = validate_derivatization(None, derivatization_type) except Exception: g.manager.add_message(Message("Could not validate derivatization type %r" % derivatization_type, 'update')) return Response("Task Not Scheduled") selected_method = data.get("selected-method", 'combinatorial') # Construct the argument set for a BuildCombinatorialGlycanHypothesis Task. # This involves building a StringIO object buffer which contains the user's # specified rules. if selected_method == "combinatorial": comb_monosaccharide_name = data.getlist('monosaccharide_name')[:-1] comb_lower_bound = map(intify, data.getlist('monosaccharide_lower_bound')[:-1]) comb_upper_bound = map(intify, data.getlist('monosaccharide_upper_bound')[:-1]) comb_monosaccharide_name, comb_lower_bound, comb_upper_bound = remove_empty_rows( comb_monosaccharide_name, comb_lower_bound, comb_upper_bound) constraint_lhs = data.getlist("left_hand_side")[:-1] constraint_op = data.getlist("operator")[:-1] constraint_rhs = data.getlist("right_hand_side")[:-1] constraints = zip(*remove_empty_rows(constraint_lhs, constraint_op, constraint_rhs)) rules = zip(comb_monosaccharide_name, comb_lower_bound, comb_upper_bound) # File-like object to pass to the task in place of a path to a rules file rules_buffer = _serialize_rules_to_buffer(rules, constraints, "generated") task = BuildCombinatorialGlycanHypothesis( rules_buffer, storage_path, reduction=custom_reduction_type if has_custom_reduction else reduction_type, derivatization=custom_derivatization_type if has_custom_derivatization else derivatization_type, name=hypothesis_name, callback=lambda: 0, user=g.user ) g.add_task(task) # Construct the argument set for a BuildTextFileGlycanHypothesis Task. elif selected_method == "text-file": glycan_list_file = request.files["glycan-list-file"] secure_glycan_list_file = g.manager.get_temp_path(secure_filename(glycan_list_file.filename)) glycan_list_file.save(secure_glycan_list_file) task = BuildTextFileGlycanHypothesis( secure_glycan_list_file, storage_path, reduction=custom_reduction_type if has_custom_reduction else reduction_type, derivatization=custom_derivatization_type if has_custom_derivatization else derivatization_type, name=hypothesis_name, callback=lambda: 0, user=g.user) g.add_task(task) elif selected_method == "pregenerated": # include_human_n_glycan = data.get("glycomedb-human-n-glycan") # include_human_o_glycan = data.get("glycomedb-human-o-glycan") # include_mammalian_n_glycan = data.get("glycomedb-mammlian-n-glycan") # include_mammalian_o_glycan = data.get("glycomedb-mammlian-o-glycan") g.manager.add_message(Message("This method is not enabled at this time", 'update')) return Response("Task Not Scheduled") # Not yet implemented elif selected_method == "merge-hypotheses": id_1 = data.get("merged-hypothesis-1", 0) id_2 = data.get("merged-hypothesis-2", 0) if id_1 == 0 or id_2 == 0 or id_1 == id_2: g.add_message(Message("Two different hypotheses must be selected to merge.")) return Response("Task Not Scheduled") rec_1 = g.manager.hypothesis_manager.get(id_1) rec_2 = g.manager.hypothesis_manager.get(id_2) # g.add_message(Message("Not yet implemented.")) # return Response("Task Not Scheduled") task = MergeGlycanHypotheses( g.manager.connection_bridge, [(rec_1.path, rec_1.id), (rec_2.path, rec_2.id)], name=hypothesis_name, callback=lambda: 0, user=g.user) g.add_task(task) else: g.add_message(Message("This method is not recognized: \"%s\"" % (selected_method,), 'update')) return Response("Task Not Scheduled") return Response("Task Scheduled")
def build_glycopeptide_search_space_post(): values = request.values # Separate the JS-based workaround to avoid inappropriate multivalue encoding # being parsed incorrectly by Werkzeug constant_modifications = values.get("constant_modifications").split(";;;") variable_modifications = values.get("variable_modifications").split(";;;") constant_modifications = [ const_mod for const_mod in constant_modifications if const_mod ] variable_modifications = [ var_mod for var_mod in variable_modifications if var_mod ] enzyme = values.getlist("enzyme") if len(enzyme) == 1: enzyme = enzyme[0] hypothesis_name = values.get("hypothesis_name") hypothesis_name = g.manager.make_unique_hypothesis_name(hypothesis_name) secure_name = secure_filename(hypothesis_name if hypothesis_name is not None else "glycopeptde_database") storage_path = g.manager.get_hypothesis_path( re.sub(r"[\s\(\)]", "_", secure_name)) + '_glycopeptde_%s.database' storage_path = make_unique_name(storage_path) touch_file(storage_path) protein_list = request.files["protein-list-file"] protein_list_type = values.get("proteomics-file-type") glycan_file = request.files.get("glycan-definition-file") glycan_database = values.get("glycan-database-source") glycan_file_type = values.get("glycans-file-format") glycan_options = {} max_missed_cleavages = intify(values.get("missed_cleavages")) maximum_glycosylation_sites = intify( values.get("max_glycosylation_sites", 1)) secure_protein_list = g.manager.get_temp_path( secure_filename(protein_list.filename)) protein_list.save(secure_protein_list) if glycan_database == "" or glycan_database is None: glycan_file_type = "text" glycan_options["glycan_source_type"] = glycan_file_type secure_glycan_file = g.manager.get_temp_path( secure_filename(glycan_file.filename)) glycan_file.save(secure_glycan_file) glycan_options["glycomics_source"] = secure_glycan_file glycan_options["glycan_source_identifier"] = None else: option_type, option_id = glycan_database.split(",", 1) record = g.manager.hypothesis_manager.get(option_id) identifier = record.id glycan_options["glycan_source_identifier"] = identifier if option_type == "Hypothesis": option_type = "hypothesis" glycan_options["glycomics_source"] = record.path elif option_type == "Analysis": option_type = "analysis" glycan_options["glycomics_source"] = record.path glycan_options["glycan_source_type"] = option_type n_workers = g.manager.configuration.get("database_build_worker_count", 4) if protein_list_type == "fasta": task = BuildGlycopeptideHypothesisFasta( storage_path, fasta_file=secure_protein_list, enzyme=enzyme, missed_cleavages=max_missed_cleavages, occupied_glycosites=maximum_glycosylation_sites, name=hypothesis_name, constant_modification=constant_modifications, variable_modification=variable_modifications, processes=n_workers, glycan_source=glycan_options["glycomics_source"], glycan_source_type=glycan_options["glycan_source_type"], glycan_source_identifier=glycan_options["glycan_source_identifier"] ) g.add_task(task) elif protein_list_type == 'mzIdentML': protein_names = values.get("protein_names").split(",") task = BuildGlycopeptideHypothesisMzId( storage_path, secure_protein_list, name=hypothesis_name, occupied_glycosites=maximum_glycosylation_sites, target_protein=protein_names, processes=n_workers, glycan_source=glycan_options['glycomics_source'], glycan_source_type=glycan_options['glycan_source_type'], glycan_source_identifier=glycan_options["glycan_source_identifier"] ) g.add_task(task) else: abort(400) return Response("Task Scheduled")