def test_profiler(self):
        db_file = self._make_hypothesis()
        output_file = self.setup_tempfile("")
        task = MzMLGlycanChromatogramAnalyzer(
            db_file, 1, agp_glycomics_mzml, output_file,
            analysis_name="test-analysis",
            scoring_model=GeneralScorer)
        task.start()
        self.assertTrue(os.path.exists(output_file))
        ads = AnalysisDeserializer(output_file)
        gcs = ads.load_glycan_composition_chromatograms()
        self.clear_file(db_file)
        # 'spacing_fit': 0.96367957815527916, 'isotopic_fit': 0.99366937970680247,
        # 'line_score': 0.99780414736388745, 'charge_count': 0.9365769766604084
        self.confirm_score(gcs, "{Fuc:1; Hex:7; HexNAc:6; Neu5Ac:4}", 17.1458)
        # 'spacing_fit': 0.96123524755239487, 'isotopic_fit': 0.97935840584492162,
        # 'line_score': 0.99562733579066764, 'charge_count': 0.7368321292716115
        self.confirm_score(gcs, "{Hex:8; HexNAc:7; Neu5Ac:3}", 13.5279)
        # 'spacing_fit': 0.94565181061625481, 'isotopic_fit': 0.99074210231338733,
        # 'line_score': 0.98925755528448378, 'charge_count': 0.999773289306269
        self.confirm_score(gcs, "{Hex:7; HexNAc:6; Neu5Ac:4}", 20.4438)
        # 'spacing_fit': 0.95567017048597336, 'isotopic_fit': 0.98274665306540443,
        # 'line_score': 0.99706887771172914, 'charge_count': 0.7604540961453831
        self.confirm_score(gcs, "{Fuc:2; Hex:6; HexNAc:5; Neu5Ac:3}", 14.0977)

        ads.close()
        self.clear_file(output_file)
コード例 #2
0
    def test_profiler(self):
        db_file = self._make_hypothesis()
        output_file = self.setup_tempfile("")
        task = MzMLGlycanChromatogramAnalyzer(db_file,
                                              1,
                                              agp_glycomics_mzml,
                                              output_file,
                                              analysis_name="test-analysis",
                                              scoring_model=GeneralScorer)
        task.start()
        self.assertTrue(os.path.exists(output_file))
        ads = AnalysisDeserializer(output_file)
        gcs = ads.load_glycan_composition_chromatograms()
        self.assertEqual(len(gcs), 23)
        self.clear_file(db_file)
        # 'spacing_fit': 0.96367957815527916, 'isotopic_fit': 0.99366937970680247,
        # 'line_score': 0.99780414736388745, 'charge_count': 0.9365769766604084
        self.confirm_score(gcs, "{Fuc:1; Hex:7; HexNAc:6; Neu5Ac:4}", 17.1458)
        # 'spacing_fit': 0.96123524755239487, 'isotopic_fit': 0.97935840584492162,
        # 'line_score': 0.99562733579066764, 'charge_count': 0.7368321292716115
        self.confirm_score(gcs, "{Hex:8; HexNAc:7; Neu5Ac:3}", 13.5279)
        # 'spacing_fit': 0.94565181061625481, 'isotopic_fit': 0.99074210231338733,
        # 'line_score': 0.98925755528448378, 'charge_count': 0.999773289306269
        self.confirm_score(gcs, "{Hex:7; HexNAc:6; Neu5Ac:4}", 20.4438)
        # 'spacing_fit': 0.95567017048597336, 'isotopic_fit': 0.98274665306540443,
        # 'line_score': 0.99706887771172914, 'charge_count': 0.7604540961453831
        self.confirm_score(gcs, "{Fuc:2; Hex:6; HexNAc:5; Neu5Ac:3}", 14.0977)

        ads.close()
        self.clear_file(output_file)
コード例 #3
0
    def test_smoothing_profiler(self):
        db_file = self._make_hypothesis()
        output_file = self.setup_tempfile("")
        task = MzMLGlycanChromatogramAnalyzer(db_file,
                                              1,
                                              agp_glycomics_mzml,
                                              output_file,
                                              regularize="grid",
                                              analysis_name="test-analysis",
                                              scoring_model=GeneralScorer)
        task.start()
        # import cProfile
        # prof = cProfile.Profile()
        # prof.runcall(task.start)
        # prof.print_stats()
        # prof.dump_stats('smooth_profile.pstats')
        self.assertTrue(os.path.exists(output_file))
        ads = AnalysisDeserializer(output_file, analysis_id=1)
        gcs = ads.load_glycan_composition_chromatograms()
        self.assertEqual(len(gcs), 23)
        self.confirm_score(gcs, "{Fuc:1; Hex:7; HexNAc:6; Neu5Ac:4}", 16.1425)
        self.confirm_score(gcs, "{Hex:8; HexNAc:7; Neu5Ac:3}", 8.8510)
        self.confirm_score(gcs, "{Hex:7; HexNAc:6; Neu5Ac:4}", 16.6722)
        network_params = ads.analysis.parameters['network_parameters']
        tau = [
            0.0, 12.173488161057854, 16.042106463675424, 0.0,
            22.061954223206591, 0.0, 13.928596053020485, 0.0,
            9.4348332520855713, 0.0, 0.0, 0.0, 0.0, 0.0
        ]
        for a, b in zip(tau, network_params.tau):
            self.assertAlmostEqual(a, b, 3)
        ads.close()

        self.clear_file(output_file)
        task = MzMLGlycanChromatogramAnalyzer(
            db_file,
            1,
            agp_glycomics_mzml,
            output_file,
            regularize=0.2,
            regularization_model=network_params,
            analysis_name="test-analysis",
            scoring_model=GeneralScorer)
        task.start()
        ads = AnalysisDeserializer(output_file, analysis_id=1)
        gcs = ads.load_glycan_composition_chromatograms()
        self.assertEqual(len(gcs), 23)
        self.confirm_score(gcs, "{Fuc:1; Hex:7; HexNAc:6; Neu5Ac:4}", 16.7795)
        self.confirm_score(gcs, "{Hex:8; HexNAc:7; Neu5Ac:3}", 10.6734)
        self.confirm_score(gcs, "{Hex:7; HexNAc:6; Neu5Ac:4}", 18.3360)
        self.confirm_score(gcs, "{Fuc:2; Hex:6; HexNAc:5; Neu5Ac:3}", 15.9628)
        network_params = ads.analysis.parameters['network_parameters']
        for a, b in zip(tau, network_params.tau):
            self.assertAlmostEqual(a, b, 3)
        ads.close()
        self.clear_file(output_file)
        self.clear_file(db_file)
    def test_profiler(self):
        db_file = self._make_hypothesis()
        output_file = self.setup_tempfile("")
        task = MzMLGlycanChromatogramAnalyzer(
            db_file, 1, agp_glycomics_mzml, output_file,
            analysis_name="test-analysis",
            scoring_model=GeneralScorer)
        task.start()
        self.assertTrue(os.path.exists(output_file))
        ads = AnalysisDeserializer(output_file)
        gcs = ads.load_glycan_composition_chromatograms()
        self.clear_file(db_file)

        self.confirm_score(gcs, "{Fuc:1; Hex:7; HexNAc:6; Neu5Ac:4}", 16.97)
        self.confirm_score(gcs, "{Hex:8; HexNAc:7; Neu5Ac:3}", 13.47)
        self.confirm_score(gcs, "{Hex:7; HexNAc:6; Neu5Ac:4}", 20.44)
        self.confirm_score(gcs, "{Fuc:2; Hex:6; HexNAc:5; Neu5Ac:3}", 14.11)

        ads.close()
        self.clear_file(output_file)
    def test_smoothing_profiler(self):
        db_file = self._make_hypothesis()
        output_file = self.setup_tempfile("")
        task = MzMLGlycanChromatogramAnalyzer(
            db_file, 1, agp_glycomics_mzml, output_file,
            regularize="grid",
            analysis_name="test-analysis",
            scoring_model=GeneralScorer)
        task.start()
        # import cProfile
        # prof = cProfile.Profile()
        # prof.runcall(task.start)
        # prof.print_stats()
        # prof.dump_stats('smooth_profile.pstats')
        self.assertTrue(os.path.exists(output_file))
        ads = AnalysisDeserializer(output_file, analysis_id=1)
        gcs = ads.load_glycan_composition_chromatograms()

        self.confirm_score(gcs, "{Fuc:1; Hex:7; HexNAc:6; Neu5Ac:4}", 16.1425)
        self.confirm_score(gcs, "{Hex:8; HexNAc:7; Neu5Ac:3}", 8.8510)
        self.confirm_score(gcs, "{Hex:7; HexNAc:6; Neu5Ac:4}", 16.6722)
        network_params = ads.analysis.parameters['network_parameters']
        tau = [0., 11.77485721, 15.99541137, 0.,
               21.96431573, 0., 13.94378948, 0.,
               9.32841618, 0., 0., 0.,
               0., 0.]
        for a, b in zip(tau, network_params.tau):
            self.assertAlmostEqual(a, b, 3)
        ads.close()

        self.clear_file(output_file)
        task = MzMLGlycanChromatogramAnalyzer(
            db_file, 1, agp_glycomics_mzml, output_file,
            regularize=0.2,
            regularization_model=network_params,
            analysis_name="test-analysis",
            scoring_model=GeneralScorer)
        task.start()
        ads = AnalysisDeserializer(output_file, analysis_id=1)
        gcs = ads.load_glycan_composition_chromatograms()
        self.confirm_score(gcs, "{Fuc:1; Hex:7; HexNAc:6; Neu5Ac:4}", 16.7795)
        self.confirm_score(gcs, "{Hex:8; HexNAc:7; Neu5Ac:3}", 10.6734)
        self.confirm_score(gcs, "{Hex:7; HexNAc:6; Neu5Ac:4}", 18.3360)
        self.confirm_score(gcs, "{Fuc:2; Hex:6; HexNAc:5; Neu5Ac:3}", 15.9628)
        network_params = ads.analysis.parameters['network_parameters']
        for a, b in zip(tau, network_params.tau):
            self.assertAlmostEqual(a, b, 3)
        ads.close()
        self.clear_file(output_file)
        self.clear_file(db_file)
コード例 #6
0
ファイル: analyze.py プロジェクト: mstim/glycresoft
def search_glycan(context,
                  database_connection,
                  sample_path,
                  hypothesis_identifier,
                  analysis_name,
                  mass_shifts,
                  grouping_error_tolerance=1.5e-5,
                  mass_error_tolerance=1e-5,
                  minimum_mass=500.,
                  scoring_model=None,
                  regularize=None,
                  regularization_model_path=None,
                  network_path=None,
                  output_path=None,
                  scoring_model_features=None,
                  delta_rt=0.5,
                  export=None,
                  interact=False,
                  require_msms_signature=0.0,
                  msn_mass_error_tolerance=2e-5,
                  mass_shift_combination_limit=None,
                  processes=4):
    """Identify glycan compositions from preprocessed LC-MS data, stored in mzML
    format.
    """
    if output_path is None and not interact:
        output_path = make_analysis_output_path("glycan")
    if scoring_model is None:
        scoring_model = GeneralScorer

    if mass_shift_combination_limit is None:
        mass_shift_combination_limit = 8

    if scoring_model_features:
        for feature in scoring_model_features:
            scoring_model.add_feature(validate_ms1_feature_name(feature))

    if regularization_model_path is not None:
        with open(regularization_model_path, 'r') as mod_file:
            regularization_model = GridPointSolution.load(mod_file)
    else:
        regularization_model = None

    if network_path is not None:
        with open(network_path, 'r') as netfile:
            network = GraphReader(netfile).network
    else:
        network = None

    database_connection = DatabaseBoundOperation(database_connection)
    ms_data = ProcessedMzMLDeserializer(sample_path, use_index=False)
    sample_run = ms_data.sample_run

    try:
        hypothesis = get_by_name_or_id(database_connection, GlycanHypothesis,
                                       hypothesis_identifier)
    except Exception:
        click.secho("Could not locate a Glycan Hypothesis with identifier %r" %
                    hypothesis_identifier,
                    fg='yellow')
        raise click.Abort()

    if analysis_name is None:
        analysis_name = "%s @ %s" % (sample_run.name, hypothesis.name)

    analysis_name = validate_analysis_name(context,
                                           database_connection.session,
                                           analysis_name)

    mass_shifts = [
        validate_mass_shift(mass_shift, multiplicity)
        for mass_shift, multiplicity in mass_shifts
    ]
    expanded = []
    expanded = MzMLGlycanChromatogramAnalyzer.expand_mass_shifts(
        dict(mass_shifts), limit=mass_shift_combination_limit)
    mass_shifts = expanded

    click.secho("Preparing analysis of %s by %s" %
                (sample_run.name, hypothesis.name),
                fg='cyan')

    analyzer = MzMLGlycanChromatogramAnalyzer(
        database_connection._original_connection,
        hypothesis.id,
        sample_path=sample_path,
        output_path=output_path,
        mass_shifts=mass_shifts,
        mass_error_tolerance=mass_error_tolerance,
        msn_mass_error_tolerance=msn_mass_error_tolerance,
        grouping_error_tolerance=grouping_error_tolerance,
        scoring_model=scoring_model,
        minimum_mass=minimum_mass,
        regularize=regularize,
        regularization_model=regularization_model,
        network=network,
        analysis_name=analysis_name,
        delta_rt=delta_rt,
        require_msms_signature=require_msms_signature,
        n_processes=processes)
    analyzer.display_header()
    analyzer.start()
    if interact:
        try:
            import IPython
            click.secho(fmt_msg("Beginning Interactive Session..."), fg='cyan')
            IPython.embed()
        except ImportError:
            click.secho(fmt_msg("Interactive Session Not Supported"), fg='red')
    if export:
        for export_type in set(export):
            click.echo(fmt_msg("Handling Export: %s" % (export_type, )))
            if export_type == 'csv':
                from glycan_profiling.cli.export import glycan_composition_identification
                base = os.path.splitext(output_path)[0]
                export_path = "%s-glycan-chromatograms.csv" % (base, )
                context.invoke(glycan_composition_identification,
                               database_connection=output_path,
                               analysis_identifier=analyzer.analysis_id,
                               output_path=export_path)
            elif export_type == 'html':
                from glycan_profiling.cli.export import glycan_composition_identification
                base = os.path.splitext(output_path)[0]
                export_path = "%s-report.html" % (base, )
                context.invoke(glycan_composition_identification,
                               database_connection=output_path,
                               analysis_identifier=analyzer.analysis_id,
                               output_path=export_path,
                               report=True)
            elif export_type == 'glycan-list':
                from glycan_profiling.cli.export import glycan_hypothesis
                base = os.path.splitext(output_path)[0]
                export_path = "%s-glycan-chromatograms.csv" % (base, )
                context.invoke(glycan_hypothesis,
                               database_connection=output_path,
                               hypothesis_identifier=analyzer.hypothesis_id,
                               output_path=export_path,
                               importable=True)
            elif export_type == "model":
                base = os.path.splitext(output_path)[0]
                export_path = "%s-regularization-parameters.txt" % (base, )
                params = analyzer.analysis.parameters.get("network_parameters")
                if params is None:
                    click.secho(
                        "No parameters were fitted, skipping \"model\"",
                        fg='red')
                else:
                    with open(export_path, 'w') as fp:
                        params.dump(fp)
            else:
                click.secho("Unrecognized Export: %s" % (export_type, ),
                            fg='yellow')
コード例 #7
0
ファイル: analyze.py プロジェクト: mstim/glycresoft
def search_glycopeptide_multipart(context,
                                  database_connection,
                                  decoy_database_connection,
                                  sample_path,
                                  target_hypothesis_identifier=1,
                                  decoy_hypothesis_identifier=1,
                                  analysis_name=None,
                                  output_path=None,
                                  grouping_error_tolerance=1.5e-5,
                                  mass_error_tolerance=1e-5,
                                  msn_mass_error_tolerance=2e-5,
                                  psm_fdr_threshold=0.05,
                                  peak_shape_scoring_model=None,
                                  tandem_scoring_model=None,
                                  glycan_score_threshold=1.0,
                                  memory_database_index=False,
                                  save_intermediate_results=None,
                                  processes=4,
                                  workload_size=500,
                                  mass_shifts=None,
                                  export=None,
                                  maximum_mass=float('inf'),
                                  isotope_probing_range=3,
                                  fdr_estimation_strategy=None,
                                  glycoproteome_smoothing_model=None,
                                  mapping_processes=1,
                                  durable_fucose=False):
    if output_path is None:
        output_path = make_analysis_output_path("glycopeptide")
    if fdr_estimation_strategy is None:
        fdr_estimation_strategy = GlycopeptideFDREstimationStrategy.multipart_gamma_gaussian_mixture
    else:
        fdr_estimation_strategy = GlycopeptideFDREstimationStrategy[
            fdr_estimation_strategy]
    if tandem_scoring_model is None:
        tandem_scoring_model = "log_intensity"
    database_connection = DatabaseBoundOperation(database_connection)
    decoy_database_connection = DatabaseBoundOperation(
        decoy_database_connection)
    ms_data = ProcessedMzMLDeserializer(sample_path, use_index=False)
    sample_run = ms_data.sample_run

    try:
        target_hypothesis = get_by_name_or_id(database_connection,
                                              GlycopeptideHypothesis,
                                              target_hypothesis_identifier)
    except Exception:
        click.secho(
            "Could not locate Target Glycopeptide Hypothesis with identifier %r"
            % target_hypothesis_identifier,
            fg='yellow')
        raise click.Abort()

    try:
        decoy_hypothesis = get_by_name_or_id(decoy_database_connection,
                                             GlycopeptideHypothesis,
                                             decoy_hypothesis_identifier)
    except Exception:
        click.secho(
            "Could not locate Decoy Glycopeptide Hypothesis with identifier %r"
            % decoy_hypothesis_identifier,
            fg='yellow')
        raise click.Abort()

    tandem_scoring_model = validate_glycopeptide_tandem_scoring_function(
        context, tandem_scoring_model)

    mass_shifts = [
        validate_mass_shift(mass_shift, multiplicity)
        for mass_shift, multiplicity in mass_shifts
    ]
    expanded = []
    expanded = MzMLGlycanChromatogramAnalyzer.expand_mass_shifts(
        dict(mass_shifts), crossproduct=False)
    mass_shifts = expanded

    if analysis_name is None:
        analysis_name = "%s @ %s" % (sample_run.name, target_hypothesis.name)

    analysis_name = validate_analysis_name(context,
                                           database_connection.session,
                                           analysis_name)

    click.secho("Preparing analysis of %s by %s" %
                (sample_run.name, target_hypothesis.name),
                fg='cyan')
    analyzer = MultipartGlycopeptideLCMSMSAnalyzer(
        database_connection._original_connection,
        decoy_database_connection._original_connection,
        target_hypothesis.id,
        decoy_hypothesis.id,
        sample_path,
        output_path,
        analysis_name=analysis_name,
        grouping_error_tolerance=grouping_error_tolerance,
        mass_error_tolerance=mass_error_tolerance,
        msn_mass_error_tolerance=msn_mass_error_tolerance,
        psm_fdr_threshold=psm_fdr_threshold,
        tandem_scoring_model=tandem_scoring_model,
        glycan_score_threshold=glycan_score_threshold,
        mass_shifts=mass_shifts,
        n_processes=processes,
        spectrum_batch_size=workload_size,
        maximum_mass=maximum_mass,
        probing_range_for_missing_precursors=isotope_probing_range,
        use_memory_database=memory_database_index,
        fdr_estimation_strategy=fdr_estimation_strategy,
        glycosylation_site_models_path=glycoproteome_smoothing_model,
        fragile_fucose=not durable_fucose)
    analyzer.display_header()
    result = analyzer.start()
    gps, unassigned, target_decoy_set = result[:3]
    if save_intermediate_results is not None:
        analyzer.log("Saving Intermediate Results")
        with open(save_intermediate_results, 'wb') as handle:
            pickle.dump((target_decoy_set, gps), handle)
    del gps
    del unassigned
    del target_decoy_set
    if export:
        for export_type in set(export):
            click.echo(fmt_msg("Handling Export: %s" % (export_type, )))
            if export_type == 'csv':
                from glycan_profiling.cli.export import glycopeptide_identification
                base = os.path.splitext(output_path)[0]
                export_path = "%s-glycopeptides.csv" % (base, )
                context.invoke(glycopeptide_identification,
                               database_connection=output_path,
                               analysis_identifier=analyzer.analysis_id,
                               output_path=export_path)
            elif export_type == 'psm-csv':
                from glycan_profiling.cli.export import glycopeptide_spectrum_matches
                base = os.path.splitext(output_path)[0]
                export_path = "%s-glycopeptide-spectrum-matches.csv" % (base, )
                context.invoke(glycopeptide_spectrum_matches,
                               database_connection=output_path,
                               analysis_identifier=analyzer.analysis_id,
                               output_path=export_path)
            elif export_type == 'html':
                from glycan_profiling.cli.export import glycopeptide_identification
                base = os.path.splitext(output_path)[0]
                export_path = "%s-report.html" % (base, )
                context.invoke(glycopeptide_identification,
                               database_connection=output_path,
                               analysis_identifier=analyzer.analysis_id,
                               output_path=export_path,
                               report=True)
コード例 #8
0
ファイル: analyze.py プロジェクト: mstim/glycresoft
def search_glycopeptide(context,
                        database_connection,
                        sample_path,
                        hypothesis_identifier,
                        analysis_name,
                        output_path=None,
                        grouping_error_tolerance=1.5e-5,
                        mass_error_tolerance=1e-5,
                        msn_mass_error_tolerance=2e-5,
                        psm_fdr_threshold=0.05,
                        peak_shape_scoring_model=None,
                        tandem_scoring_model=None,
                        oxonium_threshold=0.15,
                        save_intermediate_results=None,
                        processes=4,
                        workload_size=500,
                        mass_shifts=None,
                        export=None,
                        use_peptide_mass_filter=False,
                        maximum_mass=float('inf'),
                        decoy_database_connection=None,
                        fdr_correction='auto',
                        isotope_probing_range=3,
                        permute_decoy_glycan_fragments=False):
    """Identify glycopeptide sequences from processed LC-MS/MS data
    """
    if output_path is None:
        output_path = make_analysis_output_path("glycopeptide")
    if tandem_scoring_model is None:
        tandem_scoring_model = CoverageWeightedBinomialScorer
    database_connection = DatabaseBoundOperation(database_connection)
    ms_data = ProcessedMzMLDeserializer(sample_path, use_index=False)
    sample_run = ms_data.sample_run

    try:
        hypothesis = get_by_name_or_id(database_connection,
                                       GlycopeptideHypothesis,
                                       hypothesis_identifier)
    except Exception:
        click.secho(
            "Could not locate a Glycopeptide Hypothesis with identifier %r" %
            hypothesis_identifier,
            fg='yellow')
        raise click.Abort()

    tandem_scoring_model = validate_glycopeptide_tandem_scoring_function(
        context, tandem_scoring_model)

    mass_shifts = [
        validate_mass_shift(mass_shift, multiplicity)
        for mass_shift, multiplicity in mass_shifts
    ]
    expanded = []
    expanded = MzMLGlycanChromatogramAnalyzer.expand_mass_shifts(
        dict(mass_shifts), crossproduct=False)
    mass_shifts = expanded

    if analysis_name is None:
        analysis_name = "%s @ %s" % (sample_run.name, hypothesis.name)

    analysis_name = validate_analysis_name(context,
                                           database_connection.session,
                                           analysis_name)

    click.secho("Preparing analysis of %s by %s" %
                (sample_run.name, hypothesis.name),
                fg='cyan')

    if decoy_database_connection is None:
        analyzer = MzMLGlycopeptideLCMSMSAnalyzer(
            database_connection._original_connection,
            sample_path=sample_path,
            hypothesis_id=hypothesis.id,
            analysis_name=analysis_name,
            output_path=output_path,
            grouping_error_tolerance=grouping_error_tolerance,
            mass_error_tolerance=mass_error_tolerance,
            msn_mass_error_tolerance=msn_mass_error_tolerance,
            psm_fdr_threshold=psm_fdr_threshold,
            peak_shape_scoring_model=peak_shape_scoring_model,
            tandem_scoring_model=tandem_scoring_model,
            oxonium_threshold=oxonium_threshold,
            n_processes=processes,
            spectrum_batch_size=workload_size,
            mass_shifts=mass_shifts,
            use_peptide_mass_filter=use_peptide_mass_filter,
            maximum_mass=maximum_mass,
            probing_range_for_missing_precursors=isotope_probing_range,
            permute_decoy_glycans=permute_decoy_glycan_fragments)
    else:
        analyzer = MzMLComparisonGlycopeptideLCMSMSAnalyzer(
            database_connection._original_connection,
            decoy_database_connection,
            sample_path=sample_path,
            hypothesis_id=hypothesis.id,
            analysis_name=analysis_name,
            output_path=output_path,
            grouping_error_tolerance=grouping_error_tolerance,
            mass_error_tolerance=mass_error_tolerance,
            msn_mass_error_tolerance=msn_mass_error_tolerance,
            psm_fdr_threshold=psm_fdr_threshold,
            peak_shape_scoring_model=peak_shape_scoring_model,
            tandem_scoring_model=tandem_scoring_model,
            oxonium_threshold=oxonium_threshold,
            n_processes=processes,
            spectrum_batch_size=workload_size,
            mass_shifts=mass_shifts,
            use_peptide_mass_filter=use_peptide_mass_filter,
            maximum_mass=maximum_mass,
            use_decoy_correction_threshold=fdr_correction,
            probing_range_for_missing_precursors=isotope_probing_range,
            permute_decoy_glycans=permute_decoy_glycan_fragments)
    analyzer.display_header()
    result = analyzer.start()
    gps, unassigned, target_decoy_set = result[:3]
    if save_intermediate_results is not None:
        analyzer.log("Saving Intermediate Results")
        with open(save_intermediate_results, 'wb') as handle:
            pickle.dump((target_decoy_set, gps), handle)
    del gps
    del unassigned
    del target_decoy_set
    if export:
        for export_type in set(export):
            click.echo(fmt_msg("Handling Export: %s" % (export_type, )))
            if export_type == 'csv':
                from glycan_profiling.cli.export import glycopeptide_identification
                base = os.path.splitext(output_path)[0]
                export_path = "%s-glycopeptides.csv" % (base, )
                context.invoke(glycopeptide_identification,
                               database_connection=output_path,
                               analysis_identifier=analyzer.analysis_id,
                               output_path=export_path)
            elif export_type == 'psm-csv':
                from glycan_profiling.cli.export import glycopeptide_spectrum_matches
                base = os.path.splitext(output_path)[0]
                export_path = "%s-glycopeptide-spectrum-matches.csv" % (base, )
                context.invoke(glycopeptide_spectrum_matches,
                               database_connection=output_path,
                               analysis_identifier=analyzer.analysis_id,
                               output_path=export_path)
            elif export_type == 'html':
                from glycan_profiling.cli.export import glycopeptide_identification
                base = os.path.splitext(output_path)[0]
                export_path = "%s-report.html" % (base, )
                context.invoke(glycopeptide_identification,
                               database_connection=output_path,
                               analysis_identifier=analyzer.analysis_id,
                               output_path=export_path,
                               report=True)
コード例 #9
0
def analyze_glycan_composition(database_connection,
                               sample_path,
                               hypothesis_identifier,
                               output_path,
                               analysis_name,
                               mass_shifts,
                               grouping_error_tolerance=1.5e-5,
                               mass_error_tolerance=1e-5,
                               scoring_model=None,
                               minimum_mass=500.,
                               smoothing_factor=None,
                               regularization_model=None,
                               combinatorial_mass_shift_limit=8,
                               channel=None,
                               **kwargs):
    if scoring_model is None:
        scoring_model = GeneralScorer

    database_connection = DatabaseBoundOperation(database_connection)

    if not os.path.exists(sample_path):
        channel.send(
            Message("Could not locate sample %r" % sample_path, "error"))
        return

    reader = ProcessedMzMLDeserializer(sample_path, use_index=False)
    sample_run = reader.sample_run

    try:
        hypothesis = get_by_name_or_id(database_connection, GlycanHypothesis,
                                       hypothesis_identifier)
    except Exception:
        channel.send(
            Message("Could not locate hypothesis %r" % hypothesis_identifier,
                    "error"))
        return

    if analysis_name is None:
        analysis_name = "%s @ %s" % (sample_run.name, hypothesis.name)
    analysis_name = validate_analysis_name(None, database_connection.session,
                                           analysis_name)

    try:
        mass_shift_out = []
        for mass_shift, multiplicity in mass_shifts:
            mass_shift_out.append(validate_mass_shift(mass_shift,
                                                      multiplicity))
        expanded = []
        expanded = MzMLGlycanChromatogramAnalyzer.expand_mass_shifts(
            dict(mass_shift_out), limit=combinatorial_mass_shift_limit)
        mass_shifts = expanded
    except Abort:
        channel.send(Message.traceback())
        return

    mass_shifts = expanded

    try:
        analyzer = MzMLGlycanChromatogramAnalyzer(
            database_connection._original_connection,
            hypothesis.id,
            sample_path=sample_path,
            output_path=output_path,
            mass_shifts=mass_shifts,
            mass_error_tolerance=mass_error_tolerance,
            grouping_error_tolerance=grouping_error_tolerance,
            scoring_model=scoring_model,
            analysis_name=analysis_name,
            minimum_mass=minimum_mass)
        analyzer.start()
        analysis = analyzer.analysis
        record = project_analysis.AnalysisRecord(
            name=analysis.name,
            id=analysis.id,
            uuid=analysis.uuid,
            path=output_path,
            analysis_type=analysis.analysis_type,
            hypothesis_uuid=analysis.hypothesis.uuid,
            hypothesis_name=analysis.hypothesis.name,
            sample_name=analysis.parameters['sample_name'],
            user_id=channel.user.id)
        channel.send(Message(record.to_json(), 'new-analysis'))
    except Exception:
        channel.send(Message.traceback())
        channel.abort("An error occurred during analysis.")
コード例 #10
0
def search_glycan(context, database_connection, sample_path,
                  hypothesis_identifier,
                  analysis_name, mass_shifts, grouping_error_tolerance=1.5e-5,
                  mass_error_tolerance=1e-5, minimum_mass=500.,
                  scoring_model=None, regularize=None, regularization_model_path=None,
                  network_path=None,
                  output_path=None, scoring_model_features=None,
                  delta_rt=0.5, export=None, interact=False,
                  require_msms_signature=0.0, msn_mass_error_tolerance=2e-5,
                  mass_shift_combination_limit=None,
                  processes=4):
    """Identify glycan compositions from preprocessed LC-MS data, stored in mzML
    format.
    """
    if output_path is None and not interact:
        output_path = make_analysis_output_path("glycan")
    if scoring_model is None:
        scoring_model = GeneralScorer

    if mass_shift_combination_limit is None:
        mass_shift_combination_limit = 8

    if scoring_model_features:
        for feature in scoring_model_features:
            scoring_model.add_feature(validate_ms1_feature_name(feature))

    if regularization_model_path is not None:
        with open(regularization_model_path, 'r') as mod_file:
            regularization_model = GridPointSolution.load(mod_file)
    else:
        regularization_model = None

    if network_path is not None:
        with open(network_path, 'r') as netfile:
            network = GraphReader(netfile).network
    else:
        network = None

    database_connection = DatabaseBoundOperation(database_connection)
    ms_data = ProcessedMzMLDeserializer(sample_path, use_index=False)
    sample_run = ms_data.sample_run

    try:
        hypothesis = get_by_name_or_id(
            database_connection, GlycanHypothesis, hypothesis_identifier)
    except Exception:
        click.secho("Could not locate a Glycan Hypothesis with identifier %r" %
                    hypothesis_identifier, fg='yellow')
        raise click.Abort()

    if analysis_name is None:
        analysis_name = "%s @ %s" % (sample_run.name, hypothesis.name)

    analysis_name = validate_analysis_name(
        context, database_connection.session, analysis_name)

    mass_shifts = [validate_mass_shift(mass_shift, multiplicity)
                   for mass_shift, multiplicity in mass_shifts]
    expanded = []
    expanded = MzMLGlycanChromatogramAnalyzer.expand_mass_shifts(dict(mass_shifts), limit=mass_shift_combination_limit)
    mass_shifts = expanded

    click.secho("Preparing analysis of %s by %s" %
                (sample_run.name, hypothesis.name), fg='cyan')

    analyzer = MzMLGlycanChromatogramAnalyzer(
        database_connection._original_connection, hypothesis.id,
        sample_path=sample_path, output_path=output_path, mass_shifts=mass_shifts,
        mass_error_tolerance=mass_error_tolerance,
        msn_mass_error_tolerance=msn_mass_error_tolerance,
        grouping_error_tolerance=grouping_error_tolerance,
        scoring_model=scoring_model,
        minimum_mass=minimum_mass,
        regularize=regularize,
        regularization_model=regularization_model,
        network=network,
        analysis_name=analysis_name,
        delta_rt=delta_rt,
        require_msms_signature=require_msms_signature,
        n_processes=processes)
    analyzer.display_header()
    analyzer.start()
    if interact:
        try:
            import IPython
            click.secho(fmt_msg("Beginning Interactive Session..."), fg='cyan')
            IPython.embed()
        except ImportError:
            click.secho(fmt_msg("Interactive Session Not Supported"), fg='red')
    if export:
        for export_type in set(export):
            click.echo(fmt_msg("Handling Export: %s" % (export_type,)))
            if export_type == 'csv':
                from glycan_profiling.cli.export import glycan_composition_identification
                base = os.path.splitext(output_path)[0]
                export_path = "%s-glycan-chromatograms.csv" % (base,)
                context.invoke(
                    glycan_composition_identification,
                    database_connection=output_path,
                    analysis_identifier=analyzer.analysis_id,
                    output_path=export_path)
            elif export_type == 'html':
                from glycan_profiling.cli.export import glycan_composition_identification
                base = os.path.splitext(output_path)[0]
                export_path = "%s-report.html" % (base,)
                context.invoke(
                    glycan_composition_identification,
                    database_connection=output_path,
                    analysis_identifier=analyzer.analysis_id,
                    output_path=export_path,
                    report=True)
            elif export_type == 'glycan-list':
                from glycan_profiling.cli.export import glycan_hypothesis
                base = os.path.splitext(output_path)[0]
                export_path = "%s-glycan-chromatograms.csv" % (base,)
                context.invoke(
                    glycan_hypothesis,
                    database_connection=output_path,
                    hypothesis_identifier=analyzer.hypothesis_id,
                    output_path=export_path,
                    importable=True)
            elif export_type == "model":
                base = os.path.splitext(output_path)[0]
                export_path = "%s-regularization-parameters.txt" % (base,)
                params = analyzer.analysis.parameters.get("network_parameters")
                if params is None:
                    click.secho("No parameters were fitted, skipping \"model\"", fg='red')
                else:
                    with open(export_path, 'w') as fp:
                        params.dump(fp)
            else:
                click.secho("Unrecognized Export: %s" % (export_type,), fg='yellow')
コード例 #11
0
def search_glycopeptide_multipart(context, database_connection, decoy_database_connection, sample_path,
                                  target_hypothesis_identifier=1, decoy_hypothesis_identifier=1,
                                  analysis_name=None, output_path=None, grouping_error_tolerance=1.5e-5,
                                  mass_error_tolerance=1e-5, msn_mass_error_tolerance=2e-5, psm_fdr_threshold=0.05,
                                  peak_shape_scoring_model=None, tandem_scoring_model=None, glycan_score_threshold=1.0,
                                  memory_database_index=False, save_intermediate_results=None, processes=4,
                                  workload_size=500, mass_shifts=None, export=None, maximum_mass=float('inf'),
                                  isotope_probing_range=3):
    if output_path is None:
        output_path = make_analysis_output_path("glycopeptide")
    if tandem_scoring_model is None:
        tandem_scoring_model = "log_intensity"
    database_connection = DatabaseBoundOperation(database_connection)
    decoy_database_connection = DatabaseBoundOperation(decoy_database_connection)
    ms_data = ProcessedMzMLDeserializer(sample_path, use_index=False)
    sample_run = ms_data.sample_run

    try:
        target_hypothesis = get_by_name_or_id(
            database_connection, GlycopeptideHypothesis, target_hypothesis_identifier)
    except Exception:
        click.secho("Could not locate Target Glycopeptide Hypothesis with identifier %r" %
                    hypothesis_identifier, fg='yellow')
        raise click.Abort()

    try:
        decoy_hypothesis = get_by_name_or_id(
            decoy_database_connection, GlycopeptideHypothesis, decoy_hypothesis_identifier)
    except Exception:
        click.secho("Could not locate Decoy Glycopeptide Hypothesis with identifier %r" %
                    hypothesis_identifier, fg='yellow')
        raise click.Abort()

    tandem_scoring_model = validate_glycopeptide_tandem_scoring_function(
        context, tandem_scoring_model)

    mass_shifts = [validate_mass_shift(mass_shift, multiplicity)
                   for mass_shift, multiplicity in mass_shifts]
    expanded = []
    expanded = MzMLGlycanChromatogramAnalyzer.expand_mass_shifts(
        dict(mass_shifts), crossproduct=False)
    mass_shifts = expanded

    if analysis_name is None:
        analysis_name = "%s @ %s" % (sample_run.name, target_hypothesis.name)

    analysis_name = validate_analysis_name(
        context, database_connection.session, analysis_name)

    click.secho("Preparing analysis of %s by %s" % (
        sample_run.name, target_hypothesis.name), fg='cyan')
    analyzer = MultipartGlycopeptideLCMSMSAnalyzer(
        database_connection._original_connection,
        decoy_database_connection._original_connection,
        target_hypothesis.id,
        decoy_hypothesis.id,
        sample_path,
        output_path,
        analysis_name=analysis_name,
        grouping_error_tolerance=grouping_error_tolerance,
        mass_error_tolerance=mass_error_tolerance,
        msn_mass_error_tolerance=msn_mass_error_tolerance,
        psm_fdr_threshold=psm_fdr_threshold,
        tandem_scoring_model=tandem_scoring_model,
        glycan_score_threshold=glycan_score_threshold,
        mass_shifts=mass_shifts,
        n_processes=5,
        spectrum_batch_size=workload_size,
        maximum_mass=maximum_mass,
        probing_range_for_missing_precursors=isotope_probing_range,
        use_memory_database=memory_database_index)
    analyzer.display_header()
    gps, unassigned, target_decoy_set = analyzer.start()
    if save_intermediate_results is not None:
        analyzer.log("Saving Intermediate Results")
        with open(save_intermediate_results, 'wb') as handle:
            pickle.dump((target_decoy_set, gps), handle)
    if export:
        for export_type in set(export):
            click.echo(fmt_msg("Handling Export: %s" % (export_type,)))
            if export_type == 'csv':
                from glycan_profiling.cli.export import glycopeptide_identification
                base = os.path.splitext(output_path)[0]
                export_path = "%s-glycopeptides.csv" % (base,)
                context.invoke(
                    glycopeptide_identification,
                    database_connection=output_path,
                    analysis_identifier=analyzer.analysis_id,
                    output_path=export_path)
            elif export_type == 'psm-csv':
                from glycan_profiling.cli.export import glycopeptide_spectrum_matches
                base = os.path.splitext(output_path)[0]
                export_path = "%s-glycopeptide-spectrum-matches.csv" % (base,)
                context.invoke(
                    glycopeptide_spectrum_matches,
                    database_connection=output_path,
                    analysis_identifier=analyzer.analysis_id,
                    output_path=export_path)
            elif export_type == 'html':
                from glycan_profiling.cli.export import glycopeptide_identification
                base = os.path.splitext(output_path)[0]
                export_path = "%s-report.html" % (base,)
                context.invoke(
                    glycopeptide_identification,
                    database_connection=output_path,
                    analysis_identifier=analyzer.analysis_id,
                    output_path=export_path,
                    report=True)
コード例 #12
0
def analyze_glycopeptide_sequences(
        database_connection,
        sample_path,
        hypothesis_identifier,
        output_path,
        analysis_name,
        grouping_error_tolerance=1.5e-5,
        mass_error_tolerance=1e-5,
        msn_mass_error_tolerance=2e-5,
        psm_fdr_threshold=0.05,
        peak_shape_scoring_model=None,
        minimum_oxonium_threshold=0.05,
        workload_size=1000,
        use_peptide_mass_filter=True,
        mass_shifts=None,
        permute_decoy_glycan_fragments=False,
        include_rare_signature_ions=False,
        model_retention_time=False,
        search_strategy=GlycopeptideSearchStrategyEnum.classic,
        decoy_database_connection=None,
        decoy_hypothesis_id=None,
        tandem_scoring_model=None,
        channel=None,
        **kwargs):
    if peak_shape_scoring_model is None:
        peak_shape_scoring_model = GeneralScorer.clone()
        peak_shape_scoring_model.add_feature(get_feature("null_charge"))

    database_connection = DatabaseBoundOperation(database_connection)
    if decoy_database_connection:
        decoy_database_connection = DatabaseBoundOperation(
            decoy_database_connection)

    if not os.path.exists(sample_path):
        channel.send(
            Message("Could not locate sample %r" % sample_path, "error"))
        return

    reader = ProcessedMzMLDeserializer(sample_path, use_index=False)
    sample_run = reader.sample_run

    try:
        hypothesis = get_by_name_or_id(database_connection,
                                       GlycopeptideHypothesis,
                                       hypothesis_identifier)
    except Exception:
        channel.send(
            Message("Could not locate hypothesis %r" % hypothesis_identifier,
                    "error"))
        channel.abort("An error occurred during analysis.")

    if decoy_database_connection:
        try:
            decoy_hypothesis = get_by_name_or_id(decoy_database_connection,
                                                 GlycopeptideHypothesis,
                                                 decoy_hypothesis_id)
        except Exception:
            channel.send(
                Message("Could not locate hypothesis %r" % decoy_hypothesis_id,
                        "error"))
            channel.abort("An error occurred during analysis.")

    if analysis_name is None:
        analysis_name = "%s @ %s" % (sample_run.name, hypothesis.name)
    analysis_name = validate_analysis_name(None, database_connection.session,
                                           analysis_name)

    try:
        mass_shift_out = []
        for mass_shift, multiplicity in mass_shifts:
            mass_shift_out.append(validate_mass_shift(mass_shift,
                                                      multiplicity))
        expanded = []
        expanded = MzMLGlycanChromatogramAnalyzer.expand_mass_shifts(
            dict(mass_shift_out), crossproduct=False)
        mass_shifts = expanded
    except Abort:
        channel.send(Message.traceback())
        return

    try:
        if search_strategy == GlycopeptideSearchStrategyEnum.classic:
            analyzer = MzMLGlycopeptideLCMSMSAnalyzer(
                database_connection._original_connection,
                hypothesis.id,
                sample_path,
                output_path=output_path,
                analysis_name=analysis_name,
                grouping_error_tolerance=grouping_error_tolerance,
                mass_error_tolerance=mass_error_tolerance,
                msn_mass_error_tolerance=msn_mass_error_tolerance,
                psm_fdr_threshold=psm_fdr_threshold,
                peak_shape_scoring_model=peak_shape_scoring_model,
                oxonium_threshold=minimum_oxonium_threshold,
                spectrum_batch_size=workload_size,
                use_peptide_mass_filter=use_peptide_mass_filter,
                mass_shifts=mass_shifts,
                permute_decoy_glycans=permute_decoy_glycan_fragments,
                rare_signatures=include_rare_signature_ions,
                model_retention_time=model_retention_time,
                tandem_scoring_model=tandem_scoring_model)
        elif search_strategy == GlycopeptideSearchStrategyEnum.classic_comparison:
            analyzer = MzMLComparisonGlycopeptideLCMSMSAnalyzer(
                database_connection._original_connection,
                decoy_database_connection._original_connection,
                hypothesis.id,
                sample_path,
                output_path=output_path,
                analysis_name=analysis_name,
                grouping_error_tolerance=grouping_error_tolerance,
                mass_error_tolerance=mass_error_tolerance,
                msn_mass_error_tolerance=msn_mass_error_tolerance,
                psm_fdr_threshold=psm_fdr_threshold,
                peak_shape_scoring_model=peak_shape_scoring_model,
                oxonium_threshold=minimum_oxonium_threshold,
                spectrum_batch_size=workload_size,
                use_peptide_mass_filter=use_peptide_mass_filter,
                mass_shifts=mass_shifts,
                permute_decoy_glycans=permute_decoy_glycan_fragments,
                rare_signatures=include_rare_signature_ions,
                model_retention_time=model_retention_time,
                tandem_scoring_model=tandem_scoring_model)
        elif search_strategy == GlycopeptideSearchStrategyEnum.multipart:
            analyzer = MultipartGlycopeptideLCMSMSAnalyzer(
                database_connection._original_connection,
                decoy_database_connection._original_connection,
                hypothesis.id,
                decoy_hypothesis.id,
                sample_path,
                output_path=output_path,
                analysis_name=analysis_name,
                grouping_error_tolerance=grouping_error_tolerance,
                mass_error_tolerance=mass_error_tolerance,
                msn_mass_error_tolerance=msn_mass_error_tolerance,
                psm_fdr_threshold=psm_fdr_threshold,
                peak_shape_scoring_model=peak_shape_scoring_model,
                spectrum_batch_size=workload_size,
                mass_shifts=mass_shifts,
                rare_signatures=include_rare_signature_ions,
                model_retention_time=model_retention_time,
                tandem_scoring_model=tandem_scoring_model)
        _ = analyzer.start()

        analysis = analyzer.analysis
        if analysis is not None:
            record = project_analysis.AnalysisRecord(
                name=analysis.name,
                id=analysis.id,
                uuid=analysis.uuid,
                path=output_path,
                analysis_type=analysis.analysis_type,
                hypothesis_uuid=analysis.hypothesis.uuid,
                hypothesis_name=analysis.hypothesis.name,
                sample_name=analysis.parameters['sample_name'],
                user_id=channel.user.id)
            channel.send(Message(record.to_json(), 'new-analysis'))
        else:
            channel.send(
                Message("No glycopeptides were identified for \"%s\"" %
                        (analysis_name, )))

    except Exception:
        channel.send(Message.traceback())
        channel.abort("An error occurred during analysis.")