예제 #1
0
파일: builder.py 프로젝트: mstim/glycresoft
    def from_paths(cls,
                   analysis_paths_and_ids,
                   glycopeptide_hypothesis_path,
                   glycopeptide_hypothesis_id,
                   glycan_hypothesis_path,
                   glycan_hypothesis_id,
                   unobserved_penalty_scale=None,
                   lambda_limit=0.2,
                   require_multiple_observations=True,
                   observation_aggregator=None,
                   output_path=None,
                   n_threads=4,
                   q_value_threshold=0.05):
        gp_db = GlycopeptideDiskBackedStructureDatabase(
            glycopeptide_hypothesis_path, glycopeptide_hypothesis_id)
        gc_db = GlycanCompositionDiskBackedStructureDatabase(
            glycan_hypothesis_path, glycan_hypothesis_id)

        analyses = [
            serialize.AnalysisDeserializer(conn, analysis_id=an_id)
            for conn, an_id in analysis_paths_and_ids
        ]
        inst = cls(analyses,
                   gp_db,
                   gc_db,
                   unobserved_penalty_scale=unobserved_penalty_scale,
                   lambda_limit=lambda_limit,
                   require_multiple_observations=require_multiple_observations,
                   observation_aggregator=observation_aggregator,
                   output_path=output_path,
                   n_threads=n_threads,
                   q_value_threshold=q_value_threshold)
        return inst
예제 #2
0
    def make_template_stream(self):
        template_obj = self.env.get_template("overview.templ")

        ads = serialize.AnalysisDeserializer(
            self.database_connection._original_connection,
            analysis_id=self.analysis_id)

        hypothesis = ads.analysis.hypothesis
        sample_run = ads.analysis.sample_run

        template_stream = template_obj.stream(
            analysis=ads.analysis,
            hypothesis=hypothesis,
            sample_run=sample_run,
            protein_index=self.protein_index,
            glycoprotein_iterator=self.iterglycoproteins(),
            renderer=self,
        )

        return template_stream
예제 #3
0
    def make_template_stream(self):
        template_obj = self.env.get_template("overview.templ")

        ads = serialize.AnalysisDeserializer(
            self.database_connection._original_connection,
            analysis_id=self.analysis_id)

        hypothesis = ads.analysis.hypothesis
        sample_run = ads.analysis.sample_run
        if self.use_dynamic_display_mode:
            self.status_update("Using dynamic display mode")
        template_stream = template_obj.stream(
            analysis=ads.analysis,
            hypothesis=hypothesis,
            sample_run=sample_run,
            protein_index=self.protein_index,
            glycoprotein_iterator=self.iterglycoproteins(),
            renderer=self,
            use_dynamic_display_mode=self.use_dynamic_display_mode)

        return template_stream
예제 #4
0
    def make_template_stream(self):
        template_obj = self.env.get_template("overview.templ")

        ads = serialize.AnalysisDeserializer(
            self.database_connection._original_connection,
            analysis_id=self.analysis_id)

        self.glycan_chromatograms = gcs = ads.load_glycan_composition_chromatograms(
        )
        # und = ads.load_unidentified_chromatograms()
        self.unidentified_chromatograms = und = ChromatogramFilter(
            ads.query(serialize.UnidentifiedChromatogram).filter(
                serialize.UnidentifiedChromatogram.analysis_id ==
                self.analysis_id).all())

        if len(gcs) == 0:
            self.log(
                "No glycan compositions were identified. Skipping report building"
            )
            templ = Template('''
                <html>
                <style>
                body {
                    font-family: sans-serif;
                }
                </style>
                <body>
                    <h3>No glycan compositions were identified</h3>
                </body>
                </html>
                ''')
            return templ.stream()

        summary_plot = summaries.GlycanChromatographySummaryGraphBuilder(
            filter(lambda x: x.score > self.threshold, gcs + und))
        lcms_plot, composition_abundance_plot = summary_plot.draw(min_score=5)

        try:
            lcms_plot.ax.legend_.set_visible(False)
        except AttributeError:
            # The legend may not have been created
            pass
        lcms_plot.ax.set_title("Glycan Composition\nLC-MS Aggregated EICs",
                               fontsize=24)

        fig = lcms_plot.ax.figure
        fig.set_figwidth(fig.get_figwidth() * 2.)
        fig.set_figheight(fig.get_figheight() * 2.)

        composition_abundance_plot.ax.set_title(
            "Glycan Composition\nTotal Abundances", fontsize=24)
        composition_abundance_plot.ax.set_xlabel(
            composition_abundance_plot.ax.get_xlabel(), fontsize=14)

        def resolve_key(key):
            match = gcs.find_key(key)
            if match is None:
                match = und.find_key(key)
            return match

        template_stream = (template_obj.stream(
            analysis=ads.analysis,
            lcms_plot=svguri_plot(lcms_plot.ax,
                                  bbox_inches='tight',
                                  patchless=True,
                                  svg_width="100%"),
            composition_abundance_plot=svguri_plot(
                composition_abundance_plot.ax,
                bbox_inches='tight',
                patchless=True,
                svg_width="100%"),
            glycan_chromatograms=gcs,
            unidentified_chromatograms=und,
            resolve_key=resolve_key))
        return template_stream
예제 #5
0
import glypy
from glypy.composition.composition_transform import strip_derivatization
import numpy as np
import pandas as pd
from sklearn.metrics import auc as sk_auc, roc_curve, roc_auc_score, precision_recall_curve

from matplotlib import pyplot as plt, rcParams
rcParams['figure.figsize'] = 8, 6
Formate = chromatogram_tree.Formate
Ammonium = chromatogram_tree.Ammonium
Unmodified = chromatogram_tree.Unmodified


print "Native Plots"
native_path = "analysis/results/sulfated/phil-82-native-fit-prior.db"
ads = serialize.AnalysisDeserializer(native_path)

und = trace.ChromatogramFilter(
    ads.query(serialize.UnidentifiedChromatogram).all())
gcs = trace.ChromatogramFilter(
    ads.query(serialize.GlycanCompositionChromatogram).all())

print len(filter(lambda x: x.score > 5 and not x.used_as_adduct, gcs + und))

summary_plot = summaries.GlycanChromatographySummaryGraphBuilder(
    filter(lambda x: x.score > 5 and not x.used_as_adduct, gcs + und))
lcms_plot, composition_abundance_plot = summary_plot.draw(min_score=5)
lcms_plot.ax.legend_.set_visible(False)

lcms_plot.ax.set_xlabel("Retention Time (Min)", fontsize=14)
lcms_plot.ax.set_ylabel("Relative Abundance", fontsize=14)
from matplotlib import pyplot as plt, rcParams
rcParams['figure.figsize'] = 8, 6
Formate = chromatogram_tree.Formate
Ammonium = chromatogram_tree.Ammonium
Unmodified = chromatogram_tree.Unmodified

files = {
    "combinatorial_unregularized":
    "./analysis/results/igg-native-unregularized.db",
    "combinatorial_partial": "./analysis/results/igg-native-fit-prior.db",
    "combinatorial_grid": "./analysis/results/igg-native-grid.db",
}

deserializers = {
    key: serialize.AnalysisDeserializer(value)
    for key, value in files.items()
}

print("Loading Datasets")
gcs_map = {
    key: trace.ChromatogramFilter(
        value.query(serialize.GlycanCompositionChromatogram))
    for key, value in deserializers.items()
}

all_keys = set()
for group, members in gcs_map.items():
    for member in members:
        all_keys.add(member.key)
true_positives = set(
import numpy as np
import pandas as pd
from sklearn.metrics import auc as sk_auc, roc_curve, roc_auc_score, precision_recall_curve

from matplotlib import pyplot as plt, rcParams
rcParams['figure.figsize'] = 8, 6
Formate = chromatogram_tree.Formate
Ammonium = chromatogram_tree.Ammonium
Unmodified = chromatogram_tree.Unmodified

import variable_writer

db = database.GlycanCompositionDiskBackedStructureDatabase("./analysis/hypothesis/reduced-permethylated.db", 1)


ads = serialize.AnalysisDeserializer("./analysis/results/serum-rp-unregularized.db")
ads2 = serialize.AnalysisDeserializer("./analysis/results/serum-rp-prior.db")
ads3 = serialize.AnalysisDeserializer("./analysis/results/serum-rp-grid.db")


with open("./analysis/multiglycan-match.csv", "rb") as f:
    obs = list(csv.reader(f))
true_positives = []
false_positives = []

for row in obs:
    gc = glypy.glycan_composition.HashableGlycanComposition.parse(row[0])
    status = row[1]
    try:
        remark = row[2]
    except IndexError:
예제 #8
0
db = database.GlycanCompositionDiskBackedStructureDatabase("./analysis/hypothesis/native.db", 1)

files = {
    "combinatorial_unregularized": "./analysis/results/sulfated/phil-bs-native-unregularized.db",
    "combinatorial_partial": "./analysis/results/sulfated/phil-bs-native-fit-prior.db",
    "combinatorial_grid": "./analysis/results/sulfated/phil-bs-native-grid.db",
    "glyspace_unregularized": "./analysis/test/phil-bs-humanish-unregularized.db",
    "glyspace_partial": "./analysis/test/phil-bs-humanish-prior.db",
    "glyspace_grid": "./analysis/test/phil-bs-humanish-grid.db",
    "krambeck_unregularized": "./analysis/test/phil-bs-krambeck-unregularized.db",
    "krambeck_partial": "./analysis/test/phil-bs-krambeck-prior.db",
    "krambeck_grid": "./analysis/test/phil-bs-krambeck-grid.db"
}

deserializers = {
    key: serialize.AnalysisDeserializer(value) for key, value in files.items()
}

print("Loading Datasets")
gcs_map = {
    key: chromatogram_tree.ChromatogramFilter(value.query(serialize.GlycanCompositionChromatogram))
    for key, value in deserializers.items()
}

all_keys = set()
for group, members in gcs_map.items():
    for member in members:
        all_keys.add(member.key)
true_positives = set([
    gc.glycan_composition for gc in gcs_map["combinatorial_partial"] + gcs_map[
        "krambeck_grid"