Beispiel #1
0
 def convert(self, structure_text):
     if structure_text in self.cache:
         return self.cache[structure_text]
     structure = glycoct.loads(structure_text)
     gc = HashableGlycanComposition.from_glycan(structure).thaw()
     gc.drop_stems()
     gc.drop_configurations()
     gc.drop_positions()
     gc = HashableGlycanComposition(gc)
     self.cache[structure_text] = gc
     return gc
Beispiel #2
0
def parse_glycan_composition(string):
    try:
        return glycan_composition_cache[string]
    except KeyError:
        gc = HashableGlycanComposition.parse(string)
        glycan_composition_cache[string] = gc
        return gc
Beispiel #3
0
def to_decoy_glycan(string):
    try:
        return decoy_glycan_cache[string]
    except KeyError:
        gc = HashableGlycanComposition.parse(string)
        gc["#decoy#"] = 2
        decoy_glycan_cache[string] = gc
        return gc
 def convert(self, structure_text):
     if structure_text in self.cache:
         return self.cache[structure_text]
     structure = glycoct.loads(structure_text)
     gc = HashableGlycanComposition.from_glycan(structure).thaw()
     gc.drop_stems()
     gc.drop_configurations()
     gc.drop_positions()
     gc = HashableGlycanComposition(gc)
     self.cache[structure_text] = gc
     return gc
Beispiel #5
0
    def _from_csv(cls, row):
        def _try_parse(value):
            try:
                return int(value)
            except (ValueError, TypeError):
                try:
                    return float(value)
                except (ValueError, TypeError):
                    return value

        mass = float(row.pop("weighted_neutral_mass"))
        apex_time = float(row.pop("apex_time"))
        total_signal = float(row.pop("total_signal"))
        gc = HashableGlycanComposition.parse(row.pop("glycan_composition"))
        kwargs = {k: _try_parse(v) for k, v in row.items()}
        return cls(mass, apex_time, total_signal, gc, **kwargs)
Beispiel #6
0
def validate_glycan_text(path):
    from glycan_profiling.database.builder.glycan.glycan_source import TextFileGlycanCompositionLoader
    with open(path, 'r') as handle:
        loader = TextFileGlycanCompositionLoader(handle)
        n = 0
        glycan_classes = set()
        residues = set()
        unresolved = set()
        for line in loader:
            n += 1
            glycan_classes.update(line[1])
            glycan_composition = HashableGlycanComposition.parse(line[0])
            for residue in glycan_composition.keys():
                if residue.mass() == 0:
                    unresolved.add(residue)
                residues.add(residue)
        click.secho("%d glycan compositions" % (n,))
        click.secho("Residues:")
        for residue in residues:
            click.secho("\t%s - %f" % (str(residue), residue.mass()))
        if unresolved:
            click.secho("Unresolved Residues:", fg='yellow')
            click.secho("\n".join(str(r) for r in unresolved), fg='yellow')
Beispiel #7
0
def validate_glycan_text(path):
    from glycan_profiling.database.builder.glycan.glycan_source import TextFileGlycanCompositionLoader
    with open(path, 'r') as handle:
        loader = TextFileGlycanCompositionLoader(handle)
        n = 0
        glycan_classes = set()
        residues = set()
        unresolved = set()
        for line in loader:
            n += 1
            glycan_classes.update(line[1])
            glycan_composition = HashableGlycanComposition.parse(line[0])
            for residue in glycan_composition.keys():
                if residue.mass() == 0:
                    unresolved.add(residue)
                residues.add(residue)
        click.secho("%d glycan compositions" % (n, ))
        click.secho("Residues:")
        for residue in residues:
            click.secho("\t%s - %f" % (str(residue), residue.mass()))
        if unresolved:
            click.secho("Unresolved Residues:", fg='yellow')
            click.secho("\n".join(str(r) for r in unresolved), fg='yellow')
import glypy
import matplotlib
matplotlib.use("agg")
from matplotlib import pyplot as plt
import matplotlib_venn
from glypy.structure.glycan_composition import HashableGlycanComposition

with open("combinatorial-glycans.txt") as combfile:
    combinatorial = []
    for line in combfile:
        combinatorial.append(
            HashableGlycanComposition.parse(line.split("\t")[0]))

with open("krambeck_glycan_compositions.txt") as krambeck_file:
    krambeck = []
    for line in krambeck_file:
        krambeck.append(HashableGlycanComposition.parse(line.split("\t")[0]))

with open("glyspace_glycan_compositions.txt") as glyspace_file:
    glyspace = []
    for line in glyspace_file:
        glyspace.append(HashableGlycanComposition.parse(line.split("\t")[0]))

combinatorial = set(combinatorial)
krambeck = set(krambeck)
glyspace = set(glyspace)

c = len(combinatorial)
k = len(krambeck)
g = len(glyspace)
substituents_to_detatch = [
    glypy.Substituent("sulfate"),
    glypy.Substituent("phosphate")
]

compositions = set()
for i, structure in enumerate(structures):
    if i % 100 == 0:
        print("Converted %d glycan structures. %d glycan compositions." % (i, len(compositions)))
    glycan_comp = GlycanComposition.from_glycan(structure)
    glycan_comp.drop_configurations()
    glycan_comp.drop_stems()
    glycan_comp.drop_positions()
    glycan_comp = detatch_monosaccharide_substituents(glycan_comp, substituents_to_detatch)
    try:
        compositions.add(HashableGlycanComposition(glycan_comp))
    except ValueError as ex:
        print(ex)
        continue


valid_components = {"Hex", "HexNAc", "Neu5Ac", "Fuc", "@sulfate"}

filtered_compositions = set()
for i, composition in enumerate(compositions):
    if i % 100 == 0:
        print("%d glycan compositions filtered. %d glycan compositions accepted." % (i, len(filtered_compositions)))
    components = {str(k) for k in composition.keys()}
    if len(components - valid_components) > 0:
        continue
    filtered_compositions.add(composition)
import csv
from glypy.structure.glycan_composition import HashableGlycanComposition

with open("./Default_Combination_V2.csv") as fh, open(
        "krambeck_glycan_compositions.txt", 'wb') as out:
    reader = csv.reader(fh)
    # skip header
    next(reader)
    for i, row in enumerate(reader):
        if i % 100 == 0:
            print("%d glycan compositions processed" % (i, ))
        gc = HashableGlycanComposition({
            k: v
            for k, v in dict(
                HexNAc=row[0], Hex=row[1], Fuc=row[2], NeuAc=row[3]).items()
            if v > 0
        })
        out.write("%s\tN-Linked\n" % (gc, ))
print("%d glycan compositions parsed" % (i, ))
Beispiel #11
0
 def shift_glycan_composition(self, delta):
     inst = self.__class__.from_obj(self)
     inst.glycan_composition = HashableGlycanComposition(
         self.glycan_composition) - delta
     return inst
 def _from_dict(cls, d):
     d['composition'] = HashableGlycanComposition.parse(d['composition'])
     d['glycan_types'] = [GlycanTypes[t] for t in d['glycan_types']]
     return cls(**d)
Beispiel #13
0
 def _from_dict(cls, d):
     d['composition'] = HashableGlycanComposition.parse(d['composition'])
     d['glycan_types'] = [GlycanTypes[t] for t in d['glycan_types']]
     return cls(**d)