def convert(self, structure_text): if structure_text in self.cache: return self.cache[structure_text] structure = glycoct.loads(structure_text) gc = HashableGlycanComposition.from_glycan(structure).thaw() gc.drop_stems() gc.drop_configurations() gc.drop_positions() gc = HashableGlycanComposition(gc) self.cache[structure_text] = gc return gc
def parse_glycan_composition(string): try: return glycan_composition_cache[string] except KeyError: gc = HashableGlycanComposition.parse(string) glycan_composition_cache[string] = gc return gc
def to_decoy_glycan(string): try: return decoy_glycan_cache[string] except KeyError: gc = HashableGlycanComposition.parse(string) gc["#decoy#"] = 2 decoy_glycan_cache[string] = gc return gc
def _from_csv(cls, row): def _try_parse(value): try: return int(value) except (ValueError, TypeError): try: return float(value) except (ValueError, TypeError): return value mass = float(row.pop("weighted_neutral_mass")) apex_time = float(row.pop("apex_time")) total_signal = float(row.pop("total_signal")) gc = HashableGlycanComposition.parse(row.pop("glycan_composition")) kwargs = {k: _try_parse(v) for k, v in row.items()} return cls(mass, apex_time, total_signal, gc, **kwargs)
def validate_glycan_text(path): from glycan_profiling.database.builder.glycan.glycan_source import TextFileGlycanCompositionLoader with open(path, 'r') as handle: loader = TextFileGlycanCompositionLoader(handle) n = 0 glycan_classes = set() residues = set() unresolved = set() for line in loader: n += 1 glycan_classes.update(line[1]) glycan_composition = HashableGlycanComposition.parse(line[0]) for residue in glycan_composition.keys(): if residue.mass() == 0: unresolved.add(residue) residues.add(residue) click.secho("%d glycan compositions" % (n,)) click.secho("Residues:") for residue in residues: click.secho("\t%s - %f" % (str(residue), residue.mass())) if unresolved: click.secho("Unresolved Residues:", fg='yellow') click.secho("\n".join(str(r) for r in unresolved), fg='yellow')
def validate_glycan_text(path): from glycan_profiling.database.builder.glycan.glycan_source import TextFileGlycanCompositionLoader with open(path, 'r') as handle: loader = TextFileGlycanCompositionLoader(handle) n = 0 glycan_classes = set() residues = set() unresolved = set() for line in loader: n += 1 glycan_classes.update(line[1]) glycan_composition = HashableGlycanComposition.parse(line[0]) for residue in glycan_composition.keys(): if residue.mass() == 0: unresolved.add(residue) residues.add(residue) click.secho("%d glycan compositions" % (n, )) click.secho("Residues:") for residue in residues: click.secho("\t%s - %f" % (str(residue), residue.mass())) if unresolved: click.secho("Unresolved Residues:", fg='yellow') click.secho("\n".join(str(r) for r in unresolved), fg='yellow')
import glypy import matplotlib matplotlib.use("agg") from matplotlib import pyplot as plt import matplotlib_venn from glypy.structure.glycan_composition import HashableGlycanComposition with open("combinatorial-glycans.txt") as combfile: combinatorial = [] for line in combfile: combinatorial.append( HashableGlycanComposition.parse(line.split("\t")[0])) with open("krambeck_glycan_compositions.txt") as krambeck_file: krambeck = [] for line in krambeck_file: krambeck.append(HashableGlycanComposition.parse(line.split("\t")[0])) with open("glyspace_glycan_compositions.txt") as glyspace_file: glyspace = [] for line in glyspace_file: glyspace.append(HashableGlycanComposition.parse(line.split("\t")[0])) combinatorial = set(combinatorial) krambeck = set(krambeck) glyspace = set(glyspace) c = len(combinatorial) k = len(krambeck) g = len(glyspace)
substituents_to_detatch = [ glypy.Substituent("sulfate"), glypy.Substituent("phosphate") ] compositions = set() for i, structure in enumerate(structures): if i % 100 == 0: print("Converted %d glycan structures. %d glycan compositions." % (i, len(compositions))) glycan_comp = GlycanComposition.from_glycan(structure) glycan_comp.drop_configurations() glycan_comp.drop_stems() glycan_comp.drop_positions() glycan_comp = detatch_monosaccharide_substituents(glycan_comp, substituents_to_detatch) try: compositions.add(HashableGlycanComposition(glycan_comp)) except ValueError as ex: print(ex) continue valid_components = {"Hex", "HexNAc", "Neu5Ac", "Fuc", "@sulfate"} filtered_compositions = set() for i, composition in enumerate(compositions): if i % 100 == 0: print("%d glycan compositions filtered. %d glycan compositions accepted." % (i, len(filtered_compositions))) components = {str(k) for k in composition.keys()} if len(components - valid_components) > 0: continue filtered_compositions.add(composition)
import csv from glypy.structure.glycan_composition import HashableGlycanComposition with open("./Default_Combination_V2.csv") as fh, open( "krambeck_glycan_compositions.txt", 'wb') as out: reader = csv.reader(fh) # skip header next(reader) for i, row in enumerate(reader): if i % 100 == 0: print("%d glycan compositions processed" % (i, )) gc = HashableGlycanComposition({ k: v for k, v in dict( HexNAc=row[0], Hex=row[1], Fuc=row[2], NeuAc=row[3]).items() if v > 0 }) out.write("%s\tN-Linked\n" % (gc, )) print("%d glycan compositions parsed" % (i, ))
def shift_glycan_composition(self, delta): inst = self.__class__.from_obj(self) inst.glycan_composition = HashableGlycanComposition( self.glycan_composition) - delta return inst
def _from_dict(cls, d): d['composition'] = HashableGlycanComposition.parse(d['composition']) d['glycan_types'] = [GlycanTypes[t] for t in d['glycan_types']] return cls(**d)