def __init__(self): self._scale = 1.0 self._orientation = "RL" self._display = "normalinfo" self._notation = "cfg" self._redend = True self._format = "png" self.fmt = GlycoCTFormat()
def __init__(self): self._scale = 1.0 self._orientation = "RL" self._display = "normalinfo" self._notation = "snfg" self._redend = True self._format = "png" self._opaque = True self._force = False self._verbose = False self.fmt = GlycoCTFormat()
class GlycanImage(object): def __init__(self): self._scale = 1.0 self._orientation = "RL" self._display = "normalinfo" self._notation = "cfg" self._redend = True self._format = "png" self.fmt = GlycoCTFormat() def scale(self, value=None): if value == None: return self._scale self._scale = float(value) def reducing_end(self, value=None): if value == None: return self._redend self._redend = bool(value) def notation(self, value=None): if value == None: return self._notation self._notation = value def format(self, value=None): if value == None: return self._format self._format = value def orientation(self, value=None): if value == None: return self._orientation self._orientation = value def display(self, value=None): if value == None: return self._display self._display = value def set(self, key, value): if not hasattr(self, key): raise KeyError() getattr(self, key)(value) def writeImage(self, glycan, filename): glystr = glycan if not isinstance(glystr, basestring): glystr = self.fmt.toStr(glycan) imageWriter = GlycoCT2Image(glystr, filename, format=self._format, scale=self._scale, redend=self._redend, orient=self._orientation, display=self._display, notation=self._notation) return imageWriter()
class GWBFormat(object): def __init__(self): self.fmt = GlycoCTFormat() def toStr(self, glycan): glystr = glycan if not isinstance(glystr, basestring): glystr = self.fmt.toStr(glycan) writer = GWBFormatter(glystr) seq = writer().splitlines()[-1].strip() if "Exception" in seq or "org.glycoinfo" in seq: return None return seq
subshow(acc1, acc2, "g1 <= g2", subsumption.leq(g1, g2)) if __name__ == "__main__": from collections import defaultdict from manipulation import Topology, Composition # from GlyTouCan import GlyTouCan from GlycanResource import GlyTouCan gtc = GlyTouCan(usecache=False) from GlycanFormatter import GlycoCTFormat, WURCS20Format, GlycanParseError glycoct_format = GlycoCTFormat() wurcs_format = WURCS20Format() geq = GlycanEqual() gtopoeq = GlycanTopoEqual() gcompeq = GlycanCompEqual() subsumption = GlycanSubsumption() topology = Topology() acc1 = sys.argv[1] acc2 = sys.argv[2] g1 = gtc.getGlycan(acc1) g2 = gtc.getGlycan(acc2) tg2 = topology(g2) verbose = True
def __init__(self, filename): self.filename = filename self.name, extn = filename.rsplit('.', 1) self.name = os.path.split(self.name)[1] assert extn == self.extn self.fmt = GlycoCTFormat()
class GlycoCTDatabase: prefix = "" extn = "gct" source = "" def __init__(self, filename): self.filename = filename self.name, extn = filename.rsplit('.', 1) self.name = os.path.split(self.name)[1] assert extn == self.extn self.fmt = GlycoCTFormat() def getraw(self, accession): zf = zipfile.ZipFile(self.filename, "r") fn = accession[len(self.prefix):] + '.txt' try: zf.getinfo(fn) except KeyError: return None gct = self._getraw(zf, fn) zf.close() return gct def _getraw(self, zf, name): return zf.read(name) def _get(self, zf, name): try: glystr = self._getraw(zf, name) g = self.fmt.toGlycan(glystr) except GlycoCTParseError: # f.write('> GlycanFormatter > GLYCANPARSEERROR > '+self.message)### # print '> GlycanFormatter > GLYCANPARSEERROR > ',self.message### # time.sleep(2)### return None except: print >> sys.stderr, "Problem with GlycoCT file " + name # time.sleep(2)### traceback.print_exc() sys.exit(1) kwargs = {} try: attstr = zf.read(name.rsplit('.', 1)[0] + '.att') for r in csv.reader(StringIO(attstr)): kwargs[r[0]] = copy.copy(r[1:]) except KeyError: pass try: kwargs['image'] = zf.read(name.rsplit('.', 1)[0] + '.png') except KeyError: pass gr = GlyRecord(source=self.source if self.source else self.name, accession=self.prefix + name.rsplit('.', 1)[0], glycan=g, name=self.name, **kwargs) # print gr return gr def get(self, accession): zf = zipfile.ZipFile(self.filename, "r") fn = accession[len(self.prefix):] + '.txt' try: zf.getinfo(fn) except KeyError: return None gr = self._get(zf, fn) zf.close() return gr def __iter__(self): return self.next() def next(self): zf = zipfile.ZipFile(self.filename, "r") for name in zf.namelist(): if not name.endswith('.txt'): continue gr = self._get(zf, name) if gr: yield gr zf.close()
m = re.search(r'([ab]1?)?-?(\d(-?))?([RS][pP]?\d+|MDPLys)?$', glystr) if m != None: x = len(m.group(0)) glystr = glystr[:-x] try: g = self.fmt.toGlycan(glystr) except IUPACLinearParseError: traceback.print_exc() continue print >> sys.stderr, ">>CFG%03d" % acc yield GlyRecord(source="CFGArray", accession="CFG%03d" % acc, glycan=g, name=self.name) if __name__ == '__main__': import sys from GlycanFormatter import IUPACLinearFormat fmt = IUPACLinearFormat() fmt1 = GlycoCTFormat() gdb = CFGArrayDatabase(sys.argv[1]) for r in gdb: lc = fmt.toStr(r.glycan) print r.accession, lc # print fmt1.toStr(r.glycan) # print r.glycan print r.glycan sys.stdout.flush()
def __init__(self): self.fmt = GlycoCTFormat()
# setenv GLYTOUCAN "~/projects/GlyGen/GlyTouCan/current" # # egrep -w '(GlyTouCanAccession|Saccharide)' $GLYTOUCAN/comp.txt | \ # fgrep -f $GLYTOUCAN/humanbycomp.txt | \ # python27 manipulation.py $GLYTOUCAN/wurcs.zip $GLYTOUCAN/glycoct.zip # # egrep -w '(GlyTouCanAccession|Saccharide)' $GLYTOUCAN/comp.txt | \ # fgrep -f $GLYTOUCAN/mousebycomp.txt | \ # python27 manipulation.py $GLYTOUCAN/wurcs.zip $GLYTOUCAN/glycoct.zip # # egrep -w '(GlyTouCanAccession|Saccharide)' $GLYTOUCAN/comp.txt | \ # python27 manipulation.py $GLYTOUCAN/wurcs.zip $GLYTOUCAN/glycoct.zip # wurcs_parser = WURCS20Format() glycoct_parser = GlycoCTFormat() topology = Topology() composition = Composition() basecomposition = BaseComposition() level = LevelSniffer() zf = zipfile.ZipFile(sys.argv[1]) zf1 = zipfile.ZipFile(sys.argv[2]) reader = csv.DictReader(sys.stdin,dialect='excel-tab') j = 0 for i,d in enumerate(reader): acc = d['GlyTouCanAccession'] typ = d['GlyTouCanType']
def glycoct(self): from GlycanFormatter import GlycoCTFormat if not self.glycoctformat: self.glycoctformat = GlycoCTFormat() return self.glycoctformat.toStr(self)
class Glycan: iupacSym = IUPACSym() lcSym = LinCodeSym() glycoctformat = None glycamformat = None def __init__(self,root=None): self.set_root(root) self._undetermined = None self._bions = None self._yions = None def root(self): return self._root def set_root(self, r): self._root = r def set_ids(self): for i,m in enumerate(self.all_nodes(subst=True)): m.set_id(i+1) def unset_ids(self): for m in self.all_nodes(subst=True): m.unset_id() def set_undetermined(self, und): if und == None or len(und) == 0: self._undetermined = None return u = list(und) ueq = defaultdict(set) placed = set() for i in range(len(u)): if i in placed: continue placed.add(i) ueq[i].add(u[i]) for j in range(i+1,len(u)): if j in placed: continue if not self.undetroot_equals(u[i],u[j],mapids=False): continue ueq[i].add(u[j]) placed.add(j) self._undetermined = sorted(ueq.values(),key=lambda ec: 1*(iter(ec).next()).is_monosaccharide(),reverse=True) def undetermined(self): return self._undetermined != None def undetermined_roots(self): if self._undetermined != None: for ec in self._undetermined: for r in ec: yield r def undetermined_root_reprs(self): if self._undetermined != None: for ec in self._undetermined: for r in ec: yield (r,len(ec)) break def unconnected_roots(self): for r in self.undetermined_roots(): if not r.connected(): yield r def isolated_nodes(self): for r in self.unconnected_roots(): if len(r.parent_links()) == 0: yield r def isolated_node_reprs(self): if self._undetermined != None: for ec in self._undetermined: count = 0 repr = None for r in ec: if not r.connected() and len(r.parent_links()) == 0: count += 1 if not repr: repr = r yield (repr,count) def has_root(self): return (self._root != None) def fully_determined(self): if self.undetermined(): return False for m in self.all_nodes(subst=True): if m == self.root(): if not m.root_partially_determined(): return False else: if not m.fully_determined(): return False for l in self.all_links(subst=True): if not l.fully_determined(): return False return True ## def add_instantiation(self, inst): ## if self._instantiations == None: ## self._instantiations = [] ## self._instantiations.append(inst) ## maxlinks = {'Fuc': 0, 'NeuAc': 0, 'NeuGc': 0, 'Xyl': 0} ## def auto_instantiations(self): ## undetsets = defaultdict(set) ## todo = [self.root()] ## while len(todo) > 0: ## m = todo.pop(0) ## for l in m.substituent_links(False): ## if l.undetermined(): ## undetsets[l.child()].add(l) ## for l in m.links(False): ## if l.undetermined(): ## undetsets[l.child()].add(l) ## todo.insert(0,l.child()) ## # Pick one from each child-set ## for inst in product(*(undetsets.values())): ## # Potentially, eliminate infeasible combinations of ## # instantiated edges, too many on a parent, bond already ## # used, etc. ## counts = defaultdict(int) ## counts1 = defaultdict(int) ## for l in inst: ## if l.parent_pos(): ## counts[(l.parent(),l.parent_pos())] += 1 ## counts1[l.parent()] += 1 ## for p in counts1: ## for l in p.links(): ## if l.undetermined(): ## continue ## if l.parent_pos(): ## counts[(l.parent(),l.parent_pos())] += 1 ## counts1[l.parent()] += 1 ## coremannose = set() ## for m in self.root().children(): ## for m1 in m.children(): ## try: ## if iupacSym.toStr(m1) == 'Man': ## coremannose.add(m1) ## except KeyError: ## pass ## # print counts ## bad = False ## for m,c in counts1.items(): ## try: ## sym = iupacSym.toStr(m) ## except KeyError: ## sym = None ## if m in coremannose: ## # Probably N-glycan core Manose ## if c > 3: ## bad = True ## break ## elif c > self.maxlinks.get(sym,2): ## bad = True ## break ## if bad: ## continue ## bad = False ## for m,c in counts.items(): ## if c > 1: ## bad = True ## break ## if bad: ## continue ## # print counts,counts1 ## self.add_instantiation(inst) def set_instantiation(self,inst): conn = set() todo = [] if self.root(): todo.append(self.root()) while len(todo) > 0: m = todo.pop(0) for l in m.links(False): if l.undetermined(): if l in inst: l.set_instantiated(True) conn.add(l.child()) todo.insert(0,l.child()) for ur in self.undetermined_roots(): ur.set_connected(ur in conn) return def instantiations(self): if not self._undetermined: yield self return plsets = [] for ur in self.undetermined_roots(): if not ur.connected(): plsets.append(ur.parent_links()) for inst in combinatorics.product(*plsets,accumulator=combinatorics.set_accumulator): self.set_instantiation(inst) yield self return def instantiate(self): if not self._undetermined: return self for g in self.instantiations(): break return self def uninstantiate(self): if not self._undetermined: return self self.set_instantiation(set()) return self def instantiation_count(self): total = 1 for ur in self.undetermined_roots(): total *= len(ur.parent_links()) return total def dfsvisit(self,f,m=None,subst=False): if m == None: self.dfsvisit(f,self.root(),subst) for r in self.unconnected_roots(): self.dfsvisit(f,r,subst) else: f(m) if subst: for s in m.substituents(): f(s) for c in m.children(): self.dfsvisit(f,c,subst) def dfsvisit_post(self,f,m=None,subst=False): if m == None: self.dfsvisit_post(f,self.root(),subst) for r in self.unconnected_roots(): self.dfsvisit_post(f,r,subst) else: if subst: for s in m.substituents(): f(s) for c in m.children(): self.dfsvisit_post(f,c,subst) f(m) class SubtreeCompositionVisit: def __init__(self,sym=None,comp=None): self.sym = sym self.comp = comp def visit(self,m): if self.comp: eltcomp = m.composition(self.comp) for c in m.children(): eltcomp.add(c._elemental_composition) m._elemental_composition = eltcomp if self.sym: symcomp = Composition() symcomp[self.sym.toStr(m)] = 1 for c in m.children(): symcomp.add(c._symbol_composition) m._symbol_composition = symcomp class ElementalCompositionVisit: def __init__(self,comp): self.table = comp self.eltcomp = Composition() def visit(self,m): self.eltcomp.add(m.composition(self.table)) def subtree_composition(self,m,sym_table=None,comp_table=None): assert not self.undetermined() if m == None: m = self.root() scv = Glycan.SubtreeCompositionVisit(sym=sym_table,comp=comp_table) self.dfsvisit_post(scv.visit,m) def elemental_composition(self,comp_table): eltcomp = Composition() for m in self.all_nodes(undet_subst=True): ec = m.composition(comp_table) eltcomp.add(ec) return eltcomp def byions(self,force=False): bions = [] yions = [] r = self.root() if force or (not hasattr(r,'_symbol_composition') or not hasattr(r,'_elemental_composition')): self.subtree_composition(r,sym_table=iupacSym,comp_table=ctable) for l in self.all_links(): # yi,bi = self.split_clone(l) c = l.child() bions.append((c._symbol_composition,c._elemental_composition,l)) symcomp = copy.copy(r._symbol_composition) symcomp.sub(c._symbol_composition) eltcomp = copy.copy(r._elemental_composition) eltcomp.sub(c._elemental_composition) yions.append((symcomp,eltcomp,l)) return bions,yions def composition(self,force=False): r = self.root() if force or (not hasattr(r,'_symbol_composition') or not hasattr(r,'_elemental_composition')): self.subtree_composition(r,sym_table=iupacSym,comp_table=ctable) return r._symbol_composition,r._elemental_composition def native_elemental_composition(self): return self.elemental_composition(ctable) def permethylated_elemental_composition(self): return self.elemental_composition(pctable) def underivitized_molecular_weight(self,adduct='H2O'): return self.native_elemental_composition().mass(elmt) + \ Composition.fromstr(adduct).mass(elmt) def permethylated_molecular_weight(self,adduct='C2H6O'): return self.permethylated_elemental_composition().mass(elmt) + \ Composition.fromstr(adduct).mass(elmt) def fragments(self,r=None,force=False): atroot = False if r == None: r = self.root() atroot = True if force or (not hasattr(r,'_symbol_composition') or not hasattr(r,'_elemental_composition')): self.subtree_composition(r,sym_table=iupacSym,comp_table=ctable) links = r.links() nlink = len(links) if nlink == 0: # self fr = (copy.copy(r._symbol_composition),\ copy.copy(r._elemental_composition),True,0) yield fr return fragstore0 = [] fragstore1 = [] for l in links: fragstore0.append([]) fragstore1.append([]) for fr in self.fragments(l.child()): if fr[2]: fragstore0[-1].append(fr) else: fragstore1[-1].append(fr) fragstore0[-1].append((Composition(),Composition(),True,1)) for i,prd in enumerate(product(*fragstore0)): symcomp = copy.copy(r._symbol_composition) eltcomp = copy.copy(r._elemental_composition) cl = 0 for l,fr in zip(links,prd): # determine the amount to substract symcomp1 = copy.copy(l.child()._symbol_composition) symcomp1.sub(fr[0]) symcomp.sub(symcomp1) eltcomp1 = copy.copy(l.child()._elemental_composition) eltcomp1.sub(fr[1]) eltcomp.sub(eltcomp1) cl += fr[3] fr = (symcomp,eltcomp,True,cl) yield fr for i in range(nlink): for fr in fragstore0[i][:-1]: fr1 = (fr[0],fr[1],False,fr[3]+1) yield fr1 for fr in fragstore1[i]: fr1 = (fr[0],fr[1],False,fr[3]) yield fr1 def subtree_nodes(self,root,subst=False): todo = [root] seen = set() while len(todo) > 0: m = todo.pop(0) if m not in seen: seen.add(m) yield m if subst: for s in m.substituents(): if s not in seen: seen.add(s) yield s for c in reversed(m.children()): todo.insert(0,c) def all_nodes(self,subst=False,undet_subst=False): todo = [] if self.root(): todo.append(self.root()) for ur in self.unconnected_roots(): if (subst or undet_subst) or ur.is_monosaccharide(): todo.append(ur) for root in todo: for m in self.subtree_nodes(root,subst): yield m iupac_composition_syms = ['Man','Gal','Glc','Xyl','Fuc','ManNAc','GlcNAc','GalNAc','NeuAc','NeuGc','Hex','HexNAc','dHex','Pent','Sia','GlcA','GalA','IdoA','ManA','HexA','GlcN','GalN','ManN','HexN'] iupac_aldi_composition_syms = ['Man+aldi','Gal+aldi','Glc+aldi','Fuc+aldi','ManNAc+aldi','GlcNAc+aldi','GalNAc+aldi','Hex+aldi','HexNAc+aldi','dHex+aldi'] subst_composition_syms = ['S','P','Me','aldi'] def iupac_composition(self, floating_substituents=True, aggregate_basecomposition=True, redend_only=False): validsyms = self.iupac_composition_syms + self.subst_composition_syms if not floating_substituents: validsyms += self.iupac_aldi_composition_syms c = Composition() for sym in (validsyms + ['Xxx','X']): c[sym] = 0 if not redend_only: nodeiterable = self.all_nodes(undet_subst=True) else: if self.has_root(): nodeiterable = [ self.root() ] else: nodeiterable = [] for m in nodeiterable: try: sym = iupacSym.toStr(m) except KeyError: if isinstance(m,Monosaccharide): c['Xxx'] += 1 else: c['X'] += 1 continue if floating_substituents: syms = [ s.strip() for s in sym.split('+') ] else: syms = [sym] if syms[0] not in validsyms: if isinstance(m,Monosaccharide): syms[0] = 'Xxx' else: syms[0] = 'X' for i in range(1,len(syms)): if syms[i] not in self.subst_composition_syms: syms[i] = 'X' if syms[0] == 'Xxx' or 'X' in syms: c['Xxx'] += 1 continue if syms[0] == 'X': c['X'] += 1 continue for sym in syms: c[sym] += 1 c['Count'] = sum(map(c.__getitem__,self.iupac_composition_syms + ['Xxx'])) if aggregate_basecomposition: c['Hex'] = sum(map(c.__getitem__,('Man','Gal','Glc','Hex'))) c['Hex+aldi'] = sum(map(c.__getitem__,('Man+aldi','Gal+aldi','Glc+aldi','Hex+aldi'))) c['HexNAc'] = sum(map(c.__getitem__,('GalNAc','GlcNAc','ManNAc','HexNAc'))) c['HexNAc+aldi'] = sum(map(c.__getitem__,('GalNAc+aldi','GlcNAc+aldi','ManNAc+aldi','HexNAc+aldi'))) c['dHex'] = sum(map(c.__getitem__,('Fuc','dHex'))) c['dHex+aldi'] = sum(map(c.__getitem__,('Fuc+aldi','dHex+aldi'))) c['Pent'] = sum(map(c.__getitem__,('Xyl','Pent'))) c['Sia'] = sum(map(c.__getitem__,('NeuAc','NeuGc','Sia'))) c['HexA'] = sum(map(c.__getitem__,('GlcA','GalA','IdoA','ManA','HexA'))) c['HexN'] = sum(map(c.__getitem__,('GlcN','GalN','ManN','HexN'))) return c def iupac_redend(self, floating_substituents=True, aggregate_basecomposition=True): comp = self.iupac_composition(floating_substituents=floating_substituents, aggregate_basecomposition=aggregate_basecomposition, redend_only=True) return [ key for key in comp if comp[key] > 0 and key not in self.subst_composition_syms and key != "Count"] def glycoct(self): from GlycanFormatter import GlycoCTFormat if not self.glycoctformat: self.glycoctformat = GlycoCTFormat() return self.glycoctformat.toStr(self) def glycam(self): from GlycanFormatter import IUPACGlycamFormat if not self.glycamformat: self.glycamformat = IUPACGlycamFormat() return self.glycamformat.toStr(self) def subtree_links(self,root,subst=False,uninstantiated=False): for m in self.subtree_nodes(root): if subst: for sl in m.substituent_links(): yield sl for l in m.links(instantiated_only=(not uninstantiated)): yield l def all_links(self,subst=False,uninstantiated=False): for m in self.all_nodes(): if subst: for sl in m.substituent_links(): yield sl for l in m.links(instantiated_only=(not uninstantiated)): yield l def clone(self): self.set_ids() if self.root(): g = Glycan(self.root().deepclone()) else: g = Glycan() newurs = set() for l in g.all_links(uninstantiated=True): if l.undetermined(): newurs.add(l.child()) for ur in self.undetermined_roots(): if len(ur.parent_links()) == 0: newurs.add(ur.deepclone()) g.set_undetermined(newurs) return g def clone_with_identified_link(self,link): assert not self.undetermined() r,l = self.root().deepclone(identified_link=link) return Glycan(r),l def split_clone(self,link): g,l = self.clone_with_identified_link(link) f = Glycan(l.child()) l.parent().del_link(l) l.child().del_parent_link(l) return g,f def equals(self,g): # Three cases, at least, sigh. # 1) no root => compositions, no edges => "trivial" graph # isomorphism based on enumeration of potential node matchings # 2) tree (not undetermined) => subtree-based # 3) rooted DAG (due to undetermined nodes) => graph # isomorphism # Cases 1 & 3 are implemented by the same code # ids in both instances are reset in equal. If returns True, # then the ids of each monosaccharide in each glycan will match # their counterpart. self.set_ids() g.unset_ids() if self.has_root() and g.has_root(): if not self.undetermined() and not g.undetermined(): # print >>sys.stderr, "Tree comparison" g.unset_ids() # both are trees, use subtree_equals() return self.root().subtree_equals(g.root(),mapids=True) else: # print >>sys.stderr, "Tree comparison shortcut" # both are trees, use subtree_equals() if not self.root().subtree_equals(g.root(),mapids=False): return False # print >>sys.stderr, "Graph isomorphism comparison" g.set_ids() nodeset1 = list(self.all_nodes(subst=False)) nodeset2 = list(g.all_nodes(subst=False)) if len(nodeset1) != len(nodeset2): return False linkset1 = set() for l in self.all_links(uninstantiated=True): linkset1.add((l.parent().id(),l.astuple(),l.child().id())) linkset2 = set() for l in g.all_links(uninstantiated=True): linkset2.add((l.parent().id(),l.astuple(),l.child().id())) if len(linkset1) != len(linkset2): return False # print >>sys.stderr, " ".join(map(lambda i: "%2s"%(i.id(),),nodeset1)) # print >>sys.stderr, " ".join(map(lambda i: "%2s"%(i.id(),),nodeset2)) iters = 0 for ii,jj in iterecmatchings(nodeset1, nodeset2, self.monosaccharide_match): iters += 1 matching = dict(zip(map(lambda m: m.id(),ii),map(lambda m: m.id(),jj))) # print >>sys.stderr, " ".join(map(lambda i: "%2s"%(i.id(),),ii)) # print >>sys.stderr, " ".join(map(lambda i: "%2s"%(i.id(),),jj)) linkset3 = set() for f,l,t in linkset1: linkset3.add((matching[f],l,matching[t])) if linkset3 == linkset2: for mi,mj in zip(ii,jj): mj.set_id(mi.id()) # print >>sys.stderr, "%d iterations to find an isomorphism"%(iters,) return True return False @staticmethod def monosaccharide_match(a,b): # print a # print b if not a.equals(b): return False # parent_links_match = False # for ii,jj in itermatchings(a.parent_links(),b.parent_links(), # lambda i,j: i.equals(j) and i.parent().equals(j.parent())): # parent_links_match = True # break # if not parent_links_match: # return False if len(a.parent_links()) != len(b.parent_links()): return False if len(a.links(instantiated_only=True)) != len(b.links(instantiated_only=True)): return False if len(a.links(instantiated_only=False)) != len(b.links(instantiated_only=False)): return False child_links_match = False for ii,jj in itermatchings(a.links(instantiated_only=False),b.links(instantiated_only=False), lambda i,j: i.equals(j) and i.child().equals(j.child())): child_links_match = True break return child_links_match @staticmethod def undetroot_equals(a,b,mapids=True): if not a.subtree_equals(b,mapids=mapids): return False assert(None not in set(l.parent().id() for l in a.parent_links())) assert(None not in set(l.parent().id() for l in b.parent_links())) uipars = set((l.astuple(),l.parent().id()) for l in a.parent_links()) ujpars = set((l.astuple(),l.parent().id()) for l in b.parent_links()) if not (uipars == ujpars): return False return True def str(self,node=None,prefix="",codeprefix="",monofmt=lcSym): if node == None: node = self.root() code = monofmt.toStr(node) s = codeprefix + code kidlinks = sorted(filter(lambda l: l.instantiated(),node.links()),key=lambda l: Linkage.posstr(l.parent_pos()),reverse=True) kids = list(map(Linkage.child,kidlinks)) n = len(kids) assert n in (0,1,2,3) if n == 0: return prefix + s if n == 1: return self.str(kids[0],prefix=prefix,codeprefix=(s + ' - '),monofmt=monofmt) if n == 2: return self.str(kids[0],prefix + ' '*len(s)+" ",monofmt=monofmt) + '\n' + \ prefix + s + ' + ' + '\n' + \ self.str(kids[1],prefix + ' '*len(s)+" ",monofmt=monofmt) if n == 3: return self.str(kids[0],prefix + ' '*len(s)+" ",monofmt=monofmt) + '\n' + \ self.str(kids[1],prefix,codeprefix = s + ' + ',monofmt=monofmt) + '\n' + \ self.str(kids[2],prefix + ' '*len(s)+" ",monofmt=monofmt) def __str__(self): return self.str() def dump(self, m=None, level=0, branch='', monofmt=iupacSym): if m == None: m = self.root() br = branch + " " + monofmt.toStr(m) child_list = [] for link in m.links(): child_list.append(link.child()) if len(child_list) == 0: print(' '*level + br) elif len(child_list) > 1: print(' '*level + br) level += 1 for c in child_list: self.dump(c,level, '', monofmt) elif len(child_list) == 1: self.dump(child_list[0],level,br,monofmt)
def __init__(self): self.fmt = GlycoCTFormat() self.mf = MonoFactory() super(GlycanFactory, self).__init__()
class GlycanFactory(ReferenceTable): def __init__(self): self.fmt = GlycoCTFormat() self.mf = MonoFactory() super(GlycanFactory, self).__init__() def new(self, key): return self[key].clone() def parseSection(self, name, kv): aliases = [name] g = self.fmt.toGlycan('\n'.join(kv['GlycoCT'].split())) aliases.extend(map(str.strip, kv.get('Aliases', '').split(';'))) return [(a, g) for a in aliases] def add_mono(self, parent, name, parent_pos, child_pos=1, anomer=Anomer.beta, parent_type=Linkage.oxygenPreserved, child_type=Linkage.oxygenLost): m = self.mf.new(name) m.set_anomer(anomer) parent.add_child(m, parent_pos=parent_pos, child_pos=child_pos, parent_type=parent_type, child_type=child_type) return m def oxford2Glycan(self, name): if name in self: return self.new(name) p = 0 if name[p] == 'F': g = self.new('FM3') p += 1 else: g = self.new('M3') # print repr(g) # print self.fmt.toStr(g) # print self r = g.root() glcnac2 = filter(lambda m: m.compatible(self.mf['GlcNAc']), r.children())[0] man1 = glcnac2.children()[0] man16 = [l.child() for l in man1.links() if l.parent_pos() == 6][0] man13 = [l.child() for l in man1.links() if l.parent_pos() == 3][0] assert name[p] == 'A' nant = int(name[p + 1]) ant = [None] if nant in (1, 2, 3, 4): ant.append(self.add_mono(man13, 'GlcNAc', parent_pos=2)) if nant in (2, 3, 4): ant.append(self.add_mono(man16, 'GlcNAc', parent_pos=2)) if nant in (3, 4): ant.append(self.add_mono(man13, 'GlcNAc', parent_pos=4)) if nant in (4, ): ant.append(self.add_mono(man16, 'GlcNAc', parent_pos=6)) p += 2 if p >= len(name): return g if name[p] == 'B': b = self.add_mono(man1, 'GlcNAc', 4) name[p] += 1 if p >= len(name): return g if name[p] == 'F': nfuc = int(name[p + 1]) assert (nfuc <= nant) for fi in range(1, nfuc + 1): self.add_mono(ant[fi], 'Fuc', parent_pos=6, anomer=Anomer.alpha) p += 2 if p >= len(name): return g assert (name[p] == 'G') ngal = int(name[p + 1]) gal = [None] assert (ngal <= nant) for gi in range(1, ngal + 1): gal.append(self.add_mono(ant[gi], 'Gal', parent_pos=4)) p += 2 if p >= len(name): return g assert (name[p] == 'S') nsia = int(name[p + 1]) sia = [None] assert (nsia <= ngal) for si in range(1, nsia + 1): sia.append( self.add_mono(gal[si], 'Neu5Ac', parent_pos=6, child_pos=2, anomer=Anomer.alpha)) return g
import sys import xml import urllib import urllib2 import copy import xml.etree.ElementTree as ET from GlyTouCan import * from GlycanFormatter import GlycoCTFormat GlycoctParser = GlycoCTFormat() gtc = GlyTouCan() class MonosaccharideDB: page_cache_by_id = {} page_cache_by_glycoct = {} def __init__(self): pass def encode(self, d): return urllib.urlencode(d) def get_document_by_glycoct(self, basetype, substlist): apiurl = "http://www.monosaccharidedb.org/display_monosaccharide.action?" param = {"scheme": "glycoct", "name": basetype, "output": "xml"} apiurl += urllib.urlencode(param)