コード例 #1
0
 def __init__(self):
     self._scale = 1.0
     self._orientation = "RL"
     self._display = "normalinfo"
     self._notation = "cfg"
     self._redend = True
     self._format = "png"
     self.fmt = GlycoCTFormat()
コード例 #2
0
    def __init__(self):
        self._scale = 1.0
        self._orientation = "RL"
        self._display = "normalinfo"
        self._notation = "snfg"
        self._redend = True
        self._format = "png"
        self._opaque = True
        self._force = False
	self._verbose = False
        self.fmt = GlycoCTFormat()
コード例 #3
0
class GlycanImage(object):
    def __init__(self):
        self._scale = 1.0
        self._orientation = "RL"
        self._display = "normalinfo"
        self._notation = "cfg"
        self._redend = True
        self._format = "png"
        self.fmt = GlycoCTFormat()

    def scale(self, value=None):
        if value == None:
            return self._scale
        self._scale = float(value)

    def reducing_end(self, value=None):
        if value == None:
            return self._redend
        self._redend = bool(value)

    def notation(self, value=None):
        if value == None:
            return self._notation
        self._notation = value

    def format(self, value=None):
        if value == None:
            return self._format
        self._format = value

    def orientation(self, value=None):
        if value == None:
            return self._orientation
        self._orientation = value

    def display(self, value=None):
        if value == None:
            return self._display
        self._display = value

    def set(self, key, value):
        if not hasattr(self, key):
            raise KeyError()
        getattr(self, key)(value)

    def writeImage(self, glycan, filename):
        glystr = glycan
        if not isinstance(glystr, basestring):
            glystr = self.fmt.toStr(glycan)
        imageWriter = GlycoCT2Image(glystr,
                                    filename,
                                    format=self._format,
                                    scale=self._scale,
                                    redend=self._redend,
                                    orient=self._orientation,
                                    display=self._display,
                                    notation=self._notation)
        return imageWriter()
コード例 #4
0
ファイル: GWBFormat.py プロジェクト: glygen-glycan-data/PyGly
class GWBFormat(object):
    def __init__(self):
        self.fmt = GlycoCTFormat()

    def toStr(self, glycan):
        glystr = glycan
        if not isinstance(glystr, basestring):
            glystr = self.fmt.toStr(glycan)
        writer = GWBFormatter(glystr)
        seq = writer().splitlines()[-1].strip()
        if "Exception" in seq or "org.glycoinfo" in seq:
            return None
        return seq
コード例 #5
0
    subshow(acc1, acc2, "g1 <= g2", subsumption.leq(g1, g2))


if __name__ == "__main__":

    from collections import defaultdict

    from manipulation import Topology, Composition

    # from GlyTouCan import GlyTouCan
    from GlycanResource import GlyTouCan
    gtc = GlyTouCan(usecache=False)

    from GlycanFormatter import GlycoCTFormat, WURCS20Format, GlycanParseError
    glycoct_format = GlycoCTFormat()
    wurcs_format = WURCS20Format()

    geq = GlycanEqual()
    gtopoeq = GlycanTopoEqual()
    gcompeq = GlycanCompEqual()
    subsumption = GlycanSubsumption()
    topology = Topology()

    acc1 = sys.argv[1]
    acc2 = sys.argv[2]
    g1 = gtc.getGlycan(acc1)
    g2 = gtc.getGlycan(acc2)
    tg2 = topology(g2)

    verbose = True
コード例 #6
0
 def __init__(self, filename):
     self.filename = filename
     self.name, extn = filename.rsplit('.', 1)
     self.name = os.path.split(self.name)[1]
     assert extn == self.extn
     self.fmt = GlycoCTFormat()
コード例 #7
0
class GlycoCTDatabase:
    prefix = ""
    extn = "gct"
    source = ""

    def __init__(self, filename):
        self.filename = filename
        self.name, extn = filename.rsplit('.', 1)
        self.name = os.path.split(self.name)[1]
        assert extn == self.extn
        self.fmt = GlycoCTFormat()

    def getraw(self, accession):
        zf = zipfile.ZipFile(self.filename, "r")
        fn = accession[len(self.prefix):] + '.txt'
        try:
            zf.getinfo(fn)
        except KeyError:
            return None
        gct = self._getraw(zf, fn)
        zf.close()
        return gct

    def _getraw(self, zf, name):
        return zf.read(name)

    def _get(self, zf, name):
        try:
            glystr = self._getraw(zf, name)
            g = self.fmt.toGlycan(glystr)
        except GlycoCTParseError:
            # f.write('> GlycanFormatter > GLYCANPARSEERROR > '+self.message)###
            # print '> GlycanFormatter > GLYCANPARSEERROR > ',self.message###
            # time.sleep(2)###
            return None
        except:
            print >> sys.stderr, "Problem with GlycoCT file " + name
            # time.sleep(2)###
            traceback.print_exc()
            sys.exit(1)
        kwargs = {}
        try:
            attstr = zf.read(name.rsplit('.', 1)[0] + '.att')
            for r in csv.reader(StringIO(attstr)):
                kwargs[r[0]] = copy.copy(r[1:])
        except KeyError:
            pass
        try:
            kwargs['image'] = zf.read(name.rsplit('.', 1)[0] + '.png')
        except KeyError:
            pass
        gr = GlyRecord(source=self.source if self.source else self.name,
                       accession=self.prefix + name.rsplit('.', 1)[0],
                       glycan=g,
                       name=self.name,
                       **kwargs)
        # print gr
        return gr

    def get(self, accession):
        zf = zipfile.ZipFile(self.filename, "r")
        fn = accession[len(self.prefix):] + '.txt'
        try:
            zf.getinfo(fn)
        except KeyError:
            return None
        gr = self._get(zf, fn)
        zf.close()
        return gr

    def __iter__(self):
        return self.next()

    def next(self):
        zf = zipfile.ZipFile(self.filename, "r")
        for name in zf.namelist():
            if not name.endswith('.txt'):
                continue
            gr = self._get(zf, name)
            if gr:
                yield gr
        zf.close()
コード例 #8
0
            m = re.search(r'([ab]1?)?-?(\d(-?))?([RS][pP]?\d+|MDPLys)?$',
                          glystr)
            if m != None:
                x = len(m.group(0))
                glystr = glystr[:-x]
            try:
                g = self.fmt.toGlycan(glystr)
            except IUPACLinearParseError:
                traceback.print_exc()
                continue
            print >> sys.stderr, ">>CFG%03d" % acc
            yield GlyRecord(source="CFGArray",
                            accession="CFG%03d" % acc,
                            glycan=g,
                            name=self.name)


if __name__ == '__main__':
    import sys
    from GlycanFormatter import IUPACLinearFormat
    fmt = IUPACLinearFormat()
    fmt1 = GlycoCTFormat()
    gdb = CFGArrayDatabase(sys.argv[1])
    for r in gdb:
        lc = fmt.toStr(r.glycan)
        print r.accession, lc
        # print fmt1.toStr(r.glycan)
        # print r.glycan
        print r.glycan
        sys.stdout.flush()
コード例 #9
0
ファイル: GWBFormat.py プロジェクト: glygen-glycan-data/PyGly
 def __init__(self):
     self.fmt = GlycoCTFormat()
コード例 #10
0
    # setenv GLYTOUCAN "~/projects/GlyGen/GlyTouCan/current"
    #
    # egrep -w '(GlyTouCanAccession|Saccharide)' $GLYTOUCAN/comp.txt | \
    #     fgrep -f $GLYTOUCAN/humanbycomp.txt | \
    #     python27 manipulation.py $GLYTOUCAN/wurcs.zip $GLYTOUCAN/glycoct.zip 
    #
    # egrep -w '(GlyTouCanAccession|Saccharide)' $GLYTOUCAN/comp.txt | \
    #     fgrep -f $GLYTOUCAN/mousebycomp.txt | \
    #     python27 manipulation.py $GLYTOUCAN/wurcs.zip $GLYTOUCAN/glycoct.zip 
    #
    # egrep -w '(GlyTouCanAccession|Saccharide)' $GLYTOUCAN/comp.txt | \
    #     python27 manipulation.py $GLYTOUCAN/wurcs.zip $GLYTOUCAN/glycoct.zip 
    #

    wurcs_parser = WURCS20Format()
    glycoct_parser = GlycoCTFormat()

    topology = Topology()
    composition = Composition()
    basecomposition = BaseComposition()
    level = LevelSniffer()
    
    zf = zipfile.ZipFile(sys.argv[1])
    zf1 = zipfile.ZipFile(sys.argv[2])
    
    reader = csv.DictReader(sys.stdin,dialect='excel-tab')

    j = 0
    for i,d in enumerate(reader):
        acc = d['GlyTouCanAccession']
        typ = d['GlyTouCanType']
コード例 #11
0
 def glycoct(self):
     from GlycanFormatter import GlycoCTFormat
     if not self.glycoctformat:
         self.glycoctformat = GlycoCTFormat()
     return self.glycoctformat.toStr(self)
コード例 #12
0
class Glycan:

    iupacSym = IUPACSym()
    lcSym = LinCodeSym()
    glycoctformat = None
    glycamformat = None

    def __init__(self,root=None):
        self.set_root(root)
        self._undetermined = None
        self._bions = None
        self._yions = None

    def root(self):
        return self._root

    def set_root(self, r):
        self._root = r

    def set_ids(self):
        for i,m in enumerate(self.all_nodes(subst=True)):
            m.set_id(i+1)

    def unset_ids(self):
        for m in self.all_nodes(subst=True):
            m.unset_id()

    def set_undetermined(self, und):
        if und == None or len(und) == 0:
            self._undetermined = None
            return
        u = list(und)
        ueq = defaultdict(set)
        placed = set()
        for i in range(len(u)):
            if i in placed:
                continue
            placed.add(i)
            ueq[i].add(u[i])
            for j in range(i+1,len(u)):
                if j in placed:
                    continue
                if not self.undetroot_equals(u[i],u[j],mapids=False):
                    continue
                ueq[i].add(u[j])
                placed.add(j)
        self._undetermined = sorted(ueq.values(),key=lambda ec: 1*(iter(ec).next()).is_monosaccharide(),reverse=True)

    def undetermined(self):
        return self._undetermined != None

    def undetermined_roots(self):
        if self._undetermined != None:
            for ec in self._undetermined:
                for r in ec:
                    yield r
    
    def undetermined_root_reprs(self):
        if self._undetermined != None:
            for ec in self._undetermined:
                for r in ec:
                    yield (r,len(ec))
                    break

    def unconnected_roots(self):
        for r in self.undetermined_roots():
            if not r.connected():
                yield r

    def isolated_nodes(self):
        for r in self.unconnected_roots():
            if len(r.parent_links()) == 0:
                yield r

    def isolated_node_reprs(self):
        if self._undetermined != None:
            for ec in self._undetermined:
                count = 0
                repr = None
                for r in ec:
                    if not r.connected() and len(r.parent_links()) == 0:
                        count += 1
                        if not repr:
                            repr = r
                yield (repr,count)

    def has_root(self):
        return (self._root != None)

    def fully_determined(self):
        if self.undetermined():
            return False
        for m in self.all_nodes(subst=True):
            if m == self.root():
                if not m.root_partially_determined():
                    return False
            else:
                if not m.fully_determined():
                    return False
        for l in self.all_links(subst=True):
            if not l.fully_determined():
                return False
        return True

##     def add_instantiation(self, inst):
##         if self._instantiations == None:
##             self._instantiations = []
##         self._instantiations.append(inst)

##     maxlinks = {'Fuc': 0, 'NeuAc': 0, 'NeuGc': 0, 'Xyl': 0}
##     def auto_instantiations(self):
##         undetsets = defaultdict(set)
##         todo = [self.root()]
##         while len(todo) > 0:
##             m = todo.pop(0)
##             for l in m.substituent_links(False):
##                 if l.undetermined():
##                     undetsets[l.child()].add(l)
##             for l in m.links(False):
##                 if l.undetermined():
##                     undetsets[l.child()].add(l)
##                 todo.insert(0,l.child())
##         # Pick one from each child-set
##         for inst in product(*(undetsets.values())):
##             # Potentially, eliminate infeasible combinations of
##             # instantiated edges, too many on a parent, bond already
##             # used, etc.
##             counts = defaultdict(int)
##             counts1 = defaultdict(int)
##             for l in inst:
##                 if l.parent_pos():
##                     counts[(l.parent(),l.parent_pos())] += 1
##                 counts1[l.parent()] += 1
##             for p in counts1:
##                 for l in p.links():
##                     if l.undetermined():
##                         continue
##                     if l.parent_pos():
##                         counts[(l.parent(),l.parent_pos())] += 1
##                     counts1[l.parent()] += 1
##             coremannose = set()
##             for m in self.root().children():
##                 for m1 in m.children():
##                     try:
##                         if iupacSym.toStr(m1) == 'Man':
##                             coremannose.add(m1)
##                     except KeyError:
##                         pass
##             # print counts
##             bad = False
##             for m,c in counts1.items():
##                 try:
##                     sym = iupacSym.toStr(m)
##                 except KeyError:
##                     sym = None
##                 if m in coremannose:
##                     # Probably N-glycan core Manose
##                     if c > 3:
##                         bad = True
##                         break
##                 elif c > self.maxlinks.get(sym,2):
##                     bad = True
##                     break
##             if bad:
##                 continue                
##             bad = False
##             for m,c in counts.items():
##                 if c > 1:
##                     bad = True
##                     break
##             if bad:
##                 continue                
##             # print counts,counts1
##             self.add_instantiation(inst)

    def set_instantiation(self,inst):
        conn = set()
        todo = []
        if self.root():
            todo.append(self.root())
        while len(todo) > 0:
            m = todo.pop(0)
            for l in m.links(False):
                if l.undetermined():
                    if l in inst:
                        l.set_instantiated(True)
                        conn.add(l.child())
                todo.insert(0,l.child())
        for ur in self.undetermined_roots():
            ur.set_connected(ur in conn)
        return

    def instantiations(self):
        if not self._undetermined:
            yield self
            return
        plsets = []
        for ur in self.undetermined_roots():
            if not ur.connected():
                plsets.append(ur.parent_links())
        for inst in combinatorics.product(*plsets,accumulator=combinatorics.set_accumulator):
            self.set_instantiation(inst)
            yield self
        return

    def instantiate(self):
        if not self._undetermined:
            return self
        for g in self.instantiations():
            break
        return self

    def uninstantiate(self):
        if not self._undetermined:
            return self
        self.set_instantiation(set())
        return self
        
    def instantiation_count(self):
        total = 1
        for ur in self.undetermined_roots():
            total *= len(ur.parent_links())
        return total

    def dfsvisit(self,f,m=None,subst=False):
        if m == None:
            self.dfsvisit(f,self.root(),subst)
            for r in self.unconnected_roots():
                self.dfsvisit(f,r,subst)
        else:
            f(m)
            if subst:
                for s in m.substituents():
                    f(s)
            for c in m.children():
                self.dfsvisit(f,c,subst)

    def dfsvisit_post(self,f,m=None,subst=False):
        if m == None:
            self.dfsvisit_post(f,self.root(),subst)
            for r in self.unconnected_roots():
                self.dfsvisit_post(f,r,subst)
        else:
            if subst:
                for s in m.substituents():
                    f(s)
            for c in m.children():
                self.dfsvisit_post(f,c,subst)
            f(m)

    class SubtreeCompositionVisit:
        def __init__(self,sym=None,comp=None):
            
            self.sym = sym
            self.comp = comp
            
        def visit(self,m):

            if self.comp:
                eltcomp = m.composition(self.comp)
                for c in m.children():
                    eltcomp.add(c._elemental_composition)

                m._elemental_composition = eltcomp

            if self.sym:
                symcomp = Composition()
                symcomp[self.sym.toStr(m)] = 1

                for c in m.children():
                    symcomp.add(c._symbol_composition)

                m._symbol_composition = symcomp

    class ElementalCompositionVisit:
        def __init__(self,comp):
            
            self.table = comp
            self.eltcomp = Composition()
            
        def visit(self,m):
            self.eltcomp.add(m.composition(self.table))

    def subtree_composition(self,m,sym_table=None,comp_table=None):
        assert not self.undetermined()
        if m == None:
            m = self.root()
        scv = Glycan.SubtreeCompositionVisit(sym=sym_table,comp=comp_table)
        self.dfsvisit_post(scv.visit,m)

    def elemental_composition(self,comp_table):
        eltcomp = Composition()
        for m in self.all_nodes(undet_subst=True):
            ec = m.composition(comp_table)
            eltcomp.add(ec)
        return eltcomp

    def byions(self,force=False):
        bions = []
        yions = []
        r = self.root()
        if force or (not hasattr(r,'_symbol_composition') or not hasattr(r,'_elemental_composition')):
            self.subtree_composition(r,sym_table=iupacSym,comp_table=ctable)
        for l in self.all_links():
            # yi,bi = self.split_clone(l)
            c = l.child()
            bions.append((c._symbol_composition,c._elemental_composition,l))
            symcomp = copy.copy(r._symbol_composition)
            symcomp.sub(c._symbol_composition)
            eltcomp = copy.copy(r._elemental_composition)
            eltcomp.sub(c._elemental_composition)
            yions.append((symcomp,eltcomp,l))
        return bions,yions

    def composition(self,force=False):
        r = self.root()
        if force or (not hasattr(r,'_symbol_composition') or not hasattr(r,'_elemental_composition')):
            self.subtree_composition(r,sym_table=iupacSym,comp_table=ctable)
        return r._symbol_composition,r._elemental_composition

    def native_elemental_composition(self):
        return self.elemental_composition(ctable)
    
    def permethylated_elemental_composition(self):
        return self.elemental_composition(pctable)

    def underivitized_molecular_weight(self,adduct='H2O'):
        return self.native_elemental_composition().mass(elmt) + \
               Composition.fromstr(adduct).mass(elmt)

    def permethylated_molecular_weight(self,adduct='C2H6O'):
        return self.permethylated_elemental_composition().mass(elmt) + \
               Composition.fromstr(adduct).mass(elmt)
    
    def fragments(self,r=None,force=False):
        atroot = False
        if r == None:
            r = self.root()
            atroot = True
            if force or (not hasattr(r,'_symbol_composition') or not hasattr(r,'_elemental_composition')):
                self.subtree_composition(r,sym_table=iupacSym,comp_table=ctable)
        links = r.links()
        nlink = len(links)

        if nlink == 0:
            # self
            fr = (copy.copy(r._symbol_composition),\
                  copy.copy(r._elemental_composition),True,0)
            yield fr
            return
        
        fragstore0 = []
        fragstore1 = []
        for l in links:
            fragstore0.append([])
            fragstore1.append([])
            for fr in self.fragments(l.child()):
                if fr[2]:
                    fragstore0[-1].append(fr)
                else:
                    fragstore1[-1].append(fr)
            fragstore0[-1].append((Composition(),Composition(),True,1))

        for i,prd in enumerate(product(*fragstore0)):
            symcomp = copy.copy(r._symbol_composition)
            eltcomp = copy.copy(r._elemental_composition)
            cl = 0
            for l,fr in zip(links,prd):
                # determine the amount to substract
                symcomp1 = copy.copy(l.child()._symbol_composition)
                symcomp1.sub(fr[0])
                symcomp.sub(symcomp1)
                eltcomp1 = copy.copy(l.child()._elemental_composition)
                eltcomp1.sub(fr[1])
                eltcomp.sub(eltcomp1)
                cl += fr[3]
            fr = (symcomp,eltcomp,True,cl)
            yield fr

        for i in range(nlink):
            for fr in fragstore0[i][:-1]:
                fr1 = (fr[0],fr[1],False,fr[3]+1)
                yield fr1
            for fr in fragstore1[i]:
                fr1 = (fr[0],fr[1],False,fr[3])
                yield fr1

    def subtree_nodes(self,root,subst=False):
        todo = [root]
        seen = set()
        while len(todo) > 0:
            m = todo.pop(0)
            if m not in seen:
                seen.add(m)
                yield m
            if subst:
                for s in m.substituents():
                    if s not in seen:
                        seen.add(s)
                        yield s
            for c in reversed(m.children()):
                todo.insert(0,c)

    def all_nodes(self,subst=False,undet_subst=False):
        todo = []
        if self.root():
            todo.append(self.root())
        for ur in self.unconnected_roots():
            if (subst or undet_subst) or ur.is_monosaccharide():
                todo.append(ur)
        for root in todo:
            for m in self.subtree_nodes(root,subst):
                yield m

    iupac_composition_syms = ['Man','Gal','Glc','Xyl','Fuc','ManNAc','GlcNAc','GalNAc','NeuAc','NeuGc','Hex','HexNAc','dHex','Pent','Sia','GlcA','GalA','IdoA','ManA','HexA','GlcN','GalN','ManN','HexN']
    iupac_aldi_composition_syms = ['Man+aldi','Gal+aldi','Glc+aldi','Fuc+aldi','ManNAc+aldi','GlcNAc+aldi','GalNAc+aldi','Hex+aldi','HexNAc+aldi','dHex+aldi']
    subst_composition_syms = ['S','P','Me','aldi']

    def iupac_composition(self, floating_substituents=True, 
                                aggregate_basecomposition=True, 
                                redend_only=False):
	validsyms = self.iupac_composition_syms + self.subst_composition_syms
	if not floating_substituents:
	    validsyms += self.iupac_aldi_composition_syms
	
        c = Composition()
        for sym in (validsyms + ['Xxx','X']):
            c[sym] = 0
	if not redend_only:
	    nodeiterable = self.all_nodes(undet_subst=True)
	else:
	    if self.has_root():
	        nodeiterable = [ self.root() ]
	    else:
		nodeiterable = []
        for m in nodeiterable:

            try:
                sym = iupacSym.toStr(m)
            except KeyError:
                if isinstance(m,Monosaccharide):        
                    c['Xxx'] += 1
                else:
                    c['X'] += 1
                continue

            if floating_substituents:
                syms = [ s.strip() for s in sym.split('+') ]
            else:
                syms = [sym]

            if syms[0] not in validsyms:
                if isinstance(m,Monosaccharide):
                    syms[0] = 'Xxx'
                else:
                    syms[0] = 'X'
            
            for i in range(1,len(syms)):
                if syms[i] not in self.subst_composition_syms:
                    syms[i] = 'X'

            if syms[0] == 'Xxx' or 'X' in syms:
                c['Xxx'] += 1
                continue

            if syms[0] == 'X':
                c['X'] += 1
                continue

            for sym in syms:
                c[sym] += 1

        c['Count'] = sum(map(c.__getitem__,self.iupac_composition_syms + ['Xxx']))
        if aggregate_basecomposition:
            c['Hex'] = sum(map(c.__getitem__,('Man','Gal','Glc','Hex')))
            c['Hex+aldi'] = sum(map(c.__getitem__,('Man+aldi','Gal+aldi','Glc+aldi','Hex+aldi')))
            c['HexNAc'] = sum(map(c.__getitem__,('GalNAc','GlcNAc','ManNAc','HexNAc')))
            c['HexNAc+aldi'] = sum(map(c.__getitem__,('GalNAc+aldi','GlcNAc+aldi','ManNAc+aldi','HexNAc+aldi')))
            c['dHex'] = sum(map(c.__getitem__,('Fuc','dHex')))
            c['dHex+aldi'] = sum(map(c.__getitem__,('Fuc+aldi','dHex+aldi')))
            c['Pent'] = sum(map(c.__getitem__,('Xyl','Pent')))
            c['Sia'] = sum(map(c.__getitem__,('NeuAc','NeuGc','Sia')))
            c['HexA'] = sum(map(c.__getitem__,('GlcA','GalA','IdoA','ManA','HexA')))
            c['HexN'] = sum(map(c.__getitem__,('GlcN','GalN','ManN','HexN')))
 
        return c

    def iupac_redend(self, floating_substituents=True, aggregate_basecomposition=True):
	comp = self.iupac_composition(floating_substituents=floating_substituents, 
				      aggregate_basecomposition=aggregate_basecomposition,
				      redend_only=True)
	return [ key for key in comp if comp[key] > 0 and key not in self.subst_composition_syms and key != "Count"]

    def glycoct(self):
        from GlycanFormatter import GlycoCTFormat
        if not self.glycoctformat:
            self.glycoctformat = GlycoCTFormat()
        return self.glycoctformat.toStr(self)

    def glycam(self):
        from GlycanFormatter import IUPACGlycamFormat
        if not self.glycamformat:
            self.glycamformat = IUPACGlycamFormat()
        return self.glycamformat.toStr(self)

    def subtree_links(self,root,subst=False,uninstantiated=False):
        for m in self.subtree_nodes(root):
            if subst:
                for sl in m.substituent_links():
                    yield sl
            for l in m.links(instantiated_only=(not uninstantiated)):
                yield l

    def all_links(self,subst=False,uninstantiated=False):
        for m in self.all_nodes():
            if subst:
                for sl in m.substituent_links():
                    yield sl
            for l in m.links(instantiated_only=(not uninstantiated)):
                yield l

    def clone(self):
        self.set_ids()
        if self.root():
            g = Glycan(self.root().deepclone())
        else:
            g = Glycan()
        newurs = set()
        for l in g.all_links(uninstantiated=True):
            if l.undetermined():
                newurs.add(l.child())
        for ur in self.undetermined_roots():
            if len(ur.parent_links()) == 0:
                newurs.add(ur.deepclone())
        g.set_undetermined(newurs)
        return g

    def clone_with_identified_link(self,link):
        assert not self.undetermined()
        r,l = self.root().deepclone(identified_link=link)
        return Glycan(r),l

    def split_clone(self,link):
        g,l = self.clone_with_identified_link(link)
        f = Glycan(l.child())
        l.parent().del_link(l)
        l.child().del_parent_link(l)
        return g,f

    def equals(self,g):

        # Three cases, at least, sigh.

        # 1) no root => compositions, no edges => "trivial" graph
        # isomorphism based on enumeration of potential node matchings

        # 2) tree (not undetermined) => subtree-based

        # 3) rooted DAG (due to undetermined nodes) => graph
        # isomorphism

        # Cases 1 & 3 are implemented by the same code

        # ids in both instances are reset in equal. If returns True,
        # then the ids of each monosaccharide in each glycan will match
        # their counterpart.

        self.set_ids()        
        g.unset_ids()

        if self.has_root() and g.has_root():

            if not self.undetermined() and not g.undetermined():

                # print >>sys.stderr, "Tree comparison"
                g.unset_ids()
                # both are trees, use subtree_equals()
                return self.root().subtree_equals(g.root(),mapids=True)

            else:

                # print >>sys.stderr, "Tree comparison shortcut"
                # both are trees, use subtree_equals()
                if not self.root().subtree_equals(g.root(),mapids=False):
                    return False

        # print >>sys.stderr, "Graph isomorphism comparison"

        g.set_ids()

        nodeset1 = list(self.all_nodes(subst=False))
        nodeset2 = list(g.all_nodes(subst=False))

        if len(nodeset1) != len(nodeset2):
            return False

        linkset1 = set()
        for l in self.all_links(uninstantiated=True):
            linkset1.add((l.parent().id(),l.astuple(),l.child().id()))

        linkset2 = set()
        for l in g.all_links(uninstantiated=True):
            linkset2.add((l.parent().id(),l.astuple(),l.child().id()))

        if len(linkset1) != len(linkset2):
            return False

        # print >>sys.stderr, " ".join(map(lambda i: "%2s"%(i.id(),),nodeset1))
        # print >>sys.stderr, " ".join(map(lambda i: "%2s"%(i.id(),),nodeset2))

        iters = 0
        for ii,jj in iterecmatchings(nodeset1, nodeset2,
                                     self.monosaccharide_match):

            iters += 1
            matching = dict(zip(map(lambda m: m.id(),ii),map(lambda m: m.id(),jj)))
            # print >>sys.stderr, " ".join(map(lambda i: "%2s"%(i.id(),),ii))
            # print >>sys.stderr, " ".join(map(lambda i: "%2s"%(i.id(),),jj))
            linkset3 = set()
            for f,l,t in linkset1:
                linkset3.add((matching[f],l,matching[t]))

            if linkset3 == linkset2:
                for mi,mj in zip(ii,jj):
                    mj.set_id(mi.id())
                # print >>sys.stderr, "%d iterations to find an isomorphism"%(iters,)
                return True

        return False

    @staticmethod
    def monosaccharide_match(a,b):
        # print a
        # print b
        if not a.equals(b):
            return False
        # parent_links_match = False
        # for ii,jj in itermatchings(a.parent_links(),b.parent_links(),
        #                            lambda i,j: i.equals(j) and i.parent().equals(j.parent())):
        #     parent_links_match = True
        #     break
        # if not parent_links_match:
        #     return False
        if len(a.parent_links()) != len(b.parent_links()):
            return False
        if len(a.links(instantiated_only=True)) != len(b.links(instantiated_only=True)):
            return False
        if len(a.links(instantiated_only=False)) != len(b.links(instantiated_only=False)):
            return False
        child_links_match = False
        for ii,jj in itermatchings(a.links(instantiated_only=False),b.links(instantiated_only=False),
                                   lambda i,j: i.equals(j) and i.child().equals(j.child())):
            child_links_match = True
            break
        return child_links_match

    @staticmethod
    def undetroot_equals(a,b,mapids=True):
        if not a.subtree_equals(b,mapids=mapids):
            return False
        assert(None not in set(l.parent().id() for l in a.parent_links()))
        assert(None not in set(l.parent().id() for l in b.parent_links()))
        uipars = set((l.astuple(),l.parent().id()) for l in a.parent_links())
        ujpars = set((l.astuple(),l.parent().id()) for l in b.parent_links())
        if not (uipars == ujpars):
            return False
        return True

    def str(self,node=None,prefix="",codeprefix="",monofmt=lcSym):
        if node == None:
            node = self.root()
        code = monofmt.toStr(node)
        s = codeprefix + code
        kidlinks = sorted(filter(lambda l: l.instantiated(),node.links()),key=lambda l: Linkage.posstr(l.parent_pos()),reverse=True)
        kids = list(map(Linkage.child,kidlinks))
        n = len(kids)
        assert n in (0,1,2,3)
        if n == 0:
            return prefix + s
        if n == 1:
            return self.str(kids[0],prefix=prefix,codeprefix=(s + ' - '),monofmt=monofmt)
        if n == 2:
            return self.str(kids[0],prefix + ' '*len(s)+"   ",monofmt=monofmt) + '\n' + \
                   prefix + s + ' + ' + '\n' + \
                   self.str(kids[1],prefix + ' '*len(s)+"   ",monofmt=monofmt)
        if n == 3:
            return self.str(kids[0],prefix + ' '*len(s)+"   ",monofmt=monofmt) + '\n' + \
                   self.str(kids[1],prefix,codeprefix = s + ' + ',monofmt=monofmt) + '\n' + \
                   self.str(kids[2],prefix + ' '*len(s)+"   ",monofmt=monofmt)

    def __str__(self):
        return self.str()

    def dump(self, m=None, level=0, branch='', monofmt=iupacSym):
        if m == None:
            m = self.root()
            
        br = branch + " " + monofmt.toStr(m)
        child_list = []

        for link in m.links():
            child_list.append(link.child())

        if len(child_list) == 0:
            print('    '*level + br)
        elif len(child_list) > 1:
            print('    '*level + br)
            level += 1
            for c in child_list:
                self.dump(c,level, '', monofmt)
        elif len(child_list) == 1:
            self.dump(child_list[0],level,br,monofmt)
コード例 #13
0
ファイル: GlycanFactory.py プロジェクト: tylerjstewart/PyGly
 def __init__(self):
     self.fmt = GlycoCTFormat()
     self.mf = MonoFactory()
     super(GlycanFactory, self).__init__()
コード例 #14
0
ファイル: GlycanFactory.py プロジェクト: tylerjstewart/PyGly
class GlycanFactory(ReferenceTable):
    def __init__(self):
        self.fmt = GlycoCTFormat()
        self.mf = MonoFactory()
        super(GlycanFactory, self).__init__()

    def new(self, key):
        return self[key].clone()

    def parseSection(self, name, kv):
        aliases = [name]
        g = self.fmt.toGlycan('\n'.join(kv['GlycoCT'].split()))
        aliases.extend(map(str.strip, kv.get('Aliases', '').split(';')))
        return [(a, g) for a in aliases]

    def add_mono(self,
                 parent,
                 name,
                 parent_pos,
                 child_pos=1,
                 anomer=Anomer.beta,
                 parent_type=Linkage.oxygenPreserved,
                 child_type=Linkage.oxygenLost):
        m = self.mf.new(name)
        m.set_anomer(anomer)
        parent.add_child(m,
                         parent_pos=parent_pos,
                         child_pos=child_pos,
                         parent_type=parent_type,
                         child_type=child_type)
        return m

    def oxford2Glycan(self, name):
        if name in self:
            return self.new(name)
        p = 0
        if name[p] == 'F':
            g = self.new('FM3')
            p += 1
        else:
            g = self.new('M3')
        # print repr(g)
        # print self.fmt.toStr(g)
        # print self
        r = g.root()
        glcnac2 = filter(lambda m: m.compatible(self.mf['GlcNAc']),
                         r.children())[0]
        man1 = glcnac2.children()[0]
        man16 = [l.child() for l in man1.links() if l.parent_pos() == 6][0]
        man13 = [l.child() for l in man1.links() if l.parent_pos() == 3][0]
        assert name[p] == 'A'
        nant = int(name[p + 1])
        ant = [None]
        if nant in (1, 2, 3, 4):
            ant.append(self.add_mono(man13, 'GlcNAc', parent_pos=2))
        if nant in (2, 3, 4):
            ant.append(self.add_mono(man16, 'GlcNAc', parent_pos=2))
        if nant in (3, 4):
            ant.append(self.add_mono(man13, 'GlcNAc', parent_pos=4))
        if nant in (4, ):
            ant.append(self.add_mono(man16, 'GlcNAc', parent_pos=6))
        p += 2
        if p >= len(name):
            return g
        if name[p] == 'B':
            b = self.add_mono(man1, 'GlcNAc', 4)
            name[p] += 1
            if p >= len(name):
                return g
        if name[p] == 'F':
            nfuc = int(name[p + 1])
            assert (nfuc <= nant)
            for fi in range(1, nfuc + 1):
                self.add_mono(ant[fi],
                              'Fuc',
                              parent_pos=6,
                              anomer=Anomer.alpha)
            p += 2
            if p >= len(name):
                return g
        assert (name[p] == 'G')
        ngal = int(name[p + 1])
        gal = [None]
        assert (ngal <= nant)
        for gi in range(1, ngal + 1):
            gal.append(self.add_mono(ant[gi], 'Gal', parent_pos=4))
        p += 2
        if p >= len(name):
            return g
        assert (name[p] == 'S')
        nsia = int(name[p + 1])
        sia = [None]
        assert (nsia <= ngal)
        for si in range(1, nsia + 1):
            sia.append(
                self.add_mono(gal[si],
                              'Neu5Ac',
                              parent_pos=6,
                              child_pos=2,
                              anomer=Anomer.alpha))
        return g
コード例 #15
0
import sys
import xml
import urllib
import urllib2
import copy
import xml.etree.ElementTree as ET
from GlyTouCan import *
from GlycanFormatter import GlycoCTFormat

GlycoctParser = GlycoCTFormat()
gtc = GlyTouCan()

class MonosaccharideDB:
    
    page_cache_by_id = {}
    page_cache_by_glycoct = {}

    def __init__(self):
        pass
    
    def encode(self, d):
        return urllib.urlencode(d)
    
    def get_document_by_glycoct(self, basetype, substlist):
        apiurl = "http://www.monosaccharidedb.org/display_monosaccharide.action?"
        
        param = {"scheme": "glycoct",
                 "name": basetype,
                 "output": "xml"}
        apiurl += urllib.urlencode(param)