print >>sys.stderr, "Skip: Unsupported circular glycan" # dodge this case... continue except UnsupportedMonoError, e: print >>sys.stderr, "Skip:",e.message continue except GlycanParseError, e: print >>sys.stderr, "Skip:",e.message continue if not gtopo or not gcomp: continue try: g3 = glycoct_parser.toGlycan(zf1.read(acc+'.txt')) except (KeyError, GlycoCTParseError): g3 = None if acc in ('G24172ZD',): # this is a (1+1) two monosaccharide case in which the # GlyTouCan topology has (-1+1). Other (1+1) two # monosaccharide caes have GlyTouCan topology with (1+1) - # and this second decision seems to be more common. # Can't figure out how to decide whicoh one to do. print >>sys.stderr, "Skip: (1+1) two monosaccharide issue" continue if False and acc in ('G64632PP',): # the toplogy G83908ZR breaks my equality algorithm when compared to the # topology generated from G64632PP - multiple valid id
class GlycoCTDatabase: prefix = "" extn = "gct" source = "" def __init__(self, filename): self.filename = filename self.name, extn = filename.rsplit('.', 1) self.name = os.path.split(self.name)[1] assert extn == self.extn self.fmt = GlycoCTFormat() def getraw(self, accession): zf = zipfile.ZipFile(self.filename, "r") fn = accession[len(self.prefix):] + '.txt' try: zf.getinfo(fn) except KeyError: return None gct = self._getraw(zf, fn) zf.close() return gct def _getraw(self, zf, name): return zf.read(name) def _get(self, zf, name): try: glystr = self._getraw(zf, name) g = self.fmt.toGlycan(glystr) except GlycoCTParseError: # f.write('> GlycanFormatter > GLYCANPARSEERROR > '+self.message)### # print '> GlycanFormatter > GLYCANPARSEERROR > ',self.message### # time.sleep(2)### return None except: print >> sys.stderr, "Problem with GlycoCT file " + name # time.sleep(2)### traceback.print_exc() sys.exit(1) kwargs = {} try: attstr = zf.read(name.rsplit('.', 1)[0] + '.att') for r in csv.reader(StringIO(attstr)): kwargs[r[0]] = copy.copy(r[1:]) except KeyError: pass try: kwargs['image'] = zf.read(name.rsplit('.', 1)[0] + '.png') except KeyError: pass gr = GlyRecord(source=self.source if self.source else self.name, accession=self.prefix + name.rsplit('.', 1)[0], glycan=g, name=self.name, **kwargs) # print gr return gr def get(self, accession): zf = zipfile.ZipFile(self.filename, "r") fn = accession[len(self.prefix):] + '.txt' try: zf.getinfo(fn) except KeyError: return None gr = self._get(zf, fn) zf.close() return gr def __iter__(self): return self.next() def next(self): zf = zipfile.ZipFile(self.filename, "r") for name in zf.namelist(): if not name.endswith('.txt'): continue gr = self._get(zf, name) if gr: yield gr zf.close()
class GlycanFactory(ReferenceTable): def __init__(self): self.fmt = GlycoCTFormat() self.mf = MonoFactory() super(GlycanFactory, self).__init__() def new(self, key): return self[key].clone() def parseSection(self, name, kv): aliases = [name] g = self.fmt.toGlycan('\n'.join(kv['GlycoCT'].split())) aliases.extend(map(str.strip, kv.get('Aliases', '').split(';'))) return [(a, g) for a in aliases] def add_mono(self, parent, name, parent_pos, child_pos=1, anomer=Anomer.beta, parent_type=Linkage.oxygenPreserved, child_type=Linkage.oxygenLost): m = self.mf.new(name) m.set_anomer(anomer) parent.add_child(m, parent_pos=parent_pos, child_pos=child_pos, parent_type=parent_type, child_type=child_type) return m def oxford2Glycan(self, name): if name in self: return self.new(name) p = 0 if name[p] == 'F': g = self.new('FM3') p += 1 else: g = self.new('M3') # print repr(g) # print self.fmt.toStr(g) # print self r = g.root() glcnac2 = filter(lambda m: m.compatible(self.mf['GlcNAc']), r.children())[0] man1 = glcnac2.children()[0] man16 = [l.child() for l in man1.links() if l.parent_pos() == 6][0] man13 = [l.child() for l in man1.links() if l.parent_pos() == 3][0] assert name[p] == 'A' nant = int(name[p + 1]) ant = [None] if nant in (1, 2, 3, 4): ant.append(self.add_mono(man13, 'GlcNAc', parent_pos=2)) if nant in (2, 3, 4): ant.append(self.add_mono(man16, 'GlcNAc', parent_pos=2)) if nant in (3, 4): ant.append(self.add_mono(man13, 'GlcNAc', parent_pos=4)) if nant in (4, ): ant.append(self.add_mono(man16, 'GlcNAc', parent_pos=6)) p += 2 if p >= len(name): return g if name[p] == 'B': b = self.add_mono(man1, 'GlcNAc', 4) name[p] += 1 if p >= len(name): return g if name[p] == 'F': nfuc = int(name[p + 1]) assert (nfuc <= nant) for fi in range(1, nfuc + 1): self.add_mono(ant[fi], 'Fuc', parent_pos=6, anomer=Anomer.alpha) p += 2 if p >= len(name): return g assert (name[p] == 'G') ngal = int(name[p + 1]) gal = [None] assert (ngal <= nant) for gi in range(1, ngal + 1): gal.append(self.add_mono(ant[gi], 'Gal', parent_pos=4)) p += 2 if p >= len(name): return g assert (name[p] == 'S') nsia = int(name[p + 1]) sia = [None] assert (nsia <= ngal) for si in range(1, nsia + 1): sia.append( self.add_mono(gal[si], 'Neu5Ac', parent_pos=6, child_pos=2, anomer=Anomer.alpha)) return g