print >>sys.stderr, "Skip: Unsupported circular glycan"
            # dodge this case...
            continue
        except UnsupportedMonoError, e:
            print >>sys.stderr, "Skip:",e.message
            continue
        except GlycanParseError, e:
            print >>sys.stderr, "Skip:",e.message
            continue


        if not gtopo or not gcomp:
	    continue

        try:
	    g3 = glycoct_parser.toGlycan(zf1.read(acc+'.txt'))
	except (KeyError, GlycoCTParseError):
	    g3 = None

        if acc in ('G24172ZD',):
            # this is a (1+1) two monosaccharide case in which the
            # GlyTouCan topology has (-1+1).  Other (1+1) two
            # monosaccharide caes have GlyTouCan topology with (1+1) -
            # and this second decision seems to be more common.
            # Can't figure out how to decide whicoh one to do.
	    print >>sys.stderr, "Skip: (1+1) two monosaccharide issue"
            continue

        if False and acc in ('G64632PP',):
	    # the toplogy G83908ZR breaks my equality algorithm when compared to the 
            # topology generated from G64632PP - multiple valid id
class GlycoCTDatabase:
    prefix = ""
    extn = "gct"
    source = ""

    def __init__(self, filename):
        self.filename = filename
        self.name, extn = filename.rsplit('.', 1)
        self.name = os.path.split(self.name)[1]
        assert extn == self.extn
        self.fmt = GlycoCTFormat()

    def getraw(self, accession):
        zf = zipfile.ZipFile(self.filename, "r")
        fn = accession[len(self.prefix):] + '.txt'
        try:
            zf.getinfo(fn)
        except KeyError:
            return None
        gct = self._getraw(zf, fn)
        zf.close()
        return gct

    def _getraw(self, zf, name):
        return zf.read(name)

    def _get(self, zf, name):
        try:
            glystr = self._getraw(zf, name)
            g = self.fmt.toGlycan(glystr)
        except GlycoCTParseError:
            # f.write('> GlycanFormatter > GLYCANPARSEERROR > '+self.message)###
            # print '> GlycanFormatter > GLYCANPARSEERROR > ',self.message###
            # time.sleep(2)###
            return None
        except:
            print >> sys.stderr, "Problem with GlycoCT file " + name
            # time.sleep(2)###
            traceback.print_exc()
            sys.exit(1)
        kwargs = {}
        try:
            attstr = zf.read(name.rsplit('.', 1)[0] + '.att')
            for r in csv.reader(StringIO(attstr)):
                kwargs[r[0]] = copy.copy(r[1:])
        except KeyError:
            pass
        try:
            kwargs['image'] = zf.read(name.rsplit('.', 1)[0] + '.png')
        except KeyError:
            pass
        gr = GlyRecord(source=self.source if self.source else self.name,
                       accession=self.prefix + name.rsplit('.', 1)[0],
                       glycan=g,
                       name=self.name,
                       **kwargs)
        # print gr
        return gr

    def get(self, accession):
        zf = zipfile.ZipFile(self.filename, "r")
        fn = accession[len(self.prefix):] + '.txt'
        try:
            zf.getinfo(fn)
        except KeyError:
            return None
        gr = self._get(zf, fn)
        zf.close()
        return gr

    def __iter__(self):
        return self.next()

    def next(self):
        zf = zipfile.ZipFile(self.filename, "r")
        for name in zf.namelist():
            if not name.endswith('.txt'):
                continue
            gr = self._get(zf, name)
            if gr:
                yield gr
        zf.close()
Exemple #3
0
class GlycanFactory(ReferenceTable):
    def __init__(self):
        self.fmt = GlycoCTFormat()
        self.mf = MonoFactory()
        super(GlycanFactory, self).__init__()

    def new(self, key):
        return self[key].clone()

    def parseSection(self, name, kv):
        aliases = [name]
        g = self.fmt.toGlycan('\n'.join(kv['GlycoCT'].split()))
        aliases.extend(map(str.strip, kv.get('Aliases', '').split(';')))
        return [(a, g) for a in aliases]

    def add_mono(self,
                 parent,
                 name,
                 parent_pos,
                 child_pos=1,
                 anomer=Anomer.beta,
                 parent_type=Linkage.oxygenPreserved,
                 child_type=Linkage.oxygenLost):
        m = self.mf.new(name)
        m.set_anomer(anomer)
        parent.add_child(m,
                         parent_pos=parent_pos,
                         child_pos=child_pos,
                         parent_type=parent_type,
                         child_type=child_type)
        return m

    def oxford2Glycan(self, name):
        if name in self:
            return self.new(name)
        p = 0
        if name[p] == 'F':
            g = self.new('FM3')
            p += 1
        else:
            g = self.new('M3')
        # print repr(g)
        # print self.fmt.toStr(g)
        # print self
        r = g.root()
        glcnac2 = filter(lambda m: m.compatible(self.mf['GlcNAc']),
                         r.children())[0]
        man1 = glcnac2.children()[0]
        man16 = [l.child() for l in man1.links() if l.parent_pos() == 6][0]
        man13 = [l.child() for l in man1.links() if l.parent_pos() == 3][0]
        assert name[p] == 'A'
        nant = int(name[p + 1])
        ant = [None]
        if nant in (1, 2, 3, 4):
            ant.append(self.add_mono(man13, 'GlcNAc', parent_pos=2))
        if nant in (2, 3, 4):
            ant.append(self.add_mono(man16, 'GlcNAc', parent_pos=2))
        if nant in (3, 4):
            ant.append(self.add_mono(man13, 'GlcNAc', parent_pos=4))
        if nant in (4, ):
            ant.append(self.add_mono(man16, 'GlcNAc', parent_pos=6))
        p += 2
        if p >= len(name):
            return g
        if name[p] == 'B':
            b = self.add_mono(man1, 'GlcNAc', 4)
            name[p] += 1
            if p >= len(name):
                return g
        if name[p] == 'F':
            nfuc = int(name[p + 1])
            assert (nfuc <= nant)
            for fi in range(1, nfuc + 1):
                self.add_mono(ant[fi],
                              'Fuc',
                              parent_pos=6,
                              anomer=Anomer.alpha)
            p += 2
            if p >= len(name):
                return g
        assert (name[p] == 'G')
        ngal = int(name[p + 1])
        gal = [None]
        assert (ngal <= nant)
        for gi in range(1, ngal + 1):
            gal.append(self.add_mono(ant[gi], 'Gal', parent_pos=4))
        p += 2
        if p >= len(name):
            return g
        assert (name[p] == 'S')
        nsia = int(name[p + 1])
        sia = [None]
        assert (nsia <= ngal)
        for si in range(1, nsia + 1):
            sia.append(
                self.add_mono(gal[si],
                              'Neu5Ac',
                              parent_pos=6,
                              child_pos=2,
                              anomer=Anomer.alpha))
        return g