class GlyIUPACFilter:
    def __init__(self, glydb):
        self.glydb = glydb
        self.fmt = IUPACLinearFormat()

    def __iter__(self):
        return self.next()

    def next(self):
        for gr in self.glydb:
            try:
                iupac = self.fmt.toStr(gr.glycan)
                gr['IUPAC'] = iupac
            except KeyError:
                pass
            yield gr
class LinearIUPACDatabase:
    extn = 'iupac'
    source = ""
    prefix = ""

    def __init__(self, filename):
        self.filename = filename
        self.name, extn = filename.rsplit('.', 1)
        assert extn == self.extn
        self.fmt = IUPACLinearFormat()

    def __iter__(self):
        return self.next()

    def preprocess(self, glystr):
        return glystr

    def next(self):
        h = open(self.filename)
        seenaccs = set()
        for lineno, l in enumerate(h):
            if lineno == 0:
                continue
            if l.startswith('#'):
                continue
            sl = map(str.strip, l.split(None, 1))
            acc = self.acc(sl[0])
            glystr = self.preprocess(sl[1])
            if acc in seenaccs:
                continue
            seenaccs.add(acc)
            try:
                g = self.fmt.toGlycan(glystr)
            except IUPACLinearParseError:
                traceback.print_exc()
                continue
            yield GlyRecord(source=self.source,
                            accession=self.prefix + acc,
                            glycan=g,
                            name=self.name)
        h.close()
Exemple #3
0
class CFGArrayDatabase:
    def __init__(self, filename):
        self.filename = filename
        self.name, extn = filename.rsplit('.', 1)
        assert extn == 'cfg'
        self.fmt = IUPACLinearFormat()

    def __iter__(self):
        return self.next()

    def next(self):
        h = open(self.filename)
        for lineno, l in enumerate(h):
            if lineno == 0:
                continue
            if l.startswith('#'):
                continue
            sl = l.split(None, 1)
            acc = int(sl[0])
            glystr = sl[1].strip()
            # Wow - badly hand formatted CFG IUPAC linear glycans suck...
            m = re.search(r'([ab]1?)?-?(\d(-?))?([RS][pP]?\d+|MDPLys)?$',
                          glystr)
            if m != None:
                x = len(m.group(0))
                glystr = glystr[:-x]
            try:
                g = self.fmt.toGlycan(glystr)
            except IUPACLinearParseError:
                traceback.print_exc()
                continue
            print >> sys.stderr, ">>CFG%03d" % acc
            yield GlyRecord(source="CFGArray",
                            accession="CFG%03d" % acc,
                            glycan=g,
                            name=self.name)
Exemple #4
0
 def __init__(self, filename):
     self.filename = filename
     self.name, extn = filename.rsplit('.', 1)
     assert extn == 'cfg'
     self.fmt = IUPACLinearFormat()
Exemple #5
0
            m = re.search(r'([ab]1?)?-?(\d(-?))?([RS][pP]?\d+|MDPLys)?$',
                          glystr)
            if m != None:
                x = len(m.group(0))
                glystr = glystr[:-x]
            try:
                g = self.fmt.toGlycan(glystr)
            except IUPACLinearParseError:
                traceback.print_exc()
                continue
            print >> sys.stderr, ">>CFG%03d" % acc
            yield GlyRecord(source="CFGArray",
                            accession="CFG%03d" % acc,
                            glycan=g,
                            name=self.name)


if __name__ == '__main__':
    import sys
    from GlycanFormatter import IUPACLinearFormat
    fmt = IUPACLinearFormat()
    fmt1 = GlycoCTFormat()
    gdb = CFGArrayDatabase(sys.argv[1])
    for r in gdb:
        lc = fmt.toStr(r.glycan)
        print r.accession, lc
        # print fmt1.toStr(r.glycan)
        # print r.glycan
        print r.glycan
        sys.stdout.flush()
 def __init__(self, glydb):
     self.glydb = glydb
     self.fmt = IUPACLinearFormat()