Esempio n. 1
0
#!/bin/env python2

import sys, time, re
from collections import defaultdict
import csv

from getwiki import GlycanData, Glycan
w = GlycanData()

allsources = set()

gtc2taxid = defaultdict(lambda: defaultdict(set))
for f in sys.argv[1:]:
    for l in open(f):
        sl = l.split()
        gtc = sl[0]
        taxid = int(sl[1])
        source = sl[2]
        if len(sl) > 3:
            sourceid = sl[3]
        else:
            sourceid = None
        gtc2taxid[gtc][(source, sourceid)].add(taxid)
        allsources.add(source)

for m in w.iterglycan():
    start = time.time()
    acc = m.get('accession')

    for source in allsources:
        m.delete_annotations(source=source,
Esempio n. 2
0
#!/bin/env python2

import re, sys
from getwiki import GlycanData
from collections import defaultdict

headers = """
accession Hex HexNAc dHex NeuAc NeuGc HexA HexN S P aldi Xxx X Count
""".split()

w = GlycanData()

print "\t".join(headers)
for acc in w.iterglycanid():
    g = w.get(acc)
    row = defaultdict(lambda: [0, False])
    row['accession'] = (acc, False)
    for ann in g.annotations(type="MonosaccharideCount", source="EdwardsLab"):
        try:
            value = [int(ann.get('value')), False]
        except ValueError:
            value = [int(ann.get('value')[:-1]), True]
        prop = ann.get('property')
        if prop.endswith('Count'):
            prop = prop[:-5]
        row[prop] = value
    # print row
    row['Count'] = row['Monosaccharide']
    if 'Xxx' not in row:
        row['Xxx'] = [0, False]
    for k in row:
Esempio n. 3
0
#!/bin/env python27

import re, sys
from getwiki import GlycanData

w = GlycanData()
for g in w.iterglycanid():
    print g
Esempio n. 4
0
#!/bin/env python27

from getwiki import GlycanData
import sys

w = GlycanData()
w.loadsite(sys.argv[1])
Esempio n. 5
0
#!/bin/env python2

import re, sys
from getwiki import GlycanData

w = GlycanData()
for l in sys.stdin:
    print l.strip()
    try:
        w.delete(l.strip())
    except OSError:
        pass
Esempio n. 6
0
#!/bin/env python2

from getwiki import GlycanData
import sys
w = GlycanData()
if sys.argv[1] == "--all":
    sys.argv.pop(1)
    w.dumpsite(sys.argv[1])
else:
    w.dumpsite(sys.argv[1], exclude_categories=['Glycan', 'Annotation'])
Esempio n. 7
0
#!/bin/env python27

import sys, time, traceback
from collections import defaultdict

from getwiki import GlycanData, Glycan
w = GlycanData()

import findpygly
from pygly.GlycanResource import GlyTouCan


def accessions(args):
    if len(args) == 0:
        for it in sys.stdin:
            yield it.strip()
    else:
        for fn in args:
            for it in open(fn):
                yield it.strip()


gtc = GlyTouCan()

allmotifs = dict()
for acc, label, redend in gtc.allmotifs():
    allmotifs[acc] = dict(label=label, redend=redend)

current = set()
for gtcacc in accessions(sys.argv[1:]):
    start = time.time()
Esempio n. 8
0
#!/bin/env python2

import sys

from getwiki import GlycanData
w = GlycanData()

if len(sys.argv) >= 2:
    database = sys.argv[1]
else:
    database = "glycandatadev"

head = """<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE rdf:RDF[
        <!ENTITY rdf 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'>
        <!ENTITY rdfs 'http://www.w3.org/2000/01/rdf-schema#'>
        <!ENTITY swivt 'http://semantic-mediawiki.org/swivt/1.0#'>
        <!ENTITY glycandata 'http://glyomics.org/glycandata#'>
]>

<rdf:RDF
        xmlns:rdf="&rdf;"
        xmlns:rdfs="&rdfs;"
        xmlns:swivt="&swivt;"
        xmlns:glycandata="&glycandata;"
        xmlns:skos="http://www.w3.org/2004/02/skos/core#">
"""
tail = """
</rdf:RDF>
"""
glycantmpl = """
Esempio n. 9
0
#!/bin/env python27

import sys, time, traceback

from getwiki import GlycanData
w = GlycanData()


def accessions():
    if len(sys.argv) > 1:
        for arg in sys.argv[1:]:
            g = w.get(arg.strip())
            if g:
                yield g
    else:
        for g in w.iterglycan():
            yield g


for g in accessions():
    start = time.time()

    glycan = g.getGlycan()

    if not glycan:
        continue

    if not g.has_annotations(
            property='GlycoCT', type='Sequence', source='GlyTouCan'):
        if not g.has_annotations(
                property='GlycoCT', type='Sequence', source='EdwardsLab'):
Esempio n. 10
0
#!/bin/env python2

import sys

from getwiki import GlycanData
w = GlycanData()

def accessions(args):
    if len(args) == 0:
        for it in sys.stdin:
            yield it.strip()
    else:
        for fn in args:
            for it in open(fn):
                yield it.strip()

current_glygen = set(accessions(sys.argv[1:]))

for acc in w.iterglycanid():
    m = w.get(acc)
    if acc in current_glygen:
        m.set_annotation(value=acc,property="GlyGen",source="EdwardsLab",type="CrossReference")
    else:
        m.delete_annotations(property="GlyGen",source="EdwardsLab",type="CrossReference")
    if w.put(m):
        print acc,"updated"
    else:
        print acc,"checked"
Esempio n. 11
0
#!/bin/env python2

import sys
from getwiki import GlycanData

w = GlycanData()

if len(sys.argv) > 1:

    if sys.argv[1] == "-":

        for p in w.iterpages(exclude_categories=['Glycan']):
            print >> sys.stderr, p.name
            w.refresh(p)

    elif sys.argv[1] == "stdin":

        for p in map(str.strip, sys.stdin):
            print >> sys.stderr, p
            w.refresh(p)

    else:

        for p in w.iterpages(regex=sys.argv[1]):
            print >> sys.stderr, p.name
            w.refresh(p)

else:

    for p in w.iterpages(include_categories=['Glycan']):
        print >> sys.stderr, p.name
Esempio n. 12
0
#!/bin/env python27

import sys
from collections import defaultdict

from getwiki import GlycanData, Glycan
from pygly.GlycanFormatter import GlycoCTFormat

w = GlycanData()
glycoctformat = GlycoCTFormat()

monosdb = {}
f = open(sys.argv[1], 'r')
for line in f:
    k, v = line.split()
    monosdb[k] = v

for g in w.iterglycan():
    acc = g.get('accession')
    monodbids = set()
    glycan = g.getGlycan()
    if not glycan:
        continue
    for m in glycan.all_nodes():
        try:
            glycoctsym = glycoctformat.mtoStr(m)
        except KeyError:
            continue
        try:
            monodbids.add(monosdb[glycoctsym])
        except KeyError:
Esempio n. 13
0
#!/bin/env python2

import sys
from operator import itemgetter

from collections import defaultdict

from getwiki import GlycanData, Glycan
w = GlycanData()

from pygly.GNOme import SubsumptionGraph

gnome = SubsumptionGraph()
gnome.loaddata(sys.argv[1])
sys.argv.pop(1)

def iterglycan():
    if len(sys.argv) > 1:
	seen = set()
	for acc in sys.argv[1:]:
	    if acc in seen:
		continue
	    m = w.get(acc)
	    if m:
		seen.add(acc)
		yield m
	    for desc in gnome.descendants(acc):
		if desc in seen:
		    continue
		m = w.get(desc)
		if m:
Esempio n. 14
0
#!/bin/env python27

import sys
from operator import itemgetter

from getwiki import GlycanData, Glycan

from pygly.GlyNLinkedFilter import GlyNLinkedFilter
mnlc = GlyNLinkedFilter(None).test1

w = GlycanData()

motif_rules_data = """
G00026MO	N-linked	
G00028MO	N-linked	high mannose
G00029MO	N-linked	hybrid
G00030MO	N-linked	complex
G00031MO	O-linked	core 1
G00032MO	O-linked	core 1
G00033MO	O-linked	core 2
G00034MO	O-linked	core 2
G00035MO	O-linked	core 3
G00036MO	O-linked	core 3
G00037MO	O-linked	core 4
G00038MO	O-linked	core 4
G00039MO	O-linked	core 5
G00040MO	O-linked	core 5
G00041MO	O-linked	core 6
G00042MO	O-linked	core 6
G00043MO	O-linked	core 7
G00044MO	O-linked	core 7
Esempio n. 15
0
class GlycosphingolipidLacto(MotifClassifier):
    _class = ("Glycosphingolipid","lacto series")
    _motifs = ["GGM.001106"]

class GlycosphingolipidNeolacto(MotifClassifier):
    _class = ("Glycosphingolipid","neo-lacto series")
    _motifs = ["GGM.001107"]

class GlycosphingolipidGanglio(MotifClassifier):
    _class = ("Glycosphingolipid","ganglio series")
    _motifs = ["GGM.001108"]

class GlycosphingolipidGlobo(MotifClassifier):
    _class = ("Glycosphingolipid","globo series")
    _motifs = ["GGM.001109"]

class GPIAnchor(MotifClassifier):
    _class = ("GPI anchor","")
    _motifs = ["GGM.001030"]

if __name__ == "__main__":

    from getwiki import GlycanData, Glycan
    w = GlycanData()

    classifier = ClassifierEngine()
    for acc in sys.argv[1:]:
	g = w.get(acc)
	for asn in classifier.assign(g):
	    print g.get('accession'),asn[0],asn[1]
Esempio n. 16
0
#!/bin/env python2

import sys, time, traceback
from collections import defaultdict

from getwiki import GlycanData, Glycan
w = GlycanData()

import findpygly
from pygly.GlycanResource import GlyTouCan
from pygly.GlycanResource import GlyCosmos


def accessions(args):
    if len(args) == 0:
        for it in sys.stdin:
            yield it.strip()
    else:
        for fn in args:
            for it in open(fn):
                yield it.strip()


gtc = GlyTouCan(verbose=False, usecache=False)
gco = GlyCosmos(verbose=False, usecache=False)

allgco = set(gco.allaccessions())

# allmotifs = dict()
# for acc,label,redend in gtc.allmotifs():
#     allmotifs[acc] = dict(label=label,redend=redend)