#!/bin/env python2 import sys, time, re from collections import defaultdict import csv from getwiki import GlycanData, Glycan w = GlycanData() allsources = set() gtc2taxid = defaultdict(lambda: defaultdict(set)) for f in sys.argv[1:]: for l in open(f): sl = l.split() gtc = sl[0] taxid = int(sl[1]) source = sl[2] if len(sl) > 3: sourceid = sl[3] else: sourceid = None gtc2taxid[gtc][(source, sourceid)].add(taxid) allsources.add(source) for m in w.iterglycan(): start = time.time() acc = m.get('accession') for source in allsources: m.delete_annotations(source=source,
#!/bin/env python2 import re, sys from getwiki import GlycanData from collections import defaultdict headers = """ accession Hex HexNAc dHex NeuAc NeuGc HexA HexN S P aldi Xxx X Count """.split() w = GlycanData() print "\t".join(headers) for acc in w.iterglycanid(): g = w.get(acc) row = defaultdict(lambda: [0, False]) row['accession'] = (acc, False) for ann in g.annotations(type="MonosaccharideCount", source="EdwardsLab"): try: value = [int(ann.get('value')), False] except ValueError: value = [int(ann.get('value')[:-1]), True] prop = ann.get('property') if prop.endswith('Count'): prop = prop[:-5] row[prop] = value # print row row['Count'] = row['Monosaccharide'] if 'Xxx' not in row: row['Xxx'] = [0, False] for k in row:
#!/bin/env python27 import re, sys from getwiki import GlycanData w = GlycanData() for g in w.iterglycanid(): print g
#!/bin/env python27 from getwiki import GlycanData import sys w = GlycanData() w.loadsite(sys.argv[1])
#!/bin/env python2 import re, sys from getwiki import GlycanData w = GlycanData() for l in sys.stdin: print l.strip() try: w.delete(l.strip()) except OSError: pass
#!/bin/env python2 from getwiki import GlycanData import sys w = GlycanData() if sys.argv[1] == "--all": sys.argv.pop(1) w.dumpsite(sys.argv[1]) else: w.dumpsite(sys.argv[1], exclude_categories=['Glycan', 'Annotation'])
#!/bin/env python27 import sys, time, traceback from collections import defaultdict from getwiki import GlycanData, Glycan w = GlycanData() import findpygly from pygly.GlycanResource import GlyTouCan def accessions(args): if len(args) == 0: for it in sys.stdin: yield it.strip() else: for fn in args: for it in open(fn): yield it.strip() gtc = GlyTouCan() allmotifs = dict() for acc, label, redend in gtc.allmotifs(): allmotifs[acc] = dict(label=label, redend=redend) current = set() for gtcacc in accessions(sys.argv[1:]): start = time.time()
#!/bin/env python2 import sys from getwiki import GlycanData w = GlycanData() if len(sys.argv) >= 2: database = sys.argv[1] else: database = "glycandatadev" head = """<?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE rdf:RDF[ <!ENTITY rdf 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'> <!ENTITY rdfs 'http://www.w3.org/2000/01/rdf-schema#'> <!ENTITY swivt 'http://semantic-mediawiki.org/swivt/1.0#'> <!ENTITY glycandata 'http://glyomics.org/glycandata#'> ]> <rdf:RDF xmlns:rdf="&rdf;" xmlns:rdfs="&rdfs;" xmlns:swivt="&swivt;" xmlns:glycandata="&glycandata;" xmlns:skos="http://www.w3.org/2004/02/skos/core#"> """ tail = """ </rdf:RDF> """ glycantmpl = """
#!/bin/env python27 import sys, time, traceback from getwiki import GlycanData w = GlycanData() def accessions(): if len(sys.argv) > 1: for arg in sys.argv[1:]: g = w.get(arg.strip()) if g: yield g else: for g in w.iterglycan(): yield g for g in accessions(): start = time.time() glycan = g.getGlycan() if not glycan: continue if not g.has_annotations( property='GlycoCT', type='Sequence', source='GlyTouCan'): if not g.has_annotations( property='GlycoCT', type='Sequence', source='EdwardsLab'):
#!/bin/env python2 import sys from getwiki import GlycanData w = GlycanData() def accessions(args): if len(args) == 0: for it in sys.stdin: yield it.strip() else: for fn in args: for it in open(fn): yield it.strip() current_glygen = set(accessions(sys.argv[1:])) for acc in w.iterglycanid(): m = w.get(acc) if acc in current_glygen: m.set_annotation(value=acc,property="GlyGen",source="EdwardsLab",type="CrossReference") else: m.delete_annotations(property="GlyGen",source="EdwardsLab",type="CrossReference") if w.put(m): print acc,"updated" else: print acc,"checked"
#!/bin/env python2 import sys from getwiki import GlycanData w = GlycanData() if len(sys.argv) > 1: if sys.argv[1] == "-": for p in w.iterpages(exclude_categories=['Glycan']): print >> sys.stderr, p.name w.refresh(p) elif sys.argv[1] == "stdin": for p in map(str.strip, sys.stdin): print >> sys.stderr, p w.refresh(p) else: for p in w.iterpages(regex=sys.argv[1]): print >> sys.stderr, p.name w.refresh(p) else: for p in w.iterpages(include_categories=['Glycan']): print >> sys.stderr, p.name
#!/bin/env python27 import sys from collections import defaultdict from getwiki import GlycanData, Glycan from pygly.GlycanFormatter import GlycoCTFormat w = GlycanData() glycoctformat = GlycoCTFormat() monosdb = {} f = open(sys.argv[1], 'r') for line in f: k, v = line.split() monosdb[k] = v for g in w.iterglycan(): acc = g.get('accession') monodbids = set() glycan = g.getGlycan() if not glycan: continue for m in glycan.all_nodes(): try: glycoctsym = glycoctformat.mtoStr(m) except KeyError: continue try: monodbids.add(monosdb[glycoctsym]) except KeyError:
#!/bin/env python2 import sys from operator import itemgetter from collections import defaultdict from getwiki import GlycanData, Glycan w = GlycanData() from pygly.GNOme import SubsumptionGraph gnome = SubsumptionGraph() gnome.loaddata(sys.argv[1]) sys.argv.pop(1) def iterglycan(): if len(sys.argv) > 1: seen = set() for acc in sys.argv[1:]: if acc in seen: continue m = w.get(acc) if m: seen.add(acc) yield m for desc in gnome.descendants(acc): if desc in seen: continue m = w.get(desc) if m:
#!/bin/env python27 import sys from operator import itemgetter from getwiki import GlycanData, Glycan from pygly.GlyNLinkedFilter import GlyNLinkedFilter mnlc = GlyNLinkedFilter(None).test1 w = GlycanData() motif_rules_data = """ G00026MO N-linked G00028MO N-linked high mannose G00029MO N-linked hybrid G00030MO N-linked complex G00031MO O-linked core 1 G00032MO O-linked core 1 G00033MO O-linked core 2 G00034MO O-linked core 2 G00035MO O-linked core 3 G00036MO O-linked core 3 G00037MO O-linked core 4 G00038MO O-linked core 4 G00039MO O-linked core 5 G00040MO O-linked core 5 G00041MO O-linked core 6 G00042MO O-linked core 6 G00043MO O-linked core 7 G00044MO O-linked core 7
class GlycosphingolipidLacto(MotifClassifier): _class = ("Glycosphingolipid","lacto series") _motifs = ["GGM.001106"] class GlycosphingolipidNeolacto(MotifClassifier): _class = ("Glycosphingolipid","neo-lacto series") _motifs = ["GGM.001107"] class GlycosphingolipidGanglio(MotifClassifier): _class = ("Glycosphingolipid","ganglio series") _motifs = ["GGM.001108"] class GlycosphingolipidGlobo(MotifClassifier): _class = ("Glycosphingolipid","globo series") _motifs = ["GGM.001109"] class GPIAnchor(MotifClassifier): _class = ("GPI anchor","") _motifs = ["GGM.001030"] if __name__ == "__main__": from getwiki import GlycanData, Glycan w = GlycanData() classifier = ClassifierEngine() for acc in sys.argv[1:]: g = w.get(acc) for asn in classifier.assign(g): print g.get('accession'),asn[0],asn[1]
#!/bin/env python2 import sys, time, traceback from collections import defaultdict from getwiki import GlycanData, Glycan w = GlycanData() import findpygly from pygly.GlycanResource import GlyTouCan from pygly.GlycanResource import GlyCosmos def accessions(args): if len(args) == 0: for it in sys.stdin: yield it.strip() else: for fn in args: for it in open(fn): yield it.strip() gtc = GlyTouCan(verbose=False, usecache=False) gco = GlyCosmos(verbose=False, usecache=False) allgco = set(gco.allaccessions()) # allmotifs = dict() # for acc,label,redend in gtc.allmotifs(): # allmotifs[acc] = dict(label=label,redend=redend)