#!/bin/env python27 from getwiki import GPTWiki, Peptide import sys, urllib, string import Bio.SeqIO from util import peptide_mw, mod_mw seen = set() w = GPTWiki() peps = [] for p in w.iterpages(include_categories=['Peptide']): pep = w.get(p.name) pepkey = Peptide.key(pep.get('sequence'), pep.get('glycan', []), pep.get('mod', [])) if pepkey in seen: print >> sys.stderr, p.name w.delete(p.name) seen.add(pepkey)
#!/bin/env python27 from getwiki import GPTWiki import sys w = GPTWiki() for cat in sys.argv[1:]: assert cat in ("Transition", "TransitionGroup", "Peptide", "Protein", "Glycan") w.deletemany(category=cat, verbose=True)
import json, urllib, re from getwiki import GPTWiki import numpy as np import getwiki from analysis.fisher import lod, fisher_exact_low, fisher_exact_high from transitionspecificity import specscore w = GPTWiki() trspec = {} labelspec = {} glycanlabelspec = {} smallwindow = 1 largewindow = 6 threshold = 50 nonspeccount = 0 alltrs = set() for tg in w.itertgs(acqtype='DIA'): pepid = tg.get('peptide') pepage = w.get(pepid) z1 = tg.get('z1') spectra = tg.get('spectra') if spectra.find('DIA') == -1: continue try: glycan = re.search('\[H(.)*?\]', pepage.get('name')).group(0)[1:-1] except: continue filename = pepid + '.' + str(z1) + '.50.json'
#!/bin/env python27 import sys from getwiki import GPTWiki w = GPTWiki() if len(sys.argv) > 1: if sys.argv[1] == "-": for p in w.iterpages(exclude_categories=[ 'Transition', 'Peptide', 'Protein', 'TransitionGroup', 'Glycan' ]): print >> sys.stderr, p.name w.refresh(p) elif sys.argv[1] in ('Transition', 'Peptide', 'Protein', 'Glycan', 'TransitionGroup'): for p in w.iterpages(include_categories=sys.argv[1:]): print >> sys.stderr, p.name w.refresh(p) else: for p in w.iterpages(regex=sys.argv[1]): print >> sys.stderr, p.name w.refresh(p) else:
#!/bin/env python2 import sys from collections import defaultdict from getwiki import GPTWiki w=GPTWiki() tla = {'N': 'Asn'} sampleids = set(sys.argv[1:]) prsites = defaultdict(dict) for i,tg in enumerate(w.itertransgroups()): pep = w.get(tg.get('peptide')) # if not pep.get('nrt'): # continue glyid = pep.get('glycan')[0][0] gly = w.get(glyid) name = "" for n in gly.get('name',[]): if 'Fuc' in n: continue if 'HexNAc' in n: name = n break cls = gly.get('class') if len(cls) != 1: continue cls = cls[0]
#!/bin/env python27 from getwiki import GPTWiki, Protein import sys, urllib, string, csv import Bio.SeqIO w = GPTWiki() seen = set() for praccfile in sys.argv[1:]: for pracc in open(praccfile): pracc = pracc.strip() if pracc in seen: continue # print >>sys.stderr, pracc seen.add(pracc) data = urllib.urlopen('http://www.uniprot.org/uniprot/' + pracc + '.xml') for seq_record in Bio.SeqIO.parse(data, 'uniprot-xml'): desc = seq_record.description pracc1 = seq_record.id seq = str(seq_record.seq) gene = seq_record.annotations['gene_name_primary'] sys.stdout.write(seq_record.format('fasta')) break name = gene seqlines = [] for i in range(0, len(seq), 60): seqlines.append(seq[i:i + 60]) seq = "\n".join(seqlines)
std_dev, height, FWHM, area, '\n' ]))) if tgid not in updated: tg.set('prt', adjrt) updated.add(tgid) if w.put(tg): # print tgid pass json_file.close() if len(sys.argv) < 2: print 'please enter the spectra file name(s)' exit(1) spectrafiles = sys.argv[1:] w = GPTWiki() # outfile = open('../data/'+sys.argv[1][:5]+'.fitall.txt','w') outfile = sys.stdout outfile.write('\t'.join( map(str, [ 'TransGroup', 'Spectra', 'PeptideID', 'PrecZ', 'mmu', 'ExpRT', 'AdjRT', 'R_Value', 'Std_Dev', 'Height', 'FWHM', 'Area', '\n' ]))) for tg in w.itertransgroups(): tgid = tg.get('id') pepid = tg.get('peptide') z1 = tg.get('z1')
import matplotlib # Force matplotlib to not use any Xwindows backend. matplotlib.use('Agg') from getwiki import GPTWiki from collections import defaultdict import matplotlib.pyplot as plt import numpy as np from scipy import stats import sys w = GPTWiki() if len(sys.argv) < 2: print 'please enter the spectra file name(s)' exit(1) spectrafiles = sys.argv[1:] pepnrtpairs = defaultdict(list) nrtobsgt0 = {} for tgpage in w.itertransgroups(): spectra = tgpage.get('spectra') if spectra not in spectrafiles: continue tgid = tgpage.get('id') pepid = tgpage.get('peptide') peppage = w.get(pepid) pepnrt = peppage.get('nrt')
#!/bin/env python2 from getwiki import GPTWiki import time, sys w = GPTWiki() for sp in w.iterspec(method=sys.argv[1]): sp.set('inst', sys.argv[2]) sp.set('type', sys.argv[3]) if w.put(sp): print sp.get('id')
#!/bin/env python2 from getwiki import GPTWiki, Peptide, ProteinSite import sys w = GPTWiki() for pep in w.iterpep(): for al in pep.get('alignments', []): pr = al.get('protein') prsites = al.get('prsites', "").split('|') for prs in prsites: aa = prs[0] pos = int(prs[1:]) ps = ProteinSite(protein=pr, aa=aa, position=pos) if w.put(ps): print ProteinSite.pagename(protein=pr, aa=aa, position=pos) al.append('site', ps) # al.delete('prsites') if w.put(pep): print pep.get('id')
#!/bin/env python27 from getwiki import GPTWiki, Peptide import sys, urllib, string import Bio.SeqIO w = GPTWiki() for pr in sorted(w.iterproteins(), key=lambda pr: pr.get('accession')): acc = pr.get('accession') desc = pr.get('description') sequence = "".join(pr.get('sequence').split()) print ">%s %s" % (acc, desc) for i in range(0, len(sequence), 60): print sequence[i:i + 60]
#!/bin/env python27 from getwiki import GPTWiki import sys fromwiki = sys.argv[1].upper() towiki = sys.argv[2].upper() assert fromwiki in ("PROD", "DEV", "TEST") assert towiki in ("PROD", "DEV", "TEST") assert fromwiki != towiki w1 = GPTWiki(smwenv=fromwiki, quiet=True) print >> sys.stderr, "from: %s" % (w1.title(), ) w2 = GPTWiki(smwenv=towiki, quiet=True) print >> sys.stderr, " to: %s" % (w2.title(), ) dummy = raw_input("Enter to proceed, <Ctrl-C> to abort:") currentids = set() for page in w1.iterpages(include_categories=('Transition', 'Peptide', 'TransitionGroup')): id = page.name currentids.add(id) it = w1.get(id) if w2.put(it): print >> sys.stderr, "Pushing %s to %s" % (id, w2.title()) else: print >> sys.stderr, "No change to %s in %s" % (id, w2.title()) for page in w2.iterpages(include_categories=('Transition', 'Peptide',
default=False, help="Upload TG status and peptide nrt to GPTwiki. Default: False.") opts, args = parser.parse_args() if opts.cachefile and os.path.exists(opts.cachefile): data = json.loads(open(opts.cachefile).read()) rows = data['rows'] tgs = data['tgs'] origpepnrt = data['pepnrt'] else: monos = "NHFS" tgs = dict() origpepnrt = dict() rows = [] w = GPTWiki(quiet=True) for tg in w.itertransgroups(): pepid = tg.get('peptide') p = w.get(pepid) pepacc = p.get('id') pepseq = p.get('sequence') pepname = p.get('name') pepnrt = p.get('nrt') if not tg.has('nrt'): continue nrt = float(tg.get('nrt')) glyacc = p.get('glycan')[0][0] g = w.get(glyacc) gsym = g.get('sym') mcnt = {} for mono in monos:
opts, args = parser.parse_args() if opts.cachefile and os.path.exists(opts.cachefile): data = json.loads(open(opts.cachefile).read()) tgrows = data['tgrows'] peprows = data['peprows'] tgs = data['tgs'] origpepnrt = data['pepnrt'] else: monos = "NHFS" tgs = dict() origpepnrt = dict() tgrows = [] peprows = [] pepseen = set() w = GPTWiki(quiet=True) for spec in w.iterspec(acqtype="DDA"): print spec.get("name") for tg in w.itertgs(spectra=spec.get("name")): pepid = tg.get('peptide') p = w.get(pepid) pepacc = p.get('id') pepseq = p.get('sequence') pepname = p.get('name') pepnrt = p.get('nrt') pepnrtobs = int(p.get('nrtobs', 0)) if pepnrtobs == 0: pepnrt = None nrt = tg.get('nrt') if nrt is None and pepnrt is None: continue
#!/bin/env python2 from getwiki import GPTWiki import sys w = GPTWiki() w.loadsite(sys.argv[1])
#!/bin/env python27 from getwiki import GPTWiki, Peptide import sys, urllib, string import Bio.SeqIO from collections import defaultdict w = GPTWiki() seenpeps = set() sites = set() prot2site = defaultdict(set) samples = set() glycans = set() glysites = set() site2gly = defaultdict(set) for tg in w.itertransgroups(): tgid = tg.get('id') if tg.get('peptide') in seenpeps: continue pep = w.get(tg.get('peptide')) seenpeps.add(pep.get('id')) gly = pep.get('glycan')[0][0] glycans.add(gly) for al in pep.get('alignments',[]): site = al.get('prsites') prot = al.get('protein') print pep.get('id'),prot,site,gly sites.add((prot,site)) prot2site[prot].add(site) site2gly[(prot,site)].add(gly)
#!/bin/env python27 from getwiki import GPTWiki, Peptide import sys from collections import defaultdict w = GPTWiki() for sp in sys.argv[1:]: for tg in w.iterspectgs(sp): if not tg: continue print >> sys.stderr, "Delete transition group", tg.get('id') w.delete(tg.get('id')) print >> sys.stderr, "Delete spectra", sp w.delete(sp)
m = re.search(r'\[(.*)\]', lab) kvpairs = re.split(r'([A-Z])', m.group(1)) nmono = 0 for i in range(1, len(kvpairs), 2): if kvpairs[i + 1] == "": nmono += 1 else: nmono += int(kvpairs[i + 1]) return nmono def label2series(lab): return lab[0].lower() w = GPTWiki() peps = defaultdict(lambda: defaultdict(dict)) for sp in w.iterspec(sample=opts.sample, acqtype=opts.acqtype, method=opts.method, inst=opts.inst): print >> sys.stderr, sp.get('name') for i, tg in enumerate(w.itertgs(spectra=sp.get('name'))): pep = w.get(tg.get('peptide')) pepid = pep.get('id') if pep.get('nrt') == None: continue z1 = tg.get('z1') ntrans = len(tg.get('transitions', []))
from getwiki import GPTWiki, Glycan import findpygly from pygly.GlyTouCan import GlyTouCan import os, sys, urllib, string import Bio.SeqIO gtc = GlyTouCan(usecache=True) w = GPTWiki() try: os.mkdir('../glycoct') except OSError: pass for gc in sorted(w.iterglycans(), key=lambda gc: gc.get('accession')): acc = gc.get('accession') topos = map(str.strip, map(str, gc.get('topo'))) for tacc in topos: glycoct = gtc.getseq(tacc, 'glycoct') if not glycoct: gly = gtc.getGlycan(tacc) glycoct = gly.glycoct() f = open('../glycoct/' + acc + '.' + tacc + '.txt', 'w') f.write(glycoct) f.close() print >> sys.stderr, "Dump GlycoCT to %s.%s.txt" % (acc, tacc)
#!/bin/env python27 from getwiki import GPTWiki, Peptide import sys from collections import defaultdict w = GPTWiki() for tgpage in w.iterpages(include_categories=['TransitionGroup']): tg = w.get(tgpage.name) if tg.get('spectra') in sys.argv[1:]: print >> sys.stderr, "Delete transition group", tgpage.name w.delete(tgpage.name) for sp in sys.argv[1:]: print >> sys.stderr, "Delete spectra", sp w.delete(sp)
#!/bin/env python27 from getwiki import GPTWiki, Protein import sys, urllib, string, csv, os.path from collections import defaultdict import Bio.SeqIO from util import peptide_mw, mod_mw def asscan(s): t = s.rstrip(')').split('(') return [int(t[0])] + t[1].split(',') w = GPTWiki() spectra2tg = defaultdict(set) for tgpage in w.iterpages(include_categories=['TransitionGroup']): tg = w.get(tgpage.name) spectra = tg.get('spectra') spectra2tg[spectra].add(tg.get('id')) allspectra = set() for transfile in sys.argv[1:]: spectra, sample, method, index, extn = transfile.rsplit('.', 4) spectra = os.path.split(spectra)[1] allspectra.add(spectra) w.addacquisition(name=spectra, method=method, sample=sample) tgroup = defaultdict(dict)
#!/bin/env python27 from getwiki import GPTWiki, Protein import sys, urllib, string from collections import defaultdict import Bio.SeqIO w = GPTWiki() alignfile = sys.argv[1] alignments = defaultdict(list) for l in open(alignfile): sl = l.split() st = int(sl[1]) + 1 ed = int(sl[2]) pep = sl[4] laa = sl[3] raa = sl[5] pracc = sl[12][1:] alignments[pep].append((pracc, st, ed)) for p in w.iterpeptides(): seq = p.get('sequence') if seq in alignments: p.update(alignment=alignments[seq]) if w.put(p): print >> sys.stderr, p.get('id')
#!/bin/env python27 from getwiki import GPTWiki, Protein import getwiki import sys, urllib, string, csv, os.path, json, glob from collections import defaultdict import Bio.SeqIO from util import peptide_mw, mod_mw w = GPTWiki() sample = sys.argv[1] method = sys.argv[2] if ":" in method: method, anfrac = method.split(':') else: anfrac = None tgs = defaultdict(set) allspec = set() lccal = defaultdict(dict) for specfile in sys.argv[3:]: dirname = specfile.rsplit('.', 2)[0] if dirname.endswith('.centroid'): dirname = dirname.rsplit('.', 1)[0] spectra = os.path.split(dirname)[1] allspec.add(spectra) w.addacquisition(name=spectra, method=method, anfrac=anfrac,
#!/bin/env python2 from getwiki import GPTWiki, Protein import sys, urllib, string, csv, os.path from collections import defaultdict import Bio.SeqIO from util import peptide_mw, mod_mw def asscan(s): t = s.rstrip(')').split('(') return [int(t[0])] + t[1].split(',') w = GPTWiki() allspectra = set() spectra2tg = defaultdict(set) for transfile in sys.argv[1:]: spectra, sample, method, index, extn = transfile.rsplit('.', 4) spectra = os.path.split(spectra)[1] w.addacquisition(name=spectra, method=method, sample=sample) allspectra.add(spectra) for tg in w.itertgs(spectra=spectra, all=True): spectra2tg[spectra].add(tg.get('id')) for transfile in sys.argv[1:]: spectra, sample, method, index, extn = transfile.rsplit('.', 4) spectra = os.path.split(spectra)[1] tgroup = defaultdict(dict)
#!/bin/env python2 from getwiki import GPTWiki, Alignment import sys, urllib, string, csv import Bio.SeqIO from util import peptide_mw, mod_mw from operator import itemgetter w = GPTWiki() gmw = dict() gsym = dict() seen = set() for peptidefile in sys.argv[1:]: rest, sample, method, index, extn = peptidefile.rsplit('.', 4) for l in csv.DictReader(open(peptidefile), dialect='excel-tab'): seq = l['PeptideSequence'] glyspec = l['Glycans'] modspec = l['Mods'] praccs = l['ProteinName'] pepid = l.get('PeptideID') if (seq, glyspec, modspec) in seen: continue if '?' in glyspec: continue seen.add((seq, glyspec, modspec)) # print >>sys.stderr, seq,glyspec,modspec
#!/bin/env python27 from getwiki import GPTWiki import re w = GPTWiki() monos = "NHSF" for gly in w.iterglycans(): gsym = gly.get('sym') mcnt = {} for mono in monos: mcnt[mono] = 0 m = re.search(mono + r'(\d+)', gsym) if m: mcnt[mono] = int(m.group(1)) elif mono in gsym: mcnt[mono] = 1 gly.set('nneuac', mcnt['S']) if w.put(gly): print gly.get('id') for pep in w.iterpeptides(): pepname = pep.get('name') pep.set('nox', pepname.count('[Ox]')) if w.put(pep): print pep.get('id')
#!/bin/env python2 from getwiki import GPTWiki, Peptide import sys, urllib, string from collections import defaultdict nrtonly = False if len(sys.argv) > 1 and sys.argv[1] == "nrtonly": nrtonly = True w = GPTWiki() seenpeps = set() sites = set() prot2site = defaultdict(set) samples = set() glycans = set() glysites = set() site2gly = defaultdict(set) for sp in w.iterspec(type='DDA'): for tg in w.itertgs(spectra=sp.get('name')): tgid = tg.get('id') if tg.get('peptide') in seenpeps: continue pep = w.get(tg.get('peptide')) if nrtonly and pep.get('nrt') == None: continue seenpeps.add(pep.get('id')) gly = pep.get('glycan')[0][0] glycans.add(gly) for al in pep.get('alignments', []):
#!/bin/env python2 from getwiki import GPTWiki import sys, re w = GPTWiki() # if len(sys.argv) < 2: # print 'please enter the spectra file name regex' # exit(1) for spectrapage in w.iterspec(acqtype="DDA"): # if not re.search(sys.argv[1],spectrapage.get('name')): # continue print spectrapage.get('name') nrtslope = spectrapage.get('nrtslope') nrtintercept = spectrapage.get('nrtintercept') if not nrtslope or not nrtintercept: print "No NRT slope or intercept" continue for tgpage in w.itertgs(spectra=spectrapage.get('name')): tgid = tgpage.get('id') peakrt = tgpage.get('prt') nrt = 0.0 if peakrt != None: nrt = (peakrt - nrtintercept) / nrtslope tgpage.set('nrt', nrt) if w.put(tgpage): print tgid else: tgpage.set('nrt', '')
#!/bin/env python27 from getwiki import GPTWiki import sys w = GPTWiki() w.dumpsite(sys.argv[1], exclude_categories=['Peptide', 'Transition', 'TransitionGroup'])
#!/bin/env python27 from getwiki import GPTWiki, Peptide import sys, urllib, string import Bio.SeqIO print "\t".join( map(str, [ "Spectra", "Accession", "Peptide", "Site", "Glycan", "TGAccession", "Charge", "PrecursorMZ" ])) seen = set() w = GPTWiki() for tgpage in w.iterpages(include_categories=['TransitionGroup']): tg = w.get(tgpage.name) pep = w.get(tg.get('peptide')) pepid = pep.get('id') z1 = tg.get('z1') mz1 = tg.get('mz1') spectra = tg.get('spectra') if (spectra, pepid, z1) not in seen: pepseq = list(pep.get('sequence')) for deltastr, pos in pep.get('mod', []): aa = pos[0] pos = int(pos[1:]) - 1 if round(deltastr, 3) == 57.021: pepseq[pos] += ":m" elif round(deltastr, 3) in (15.995, 15.996): pepseq[pos] += ":o" pepseq = "".join(pepseq)