from analysis.fisher import lod, fisher_exact_low, fisher_exact_high from transitionspecificity import specscore w = GPTWiki() trspec = {} labelspec = {} glycanlabelspec = {} smallwindow = 1 largewindow = 6 threshold = 50 nonspeccount = 0 alltrs = set() for tg in w.itertgs(acqtype='DIA'): pepid = tg.get('peptide') pepage = w.get(pepid) z1 = tg.get('z1') spectra = tg.get('spectra') if spectra.find('DIA') == -1: continue try: glycan = re.search('\[H(.)*?\]', pepage.get('name')).group(0)[1:-1] except: continue filename = pepid + '.' + str(z1) + '.50.json' onlinefile = 'http://edwardslab.bmcb.georgetown.edu/~nedwards/dropbox/pBYmLSkGeq/' + spectra + '/' + filename json_file = urllib.urlopen(onlinefile) try:
continue if '?' in glyspec: continue seen.add((seq, glyspec, modspec)) # print >>sys.stderr, seq,glyspec,modspec glycan = [] if glyspec != "-": glycan = map(lambda t: (t[1], seq[int(t[0]) - 1] + str(t[0])), map(lambda s: s.split(':'), glyspec.split(','))) badglys = set() for glyacc in set(map(itemgetter(0), glycan)): if not w.get(glyacc): badglys.add(glyacc) if len(badglys) > 0: print >> sys.stderr, "Warning: Can't resolve glycan accession(s):", ", ".join( sorted(badglys)) continue alignments = [] for pracc in map(str.strip, praccs.split(',')): if not w.get(pracc): print >> sys.stderr, "Warning: Can't resolve protein accession:", pracc continue prseq = "".join(w.get(pracc).get('sequence').split())
nrtonly = True w = GPTWiki() seenpeps = set() sites = set() prot2site = defaultdict(set) samples = set() glycans = set() glysites = set() site2gly = defaultdict(set) for sp in w.iterspec(type='DDA'): for tg in w.itertgs(spectra=sp.get('name')): tgid = tg.get('id') if tg.get('peptide') in seenpeps: continue pep = w.get(tg.get('peptide')) if nrtonly and pep.get('nrt') == None: continue seenpeps.add(pep.get('id')) gly = pep.get('glycan')[0][0] glycans.add(gly) for al in pep.get('alignments', []): site = al.get('prsites') prot = al.get('protein') print pep.get('id'), prot, site, gly sites.add((prot, site)) prot2site[prot].add(site) site2gly[(prot, site)].add(gly) glysites.add((prot, site, gly)) print "Proteins:", len(prot2site)
z1=z1, spectra=spectra, mz1=mz1, nrt=nrt, rt=rt, prt=rt, transitions=trans, ntransition=len(trans), **extras) if 'lccalibration' in data and spectra not in lccal: nrtslope, nrtintercept = map(float, data['lccalibration'].split(":")) lccal[spectra]['nrtslope'] = nrtslope lccal[spectra]['nrtintercept'] = nrtintercept tgs[spectra].add(tg.get('id')) for spectra in allspec: spec = w.get(spectra) if spectra in lccal: spec.set("nrtslope", lccal[spectra]['nrtslope']) spec.set("nrtintercept", lccal[spectra]['nrtintercept']) else: spec.delete("nrtslope") spec.delete("nrtintercept") w.put(spec) for tg in w.itertgs(spectra=spectra, all=True): if tg.get('id') not in tgs[spectra]: if w.cleartransgroup(tg): print "Clear", tg.get("id")
#!/bin/env python2 import sys from collections import defaultdict from getwiki import GPTWiki w=GPTWiki() tla = {'N': 'Asn'} sampleids = set(sys.argv[1:]) prsites = defaultdict(dict) for i,tg in enumerate(w.itertransgroups()): pep = w.get(tg.get('peptide')) # if not pep.get('nrt'): # continue glyid = pep.get('glycan')[0][0] gly = w.get(glyid) name = "" for n in gly.get('name',[]): if 'Fuc' in n: continue if 'HexNAc' in n: name = n break cls = gly.get('class') if len(cls) != 1: continue cls = cls[0]
if modified: print t.get('id') if (pid, z1, spectra) not in tgroup: tgroup[(pid, z1, spectra)] = dict(transitions=[], nrt=nrt, rt=rt, prt=prt, mz1=mz1, scans=scans, gphash=gphash) tgroup[(pid, z1, spectra)]['transitions'].append((t.get('id'), relint)) for pid, z1, spectra in tgroup: tgroup[(pid, z1, spectra)]['ntransition'] = len( tgroup[(pid, z1, spectra)]['transitions']) tg, mod = w.addtransgroup(peptide=pid, z1=z1, spectra=spectra, **tgroup[(pid, z1, spectra)]) if mod: print tg.get('id') if tg.get('id') in spectra2tg[spectra]: spectra2tg[spectra].remove(tg.get('id')) for spectra in allspectra: for tgid in spectra2tg[spectra]: tg = w.get(tgid) if w.cleartransgroup(tg): print "Clear", tg.get("id")
import sys, urllib, string, csv, os.path from collections import defaultdict import Bio.SeqIO from util import peptide_mw, mod_mw def asscan(s): t = s.rstrip(')').split('(') return [int(t[0])] + t[1].split(',') w = GPTWiki() spectra2tg = defaultdict(set) for tgpage in w.iterpages(include_categories=['TransitionGroup']): tg = w.get(tgpage.name) spectra = tg.get('spectra') spectra2tg[spectra].add(tg.get('id')) allspectra = set() for transfile in sys.argv[1:]: spectra, sample, method, index, extn = transfile.rsplit('.', 4) spectra = os.path.split(spectra)[1] allspectra.add(spectra) w.addacquisition(name=spectra, method=method, sample=sample) tgroup = defaultdict(dict) for l in csv.DictReader(open(transfile), dialect='excel-tab'): seq = l['PeptideSequence'] glyspec = l['Glycans'] modspec = l['Mods']
def label2series(lab): return lab[0].lower() w = GPTWiki() peps = defaultdict(lambda: defaultdict(dict)) for sp in w.iterspec(sample=opts.sample, acqtype=opts.acqtype, method=opts.method, inst=opts.inst): print >> sys.stderr, sp.get('name') for i, tg in enumerate(w.itertgs(spectra=sp.get('name'))): pep = w.get(tg.get('peptide')) pepid = pep.get('id') if pep.get('nrt') == None: continue z1 = tg.get('z1') ntrans = len(tg.get('transitions', [])) if ntrans == 0: continue proteins = set() for al in pep.get("alignments", []): proteins.add(al.get("protein")) for trid, trint in tg.get('transitions', []): tr = w.get(trid) if trid not in peps[(pepid, z1)]: peps[(pepid, z1)][trid]['pepname'] = pep.get('name') peps[(pepid, z1)][trid]['prot'] = ";".join(sorted(proteins))
#!/bin/env python27 from getwiki import GPTWiki, Peptide import sys from collections import defaultdict w = GPTWiki() currtrans = set() for tgpage in w.iterpages(include_categories=['TransitionGroup']): tg = w.get(tgpage.name) tgid = tg.get('id') for t, i in tg.get('transitions'): tr = w.get(t) if tr: currtrans.add(t) for tpage in w.iterpages(include_categories=['Transition']): t = w.get(tpage.name) tid = t.get('id') if tid not in currtrans: print >> sys.stderr, "Delete transition " + tid w.delete(tid)
print 'please enter the spectra file name(s)' exit(1) spectrafiles = sys.argv[1:] pepnrtpairs = defaultdict(list) nrtobsgt0 = {} for tgpage in w.itertransgroups(): spectra = tgpage.get('spectra') if spectra not in spectrafiles: continue tgid = tgpage.get('id') pepid = tgpage.get('peptide') peppage = w.get(pepid) pepnrt = peppage.get('nrt') nrtobs = peppage.get('nrtobs') peakrt = tgpage.get('prt') if peakrt != None and pepnrt != None: if nrtobs == '0': continue else: if spectra not in nrtobsgt0: nrtobsgt0[spectra] = 1 else: nrtobsgt0[spectra] += 1 if spectra not in pepnrtpairs: pepnrtpairs[spectra] = [(float(peakrt), float(pepnrt))]
towiki = sys.argv[2].upper() assert fromwiki in ("PROD", "DEV", "TEST") assert towiki in ("PROD", "DEV", "TEST") assert fromwiki != towiki w1 = GPTWiki(smwenv=fromwiki, quiet=True) print >> sys.stderr, "from: %s" % (w1.title(), ) w2 = GPTWiki(smwenv=towiki, quiet=True) print >> sys.stderr, " to: %s" % (w2.title(), ) dummy = raw_input("Enter to proceed, <Ctrl-C> to abort:") currentids = set() for page in w1.iterpages(include_categories=('Transition', 'Peptide', 'TransitionGroup')): id = page.name currentids.add(id) it = w1.get(id) if w2.put(it): print >> sys.stderr, "Pushing %s to %s" % (id, w2.title()) else: print >> sys.stderr, "No change to %s in %s" % (id, w2.title()) for page in w2.iterpages(include_categories=('Transition', 'Peptide', 'TransitionGroup')): id = page.name if id not in currentids: w2.delete(id) print >> sys.stderr, "Delete %s from %s" % (id, w2.title())
opts, args = parser.parse_args() if opts.cachefile and os.path.exists(opts.cachefile): data = json.loads(open(opts.cachefile).read()) rows = data['rows'] tgs = data['tgs'] origpepnrt = data['pepnrt'] else: monos = "NHFS" tgs = dict() origpepnrt = dict() rows = [] w = GPTWiki(quiet=True) for tg in w.itertransgroups(): pepid = tg.get('peptide') p = w.get(pepid) pepacc = p.get('id') pepseq = p.get('sequence') pepname = p.get('name') pepnrt = p.get('nrt') if not tg.has('nrt'): continue nrt = float(tg.get('nrt')) glyacc = p.get('glycan')[0][0] g = w.get(glyacc) gsym = g.get('sym') mcnt = {} for mono in monos: mcnt[mono] = 0 m = re.search(mono + r'(\d+)', gsym) if m:
#!/bin/env python27 from getwiki import GPTWiki, Peptide import sys, urllib, string import Bio.SeqIO from util import peptide_mw, mod_mw seen = set() w = GPTWiki() peps = [] for p in w.iterpages(include_categories=['Peptide']): pep = w.get(p.name) pepkey = Peptide.key(pep.get('sequence'), pep.get('glycan', []), pep.get('mod', [])) if pepkey in seen: print >> sys.stderr, p.name w.delete(p.name) seen.add(pepkey)
#!/bin/env python27 from getwiki import GPTWiki, Peptide import sys, urllib, string import Bio.SeqIO print "\t".join( map(str, [ "Spectra", "Accession", "Peptide", "Site", "Glycan", "TGAccession", "Charge", "PrecursorMZ" ])) seen = set() w = GPTWiki() for tgpage in w.iterpages(include_categories=['TransitionGroup']): tg = w.get(tgpage.name) pep = w.get(tg.get('peptide')) pepid = pep.get('id') z1 = tg.get('z1') mz1 = tg.get('mz1') spectra = tg.get('spectra') if (spectra, pepid, z1) not in seen: pepseq = list(pep.get('sequence')) for deltastr, pos in pep.get('mod', []): aa = pos[0] pos = int(pos[1:]) - 1 if round(deltastr, 3) == 57.021: pepseq[pos] += ":m" elif round(deltastr, 3) in (15.995, 15.996): pepseq[pos] += ":o" pepseq = "".join(pepseq)