from analysis.fisher import lod, fisher_exact_low, fisher_exact_high
from transitionspecificity import specscore

w = GPTWiki()
trspec = {}
labelspec = {}
glycanlabelspec = {}
smallwindow = 1
largewindow = 6
threshold = 50
nonspeccount = 0
alltrs = set()

for tg in w.itertgs(acqtype='DIA'):
    pepid = tg.get('peptide')
    pepage = w.get(pepid)
    z1 = tg.get('z1')
    spectra = tg.get('spectra')

    if spectra.find('DIA') == -1:
        continue

    try:
        glycan = re.search('\[H(.)*?\]', pepage.get('name')).group(0)[1:-1]
    except:
        continue
    filename = pepid + '.' + str(z1) + '.50.json'
    onlinefile = 'http://edwardslab.bmcb.georgetown.edu/~nedwards/dropbox/pBYmLSkGeq/' + spectra + '/' + filename

    json_file = urllib.urlopen(onlinefile)
    try:
Example #2
0
            continue

        if '?' in glyspec:
            continue

        seen.add((seq, glyspec, modspec))
        # print >>sys.stderr, seq,glyspec,modspec

        glycan = []
        if glyspec != "-":
            glycan = map(lambda t: (t[1], seq[int(t[0]) - 1] + str(t[0])),
                         map(lambda s: s.split(':'), glyspec.split(',')))

        badglys = set()
        for glyacc in set(map(itemgetter(0), glycan)):
            if not w.get(glyacc):
                badglys.add(glyacc)

        if len(badglys) > 0:
            print >> sys.stderr, "Warning: Can't resolve glycan accession(s):", ", ".join(
                sorted(badglys))
            continue

        alignments = []
        for pracc in map(str.strip, praccs.split(',')):

            if not w.get(pracc):
                print >> sys.stderr, "Warning: Can't resolve protein accession:", pracc
                continue

            prseq = "".join(w.get(pracc).get('sequence').split())
Example #3
0
    nrtonly = True

w = GPTWiki()
seenpeps = set()
sites = set()
prot2site = defaultdict(set)
samples = set()
glycans = set()
glysites = set()
site2gly = defaultdict(set)
for sp in w.iterspec(type='DDA'):
    for tg in w.itertgs(spectra=sp.get('name')):
        tgid = tg.get('id')
        if tg.get('peptide') in seenpeps:
            continue
        pep = w.get(tg.get('peptide'))
        if nrtonly and pep.get('nrt') == None:
            continue
        seenpeps.add(pep.get('id'))
        gly = pep.get('glycan')[0][0]
        glycans.add(gly)
        for al in pep.get('alignments', []):
            site = al.get('prsites')
            prot = al.get('protein')
            print pep.get('id'), prot, site, gly
            sites.add((prot, site))
            prot2site[prot].add(site)
            site2gly[(prot, site)].add(gly)
            glysites.add((prot, site, gly))

print "Proteins:", len(prot2site)
Example #4
0
                                  z1=z1,
                                  spectra=spectra,
                                  mz1=mz1,
                                  nrt=nrt,
                                  rt=rt,
                                  prt=rt,
                                  transitions=trans,
                                  ntransition=len(trans),
                                  **extras)
        if 'lccalibration' in data and spectra not in lccal:
            nrtslope, nrtintercept = map(float,
                                         data['lccalibration'].split(":"))
            lccal[spectra]['nrtslope'] = nrtslope
            lccal[spectra]['nrtintercept'] = nrtintercept
        tgs[spectra].add(tg.get('id'))

for spectra in allspec:
    spec = w.get(spectra)
    if spectra in lccal:
        spec.set("nrtslope", lccal[spectra]['nrtslope'])
        spec.set("nrtintercept", lccal[spectra]['nrtintercept'])
    else:
        spec.delete("nrtslope")
        spec.delete("nrtintercept")
    w.put(spec)

    for tg in w.itertgs(spectra=spectra, all=True):
        if tg.get('id') not in tgs[spectra]:
            if w.cleartransgroup(tg):
                print "Clear", tg.get("id")
#!/bin/env python2

import sys
from collections import defaultdict
from getwiki import GPTWiki

w=GPTWiki()

tla = {'N': 'Asn'}

sampleids = set(sys.argv[1:])

prsites = defaultdict(dict)

for i,tg in enumerate(w.itertransgroups()):
    pep = w.get(tg.get('peptide'))
    # if not pep.get('nrt'):
    #     continue
    glyid = pep.get('glycan')[0][0]
    gly = w.get(glyid)
    name = ""
    for n in gly.get('name',[]):
	if 'Fuc' in n:
	    continue
	if 'HexNAc' in n:
	    name = n
	    break
    cls = gly.get('class')
    if len(cls) != 1:
	continue
    cls = cls[0]
Example #6
0
        if modified:
            print t.get('id')

        if (pid, z1, spectra) not in tgroup:
            tgroup[(pid, z1, spectra)] = dict(transitions=[],
                                              nrt=nrt,
                                              rt=rt,
                                              prt=prt,
                                              mz1=mz1,
                                              scans=scans,
                                              gphash=gphash)
        tgroup[(pid, z1, spectra)]['transitions'].append((t.get('id'), relint))

    for pid, z1, spectra in tgroup:
        tgroup[(pid, z1, spectra)]['ntransition'] = len(
            tgroup[(pid, z1, spectra)]['transitions'])
        tg, mod = w.addtransgroup(peptide=pid,
                                  z1=z1,
                                  spectra=spectra,
                                  **tgroup[(pid, z1, spectra)])
        if mod:
            print tg.get('id')
        if tg.get('id') in spectra2tg[spectra]:
            spectra2tg[spectra].remove(tg.get('id'))

for spectra in allspectra:
    for tgid in spectra2tg[spectra]:
        tg = w.get(tgid)
        if w.cleartransgroup(tg):
            print "Clear", tg.get("id")
Example #7
0
import sys, urllib, string, csv, os.path
from collections import defaultdict
import Bio.SeqIO
from util import peptide_mw, mod_mw


def asscan(s):
    t = s.rstrip(')').split('(')
    return [int(t[0])] + t[1].split(',')


w = GPTWiki()

spectra2tg = defaultdict(set)
for tgpage in w.iterpages(include_categories=['TransitionGroup']):
    tg = w.get(tgpage.name)
    spectra = tg.get('spectra')
    spectra2tg[spectra].add(tg.get('id'))

allspectra = set()

for transfile in sys.argv[1:]:
    spectra, sample, method, index, extn = transfile.rsplit('.', 4)
    spectra = os.path.split(spectra)[1]
    allspectra.add(spectra)
    w.addacquisition(name=spectra, method=method, sample=sample)
    tgroup = defaultdict(dict)
    for l in csv.DictReader(open(transfile), dialect='excel-tab'):
        seq = l['PeptideSequence']
        glyspec = l['Glycans']
        modspec = l['Mods']
def label2series(lab):
    return lab[0].lower()


w = GPTWiki()

peps = defaultdict(lambda: defaultdict(dict))

for sp in w.iterspec(sample=opts.sample,
                     acqtype=opts.acqtype,
                     method=opts.method,
                     inst=opts.inst):
    print >> sys.stderr, sp.get('name')
    for i, tg in enumerate(w.itertgs(spectra=sp.get('name'))):
        pep = w.get(tg.get('peptide'))
        pepid = pep.get('id')
        if pep.get('nrt') == None:
            continue
        z1 = tg.get('z1')
        ntrans = len(tg.get('transitions', []))
        if ntrans == 0:
            continue
        proteins = set()
        for al in pep.get("alignments", []):
            proteins.add(al.get("protein"))
        for trid, trint in tg.get('transitions', []):
            tr = w.get(trid)
            if trid not in peps[(pepid, z1)]:
                peps[(pepid, z1)][trid]['pepname'] = pep.get('name')
                peps[(pepid, z1)][trid]['prot'] = ";".join(sorted(proteins))
Example #9
0
#!/bin/env python27

from getwiki import GPTWiki, Peptide
import sys
from collections import defaultdict

w = GPTWiki()
currtrans = set()
for tgpage in w.iterpages(include_categories=['TransitionGroup']):
    tg = w.get(tgpage.name)
    tgid = tg.get('id')
    for t, i in tg.get('transitions'):
        tr = w.get(t)
        if tr:
            currtrans.add(t)
for tpage in w.iterpages(include_categories=['Transition']):
    t = w.get(tpage.name)
    tid = t.get('id')
    if tid not in currtrans:
        print >> sys.stderr, "Delete transition " + tid
        w.delete(tid)
Example #10
0
    print 'please enter the spectra file name(s)'
    exit(1)
spectrafiles = sys.argv[1:]

pepnrtpairs = defaultdict(list)

nrtobsgt0 = {}

for tgpage in w.itertransgroups():

    spectra = tgpage.get('spectra')
    if spectra not in spectrafiles:
        continue
    tgid = tgpage.get('id')
    pepid = tgpage.get('peptide')
    peppage = w.get(pepid)
    pepnrt = peppage.get('nrt')
    nrtobs = peppage.get('nrtobs')
    peakrt = tgpage.get('prt')

    if peakrt != None and pepnrt != None:
        if nrtobs == '0':
            continue
        else:
            if spectra not in nrtobsgt0:
                nrtobsgt0[spectra] = 1
            else:
                nrtobsgt0[spectra] += 1

        if spectra not in pepnrtpairs:
            pepnrtpairs[spectra] = [(float(peakrt), float(pepnrt))]
Example #11
0
towiki = sys.argv[2].upper()

assert fromwiki in ("PROD", "DEV", "TEST")
assert towiki in ("PROD", "DEV", "TEST")
assert fromwiki != towiki

w1 = GPTWiki(smwenv=fromwiki, quiet=True)
print >> sys.stderr, "from: %s" % (w1.title(), )
w2 = GPTWiki(smwenv=towiki, quiet=True)
print >> sys.stderr, "  to: %s" % (w2.title(), )

dummy = raw_input("Enter to proceed, <Ctrl-C> to abort:")

currentids = set()
for page in w1.iterpages(include_categories=('Transition', 'Peptide',
                                             'TransitionGroup')):
    id = page.name
    currentids.add(id)
    it = w1.get(id)
    if w2.put(it):
        print >> sys.stderr, "Pushing %s to %s" % (id, w2.title())
    else:
        print >> sys.stderr, "No change to %s in %s" % (id, w2.title())

for page in w2.iterpages(include_categories=('Transition', 'Peptide',
                                             'TransitionGroup')):
    id = page.name
    if id not in currentids:
        w2.delete(id)
        print >> sys.stderr, "Delete %s from %s" % (id, w2.title())
Example #12
0
opts, args = parser.parse_args()

if opts.cachefile and os.path.exists(opts.cachefile):
    data = json.loads(open(opts.cachefile).read())
    rows = data['rows']
    tgs = data['tgs']
    origpepnrt = data['pepnrt']
else:
    monos = "NHFS"
    tgs = dict()
    origpepnrt = dict()
    rows = []
    w = GPTWiki(quiet=True)
    for tg in w.itertransgroups():
        pepid = tg.get('peptide')
        p = w.get(pepid)
        pepacc = p.get('id')
        pepseq = p.get('sequence')
        pepname = p.get('name')
        pepnrt = p.get('nrt')
        if not tg.has('nrt'):
            continue
        nrt = float(tg.get('nrt'))
        glyacc = p.get('glycan')[0][0]
        g = w.get(glyacc)
        gsym = g.get('sym')
        mcnt = {}
        for mono in monos:
            mcnt[mono] = 0
            m = re.search(mono + r'(\d+)', gsym)
            if m:
Example #13
0
#!/bin/env python27

from getwiki import GPTWiki, Peptide

import sys, urllib, string
import Bio.SeqIO
from util import peptide_mw, mod_mw

seen = set()

w = GPTWiki()
peps = []
for p in w.iterpages(include_categories=['Peptide']):
    pep = w.get(p.name)
    pepkey = Peptide.key(pep.get('sequence'), pep.get('glycan', []),
                         pep.get('mod', []))
    if pepkey in seen:
        print >> sys.stderr, p.name
        w.delete(p.name)
    seen.add(pepkey)
Example #14
0
#!/bin/env python27

from getwiki import GPTWiki, Peptide

import sys, urllib, string
import Bio.SeqIO

print "\t".join(
    map(str, [
        "Spectra", "Accession", "Peptide", "Site", "Glycan", "TGAccession",
        "Charge", "PrecursorMZ"
    ]))
seen = set()
w = GPTWiki()
for tgpage in w.iterpages(include_categories=['TransitionGroup']):
    tg = w.get(tgpage.name)
    pep = w.get(tg.get('peptide'))
    pepid = pep.get('id')
    z1 = tg.get('z1')
    mz1 = tg.get('mz1')
    spectra = tg.get('spectra')
    if (spectra, pepid, z1) not in seen:
        pepseq = list(pep.get('sequence'))
        for deltastr, pos in pep.get('mod', []):
            aa = pos[0]
            pos = int(pos[1:]) - 1
            if round(deltastr, 3) == 57.021:
                pepseq[pos] += ":m"
            elif round(deltastr, 3) in (15.995, 15.996):
                pepseq[pos] += ":o"
        pepseq = "".join(pepseq)