Esempio n. 1
0
#!/bin/env python27

from getwiki import GPTWiki, Peptide

import sys, urllib, string
import Bio.SeqIO
from util import peptide_mw, mod_mw

seen = set()

w = GPTWiki()
peps = []
for p in w.iterpages(include_categories=['Peptide']):
    pep = w.get(p.name)
    pepkey = Peptide.key(pep.get('sequence'), pep.get('glycan', []),
                         pep.get('mod', []))
    if pepkey in seen:
        print >> sys.stderr, p.name
        w.delete(p.name)
    seen.add(pepkey)
Esempio n. 2
0
#!/bin/env python27

from getwiki import GPTWiki
import sys

w = GPTWiki()
for cat in sys.argv[1:]:
    assert cat in ("Transition", "TransitionGroup", "Peptide", "Protein",
                   "Glycan")
    w.deletemany(category=cat, verbose=True)
import json, urllib, re
from getwiki import GPTWiki
import numpy as np
import getwiki
from analysis.fisher import lod, fisher_exact_low, fisher_exact_high
from transitionspecificity import specscore

w = GPTWiki()
trspec = {}
labelspec = {}
glycanlabelspec = {}
smallwindow = 1
largewindow = 6
threshold = 50
nonspeccount = 0
alltrs = set()

for tg in w.itertgs(acqtype='DIA'):
    pepid = tg.get('peptide')
    pepage = w.get(pepid)
    z1 = tg.get('z1')
    spectra = tg.get('spectra')

    if spectra.find('DIA') == -1:
        continue

    try:
        glycan = re.search('\[H(.)*?\]', pepage.get('name')).group(0)[1:-1]
    except:
        continue
    filename = pepid + '.' + str(z1) + '.50.json'
Esempio n. 4
0
#!/bin/env python27

import sys
from getwiki import GPTWiki

w = GPTWiki()

if len(sys.argv) > 1:

    if sys.argv[1] == "-":

        for p in w.iterpages(exclude_categories=[
                'Transition', 'Peptide', 'Protein', 'TransitionGroup', 'Glycan'
        ]):
            print >> sys.stderr, p.name
            w.refresh(p)

    elif sys.argv[1] in ('Transition', 'Peptide', 'Protein', 'Glycan',
                         'TransitionGroup'):

        for p in w.iterpages(include_categories=sys.argv[1:]):
            print >> sys.stderr, p.name
            w.refresh(p)

    else:

        for p in w.iterpages(regex=sys.argv[1]):
            print >> sys.stderr, p.name
            w.refresh(p)

else:
Esempio n. 5
0
#!/bin/env python2

import sys
from collections import defaultdict
from getwiki import GPTWiki

w=GPTWiki()

tla = {'N': 'Asn'}

sampleids = set(sys.argv[1:])

prsites = defaultdict(dict)

for i,tg in enumerate(w.itertransgroups()):
    pep = w.get(tg.get('peptide'))
    # if not pep.get('nrt'):
    #     continue
    glyid = pep.get('glycan')[0][0]
    gly = w.get(glyid)
    name = ""
    for n in gly.get('name',[]):
	if 'Fuc' in n:
	    continue
	if 'HexNAc' in n:
	    name = n
	    break
    cls = gly.get('class')
    if len(cls) != 1:
	continue
    cls = cls[0]
Esempio n. 6
0
#!/bin/env python27

from getwiki import GPTWiki, Protein

import sys, urllib, string, csv
import Bio.SeqIO

w = GPTWiki()

seen = set()
for praccfile in sys.argv[1:]:
    for pracc in open(praccfile):
        pracc = pracc.strip()
        if pracc in seen:
            continue
        # print >>sys.stderr, pracc
        seen.add(pracc)
        data = urllib.urlopen('http://www.uniprot.org/uniprot/' + pracc +
                              '.xml')
        for seq_record in Bio.SeqIO.parse(data, 'uniprot-xml'):
            desc = seq_record.description
            pracc1 = seq_record.id
            seq = str(seq_record.seq)
            gene = seq_record.annotations['gene_name_primary']
            sys.stdout.write(seq_record.format('fasta'))
            break
        name = gene
        seqlines = []
        for i in range(0, len(seq), 60):
            seqlines.append(seq[i:i + 60])
        seq = "\n".join(seqlines)
Esempio n. 7
0
                        std_dev, height, FWHM, area, '\n'
                    ])))
                if tgid not in updated:
                    tg.set('prt', adjrt)
                    updated.add(tgid)
                    if w.put(tg):
                        # print tgid
                        pass
        json_file.close()

if len(sys.argv) < 2:
    print 'please enter the spectra file name(s)'
    exit(1)

spectrafiles = sys.argv[1:]
w = GPTWiki()

# outfile = open('../data/'+sys.argv[1][:5]+'.fitall.txt','w')
outfile = sys.stdout

outfile.write('\t'.join(
    map(str, [
        'TransGroup', 'Spectra', 'PeptideID', 'PrecZ', 'mmu', 'ExpRT', 'AdjRT',
        'R_Value', 'Std_Dev', 'Height', 'FWHM', 'Area', '\n'
    ])))

for tg in w.itertransgroups():

    tgid = tg.get('id')
    pepid = tg.get('peptide')
    z1 = tg.get('z1')
Esempio n. 8
0
import matplotlib
# Force matplotlib to not use any Xwindows backend.
matplotlib.use('Agg')

from getwiki import GPTWiki
from collections import defaultdict
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
import sys

w = GPTWiki()

if len(sys.argv) < 2:
    print 'please enter the spectra file name(s)'
    exit(1)
spectrafiles = sys.argv[1:]

pepnrtpairs = defaultdict(list)

nrtobsgt0 = {}

for tgpage in w.itertransgroups():

    spectra = tgpage.get('spectra')
    if spectra not in spectrafiles:
        continue
    tgid = tgpage.get('id')
    pepid = tgpage.get('peptide')
    peppage = w.get(pepid)
    pepnrt = peppage.get('nrt')
Esempio n. 9
0
#!/bin/env python2

from getwiki import GPTWiki

import time, sys

w = GPTWiki()

for sp in w.iterspec(method=sys.argv[1]):
    sp.set('inst', sys.argv[2])
    sp.set('type', sys.argv[3])
    if w.put(sp):
        print sp.get('id')
Esempio n. 10
0
#!/bin/env python2

from getwiki import GPTWiki, Peptide, ProteinSite

import sys

w = GPTWiki()
for pep in w.iterpep():
    for al in pep.get('alignments', []):
        pr = al.get('protein')
        prsites = al.get('prsites', "").split('|')
        for prs in prsites:
            aa = prs[0]
            pos = int(prs[1:])
            ps = ProteinSite(protein=pr, aa=aa, position=pos)
            if w.put(ps):
                print ProteinSite.pagename(protein=pr, aa=aa, position=pos)
            al.append('site', ps)
        # al.delete('prsites')
    if w.put(pep):
        print pep.get('id')
Esempio n. 11
0
#!/bin/env python27

from getwiki import GPTWiki, Peptide

import sys, urllib, string
import Bio.SeqIO

w = GPTWiki()
for pr in sorted(w.iterproteins(), key=lambda pr: pr.get('accession')):
    acc = pr.get('accession')
    desc = pr.get('description')
    sequence = "".join(pr.get('sequence').split())
    print ">%s %s" % (acc, desc)
    for i in range(0, len(sequence), 60):
        print sequence[i:i + 60]
Esempio n. 12
0
#!/bin/env python27

from getwiki import GPTWiki
import sys

fromwiki = sys.argv[1].upper()
towiki = sys.argv[2].upper()

assert fromwiki in ("PROD", "DEV", "TEST")
assert towiki in ("PROD", "DEV", "TEST")
assert fromwiki != towiki

w1 = GPTWiki(smwenv=fromwiki, quiet=True)
print >> sys.stderr, "from: %s" % (w1.title(), )
w2 = GPTWiki(smwenv=towiki, quiet=True)
print >> sys.stderr, "  to: %s" % (w2.title(), )

dummy = raw_input("Enter to proceed, <Ctrl-C> to abort:")

currentids = set()
for page in w1.iterpages(include_categories=('Transition', 'Peptide',
                                             'TransitionGroup')):
    id = page.name
    currentids.add(id)
    it = w1.get(id)
    if w2.put(it):
        print >> sys.stderr, "Pushing %s to %s" % (id, w2.title())
    else:
        print >> sys.stderr, "No change to %s in %s" % (id, w2.title())

for page in w2.iterpages(include_categories=('Transition', 'Peptide',
Esempio n. 13
0
    default=False,
    help="Upload TG status and peptide nrt to GPTwiki. Default: False.")

opts, args = parser.parse_args()

if opts.cachefile and os.path.exists(opts.cachefile):
    data = json.loads(open(opts.cachefile).read())
    rows = data['rows']
    tgs = data['tgs']
    origpepnrt = data['pepnrt']
else:
    monos = "NHFS"
    tgs = dict()
    origpepnrt = dict()
    rows = []
    w = GPTWiki(quiet=True)
    for tg in w.itertransgroups():
        pepid = tg.get('peptide')
        p = w.get(pepid)
        pepacc = p.get('id')
        pepseq = p.get('sequence')
        pepname = p.get('name')
        pepnrt = p.get('nrt')
        if not tg.has('nrt'):
            continue
        nrt = float(tg.get('nrt'))
        glyacc = p.get('glycan')[0][0]
        g = w.get(glyacc)
        gsym = g.get('sym')
        mcnt = {}
        for mono in monos:
Esempio n. 14
0
opts, args = parser.parse_args()

if opts.cachefile and os.path.exists(opts.cachefile):
    data = json.loads(open(opts.cachefile).read())
    tgrows = data['tgrows']
    peprows = data['peprows']
    tgs = data['tgs']
    origpepnrt = data['pepnrt']
else:
    monos = "NHFS"
    tgs = dict()
    origpepnrt = dict()
    tgrows = []
    peprows = []
    pepseen = set()
    w = GPTWiki(quiet=True)
    for spec in w.iterspec(acqtype="DDA"):
        print spec.get("name")
        for tg in w.itertgs(spectra=spec.get("name")):
            pepid = tg.get('peptide')
            p = w.get(pepid)
            pepacc = p.get('id')
            pepseq = p.get('sequence')
            pepname = p.get('name')
            pepnrt = p.get('nrt')
            pepnrtobs = int(p.get('nrtobs', 0))
            if pepnrtobs == 0:
                pepnrt = None
            nrt = tg.get('nrt')
            if nrt is None and pepnrt is None:
                continue
Esempio n. 15
0
#!/bin/env python2

from getwiki import GPTWiki
import sys

w = GPTWiki()
w.loadsite(sys.argv[1])
Esempio n. 16
0
#!/bin/env python27

from getwiki import GPTWiki, Peptide

import sys, urllib, string
import Bio.SeqIO
from collections import defaultdict

w = GPTWiki()
seenpeps = set()
sites = set()
prot2site = defaultdict(set)
samples = set()
glycans = set()
glysites = set()
site2gly = defaultdict(set)
for tg in w.itertransgroups():
    tgid = tg.get('id')
    if tg.get('peptide') in seenpeps:
	continue
    pep = w.get(tg.get('peptide'))
    seenpeps.add(pep.get('id'))
    gly = pep.get('glycan')[0][0]
    glycans.add(gly)
    for al in pep.get('alignments',[]):
	site = al.get('prsites')
	prot = al.get('protein')
	print pep.get('id'),prot,site,gly
	sites.add((prot,site))
	prot2site[prot].add(site)
	site2gly[(prot,site)].add(gly)
Esempio n. 17
0
#!/bin/env python27

from getwiki import GPTWiki, Peptide
import sys
from collections import defaultdict

w = GPTWiki()
for sp in sys.argv[1:]:
    for tg in w.iterspectgs(sp):
        if not tg:
            continue
        print >> sys.stderr, "Delete transition group", tg.get('id')
        w.delete(tg.get('id'))
    print >> sys.stderr, "Delete spectra", sp
    w.delete(sp)
    m = re.search(r'\[(.*)\]', lab)
    kvpairs = re.split(r'([A-Z])', m.group(1))
    nmono = 0
    for i in range(1, len(kvpairs), 2):
        if kvpairs[i + 1] == "":
            nmono += 1
        else:
            nmono += int(kvpairs[i + 1])
    return nmono


def label2series(lab):
    return lab[0].lower()


w = GPTWiki()

peps = defaultdict(lambda: defaultdict(dict))

for sp in w.iterspec(sample=opts.sample,
                     acqtype=opts.acqtype,
                     method=opts.method,
                     inst=opts.inst):
    print >> sys.stderr, sp.get('name')
    for i, tg in enumerate(w.itertgs(spectra=sp.get('name'))):
        pep = w.get(tg.get('peptide'))
        pepid = pep.get('id')
        if pep.get('nrt') == None:
            continue
        z1 = tg.get('z1')
        ntrans = len(tg.get('transitions', []))
Esempio n. 19
0
from getwiki import GPTWiki, Glycan

import findpygly
from pygly.GlyTouCan import GlyTouCan

import os, sys, urllib, string
import Bio.SeqIO

gtc = GlyTouCan(usecache=True)
w = GPTWiki()
try:
    os.mkdir('../glycoct')
except OSError:
    pass

for gc in sorted(w.iterglycans(), key=lambda gc: gc.get('accession')):
    acc = gc.get('accession')
    topos = map(str.strip, map(str, gc.get('topo')))
    for tacc in topos:
        glycoct = gtc.getseq(tacc, 'glycoct')
        if not glycoct:
            gly = gtc.getGlycan(tacc)
            glycoct = gly.glycoct()
        f = open('../glycoct/' + acc + '.' + tacc + '.txt', 'w')
        f.write(glycoct)
        f.close()
        print >> sys.stderr, "Dump GlycoCT to %s.%s.txt" % (acc, tacc)
Esempio n. 20
0
#!/bin/env python27

from getwiki import GPTWiki, Peptide
import sys
from collections import defaultdict

w = GPTWiki()
for tgpage in w.iterpages(include_categories=['TransitionGroup']):
    tg = w.get(tgpage.name)
    if tg.get('spectra') in sys.argv[1:]:
        print >> sys.stderr, "Delete transition group", tgpage.name
        w.delete(tgpage.name)
for sp in sys.argv[1:]:
    print >> sys.stderr, "Delete spectra", sp
    w.delete(sp)
Esempio n. 21
0
#!/bin/env python27

from getwiki import GPTWiki, Protein

import sys, urllib, string, csv, os.path
from collections import defaultdict
import Bio.SeqIO
from util import peptide_mw, mod_mw


def asscan(s):
    t = s.rstrip(')').split('(')
    return [int(t[0])] + t[1].split(',')


w = GPTWiki()

spectra2tg = defaultdict(set)
for tgpage in w.iterpages(include_categories=['TransitionGroup']):
    tg = w.get(tgpage.name)
    spectra = tg.get('spectra')
    spectra2tg[spectra].add(tg.get('id'))

allspectra = set()

for transfile in sys.argv[1:]:
    spectra, sample, method, index, extn = transfile.rsplit('.', 4)
    spectra = os.path.split(spectra)[1]
    allspectra.add(spectra)
    w.addacquisition(name=spectra, method=method, sample=sample)
    tgroup = defaultdict(dict)
Esempio n. 22
0
#!/bin/env python27

from getwiki import GPTWiki, Protein

import sys, urllib, string
from collections import defaultdict
import Bio.SeqIO

w = GPTWiki()
alignfile = sys.argv[1]
alignments = defaultdict(list)
for l in open(alignfile):
    sl = l.split()
    st = int(sl[1]) + 1
    ed = int(sl[2])
    pep = sl[4]
    laa = sl[3]
    raa = sl[5]
    pracc = sl[12][1:]
    alignments[pep].append((pracc, st, ed))

for p in w.iterpeptides():
    seq = p.get('sequence')
    if seq in alignments:
        p.update(alignment=alignments[seq])
        if w.put(p):
            print >> sys.stderr, p.get('id')
Esempio n. 23
0
#!/bin/env python27

from getwiki import GPTWiki, Protein
import getwiki

import sys, urllib, string, csv, os.path, json, glob
from collections import defaultdict
import Bio.SeqIO
from util import peptide_mw, mod_mw

w = GPTWiki()

sample = sys.argv[1]
method = sys.argv[2]
if ":" in method:
    method, anfrac = method.split(':')
else:
    anfrac = None

tgs = defaultdict(set)
allspec = set()
lccal = defaultdict(dict)
for specfile in sys.argv[3:]:
    dirname = specfile.rsplit('.', 2)[0]
    if dirname.endswith('.centroid'):
        dirname = dirname.rsplit('.', 1)[0]
    spectra = os.path.split(dirname)[1]
    allspec.add(spectra)
    w.addacquisition(name=spectra,
                     method=method,
                     anfrac=anfrac,
Esempio n. 24
0
#!/bin/env python2

from getwiki import GPTWiki, Protein

import sys, urllib, string, csv, os.path
from collections import defaultdict
import Bio.SeqIO
from util import peptide_mw, mod_mw


def asscan(s):
    t = s.rstrip(')').split('(')
    return [int(t[0])] + t[1].split(',')


w = GPTWiki()

allspectra = set()
spectra2tg = defaultdict(set)
for transfile in sys.argv[1:]:
    spectra, sample, method, index, extn = transfile.rsplit('.', 4)
    spectra = os.path.split(spectra)[1]
    w.addacquisition(name=spectra, method=method, sample=sample)
    allspectra.add(spectra)
    for tg in w.itertgs(spectra=spectra, all=True):
        spectra2tg[spectra].add(tg.get('id'))

for transfile in sys.argv[1:]:
    spectra, sample, method, index, extn = transfile.rsplit('.', 4)
    spectra = os.path.split(spectra)[1]
    tgroup = defaultdict(dict)
Esempio n. 25
0
#!/bin/env python2

from getwiki import GPTWiki, Alignment

import sys, urllib, string, csv
import Bio.SeqIO
from util import peptide_mw, mod_mw
from operator import itemgetter

w = GPTWiki()
gmw = dict()
gsym = dict()
seen = set()
for peptidefile in sys.argv[1:]:
    rest, sample, method, index, extn = peptidefile.rsplit('.', 4)
    for l in csv.DictReader(open(peptidefile), dialect='excel-tab'):

        seq = l['PeptideSequence']
        glyspec = l['Glycans']
        modspec = l['Mods']
        praccs = l['ProteinName']
        pepid = l.get('PeptideID')

        if (seq, glyspec, modspec) in seen:
            continue

        if '?' in glyspec:
            continue

        seen.add((seq, glyspec, modspec))
        # print >>sys.stderr, seq,glyspec,modspec
Esempio n. 26
0
#!/bin/env python27

from getwiki import GPTWiki
import re

w = GPTWiki()

monos = "NHSF"
for gly in w.iterglycans():
    gsym = gly.get('sym')
    mcnt = {}
    for mono in monos:
        mcnt[mono] = 0
        m = re.search(mono + r'(\d+)', gsym)
        if m:
            mcnt[mono] = int(m.group(1))
        elif mono in gsym:
            mcnt[mono] = 1
    gly.set('nneuac', mcnt['S'])
    if w.put(gly):
        print gly.get('id')

for pep in w.iterpeptides():
    pepname = pep.get('name')
    pep.set('nox', pepname.count('[Ox]'))
    if w.put(pep):
        print pep.get('id')
Esempio n. 27
0
#!/bin/env python2

from getwiki import GPTWiki, Peptide

import sys, urllib, string
from collections import defaultdict

nrtonly = False
if len(sys.argv) > 1 and sys.argv[1] == "nrtonly":
    nrtonly = True

w = GPTWiki()
seenpeps = set()
sites = set()
prot2site = defaultdict(set)
samples = set()
glycans = set()
glysites = set()
site2gly = defaultdict(set)
for sp in w.iterspec(type='DDA'):
    for tg in w.itertgs(spectra=sp.get('name')):
        tgid = tg.get('id')
        if tg.get('peptide') in seenpeps:
            continue
        pep = w.get(tg.get('peptide'))
        if nrtonly and pep.get('nrt') == None:
            continue
        seenpeps.add(pep.get('id'))
        gly = pep.get('glycan')[0][0]
        glycans.add(gly)
        for al in pep.get('alignments', []):
Esempio n. 28
0
#!/bin/env python2
from getwiki import GPTWiki
import sys, re

w = GPTWiki()

# if len(sys.argv) < 2:
#     print 'please enter the spectra file name regex'
#     exit(1)

for spectrapage in w.iterspec(acqtype="DDA"):
    # if not re.search(sys.argv[1],spectrapage.get('name')):
    #   continue
    print spectrapage.get('name')
    nrtslope = spectrapage.get('nrtslope')
    nrtintercept = spectrapage.get('nrtintercept')
    if not nrtslope or not nrtintercept:
        print "No NRT slope or intercept"
        continue
    for tgpage in w.itertgs(spectra=spectrapage.get('name')):
        tgid = tgpage.get('id')
        peakrt = tgpage.get('prt')
        nrt = 0.0

        if peakrt != None:
            nrt = (peakrt - nrtintercept) / nrtslope
            tgpage.set('nrt', nrt)
            if w.put(tgpage):
                print tgid
        else:
            tgpage.set('nrt', '')
Esempio n. 29
0
#!/bin/env python27

from getwiki import GPTWiki
import sys

w = GPTWiki()
w.dumpsite(sys.argv[1],
           exclude_categories=['Peptide', 'Transition', 'TransitionGroup'])
Esempio n. 30
0
#!/bin/env python27

from getwiki import GPTWiki, Peptide

import sys, urllib, string
import Bio.SeqIO

print "\t".join(
    map(str, [
        "Spectra", "Accession", "Peptide", "Site", "Glycan", "TGAccession",
        "Charge", "PrecursorMZ"
    ]))
seen = set()
w = GPTWiki()
for tgpage in w.iterpages(include_categories=['TransitionGroup']):
    tg = w.get(tgpage.name)
    pep = w.get(tg.get('peptide'))
    pepid = pep.get('id')
    z1 = tg.get('z1')
    mz1 = tg.get('mz1')
    spectra = tg.get('spectra')
    if (spectra, pepid, z1) not in seen:
        pepseq = list(pep.get('sequence'))
        for deltastr, pos in pep.get('mod', []):
            aa = pos[0]
            pos = int(pos[1:]) - 1
            if round(deltastr, 3) == 57.021:
                pepseq[pos] += ":m"
            elif round(deltastr, 3) in (15.995, 15.996):
                pepseq[pos] += ":o"
        pepseq = "".join(pepseq)