Esempio n. 1
0
def ReadTSV(filename):
    snvheaders = [_f for _f in """CHROM POS REF ALT""".split() if _f]
    base, extn = filename.rsplit('.', 1)
    extn = extn.lower()
    if extn == 'csv':
        snvs = CSVFileTable(filename=filename)
    elif extn == 'tsv':
        snvs = TSVFileTable(filename=filename)
    elif extn == 'xls':
        snvs = XLSFileTable(filename=filename)
    elif extn == 'xlsx':
        snvs = XLSXFileTable(filename=filename)
    elif extn == 'txt':
        snvs = TXTFileTable(filename=filename, headers=snvheaders)
    else:
        raise RuntimeError("Unexpected SNV file extension: %s" % filename)

    for h in snvheaders:
        if h not in snvs.headers():
            raise RuntimeError("Required header: %s missing from SNV file %s" %
                               (h, filename))

    assert (snvs.headers()[:4] == snvheaders)

    chrom = set()
    snvdata = []
    for r in snvs:
        ri = list(map(r.get, snvs.headers()))
        chrom.add(ri[0])
        snvdata.append(ri)

    return ["\t".join(snvs.headers())], chrom, snvdata
Esempio n. 2
0
""".split())

snvdata = {}
# extrasnvheaders = []
# usedsnvheaders = set()
snvchroms = defaultdict(set)
for filename in opt.snvs:

    base, extn = filename.rsplit('.', 1)
    extn = extn.lower()
    if extn == 'csv':
        snvs = CSVFileTable(filename=filename)
    elif extn == 'vcf':
        snvs = VCFFile(filename=filename)
    elif extn == 'tsv':
        snvs = TSVFileTable(filename=filename)
    elif extn == 'xls':
        snvs = XLSFileTable(filename=filename)
    elif extn == 'xlsx':
        snvs = XLSXFileTable(filename=filename)
    elif extn == 'txt':
        snvs = TXTFileTable(filename=filename, headers=snvheaders)
    else:
        raise RuntimeError("Unexpected SNV file extension: %s" % filename)

    for h in snvheaders:
        if h not in snvs.headers():
            raise RuntimeError("Required header: %s missing from SNV file %s" %
                               (h, filename))

    for h in snvs.headers():
#!/bin/env python27

import sys,traceback

from getwiki import GlycoMotifWiki, GlyGenMotif
w = GlycoMotifWiki()

from dataset import TSVFileTable
current = set()
for fn in sys.argv[1:]:
  rows = TSVFileTable(fn)
  for r in rows:
    accession = "%06d"%(int(r['accession']),)
    prefname = r['name'].strip()
    name = [prefname]
    gtc = r['gtc'].strip()
    redend = r['redend'].strip()
    aglycon = r['aglycon'].strip()
    for h in r.keys():
	if h.startswith('altname'):
	    if r[h].strip():
	        name.append(r[h].strip())
    motif = GlyGenMotif(accession=accession,prefname=prefname,name=name,glytoucan=gtc,redend=redend,aglycon=aglycon)
    if w.update(motif):
	print >>sys.stderr, accession
    current.add(accession)

for m in w.itermotif(collection=GlyGenMotif):
    if m.get('accession') not in current:
        print >>sys.stderr, "Deleting:",m.get('pagename')
        w.delete(m.get('pagename'))
Esempio n. 4
0
#!/bin/env python27

import sys,traceback

from getwiki import GlycoMotifWiki, GlyGenMotif
w = GlycoMotifWiki()

from dataset import TSVFileTable
rows = TSVFileTable(sys.argv[1])

current = set()
for r in rows:

    accession = "%06d"%(int(r['accession']),)
    name = [r['name'].strip()]
    gtc = r['gtc'].strip()
    redend = r['redend'].strip()
    aglycon = r['aglycon'].strip()
    for h in r.keys():
	if h.startswith('altname'):
	    name.append(r[h].strip())

    motif = GlyGenMotif(accession=accession,name=name,glytoucan=gtc,redend=redend,aglycon=aglycon)
    if w.update(motif):
	print >>sys.stderr, accession
    current.add(accession)

for m in w.itermotif(collection=GlyGenMotif):
    if m.get('accession') not in current:
        print >>sys.stderr, "Deleting:",m.get('pagename')
        w.delete(m.get('pagename'))