def ReadTSV(filename): snvheaders = [_f for _f in """CHROM POS REF ALT""".split() if _f] base, extn = filename.rsplit('.', 1) extn = extn.lower() if extn == 'csv': snvs = CSVFileTable(filename=filename) elif extn == 'tsv': snvs = TSVFileTable(filename=filename) elif extn == 'xls': snvs = XLSFileTable(filename=filename) elif extn == 'xlsx': snvs = XLSXFileTable(filename=filename) elif extn == 'txt': snvs = TXTFileTable(filename=filename, headers=snvheaders) else: raise RuntimeError("Unexpected SNV file extension: %s" % filename) for h in snvheaders: if h not in snvs.headers(): raise RuntimeError("Required header: %s missing from SNV file %s" % (h, filename)) assert (snvs.headers()[:4] == snvheaders) chrom = set() snvdata = [] for r in snvs: ri = list(map(r.get, snvs.headers())) chrom.add(ri[0]) snvdata.append(ri) return ["\t".join(snvs.headers())], chrom, snvdata
""".split()) snvdata = {} # extrasnvheaders = [] # usedsnvheaders = set() snvchroms = defaultdict(set) for filename in opt.snvs: base, extn = filename.rsplit('.', 1) extn = extn.lower() if extn == 'csv': snvs = CSVFileTable(filename=filename) elif extn == 'vcf': snvs = VCFFile(filename=filename) elif extn == 'tsv': snvs = TSVFileTable(filename=filename) elif extn == 'xls': snvs = XLSFileTable(filename=filename) elif extn == 'xlsx': snvs = XLSXFileTable(filename=filename) elif extn == 'txt': snvs = TXTFileTable(filename=filename, headers=snvheaders) else: raise RuntimeError("Unexpected SNV file extension: %s" % filename) for h in snvheaders: if h not in snvs.headers(): raise RuntimeError("Required header: %s missing from SNV file %s" % (h, filename)) for h in snvs.headers():
#!/bin/env python27 import sys,traceback from getwiki import GlycoMotifWiki, GlyGenMotif w = GlycoMotifWiki() from dataset import TSVFileTable current = set() for fn in sys.argv[1:]: rows = TSVFileTable(fn) for r in rows: accession = "%06d"%(int(r['accession']),) prefname = r['name'].strip() name = [prefname] gtc = r['gtc'].strip() redend = r['redend'].strip() aglycon = r['aglycon'].strip() for h in r.keys(): if h.startswith('altname'): if r[h].strip(): name.append(r[h].strip()) motif = GlyGenMotif(accession=accession,prefname=prefname,name=name,glytoucan=gtc,redend=redend,aglycon=aglycon) if w.update(motif): print >>sys.stderr, accession current.add(accession) for m in w.itermotif(collection=GlyGenMotif): if m.get('accession') not in current: print >>sys.stderr, "Deleting:",m.get('pagename') w.delete(m.get('pagename'))
#!/bin/env python27 import sys,traceback from getwiki import GlycoMotifWiki, GlyGenMotif w = GlycoMotifWiki() from dataset import TSVFileTable rows = TSVFileTable(sys.argv[1]) current = set() for r in rows: accession = "%06d"%(int(r['accession']),) name = [r['name'].strip()] gtc = r['gtc'].strip() redend = r['redend'].strip() aglycon = r['aglycon'].strip() for h in r.keys(): if h.startswith('altname'): name.append(r[h].strip()) motif = GlyGenMotif(accession=accession,name=name,glytoucan=gtc,redend=redend,aglycon=aglycon) if w.update(motif): print >>sys.stderr, accession current.add(accession) for m in w.itermotif(collection=GlyGenMotif): if m.get('accession') not in current: print >>sys.stderr, "Deleting:",m.get('pagename') w.delete(m.get('pagename'))