def ReadTSV(filename): snvheaders = [_f for _f in """CHROM POS REF ALT""".split() if _f] base, extn = filename.rsplit('.', 1) extn = extn.lower() if extn == 'csv': snvs = CSVFileTable(filename=filename) elif extn == 'tsv': snvs = TSVFileTable(filename=filename) elif extn == 'xls': snvs = XLSFileTable(filename=filename) elif extn == 'xlsx': snvs = XLSXFileTable(filename=filename) elif extn == 'txt': snvs = TXTFileTable(filename=filename, headers=snvheaders) else: raise RuntimeError("Unexpected SNV file extension: %s" % filename) for h in snvheaders: if h not in snvs.headers(): raise RuntimeError("Required header: %s missing from SNV file %s" % (h, filename)) assert (snvs.headers()[:4] == snvheaders) chrom = set() snvdata = [] for r in snvs: ri = list(map(r.get, snvs.headers())) chrom.add(ri[0]) snvdata.append(ri) return ["\t".join(snvs.headers())], chrom, snvdata
# usedsnvheaders = set() snvchroms = defaultdict(set) for filename in opt.snvs: base, extn = filename.rsplit('.', 1) extn = extn.lower() if extn == 'csv': snvs = CSVFileTable(filename=filename) elif extn == 'vcf': snvs = VCFFile(filename=filename) elif extn == 'tsv': snvs = TSVFileTable(filename=filename) elif extn == 'xls': snvs = XLSFileTable(filename=filename) elif extn == 'xlsx': snvs = XLSXFileTable(filename=filename) elif extn == 'txt': snvs = TXTFileTable(filename=filename, headers=snvheaders) else: raise RuntimeError("Unexpected SNV file extension: %s" % filename) for h in snvheaders: if h not in snvs.headers(): raise RuntimeError("Required header: %s missing from SNV file %s" % (h, filename)) for h in snvs.headers(): if h in snvheaders: continue # if h not in extrasnvheaders: # extrasnvheaders.append(h)
m = re.search('^(\d+)-(\d+)$', fs) if m: f = map(lambda n: (n, opts.numberfmt % n), range(int(m.group(1)), int(m.group(2)) + 1)) else: try: f = [(fs, fs)] f = [(int(fs), opts.numberfmt % int(fs))] except ValueError: pass replicates.extend(f) folders = [] from dataset import XLSXFileTable rows = XLSXFileTable(args[0], sheet=opts.sheet) for r in rows: if not r.get("File Name"): continue if r.get("PCC", "") != opts.pcc: continue # print r d = dict(Folder=r["Folder Name"]) d["114-Biospecimen"] = r["114-Biospecimen"] d["115-Biospecimen"] = r["115-Biospecimen"] d["116-Biospecimen"] = r["116-Biospecimen"] d["117-Biospecimen"] = r["117-Biospecimen"] # JHU filename inconsistencies... # d["FilenameTemplate"] = re.sub(r'0?1\.raw$','%(fraction)s.raw',r["File Name"]) # BI/PNNL filenames... # d["FilenameTemplate"] = r["File Name"].replace('_f01.raw','_f%s.raw')
from getwiki import GlycoMotifWiki, CCRCMotif w = GlycoMotifWiki() from pygly.GlyTouCan import GlyTouCan gtc = GlyTouCan() from gtccache import GlyTouCanCache gtccache = GlyTouCanCache() from dataset import XLSXFileTable rows = XLSXFileTable(sys.argv[1]) current = set() for r in rows: try: index = int(r['#']) except ValueError: traceback.print_exc() continue name = r['Trivial Name-Cummings'] if name: name = name.strip() if not name:
sys.argv.pop(1) if len(sys.argv) < 5: sys.exit(1) d = sys.argv[1].rstrip(os.sep) study = sys.argv[2] subproteome = sys.argv[3] site = sys.argv[4] iniFile = sys.argv[0].rsplit('.', 1)[0] + '.ini' config = ConfigParser() config.read([iniFile]) from dataset import XLSXFileTable prfile = glob.glob("%s/%s_Protocols.xlsx" % (d, d))[0] mdfile = glob.glob("%s/%s_Metadata.xlsx" % (d, d))[0] asp = XLSXFileTable(prfile, sheet='Analytical Sample Protocol') chp = XLSXFileTable(prfile, sheet='Chromatography Protocol') msp = XLSXFileTable(prfile, sheet='Mass Spectrometry Protocol') metadatafile = XLSXFileTable(mdfile) protocols = {} for p, t in zip([asp, chp, msp], ["ASP", "CHP", "MSP"]): headers = map(str.strip, p.headers()) for r in p: field = str(r.get('Name')).strip() for h in headers[1:]: h = h.strip() val = r.get(h) if isinstance(val, basestring): val = val.strip() if not val: