Esempio n. 1
0
def ReadTSV(filename):
    snvheaders = [_f for _f in """CHROM POS REF ALT""".split() if _f]
    base, extn = filename.rsplit('.', 1)
    extn = extn.lower()
    if extn == 'csv':
        snvs = CSVFileTable(filename=filename)
    elif extn == 'tsv':
        snvs = TSVFileTable(filename=filename)
    elif extn == 'xls':
        snvs = XLSFileTable(filename=filename)
    elif extn == 'xlsx':
        snvs = XLSXFileTable(filename=filename)
    elif extn == 'txt':
        snvs = TXTFileTable(filename=filename, headers=snvheaders)
    else:
        raise RuntimeError("Unexpected SNV file extension: %s" % filename)

    for h in snvheaders:
        if h not in snvs.headers():
            raise RuntimeError("Required header: %s missing from SNV file %s" %
                               (h, filename))

    assert (snvs.headers()[:4] == snvheaders)

    chrom = set()
    snvdata = []
    for r in snvs:
        ri = list(map(r.get, snvs.headers()))
        chrom.add(ri[0])
        snvdata.append(ri)

    return ["\t".join(snvs.headers())], chrom, snvdata
Esempio n. 2
0
# usedsnvheaders = set()
snvchroms = defaultdict(set)
for filename in opt.snvs:

    base, extn = filename.rsplit('.', 1)
    extn = extn.lower()
    if extn == 'csv':
        snvs = CSVFileTable(filename=filename)
    elif extn == 'vcf':
        snvs = VCFFile(filename=filename)
    elif extn == 'tsv':
        snvs = TSVFileTable(filename=filename)
    elif extn == 'xls':
        snvs = XLSFileTable(filename=filename)
    elif extn == 'xlsx':
        snvs = XLSXFileTable(filename=filename)
    elif extn == 'txt':
        snvs = TXTFileTable(filename=filename, headers=snvheaders)
    else:
        raise RuntimeError("Unexpected SNV file extension: %s" % filename)

    for h in snvheaders:
        if h not in snvs.headers():
            raise RuntimeError("Required header: %s missing from SNV file %s" %
                               (h, filename))

    for h in snvs.headers():
        if h in snvheaders:
            continue
        # if h not in extrasnvheaders:
        #     extrasnvheaders.append(h)
    m = re.search('^(\d+)-(\d+)$', fs)
    if m:
        f = map(lambda n: (n, opts.numberfmt % n),
                range(int(m.group(1)),
                      int(m.group(2)) + 1))
    else:
        try:
            f = [(fs, fs)]
            f = [(int(fs), opts.numberfmt % int(fs))]
        except ValueError:
            pass
    replicates.extend(f)

folders = []
from dataset import XLSXFileTable
rows = XLSXFileTable(args[0], sheet=opts.sheet)
for r in rows:
    if not r.get("File Name"):
        continue
    if r.get("PCC", "") != opts.pcc:
        continue
    # print r
    d = dict(Folder=r["Folder Name"])
    d["114-Biospecimen"] = r["114-Biospecimen"]
    d["115-Biospecimen"] = r["115-Biospecimen"]
    d["116-Biospecimen"] = r["116-Biospecimen"]
    d["117-Biospecimen"] = r["117-Biospecimen"]
    # JHU filename inconsistencies...
    # d["FilenameTemplate"] = re.sub(r'0?1\.raw$','%(fraction)s.raw',r["File Name"])
    # BI/PNNL filenames...
    # d["FilenameTemplate"] = r["File Name"].replace('_f01.raw','_f%s.raw')
Esempio n. 4
0
from getwiki import GlycoMotifWiki, CCRCMotif

w = GlycoMotifWiki()

from pygly.GlyTouCan import GlyTouCan

gtc = GlyTouCan()

from gtccache import GlyTouCanCache

gtccache = GlyTouCanCache()

from dataset import XLSXFileTable

rows = XLSXFileTable(sys.argv[1])

current = set()
for r in rows:

    try:
        index = int(r['#'])
    except ValueError:
        traceback.print_exc()
        continue

    name = r['Trivial Name-Cummings']
    if name:
        name = name.strip()

    if not name:
Esempio n. 5
0
    sys.argv.pop(1)
if len(sys.argv) < 5:
    sys.exit(1)
d = sys.argv[1].rstrip(os.sep)
study = sys.argv[2]
subproteome = sys.argv[3]
site = sys.argv[4]

iniFile = sys.argv[0].rsplit('.', 1)[0] + '.ini'
config = ConfigParser()
config.read([iniFile])

from dataset import XLSXFileTable
prfile = glob.glob("%s/%s_Protocols.xlsx" % (d, d))[0]
mdfile = glob.glob("%s/%s_Metadata.xlsx" % (d, d))[0]
asp = XLSXFileTable(prfile, sheet='Analytical Sample Protocol')
chp = XLSXFileTable(prfile, sheet='Chromatography Protocol')
msp = XLSXFileTable(prfile, sheet='Mass Spectrometry Protocol')
metadatafile = XLSXFileTable(mdfile)

protocols = {}
for p, t in zip([asp, chp, msp], ["ASP", "CHP", "MSP"]):
    headers = map(str.strip, p.headers())
    for r in p:
        field = str(r.get('Name')).strip()
        for h in headers[1:]:
            h = h.strip()
            val = r.get(h)
            if isinstance(val, basestring):
                val = val.strip()
            if not val: