Esempio n. 1
0
def _xsq_convert_region(filename, sample, region, tags, outname):
    out = gzip.open(outname, 'w')
    xsq = XSQFile(filename)

    for name, seq, quals in xsq.fetch_region(sample, region, tags):
        if suffix:
            out.write('@%s%s\n%s\n+\n%s\n' % (name, suffix, seq, ''.join([chr(q + 33) for q in quals])))
        else:
            out.write('@%s\n%s\n+\n%s\n' % (name, seq, ''.join([chr(q + 33) for q in quals])))
    xsq.close()
    out.close()
    return region
Esempio n. 2
0
def xsq_list(filename, count=False, minreads=-1, total=False):
    xsq = XSQFile(filename)
    print 'Tags: '
    for tag in xsq.tags:
        t = xsq.tags[tag]
        if t.is_colorspace:
            print '    %s[cs/%s]' % (tag, t.prefix)
        else:
            print '    %s[nt]' % (tag,)
    print ''
    print 'Samples: '

    acc = 0
    try:
        for sample in xsq.get_samples():
            desc = xsq.get_sample_desc(sample).strip()

            if count:
                readcount = xsq.get_read_count(sample)
                if readcount > minreads:
                    pn = pretty_number(readcount)
                    if sample != 'Unclassified':
                        acc += readcount

                    if desc:
                        print '    %s (%s) %s' % (sample, desc, pn)
                    else:
                        print '    %s %s' % (sample, pn)
            else:
                if desc:
                    print '    %s (%s)' % (sample, desc)
                else:
                    print '    %s' % (sample, )
    except KeyboardInterrupt:
        pass

    if count and total:
        print ''
        print '    Total reads => %s' % pretty_number(acc)

    print ''

    xsq.close()
Esempio n. 3
0
def xsq_convert_all(filename, tags=None, force=False, suffix=None, noz=False, usedesc=False, minreads=0, fsuffix=None, unclassified=False, procs=1, tmpdir=None):
    xsq = XSQFile(filename)

    samples = []

    for sample in xsq.get_samples():
        fname = sample
        if not fsuffix:
            fsuffix = ''

        if usedesc:
            fname = xsq.get_sample_desc(sample)
            if not fname:
                fname = sample

        if fname == sample:
            sys.stderr.write('Sample: %s... ' % fname)
        else:
            sys.stderr.write('Sample: (%s) %s... ' % (sample, fname))

        if noz:
            outname = '%s%s.fastq' % (fname, fsuffix)
        else:
            outname = '%s%s.fastq.gz' % (fname, fsuffix)

        if force or not os.path.exists(outname):
            if sample == 'Unclassified' and not unclassified:
                sys.stderr.write(' Skipping unclassified\n')
                continue

            count = xsq.get_read_count(sample)
            if count < minreads:
                sys.stderr.write(' Too few reads (%s)\n' % count)
                continue

            samples.append((sample, outname))
        sys.stderr.write('\n')

    xsq.close()

    for sample, outname in samples:
        xsq_convert(filename, sample, tags, suffix, procs=procs, outname=outname, noz=noz, tmpdir=tmpdir)
Esempio n. 4
0
def xsq_info(filename):
    xsq = XSQFile(filename)
    xsq.dump(xsq.hdf.root.RunMetadata)
    xsq.close()
Esempio n. 5
0
def xsq_convert(filename, sample=None, tags=None, suffix=None, procs=1, outname='-', tmpdir=None, noz=False):
    sys.stderr.write("Converting: %s\n" % sample)
    if tmpdir is None:
        tmpdir = '.'

    if procs < 1:
        procs = multiprocessing.cpu_count()
    pool = multiprocessing.Pool(procs)

    xsq = XSQFile(filename)
    regions = []
    tmpnames = []
    for region in xsq.get_regions(sample):
        regions.append(region)
        tmpnames.append(os.path.join(tmpdir, '.tmp.%s.%s.%s.fastq.gz.%s' % (os.path.basename(filename), sample, region, os.getpid())))
    xsq.close()

    if ETA:
        callback = Callback(len(regions))
    else:
        callback = None

    for region, tmpname in zip(regions, tmpnames):
        pool.apply_async(_xsq_convert_region, (filename, sample, region, tags, tmpname), callback=callback)

    pool.close()
    try:
        pool.join()
    except KeyboardInterrupt:
        pool.terminate()
        sys.exit(1)

    if callback:
        callback.done()

    sys.stderr.write("Merging temp files...\n")
    if ETA:
        callback = Callback(len(regions))
    else:
        callback = None

    tmpname = os.path.join(tmpdir, '.tmp.%s.%s.%s' % (os.path.basename(outname), sample, os.getpid()))

    if outname == '-':
        out = sys.stdout
    elif noz:
        out = open(tmpname, 'w')
    else:
        out = gzip.open(tmpname, 'w')

    for tmp in tmpnames:
        src = gzip.open(tmp)
        _dump_stream(src, out)
        src.close()
        os.unlink(tmp)
        if callback:
            callback()

    if out != sys.stdout:
        out.close()
        shutil.move(tmpname, outname)

    if callback:
        callback.done()