Beispiel #1
0
def main(args):
    if args.split:
        kevlar.mkdirp(args.split, trim=True)
    outstream = None if args.split else kevlar.open(args.out, 'w')
    readstream = kevlar.parse_augmented_fastx(kevlar.open(args.infile, 'r'))
    partitioner = partition(readstream,
                            strict=args.strict,
                            minabund=args.min_abund,
                            maxabund=args.max_abund,
                            dedup=args.dedup,
                            gmlfile=args.gml,
                            logstream=args.logfile)
    partnum = 0
    numreads = 0
    for partnum, part in enumerate(partitioner, 1):
        numreads += len(part)
        if args.split:
            ofname = '{:s}.cc{:d}.augfastq.gz'.format(args.split, partnum)
            with kevlar.open(ofname, 'w') as outfile:
                for read in part:
                    kevlar.print_augmented_fastx(read, outfile)
        else:
            for read in part:
                read.name += ' kvcc={:d}'.format(partnum)
                kevlar.print_augmented_fastx(read, outstream)
    message = '[kevlar::partition] grouped {:d} reads'.format(numreads)
    message += ' into {:d} connected components'.format(partnum)
    print(message, file=args.logfile)
Beispiel #2
0
def test_mkdirp():
    tempdir = tempfile.mkdtemp()

    path1 = os.path.join(tempdir, 'partitions')
    assert kevlar.mkdirp(path1) == path1

    path2 = os.path.join(tempdir, 'partitions2', 'part')
    path2test = os.path.join(tempdir, 'partitions2')
    assert kevlar.mkdirp(path2, trim=True) == path2test

    path3 = os.path.join(tempdir, 'partitions3', 'a', 'long', 'path', 'PART')
    path3test = os.path.join(tempdir, 'partitions3', 'a', 'long', 'path')
    assert kevlar.mkdirp(path3, trim=True) == path3test

    shutil.rmtree(tempdir)