Example #1
0
def main():
    opts = get_options()

    if opts.quality_plot:
        logging.info('Generating Hi-C QC plot at:\n  ' +
               path.join(opts.output, path.split(opts.fastq)[-1] + '.pdf'))
        quality_plot(opts.fastq, r_enz=opts.renz,
                     nreads=100000, paired=False,
                     savefig=path.join(opts.output,
                                       path.split(opts.fastq)[-1] + '.pdf'))
        return

    windows = opts.windows

    logging.info('mapping %s read %s to %s', opts.fastq, opts.read, opts.output)
    outfiles = full_mapping(opts.index, opts.fastq,
                            path.join(opts.output, '01_mapped_r' + opts.read),
                            opts.renz, temp_dir=opts.tmp,
                            frag_map=opts.strategy=='frag', clean=True,
                            windows=windows, get_nread=True)

    # write machine log
    with open(path.join(opts.output, 'trace.log'), "a") as mlog:
        fcntl.flock(mlog, fcntl.LOCK_EX)
        mlog.write('\n'.join([('# MAPPED READ%s PATH\t%d\t' % (opts.read, num)) + out
                              for out, num in outfiles]) + '\n')
        fcntl.flock(mlog, fcntl.LOCK_UN)

    logging.info('cleaning temporary files')
    # clean
    system('rm -rf ' + opts.tmp)
Example #2
0
def run(opts):
    check_options(opts)

    launch_time = time.localtime()

    # hash that gonna be append to output file names
    param_hash = digest_parameters(opts, get_md5=True)

    if opts.quality_plot:
        logging.info('Generating Hi-C QC plot at:\n  ' +
               path.join(opts.workdir, path.split(opts.fastq)[-1] + '.pdf'))
        dangling_ends, ligated = quality_plot(opts.fastq, r_enz=opts.renz,
                                              nreads=100000, paired=False,
                                              savefig=path.join(
                                                  opts.workdir,
                                                  path.split(opts.fastq)[-1] + '.pdf'))
        logging.info('  - Dangling-ends (sensu-stricto): %.3f%%', dangling_ends)
        logging.info('  - Ligation sites: %.3f%%', ligated)
        return

    logging.info('mapping %s read %s to %s', opts.fastq, opts.read, opts.workdir)

    outfiles = full_mapping(opts.index, opts.fastq,
                            path.join(opts.workdir,
                                      '01_mapped_r%d' % (opts.read)),
                            r_enz=opts.renz, temp_dir=opts.tmp, nthreads=opts.cpus,
                            frag_map=not opts.iterative, clean=not opts.keep_tmp,
                            windows=opts.windows, get_nread=True, skip=opts.skip,
                            suffix=param_hash, **opts.gem_param)

    # adjust line count
    if opts.skip:
        for i, (out, _) in enumerate(outfiles[1:], 1):
            outfiles[i] = out, outfiles[i-1][1] - sum(1 for _ in open(outfiles[i-1][0]))

    finish_time = time.localtime()

    # save all job information to sqlite DB
    save_to_db(opts, outfiles, launch_time, finish_time)

    # write machine log
    while path.exists(path.join(opts.workdir, '__lock_log')):
        time.sleep(0.5)
    open(path.join(opts.workdir, '__lock_log'), 'a').close()
    with open(path.join(opts.workdir, 'trace.log'), "a") as mlog:
        mlog.write('\n'.join([
            ('# MAPPED READ%s\t%d\t%s' % (opts.read, num, out))
            for out, num in outfiles]) + '\n')
    # release lock
    try:
        remove(path.join(opts.workdir, '__lock_log'))
    except OSError:
        pass
Example #3
0
def run(opts):
    check_options(opts)

    launch_time = time.localtime()

    # hash that gonna be append to output file names
    param_hash = digest_parameters(opts, get_md5=True)

    if opts.quality_plot:
        logging.info('Generating Hi-C QC plot at:\n  ' +
               path.join(opts.workdir, path.split(opts.fastq)[-1] + '.pdf'))
        dangling_ends, ligated = quality_plot(opts.fastq, r_enz=opts.renz,
                                              nreads=100000, paired=False,
                                              savefig=path.join(
                                                  opts.workdir,
                                                  path.split(opts.fastq)[-1] + '.pdf'))
        logging.info('  - Dangling-ends (sensu-stricto): %.3f%%', dangling_ends)
        logging.info('  - Ligation sites: %.3f%%', ligated)
        return

    logging.info('mapping %s read %s to %s', opts.fastq, opts.read, opts.workdir)
    outfiles = full_mapping(opts.index, opts.fastq,
                            path.join(opts.workdir,
                                      '01_mapped_r%d' % (opts.read)),
                            opts.renz, temp_dir=opts.tmp, nthreads=opts.cpus,
                            frag_map=not opts.iterative, clean=opts.keep_tmp,
                            windows=opts.windows, get_nread=True, skip=opts.skip,
                            suffix=param_hash, **opts.gem_param)

    # adjust line count
    if opts.skip:
        for i, (out, _) in enumerate(outfiles[1:], 1):
            outfiles[i] = out, outfiles[i-1][1] - sum(1 for _ in open(outfiles[i-1][0]))
    
    finish_time = time.localtime()

    # save all job information to sqlite DB
    save_to_db(opts, outfiles, launch_time, finish_time)
    
    # write machine log
    with open(path.join(opts.workdir, 'trace.log'), "a") as mlog:
        fcntl.flock(mlog, fcntl.LOCK_EX)
        mlog.write('\n'.join([
            ('# MAPPED READ%s\t%d\t%s' % (opts.read, num, out))
            for out, num in outfiles]) + '\n')
        fcntl.flock(mlog, fcntl.LOCK_UN)

    # clean
    if not opts.keep_tmp:
        logging.info('cleaning temporary files')
        system('rm -rf ' + opts.tmp)
Example #4
0
def main():
    opts = get_options()

    if opts.quality_plot:
        logging.info('Generating Hi-C QC plot at:\n  ' +
                     path.join(opts.output,
                               path.split(opts.fastq)[-1] + '.pdf'))
        quality_plot(opts.fastq,
                     r_enz=opts.renz,
                     nreads=100000,
                     paired=False,
                     savefig=path.join(opts.output,
                                       path.split(opts.fastq)[-1] + '.pdf'))
        return

    windows = opts.windows

    logging.info('mapping %s read %s to %s', opts.fastq, opts.read,
                 opts.output)
    outfiles = full_mapping(opts.index,
                            opts.fastq,
                            path.join(opts.output, '01_mapped_r' + opts.read),
                            opts.renz,
                            temp_dir=opts.tmp,
                            frag_map=opts.strategy == 'frag',
                            clean=True,
                            windows=windows,
                            get_nread=True)

    # write machine log
    with open(path.join(opts.output, 'trace.log'), "a") as mlog:
        fcntl.flock(mlog, fcntl.LOCK_EX)
        mlog.write('\n'.join([('# MAPPED READ%s PATH\t%d\t' %
                               (opts.read, num)) + out
                              for out, num in outfiles]) + '\n')
        fcntl.flock(mlog, fcntl.LOCK_UN)

    logging.info('cleaning temporary files')
    # clean
    system('rm -rf ' + opts.tmp)
Example #5
0
def run(opts):
    check_options(opts)

    launch_time = time.localtime()

    # hash that gonna be append to output file names
    param_hash = digest_parameters(opts, get_md5=True)

    # create tmp directory
    if not opts.tmp:
        temp_dir = opts.workdir + '_tmp_r%d_%s' % (opts.read, param_hash)
    else:
        temp_dir = path.join(opts.tmp,
                             'TADbit_tmp_r%d_%s' % (opts.read, param_hash))

    # QC plot
    fig_path = path.join(
        opts.workdir, '%s_%s_%s.png' % (path.split(opts.fastq)[-1], '-'.join(
            map(str, opts.renz)), param_hash))
    logging.info('Generating Hi-C QC plot')

    dangling_ends, ligated = quality_plot(opts.fastq,
                                          r_enz=opts.renz,
                                          nreads=100000,
                                          paired=False,
                                          savefig=fig_path)
    for renz in dangling_ends:
        logging.info('  - Dangling-ends (sensu-stricto): %.3f%%',
                     dangling_ends[renz])
    for renz in ligated:
        logging.info('  - Ligation sites: %.3f%%', ligated[renz])
    if opts.skip_mapping:
        save_to_db(opts, dangling_ends, ligated, fig_path, [], launch_time,
                   time.localtime())
        return

    # Mapping
    if opts.fast_fragment:
        mkdir(path.join(opts.workdir, '03_filtered_reads'))
        logging.info('parsing genomic sequence')
        try:
            # allows the use of pickle genome to make it faster
            genome_seq = load(open(opts.genome[0], 'rb'))
        except (UnpicklingError, KeyError):
            genome_seq = parse_fasta(opts.genome)

        logging.info('mapping %s and %s to %s', opts.fastq, opts.fastq2,
                     opts.workdir)
        outfiles = fast_fragment_mapping(
            opts.index,
            opts.fastq,
            opts.fastq2,
            opts.renz,
            genome_seq,
            path.join(opts.workdir, '03_filtered_reads',
                      'all_r1-r2_intersection_%s.tsv' % param_hash),
            clean=not opts.keep_tmp,
            get_nread=True,
            mapper_binary=opts.mapper_binary,
            mapper_params=opts.mapper_param,
            suffix=param_hash,
            temp_dir=temp_dir,
            nthreads=opts.cpus)
    else:
        logging.info('mapping %s read %s to %s', opts.fastq, opts.read,
                     opts.workdir)
        outfiles = full_mapping(opts.index,
                                opts.fastq,
                                path.join(opts.workdir,
                                          '01_mapped_r%d' % (opts.read)),
                                mapper=opts.mapper,
                                r_enz=opts.renz,
                                temp_dir=temp_dir,
                                nthreads=opts.cpus,
                                frag_map=not opts.iterative,
                                clean=not opts.keep_tmp,
                                windows=opts.windows,
                                get_nread=True,
                                skip=opts.skip,
                                suffix=param_hash,
                                mapper_binary=opts.mapper_binary,
                                mapper_params=opts.mapper_param)

    # adjust line count
    if opts.skip:
        for i, (out, _) in enumerate(outfiles[1:], 1):
            outfiles[i] = out, outfiles[i - 1][1] - sum(
                1 for _ in open(outfiles[i - 1][0]))

    finish_time = time.localtime()

    # save all job information to sqlite DB
    save_to_db(opts, dangling_ends, ligated, fig_path, outfiles, launch_time,
               finish_time)
    try:
        save_to_db(opts, dangling_ends, ligated, fig_path, outfiles,
                   launch_time, finish_time)
    except Exception as e:
        # release lock
        remove(path.join(opts.workdir, '__lock_db'))
        print_exc()
        exit(1)

    # write machine log
    try:
        while path.exists(path.join(opts.workdir, '__lock_log')):
            time.sleep(0.5)
            open(path.join(opts.workdir, '__lock_log'), 'a').close()
        with open(path.join(opts.workdir, 'trace.log'), "a") as mlog:
            mlog.write('\n'.join([('# MAPPED READ%s\t%d\t%s' %
                                   (opts.read, num, out))
                                  for out, num in outfiles]) + '\n')
            # release lock
        try:
            remove(path.join(opts.workdir, '__lock_log'))
        except OSError:
            pass
    except Exception as e:
        # release lock
        remove(path.join(opts.workdir, '__lock_db'))
        print_exc()
        exit(1)

    # clean
    if not opts.keep_tmp:
        logging.info('cleaning temporary files')
        system('rm -rf ' + temp_dir)
Example #6
0
from pytadbit.mapping.filter import filter_reads, apply_filter

if mapper == 1:
    print 'read 1'
    outfiles1 = iterative_mapping(gem_index_path, fastq, out_map_dir1,
                                  r_beg1, [e + 2 for e in r_end1],
                                  temp_dir=temp_dir1)
    print 'read 2'
    outfiles2 = iterative_mapping(gem_index_path, fastq, out_map_dir2,
                                  r_beg2, [e + 2 for e in r_end2],
                                  temp_dir=temp_dir2)
    parse_thing = parse_sam
elif mapper == 2:
    print 'read 1'
    outfiles1 = full_mapping(gem_index_path, fastq, out_map_dir1, 'HindIII',
                             temp_dir=temp_dir1, frag_map=False,
                             windows=(zip(*(r_beg1, r_end1))))
    print 'read 2'
    outfiles2 = full_mapping(gem_index_path, fastq, out_map_dir2, 'HindIII',
                             temp_dir=temp_dir2, frag_map=False,
                             windows=(zip(*(r_beg2, r_end2))))
    parse_thing = parse_map
elif mapper == 3:
    print 'read 1'
    outfiles1 = full_mapping(gem_index_path, fastq, out_map_dir1, 'HindIII',
                             temp_dir=temp_dir1,
                             windows=(zip(*(r_beg1, r_end1))))
    print 'read 2'
    outfiles2 = full_mapping(gem_index_path, fastq, out_map_dir2, 'HindIII',
                             temp_dir=temp_dir2,
                             windows=(zip(*(r_beg2, r_end2))))
if frag_map == 'True':
    frag_map = True
    windows = None
elif frag_map == 'False':
    frag_map = False
    range_stop = range(20, int(read_length) + 1, 5)
    range_start = [1] * len(range_stop)
    windows = (zip(*(range_start, range_stop)))

# call mapping function for read1 and read2
for infile in [paired1, paired2]:
    bname = infile.split("/")[-1].replace(".fastq.gz", "")
    maps = full_mapping(gem_index_path=gem_index,
                        fastq_path=infile,
                        out_map_dir='%s/%s/' % (MAP_DIR, bname),
                        r_enz=restriction_enzyme,
                        windows=windows,
                        temp_dir='%s/tmp_dir_%s/' % (MAP_DIR, bname),
                        frag_map=frag_map,
                        nthreads=slots)

# ========================================================================================
# Process mapped reads according to restriction enzyme fragments, Merging mapped "read1" and "read2"
# ========================================================================================

# Import python modules/functions
import glob
from pytadbit.parsers.map_parser import parse_map
from pytadbit.parsers.genome_parser import parse_fasta
from pytadbit.mapping import get_intersection

# Load the genome