def metalfox_pipe(config_file, sample_pairs, ref_mnt):
    (metalfox_tool, cont, obj, map_ref, max_t, ram) = parse_config(config_file)
    map_ref = ref_mnt + '/' + map_ref
    src_cmd = '. ~/.novarc;'
    deproxy = 'unset http_proxy; unset https_proxy;'
    pairs = open(sample_pairs, 'r')
    job_list = []
    for sn in pairs:
        sn = sn.rstrip('\n')
        info = sn.split('\t')
        sys.stderr.write('Getting bam file name for ' + info[1] + '\n')
        get_bam_name = 'swift list ' + cont + ' --prefix ' + obj + '/' + info[1] + '/BAM/' + info[1] \
                       + ' | grep .rmdup.srt.ba* '
        bam = subprocess.check_output(get_bam_name, shell=True).split('\n')
        dl_bam = 'swift download --skip-identical ' + cont + ' ' + bam[1] + ';swift download --skip-identical ' \
                 + cont + ' ' + bam[0] + ';'
        mut_out = 'ANALYSIS/' + info[0] + '/OUTPUT/' + info[0] + '.out.keep'
        dl_out = 'swift download ' + cont + ' ' + mut_out + ';'
        # .bai/.bam extension not always clear
        if bam[1][-3:] == 'bam':
            run_metal = metalfox_tool + ' -f1 ' + mut_out + ' -f3 ' + bam[1] + ' -m ' + map_ref + ' > ' + info[0] + \
                    '.foxog_scored_added.out;'
        else:
            run_metal = metalfox_tool + ' -f1 ' + mut_out + ' -f3 ' + bam[0] + ' -m ' + map_ref + ' > ' + info[0] + \
                    '.foxog_scored_added.out;'
        cleanup = 'rm ' + ' '.join((bam[0], bam[1], mut_out)) + ';'
        job_list.append(src_cmd + deproxy + dl_bam + dl_out + run_metal)  # + cleanup)
    pairs.close()
    sys.stderr.write(date_time() + 'Queueing jobs\n')
    job_manager(job_list, max_t)
def run_fastqc(config=None, run_config=None):
    os.chdir(os.path.join(config['root_dir'], run_config['run_name']))
    try:
        os.mkdir(config['qc']['dir'])
    except OSError:
        print 'Warning: {} directory already exists, which is ok.'.format(
                config['qc']['dir'])
    os.chdir(os.path.join(config['root_dir'], run_config['run_name'], 'TEMP'))
    jobs = []
    for fastq in glob.glob('*_sequence.txt.gz'):
        cmd = '{fastqc} -j {java} -o {QC} {fastq}'.format(
            fastqc=config['qc']['fastqc'], java=config['qc']['java'],
            QC=os.path.join(config['root_dir'], run_config['run_name'], config['qc']['dir']),
            fastq=fastq)
        print 'appending cmd: {}'.format(cmd)
        jobs.append(cmd)
    job_manager.job_manager(cmd_list=jobs, threads=multiprocessing.cpu_count(), interval=20)
Exemple #3
0
def oxog_check(config_file, lane_list, ref_mnt):
    (java, picard, fa_ordered, intervals, cont, obj, max_t, ram) = parse_config(config_file)
    src_cmd = ". /home/ubuntu/.novarc;"
    ram = str(int(ram) / int(max_t))
    job_list = []
    lane_fh = open(lane_list, 'r')
    for sample in lane_fh:
        sample = sample.rstrip('\n')
        info = sample.split('\t')
        lanes = info[2].split(', ')
        bid = info[0]
        for lane in lanes:
            dl_bam = src_cmd + 'swift download ' + cont + ' --prefix ' + obj + '/' + bid + '/BAM/' + bid \
                     + '_' + lane + '.rmdup.srt.ba;'
            # pdb.set_trace()
            bam = obj + '/' + bid + '/BAM/' + bid + '_' + lane + '.rmdup.srt.bam'
            oxoG = java + ' -Xmx' + ram + 'g -jar ' + picard + ' CollectOxoGMetrics I=' + bam + ' O=' + bid + '_' + \
                   lane + '.oxo_summary.txt R=' + ref_mnt + '/' + fa_ordered + ' INTERVALS=' + ref_mnt + '/' + \
                   intervals + ' 2> ' + bid + '_' + lane + '.log;'
            del_bam = 'rm ' + obj + '/' + bid + '/BAM/' + bid + '_' + lane + '.rmdup.srt.bam;'
            job_list.append(dl_bam + oxoG + del_bam)
    lane_fh.close()
    job_manager(job_list, max_t)
Exemple #4
0
    READ_GROUP_NAME = root
    sname = meta[0]
    LIBRARY_NAME = meta[0]
    PLATFORM_UNIT = meta[4]
    PLATFORM = 'illumina'
    bam = os.path.basename(root) + '_unaligned.bam'
    bam_list.append(bam)
    log_file = READ_GROUP_NAME + '.convert.log'
    picard_cmd = args['JAVA'] + ' -Djava.io.tmpdir=tmp -Xmx' + str(p_mem) + 'G -jar ' + args['PICARD'] \
                 + ' FastqToSam FASTQ=' + r1 + ' FASTQ2=' + r2 + ' OUTPUT=' + bam + ' READ_GROUP_NAME=' \
                 + READ_GROUP_NAME + ' SAMPLE_NAME=' + sname + ' LIBRARY_NAME=' + LIBRARY_NAME + ' PLATFORM_UNIT=' \
                 + PLATFORM_UNIT + ' PLATFORM=' + PLATFORM
    picard_cmd += ' 2> ' + log_file + ' >> ' + log_file + '; rm ' + r1 + ' ' + r2
    cmd_list.append(picard_cmd)

sys.stderr.write(date_time() + 'Queueing jobs for conversion\n')
job_manager(cmd_list, args['THREADS'])
novo_cmd = 'mkdir tmp; ' + args['NOVOSORT'] + ' -c ' + args['THREADS'] + ' -m ' + args['MEMORY'] + 'G -n -t tmp ' \
           + ' '.join(bam_list) + ' > ' + sname + '_ualigned_merged.bam'
sys.stderr.write(date_time() + ' Merging unaligned bams with command ' +
                 novo_cmd + '\n')
check = subprocess.call(novo_cmd, shell=True)
if check != 0:
    sys.stderr.write(date_time() + 'Novosort merge failed!\n')
    exit(1)
else:
    sys.stderr.write(date_time() +
                     'Merge complete, deleting individual bams\n')
    #rm_bam = 'rm ' + ' '.join(bam_list)
    #subprocess.call(rm_bam, shell=True)
def send_fastqc_to_server(config=None, run_config=None):
    os.chdir(os.path.join(config['root_dir'], run_config['run_name'], config['qc']['dir']))
    cmd = 'rsync -av --progress --stats *html ' + config['qc']['user'] + '@{}/{}/'.format(config['qc']['server'],
                                                             run_config['run_name'])
    job_manager.job_manager([cmd], threads=1, interval=10)
Exemple #6
0
Options:
-h

Arguments:
<list> fastq list
<th>   num threads

"""
import subprocess
import sys

sys.path.append('/home/ubuntu/TOOLS/Scripts/alignment')
sys.path.append('/home/ubuntu/TOOLS/Scripts/utility')

from docopt import docopt

args = docopt(__doc__)

fh = open(args['<list>'])
th = args['<th>']
cmd_list = []
#dir_mk = 'mkdir converted'
#subprocess.call(dir_mk, shell=True)
for line in fh:
    line = line.rstrip('\n')
    cmd = '/home/ubuntu/TOOLS/Scripts/utility/fastq_64_to_33.py ' + line
    cmd_list.append(cmd)
from job_manager import job_manager

job_manager(cmd_list, th)
# create sub_files to process
lc_info = subprocess.check_output('wc -l ' + fn, shell=True)
fh = open(fn, 'r')
lc = lc_info.split()
line_split = math.ciel(float(lc[0])/float(th))
cur = 1
fct = 1
flist = []
cur_file = fn + str(fct) + 'split'
out_pre = 'Gene_metrics' + str(fct)
out = open(cur_file, 'w')
job_list = []
cmd = '/home/ubuntu/TOOLS/dropseq/2_calc_mean_variance_bin.py '
job_list.append(cmd + cur_file + ' ' + out_pre + ' 0')
head = next(fh)
# out.write(head)
for line in fh:
    if cur > line_split:
        out.close()
        fct += 1
        cur_file = fn + str(fct) + 'split'
        out = open(cur_file, 'w')
        out_pre = 'Gene_metrics' + str(fct)
        job_list.append(cmd + cur_file + ' ' + out_pre)
        cur = 1
        # out.write(head)
    out.write(line)
    cur += 1
out.close()
job_manager(job_list, th)
Exemple #8
0
# create sub_files to process
lc_info = subprocess.check_output('wc -l ' + fn, shell=True)
fh = open(fn, 'r')
lc = lc_info.split()
line_split = math.ciel(float(lc[0]) / float(th))
cur = 1
fct = 1
flist = []
cur_file = fn + str(fct) + 'split'
out_pre = 'Gene_metrics' + str(fct)
out = open(cur_file, 'w')
job_list = []
cmd = '/home/ubuntu/TOOLS/dropseq/2_calc_mean_variance_bin.py '
job_list.append(cmd + cur_file + ' ' + out_pre + ' 0')
head = next(fh)
# out.write(head)
for line in fh:
    if cur > line_split:
        out.close()
        fct += 1
        cur_file = fn + str(fct) + 'split'
        out = open(cur_file, 'w')
        out_pre = 'Gene_metrics' + str(fct)
        job_list.append(cmd + cur_file + ' ' + out_pre)
        cur = 1
        # out.write(head)
    out.write(line)
    cur += 1
out.close()
job_manager(job_list, th)