Beispiel #1
0
 def set_sample_2type(li, mode, config):
     # tumor, control-panel
     import genomon_post_analysis.subcode.tools as tools
     
     tmr_list = []
     tmr_none = []
 
     [section_in, section_out] = tools.get_section(mode)
     unpanel = tools.config_getboolean(config, section_out, "include_unpanel")
     
     for item in li:
         if item[1]== None:  # control-panel
             if unpanel == True: tmr_none.append(item[0])
         else:
             tmr_list.append(item[0])
     
     sample_dict = {"all":[], "case1":[], "case2":[], "case3":[], "case4":[]}
     
     if tools.config_getboolean(config, section_out, "all_in_one") == True:
         al = []
         al.extend(tmr_list)
         al.extend(tmr_none)
         sample_dict["all"] = al
     
     if tools.config_getboolean(config, section_out, "separate") == True:
         sample_dict["case1"] = tmr_list
         sample_dict["case2"] = tmr_none
         
     return sample_dict
def call_merge_result(mode, ids_dict, output_dir, genomon_root, config):
    
    print "=== [%s] merge result file. ===" % mode
    
    #import os 
    import genomon_post_analysis.subcode.merge as subcode_merge
    import merge
    
    [section_in, section_out] = tools.get_section(mode)
    suffix_u = tools.config_getstr(config, section_in, "suffix")
    suffix_f = tools.config_getstr(config, section_in, "suffix_filt")
    
    merge_unfilt = tools.config_getboolean(config, section_out, "include_unfilt")
    
    for key in ids_dict:
        if len(ids_dict[key]) == 0:
            continue
        
        # unfilterd
        output_name = tools.config_getstr(config, section_out, "output_" + key)

        #if output_name != "" and os.path.exists(output_dir + "/" + output_name) == False:
        if output_name != "":
            if merge_unfilt == True:
                files = []
                for iid in ids_dict[key]:
                    files.append(capture.sample_to_result_file(iid, mode, genomon_root, suffix_u))

                if mode == "mutation":
                    merge.merge_mutaion_for_paplot(files, ids_dict[key], output_dir + "/" + output_name, config)
                elif mode == "starqc":
                    merge.merge_star_qc_for_paplot(files, ids_dict[key], output_dir + "/" + output_name, config)
                else:
                    subcode_merge.merge_result(files, ids_dict[key], output_dir + "/" + output_name, mode, config)
    
        # filterd
        output_name = tools.config_getstr(config, section_out, "output_filt_" + key)
        
        #if output_name != "" and os.path.exists(output_dir + "/" + output_name) == False:
        if output_name != "":
            files = []
            for iid in ids_dict[key]:
                files.append(capture.sample_to_result_file(iid, mode, genomon_root, suffix_f))
            
            if mode == "mutation":
                merge.merge_mutaion_for_paplot(files, ids_dict[key], output_dir + "/" + output_name, config)
            elif mode == "starqc":
                merge.merge_star_qc_for_paplot(files, ids_dict[key], output_dir + "/" + output_name, config)
            else:
                subcode_merge.merge_result(files, ids_dict[key], output_dir + "/" + output_name, mode, config)
def call_image_capture(mode, ids_dict, output_dir, genomon_root, sample_conf, config):
    print "=== [%s] create script file, for IGV image capture. ===" % mode
    import os
    
    def image_capture(mode, sample_list, output_dir, genomon_root, sample_conf, config):
        
        if (os.path.exists(output_dir + "/capture") == False):
            os.mkdir(output_dir + "/capture")
        if (os.path.exists(output_dir + "/capture_script") == False):
            os.mkdir(output_dir + "/capture_script") 
            
        files_capt = []
        for ID in sample_list:

            f_capt = "%s/capture_script/%s.bat" % (output_dir, ID)
            
            path_options = { 
                "output_file": f_capt,
                "output_igv_dir": output_dir + "/capture",
                "bam_dir": "",
                "genomon_root": genomon_root
               }            

            if capture.write_capture_bat(path_options, ID, sample_conf, mode, config) == True:
                files_capt.append(f_capt)
            else:
                if os.path.exists(f_capt) == True:
                    os.remove(f_capt)
    
        capture.merge_capture_bat(files_capt, output_dir + "/capture_script/capture.bat", True)
        
    # output dirs
    output_dir = os.path.abspath(output_dir) + "/" + mode
    
    if (os.path.exists(output_dir) == False):
        os.mkdir(output_dir)
        
    [section_in, section_out] = tools.get_section(mode)

    for key in ids_dict:
        if len(ids_dict[key]) == 0:
            continue

        dirname = tools.config_getstr(config, section_out, "output_dirname_" + key)
        
        if (os.path.exists(output_dir + "/" + dirname) == False):
            os.mkdir(output_dir + "/" + dirname)
            
        image_capture(mode, ids_dict[key], output_dir + "/" + dirname, genomon_root, sample_conf, config)
def arg_to_file(mode, genomon_root, args, config):
    def text_to_list(inputs, flg):
        
        if flg == False:
            return []
            
        if len(inputs) == 0:
            return []

        f = inputs.lstrip("'").lstrip('"').rstrip("'").rstrip('"').split(";")
        
        li = []
        for item in f[0].split(","):
            li.append(item.lstrip(" ").rstrip(" "))
        
        return li
            
    [section_in, section_out] = tools.get_section(mode)
    
    sample_dict = {"all":[], "case1":[], "case2":[], "case3":[], "case4":[]}
    
    if tools.config_getboolean(config, section_out, "all_in_one") == True:
        al = []
        al.extend(text_to_list(args.input_file_case1, True))
        al.extend(text_to_list(args.input_file_case2, tools.config_getboolean(config, section_out, "include_unpanel")))
        al.extend(text_to_list(args.input_file_case3, tools.config_getboolean(config, section_out, "include_unpair")))
        al.extend(text_to_list(args.input_file_case4, 
                  tools.config_getboolean(config, section_out, "include_unpair") and tools.config_getboolean(config, section_out, "include_unpanel")))
        sample_dict["all"] = al
    
    if tools.config_getboolean(config, section_out, "separate") == True:
        sample_dict["case1"] = text_to_list(args.input_file_case1, True)
        sample_dict["case2"] = text_to_list(args.input_file_case2, tools.config_getboolean(config, section_out, "include_unpanel"))
        sample_dict["case3"] = text_to_list(args.input_file_case3, tools.config_getboolean(config, section_out, "include_unpair"))
        sample_dict["case4"] = text_to_list(args.input_file_case4, 
                  tools.config_getboolean(config, section_out, "include_unpair") and tools.config_getboolean(config, section_out, "include_unpanel"))
    
    return sample_dict
Beispiel #5
0
def write_pickup_script(path_options, ID, sample_conf, mode, config):

    cmd_header = """#!/bin/bash
#
#$ -S /bin/bash
#$ -cwd
#$ -e {log}
#$ -o {log}
"""
    
    cmd_bed1 = """
if [ -e {bed} ]; then
    rm {bed}
fi
"""
    cmd_bed2 = "echo '{chr}\t{start}\t{end}' >> {bed}\n"
    cmd_bed3 = """
{bedtools} sort -i {bed} > {bed}.sort.bed
{bedtools} merge -i {bed}.sort.bed > {bed}.merge.bed
rm {bed} {bed}.sort.bed 
mv {bed}.merge.bed {bed}
    
"""
    cmd_view = """
{samtools} view -h -L {bed} {bam} > {output_bam}.temp.bam
{samtools} sort {output_bam}.temp.bam {output_bam}
{samtools} index {output_bam}.bam
rm {output_bam}.temp.bam
    
"""
    cmd_rm_bed = """
rm {bed}
"""

    import genomon_post_analysis.subcode.tools as tools
    import os
    
    # options read
    [section_in, section_out] = tools.get_section(mode)
    suffix_f = tools.config_getstr(config, section_in, "suffix_filt")
    data_file = sample_to_result_file(ID, mode, path_options["genomon_root"], suffix_f)

    # output file name
    if os.path.exists(path_options["output_bam_dir"] + "/" + ID) == False:
        os.mkdir(path_options["output_bam_dir"] + "/" + ID)

    bam_tumor = sample_to_bam_file(ID, mode, path_options["genomon_root"], tools.config_getstr(config, "bam", "input_bam_suffix"))
    out_tumor_name = path_options["output_bam_dir"] + "/" + ID + "/" + ID
    
    normal = sample_to_pair(sample_conf, mode, ID)
    bam_normal = ""
    out_normal_name = ""
    if normal != None:
        bam_normal = sample_to_bam_file(normal, mode, path_options["genomon_root"], tools.config_getstr(config, "bam", "input_bam_suffix"))
        out_normal_name = path_options["output_bam_dir"] + "/" + ID + "/" + normal
        
    # read config
    width = config.getint("bam", "pickup_width")
    output_bam_suffix = os.path.splitext(config.get("bam", "output_bam_suffix"))[0]
    samtools = path_options["samtools"]
    bedtools = path_options["bedtools"]
    sept = tools.config_getstr(config, section_in, "sept").replace("\\t", "\t").replace("\\n", "\n").replace("\\r", "\r")
    
    # create command text
    bed1_text = cmd_bed1.format(bedtools = bedtools, bed = out_tumor_name + ".bed")
    #bed2_text = ""
    bed3_text = cmd_bed3.format(bedtools = bedtools, bed = out_tumor_name + ".bed")
    
    cmd_text = cmd_view.format(samtools = samtools, 
                                bed = out_tumor_name + ".bed",
                                bam = bam_tumor,
                                output_bam = out_tumor_name + output_bam_suffix)
    if bam_normal != "":
        cmd_text += cmd_view.format(samtools = samtools, 
                                bed = out_tumor_name + ".bed",
                                bam = bam_normal,
                                output_bam = out_normal_name + output_bam_suffix)
    cmd_text += cmd_rm_bed.format(bed = out_tumor_name + ".bed")

    # write script 1
    f_sh = open(path_options["output_file"] + ".tmp", "w")
    f_sh.write(cmd_header.format(log = path_options["output_log_dir"])) 
    f_sh.write(bed1_text)
        
    # read
    header = []
    bed2_text = []
    lines_count = 0
    enable_data = False
    for line in open(data_file):
        line = line.rstrip()
        if len(line.replace(sept, "")) == 0:
            continue
        
        if line.find(tools.config_getstr(config, section_in, "comment")) == 0:
            continue
        
        if len(header) == 0:
            header = line.split(sept)
            col_chr1 = header.index(tools.config_getstr(config, section_in, "col_chr1"))
            col_chr2 = header.index(tools.config_getstr(config, section_in, "col_chr2"))
            col_start = header.index(tools.config_getstr(config, section_in, "col_start"))
            col_end = header.index(tools.config_getstr(config, section_in, "col_end"))
            continue
        
        data = line.split(sept)

        start = int(data[col_start]) - width
        if start < 0:
            start = 0
        bed2_text.append(cmd_bed2.format(chr = data[col_chr1], 
                           start = start,
                           end = int(data[col_start]) + width,
                           bed = out_tumor_name + ".bed"
                           ))
        lines_count += 1
        
        start = int(data[col_end]) - width
        if start < 0:
            start = 0
        bed2_text.append(cmd_bed2.format(chr = data[col_chr2], 
                           start = start,
                           end = int(data[col_end]) + width,
                           bed = out_tumor_name + ".bed"
                           ))
        lines_count += 1
        
        enable_data = True
        
        if (lines_count > 10000):
            f_sh.writelines(bed2_text)
            bed2_text = []
            lines_count = 0

    if (lines_count > 0):
        f_sh.writelines(bed2_text)
            
    # write script 3
    f_sh.write(bed3_text)
    f_sh.write(cmd_text)
    f_sh.close()
    os.rename(path_options["output_file"] + ".tmp", path_options["output_file"])
    
    return enable_data
Beispiel #6
0
def write_capture_bat(path_options, ID, sample_conf, mode, config):

    cmd_header = """
genome hg19

"""
    cmd_new_tumor = """
new
load {tumor_bam}
"""
    cmd_new_normal = """
load {normal_bam}
"""
    cmd_capt = """
goto {chr}:{start}-{end}
snapshot {name}
"""

    import genomon_post_analysis.subcode.tools as tools
    import os
    
    [section_in, section_out] = tools.get_section(mode)
    
    # result file
    suffix_f = tools.config_getstr(config, section_in, "suffix_filt")
    data_file = sample_to_result_file(ID, mode, path_options["genomon_root"], suffix_f)

    # use bams
    bam_tumor = sample_to_bam_file(ID, mode, path_options["genomon_root"], tools.config_getstr(config, "bam", "input_bam_suffix"))
    normal = sample_to_pair(sample_conf, mode, ID)
    bam_normal = ""
    if normal != None:
        bam_normal = sample_to_bam_file(normal, mode, path_options["genomon_root"], tools.config_getstr(config, "bam", "input_bam_suffix"))

    # output file
    out_tumor_name = "%s/%s" % (path_options["output_igv_dir"], ID)
    
    # options
    width = config.getint("igv", "capture_width")    
    sept = tools.config_getstr(config, section_in, "sept").replace("\\t", "\t").replace("\\n", "\n").replace("\\r", "\r")
    # read
    capt_list = []
    
    # write script 1
    f = open(path_options["output_file"] + ".tmp", "w")
    f.write(cmd_header)
    f.write(cmd_new_tumor.format(tumor_bam = bam_tumor))
    if len(bam_normal) > 0:
        f.write(cmd_new_normal.format(normal_bam = bam_normal))
            
    # read
    header = []
    capt_text = []
    lines_count = 0
    enable_data = False
    for line in open(data_file):
        if len(capt_list) >= config.getint("igv", "capture_max"):
            break
        
        line = line.rstrip()
        if len(line.replace(sept, "")) == 0:
            continue
        
        if line.find(tools.config_getstr(config, section_in, "comment")) == 0:
            continue
        
        if len(header) == 0:
            header = line.split(sept)
            col_chr1 = header.index(tools.config_getstr(config, section_in, "col_chr1"))
            col_chr2 = header.index(tools.config_getstr(config, section_in, "col_chr2"))
            col_start = header.index(tools.config_getstr(config, section_in, "col_start"))
            col_end = header.index(tools.config_getstr(config, section_in, "col_end"))
            continue
        
        data = line.split(sept)

        chr1 = data[col_chr1]
        start = int(data[col_start])
        chr2 = data[col_chr2]
        end = int(data[col_end])
        
        fname = "{0}_{1}_{2}_{3}_{4}".format(out_tumor_name, chr1, start, chr2, end)
        if (fname in capt_list) == True:
            continue
        
        capt_list.append(fname)
        
        if (chr1 == chr2) and ((long(end) - long(start)) < (width / 2)):
            capt_text.append(cmd_capt.format(chr = chr1, start = start - width, end = start + width, name = fname + ".png"))
            lines_count += 1
        else:
            start2 = start - width
            if start2 < 0:
                start2 = 0
            capt_text.append(cmd_capt.format(chr = chr1, start = start2, end = start + width, name = fname + "_1.png"))
            lines_count += 1
            
            start2 = end-width
            if start2 < 0:
                start2 = 0
            capt_text.append(cmd_capt.format(chr = chr2, start = start2, end = end + width, name = fname + "_2.png"))
            lines_count += 1

        enable_data = True
        
        if (lines_count > 10000):
            f.writelines(capt_text)
            capt_text = []
            lines_count = 0

    if (lines_count > 0):
        f.writelines(capt_text)                
    
    f.close()
    os.rename(path_options["output_file"] + ".tmp", path_options["output_file"])
    
    return enable_data
def call_bam_pickup(mode, ids_dict, output_dir, genomon_root, arg_samtools, arg_bedtools, sample_conf, config):
    print "=== [%s] create script file, for bam pick up. ===" % mode
    import os
    
    def bam_pickup(mode, sample_list, output_dir, genomon_root, samtools, bedtools, sample_conf, config):
        
        output_bam_dir = output_dir + "/bam"
        output_log_dir = output_dir + "/log"
        output_script_dir = output_dir + "/bam_script"        
        if (os.path.exists(output_bam_dir) == False):
            os.mkdir(output_bam_dir)
        if (os.path.exists(output_log_dir) == False):
            os.mkdir(output_log_dir)
        if (os.path.exists(output_script_dir) == False):
            os.mkdir(output_script_dir)
            
        files_pick = []
        for ID in sample_list:
            f_pick = "%s/pickup.%s.sh" % (output_script_dir, ID)
            
            path_options = {
               "output_file": f_pick,
               "output_bam_dir": output_bam_dir,
               "output_log_dir": output_log_dir,
               "genomon_root": genomon_root,
               "samtools":samtools,
               "bedtools":bedtools
               }
               
            if capture.write_pickup_script(path_options, ID, sample_conf, mode, config) == True:
                files_pick.append(f_pick)
            else:
                if os.path.exists(f_pick) == True:
                    os.remove(f_pick)
                    
        capture.merge_pickup_script(files_pick, output_script_dir + "/pickup.sh")
    
    # output dirs
    output_dir = os.path.abspath(output_dir) + "/" + mode

    if (os.path.exists(output_dir) == False):
        os.mkdir(output_dir)
        
    # tools
    samtools = arg_samtools
    if samtools == "":
        samtools = tools.config_getstr(config, "tools", "samtools")
        
    bedtools = arg_bedtools
    if bedtools == "":
        bedtools = tools.config_getstr(config, "tools", "bedtools")
            
    [section_in, section_out] = tools.get_section(mode)
    
    for key in ids_dict:
        if len(ids_dict[key]) == 0:
            continue

        dirname = tools.config_getstr(config, section_out, "output_dirname_" + key)
        if (os.path.exists(output_dir + "/" + dirname) == False):
            os.mkdir(output_dir + "/" + dirname)
            
        bam_pickup(mode, ids_dict[key], output_dir + "/" + dirname, genomon_root, samtools, bedtools, sample_conf, config)