Beispiel #1
0
def group_list(colmun, mode, name, config):

    import paplot.color as color

    option_input = ""

    if mode == "mutation":
        option_input = "result_format_mutation"
    elif mode == "ca":
        option_input = "result_format_ca"
    else:
        return []

    sept = tools.config_getstr(config, option_input, "sept_%s" % name)
    limited_list = text_to_list(
        tools.config_getstr(config, mode, "limited_%s" % name), ",")
    nouse_list = text_to_list(
        tools.config_getstr(config, mode, "nouse_%s" % name), ",")

    funcs = []
    for row in colmun:
        splt = []
        if sept == "": splt.append(row)
        else: splt = row.split(sept)

        for func in splt:
            func = func.strip()

            if func == "":
                continue

            if len(limited_list) > 0:
                #if (func in limited_list) == False:
                if fnmatch_list(func, limited_list) == False:
                    continue

            #if func in nouse_list:
            if fnmatch_list(func, nouse_list):
                continue
            funcs.append(func)

    # sort list
    funcs = list(set(funcs))
    funcs.sort()

    color_list = {}
    for f in tools.config_getstr(config, mode, "%s_color" % name).split(","):
        if len(f) == 0: continue
        cols = text_to_list(f, ":")
        if len(cols) >= 2:
            color_list[cols[0]] = color.name_to_value(cols[1])

    color_list = color.create_color_dict(funcs, color_list, color.metro_colors)

    # dict to value
    colors = []
    for key in funcs:
        colors.append(color_list[key])

    return [funcs, colors]
Beispiel #2
0
def genes_list(colmun, colmun_f, colmun_id, funcs, id_list, config):

    import paplot.subcode.tools as tools
    import paplot.convert as convert

    sept = tools.config_getstr(config, "result_format_mutation", "sept_gene")

    genes_di = {}
    ids_di = {}

    for i in range(len(colmun)):
        if (colmun_f[i] in funcs) is False:
            continue

        row = colmun[i].replace('"', '')
        splt = []
        if sept == "":
            splt.append(row)
        else:
            splt = row.split(sept)

        for gene in splt:
            if gene == "":
                continue
            if gene in ids_di.keys():
                if (colmun_id[i] in ids_di[gene]) is True:
                    continue
                else:
                    ids_di[gene].append(colmun_id[i])
            else:
                ids_di.update({gene: [colmun_id[i]]})

            value = 1
            if gene in genes_di.keys():
                value = genes_di[gene] + 1
            genes_di.update({gene: value})

    # gene list
    use_gene_rate = config.getfloat("mutation", "use_gene_rate")
    limited_list = convert.text_to_list(
        tools.config_getstr(config, "mutation", "limited_gene"), ",")
    nouse_list = convert.text_to_list(
        tools.config_getstr(config, "mutation", "nouse_gene"), ",")

    genes = []
    for key in genes_di:
        if len(limited_list) > 0 and convert.fnmatch_list(
                key, limited_list) is False:
            continue
        if convert.fnmatch_list(key, nouse_list):
            continue
        if genes_di[key] < float(len(id_list)) * use_gene_rate:
            continue

        genes.append(key)

    genes.sort()
    return genes
Beispiel #3
0
def group_list(colmun, mode, name, config):

    import paplot.color as color
    
    option_input = ""

    if mode == "mut":
        option_input = "result_format_mutation"
    elif mode == "sv":
        option_input = "result_format_sv"
    else:
        return []
    
    sept = tools.config_getstr(config, option_input, "sept_%s" % name)
    limited_list = text_to_list(tools.config_getstr(config, mode, "limited_%ss" % name), ",")
    nouse_list = text_to_list(tools.config_getstr(config, mode, "nouse_%ss" % name), ",")
    
    funcs = []
    for row in colmun:
        splt = []
        if sept == "": splt.append(row)
        else: splt = row.split(sept)
        
        for func in splt:
            func = func.strip()
            
            if func == "": continue
            if len(limited_list) > 0:
                if (func in limited_list) == False: continue   
            if func in nouse_list: continue
            funcs.append(func)
            
    # sort list
    funcs = list(set(funcs))
    funcs.sort() 
    
    color_n_list = {};
    for f in tools.config_getstr(config, mode, "%s_colors" % name).split(","):
        if len(f) == 0: continue
        cols = text_to_list(f, ":")
        if len(cols) >= 2:
            color_n_list[cols[0]] = color.name_to_value(cols[1])
    
    color_n_list = color.create_color_dict(funcs, color_n_list, color.metro_colors) 
    
    # dict to value
    colors_n = []    
    for key in funcs:
        colors_n.append(color_n_list[key])
        
    return [funcs, colors_n]
Beispiel #4
0
def create_index(config, output_dir, output_html, project_name, name, overview = "", sub_text = "", composite = False, remarks = ""):

    import paplot.subcode.tools as tools
    import os
    
    html_exists = os.path.exists(output_dir + "/" + project_name + "/" + output_html)
    if output_html == "":
        html_exists = False
        
    json_data = _load_metadata(output_dir, output_html, project_name, name, overview, sub_text, composite, html_exists)
    
    link_text = _convert_index_item(json_data)
    
    f_template = open(os.path.dirname(os.path.abspath(__file__)) + "/templates/index.html")
    html_template = f_template.read()
    f_template.close()
    
    if remarks == "":
        remarks = tools.config_getstr(config, "style", "remarks")
        
    f_html = open(output_dir + "/index.html", "w")
    f_html.write(
        html_template.format(
            version = version_text(),
            date = tools.now_string(),
            remarks = remarks,
            link = link_text
        )
    )
    f_html.close()
Beispiel #5
0
def recreate_index(config, output_dir, remarks = ""):

    import paplot.subcode.tools as tools
    import os

    json_data = _reload_metadata(output_dir)
      
    link_text = _convert_index_item(json_data)
    
    f_template = open(os.path.dirname(os.path.abspath(__file__)) + "/templates/index.html")
    html_template = f_template.read()
    f_template.close()
    
    if remarks == "":
        remarks = tools.config_getstr(config, "style", "remarks")
        
    f_html = open(output_dir + "/index.html", "w")
    f_html.write(
        html_template.format(
            version = version_text(),
            date = tools.now_string(),
            remarks = remarks,
            link = link_text
        )
    )
    f_html.close()
Beispiel #6
0
def genes_list(colmun, colmun_f, colmun_id, funcs, Ids, config):
    
    import paplot.subcode.tools as tools
    import paplot.convert as convert
    
    sept = tools.config_getstr(config, "result_format_mutation", "sept_gene")
    use_gene_rate = config.getfloat("mut", "use_gene_rate")
    
    limited_list = convert.text_to_list(tools.config_getstr(config, "mut", "limited_genes"), ",")
    nouse_list = convert.text_to_list(tools.config_getstr(config, "mut", "nouse_genes"), ",")
    
    genes_di = {}
    ids_di = {}
    
    for i in range(len(colmun)):
        if (colmun_f[i] in funcs) == False: continue
        
        row = colmun[i].replace('"', '')
        splt = []
        if sept == "": splt.append(row)
        else: splt = row.split(sept)
        
        for gene in splt:
            if gene == "": continue
            if gene in ids_di.keys():
                if (colmun_id[i] in ids_di[gene]) == True: continue
                else: ids_di[gene].append(colmun_id[i])
            else:
                ids_di.update({gene: [colmun_id[i]]})
            
            value = 1            
            if gene in genes_di.keys():
                value = genes_di[gene] + 1
            genes_di.update({gene: value})
            
    # gene list
    genes = []
    for key in genes_di:
        if len(limited_list) > 0:
            if (key in limited_list) == False: continue   
        if key in nouse_list: continue
        if genes_di[key] < float(len(Ids))*use_gene_rate: continue

        genes.append(key)

    genes.sort()
    return genes
Beispiel #7
0
def qc_main(args):
    import paplot.qc as qc

    # config
    [config,
     conf_file] = tools.load_config(tools.win_to_unix(args.config_file))

    input_list = tools.get_inputlist(tools.win_to_unix(args.input))
    if len(input_list) == 0:
        print("input no file.")
        return

    [sec_in, sec_out] = tools.get_section("qc")
    id_list = tools.get_idlist(input_list,
                               tools.config_getstr(config, sec_in, "suffix"))

    # dirs
    output_html_dir = prep.create_dirs(tools.win_to_unix(args.output_dir),
                                       args.project_name, config)
    output_data = "data_%s%s" % (args.ellipsis, os.path.splitext(
        input_list[0])[1])
    positions = merge.merge_result(input_list,
                                   id_list,
                                   output_html_dir + "/" + output_data,
                                   "qc",
                                   config,
                                   extract=True)
    if positions == {}:
        print("merge.merge_result: input file is invalid.")
        return

    html_name = "graph_%s.html" % args.ellipsis
    params_html = {
        "dir": output_html_dir,
        "data": output_data,
        "js": "data_%s.js" % args.ellipsis,
        "html": html_name,
        "project": args.project_name,
        "title": args.title,
    }
    qc.output_html(params_html, positions, config)

    prep.create_index(config,
                      tools.win_to_unix(args.output_dir),
                      html_name,
                      args.project_name,
                      args.title,
                      overview=args.overview,
                      remarks=args.remarks)
Beispiel #8
0
def main(argv):
    import paplot.subcode.tools as tools
    import paplot.subcode.merge as merge
    import paplot.mut as mut
    import paplot.prep as prep
    import argparse

    parser = argparse.ArgumentParser(prog = prog)
    
    parser.add_argument("--version", action = "version", version = tools.version_text())
    parser.add_argument("input", help = "input files path", type = str)
    parser.add_argument("output_dir", help = "output file path", type = str)
    parser.add_argument("project_name", help = "project name", type = str)
    parser.add_argument("--config_file", help = "config file", type = str, default = "")
    parser.add_argument("--remarks", help = "optional text", type = str, default = "")
    
    args = parser.parse_args(argv)
    
    # config
    if len(args.config_file) > 0:
        [config, conf_file] = tools.load_config(tools.win_to_unix(args.config_file))
    else:
        [config, conf_file] = tools.load_config("")
        
    if len(args.remarks) > 0:
        tools.config_set(config, "style", "remarks", args.remarks)
        
    input_list = tools.get_inputlist(args.input)
    if len(input_list) == 0:
        print ("input no file.")
        return
    
    [sec_in, sec_out] = tools.get_section("mutation")
    id_list = tools.get_IDlist(input_list, tools.config_getstr(config, sec_in, "suffix"))
    
    # dirs
    output_html_dir = prep.create_dirs(tools.win_to_unix(args.output_dir), args.project_name, config)
    positions = merge.merge_result(input_list, id_list, output_html_dir + "/data_mut.csv", "mutation", config, extract = True)
    if positions == {}:
        print ("merge.merge_result: input file is invalid.")
        return
    mut.output_html(output_html_dir + "/data_mut.csv", output_html_dir + "/data_mut.js", \
                output_html_dir, "graph_mut.html", args.project_name, positions, config)
    
    prep.create_index(args.output_dir,  args.project_name, config)
    
Beispiel #9
0
def create_index(output_dir, project_name, config):

    link_qc = """<li><a href="{project}/graph_qc.html" target=_blank>QC graphs</a>......Quality Control of bam.</li>
"""
    link_sv = """<li><a href="{project}/graph_ca.html" target=_blank>CA graphs</a>......Chromosomal Aberration.</li>
"""
    link_mut = """<li><a href="{project}/graph_mut.html" target=_blank>Mutation matrix</a>......Gene-sample mutational profiles.</li>
"""
    link_sv_nodata = """<li>CA graphs......No Data.</li>
"""
    link_mut_nodata = """<li>Mutation matrix......No Data.</li>
"""
    import paplot.subcode.tools as tools
    import os
    
    f_template = open(os.path.dirname(os.path.abspath(__file__)) + "/templates/index.html")
    html_template = f_template.read()
    f_template.close()
    
    link_text = ""
    if os.path.exists(output_dir + "/" + project_name + "/graph_qc.html") == True:
        link_text += link_qc.format(project = project_name)
    
    if os.path.exists(output_dir + "/" + project_name + "/graph_ca.html") == True:
        link_text += link_sv.format(project = project_name)
        
    elif os.path.exists(output_dir + "/" + project_name + "/data_ca.csv") == True:
        link_text += link_sv_nodata

    if os.path.exists(output_dir + "/" + project_name + "/graph_mut.html") == True:
        link_text += link_mut.format(project = project_name)
    
    elif os.path.exists(output_dir + "/" + project_name + "/data_mut.csv") == True:
        link_text += link_mut_nodata
        
    f_html = open(output_dir + "/index.html", "w")
    f_html.write(
        html_template.format(project = project_name, 
        version = tools.version_text(),
        date = tools.now_string(),
        remarks = tools.config_getstr(config, "style", "remarks"),
        link = link_text
        ))
    f_html.close()
Beispiel #10
0
def convert_tojs(input_file, output_file, positions, config):

    import paplot.subcode.data_frame as data_frame
    import paplot.subcode.merge as merge
    import paplot.subcode.tools as tools
    import paplot.convert as convert

    import math

    genome_size = load_genome_size(config)

    if len(genome_size) == 0:
        return None

    genome = ""
    for i in range(len(genome_size)):
        if len(genome) > 0:
            genome += ",\n"
        genome += genome_size_template.format(Chr=i,
                                              size=genome_size[i][1],
                                              color=genome_size[i][2],
                                              label=genome_size[i][3])

    cols_di = merge.position_to_dict(positions)

    # data read
    try:
        df = data_frame.load_file(input_file, header = 1, \
            sept = tools.config_getstr(config, "result_format_ca", "sept"), \
            comment = tools.config_getstr(config, "result_format_ca", "comment") \
            )
    except Exception as e:
        print("failure open data %s, %s" % (input_file, e.message))
        return None

    if len(df.data) == 0:
        print("no data %s" % input_file)
        return None

    # group list
    if "group" in cols_di:
        for f in range(len(df.data)):
            group_pos = df.name_to_index(cols_di["group"])
            group = df.data[f][group_pos]
            df.data[f][group_pos] = group.replace(" ", "_")
            if group == "":
                df.data[f][group_pos] = "_blank_"

        [groups, colors_n] = convert.group_list(df.column(cols_di["group"]),
                                                "ca", "group", config)
        labels = groups

    else:
        groups = ["outer", "inner"]
        labels = ["Inter-chromosome", "Intra-chromosome"]
        colors_n = ["#9E4A98", "#51BF69"]

    conbined = []
    for i in range(len(groups)):
        conbined.append(
            group_template.format(name=groups[i],
                                  label=labels[i],
                                  color=colors_n[i]))

    group_text = ",".join(conbined)

    # ID list
    Ids = []
    for row in df.data:
        iid = row[df.name_to_index(cols_di["id"])]
        if iid != "": Ids.append(iid)
    Ids = list(set(Ids))
    Ids.sort()

    option_keys = tools.dict_keys(cols_di)
    option_keys.remove("id")
    option_keys.remove("chr1")
    option_keys.remove("break1")
    option_keys.remove("chr2")
    option_keys.remove("break2")
    if "group" in option_keys:
        option_keys.remove("group")

    # node_size
    node_size_select = tools.config_getint(config, "ca", "selector_split_size",
                                           5000000)

    f = open(output_file, "w")

    f.write(js_header \
        + js_dataset.format(node_size_detail = calc_node_size(genome_size, 500), \
            node_size_thumb = calc_node_size(genome_size, 250), \
            node_size_select = node_size_select,\
            genome_size = genome, \
            IDs = convert.list_to_text(Ids), \
            group = group_text, \
            tooltip = convert.pyformat_to_jstooltip_text(cols_di, config, "ca", "result_format_ca", "tooltip_format"), \
            link_header = convert.list_to_text(option_keys), \
            ))

    # write links
    data_links = []

    f.write(js_links_1)

    for row in df.data:
        iid = row[df.name_to_index(cols_di["id"])]
        if iid == "": continue

        chr1 = str(row[df.name_to_index(cols_di["chr1"])])
        pos1 = row[df.name_to_index(cols_di["break1"])]
        chr2 = str(row[df.name_to_index(cols_di["chr2"])])
        pos2 = row[df.name_to_index(cols_di["break2"])]

        [index1, rang] = insite_genome(genome_size, chr1, pos1)
        if rang > 0:
            print("breakpoint 1 is over range. chr%s: input=%d, range=%d" %
                  (chr1, pos1, rang))
            continue
        if rang < 0:
            #print("chr1 is undefined. %s" % (chr1))
            continue

        [index2, rang] = insite_genome(genome_size, chr2, pos2)
        if rang > 0:
            print("breakpoint 2 is over range. chr%s: input=%d, range=%d" %
                  (chr2, pos2, rang))
            continue
        if rang < 0:
            #print("chr2 is undefined. %s" % (chr2))
            continue

        inner_flg = "false"
        if (chr1 == chr2):
            inner_flg = "true"

        tooltip_items = []
        for k in range(len(option_keys)):
            key = option_keys[k]
            if cols_di[key] == "": continue
            tooltip_items.append(row[df.name_to_index(cols_di[key])])

        group_id = -1
        if "group" in cols_di:
            group_id = convert.value_to_index(
                groups, row[df.name_to_index(cols_di["group"])], -1)
        else:
            if inner_flg == "false":
                group_id = 0
            else:
                group_id = 1

        data_links.append([iid, index1, pos1, index2, pos2, group_id])

        f.write(links_template.format(ID = iid, \
            Chr1=index1, pos1=pos1, Chr2=index2, pos2=pos2, \
            inner_flg = inner_flg, \
            group_id = group_id , \
            tooltip = "[" + convert.list_to_text(tooltip_items) + "],"))

    f.write(js_links_2)

    # integral bar item
    link = []
    for g in range(len(groups)):
        link.append({})

    for l in data_links:

        bp1 = "root.{Chr:0>2}.{Chr:0>2}_{Pos:0>3}".format(
            Chr=l[1], Pos=int(math.floor(l[2] / node_size_select)))
        bp2 = "root.{Chr:0>2}.{Chr:0>2}_{Pos:0>3}".format(
            Chr=l[3], Pos=int(math.floor(l[4] / node_size_select)))

        group = l[5]
        #print group
        # add bp1
        if not bp1 in link[group]:
            link[group][bp1] = []
        link[group][bp1].append(l[0])

        # add bp2
        if bp1 != bp2:
            if not bp2 in link[group]:
                link[group][bp2] = []
            link[group][bp2].append(l[0])

    select_item_text = ""
    select_value_text = ""
    select_key_text = ""

    for g in range(len(groups)):
        items = []
        values = []
        keys = []

        for i in link[g]:

            values.append(len(link[g][i]))

            # split key to chr and pos
            parts = i.split(".")[2].split("_")
            keys.append([int(parts[0]), int(parts[1])])

            # delete duplication
            sort = sorted(list(set(link[g][i])))

            temp = []
            for t in sort:
                temp.append(Ids.index(t))
            items.append(temp)

        select_value_text += "[%s]," % (",".join(map(str, values)).replace(
            " ", ""))
        select_key_text += "[%s]," % (",".join(map(str, keys)).replace(
            " ", ""))
        select_item_text += "[%s]," % (",".join(map(str, items)).replace(
            " ", ""))

    f.write(
        js_selection.format(value=select_value_text,
                            key=select_key_text,
                            item=select_item_text))

    f.write(js_function)
    f.close()

    return {"id_list": Ids, "group_list": groups, "color": colors_n}
Beispiel #11
0
def load_genome_size(config):

    path = tools.config_getpath(config, "genome", "path",
                                "../../config/hg19.csv")

    settings = tools.config_getstr(config, "ca",
                                   "use_chrs").replace(" ", "").split(",")
    use_chrs = []
    colors = []
    labels = []

    for i in range(len(settings)):
        items = settings[i].split(":")
        use_chrs.append(items[0].lower())
        labels.append("")
        colors.append("#BBBBBB")

        for j in range(len(items)):
            if j == 0:
                if items[j][0:3] == "chr":
                    use_chrs[i] = items[j][3:]

            elif j == 1:
                labels[i] = items[j]
            elif j == 2:
                colors[i] = items[j]

    if len(use_chrs) < 1:
        return []

    f = open(path)
    read = f.read()
    f.close()

    formatt = read.replace("\r", "\n").replace(" ", "")

    genome_size = []
    _max = 0
    for row in formatt.split("\n"):
        sept = ","
        if row.find(",") < 0:
            sept = "\t"
        items = row.split(sept)

        if len(items) < 2:
            continue

        if items[1].isdigit() == False:
            continue

        label = items[0].lower()
        if label[0:3] == "chr":
            label = label[3:len(label)]

        if (label in use_chrs) == False:
            continue

        pos = use_chrs.index(label)

        if _max < int(items[1]):
            _max = int(items[1])

        if labels[pos] == "":
            labels[pos] = items[0]

        genome_size.append([label, int(items[1]), colors[pos], labels[pos]])

    for i in range(len(genome_size)):
        if genome_size[i][1] < int(_max / 10):
            genome_size[i][1] = int(_max / 10)

    return genome_size
Beispiel #12
0
def convert_tojs(input_file, output_file, positions, config):
    import os
    import paplot.subcode.data_frame as data_frame
    import paplot.subcode.merge as merge
    import paplot.subcode.tools as tools
    import paplot.convert as convert
    
    cols_di = merge.position_to_dict(positions)

    # data read
    try:
        df = data_frame.load_file(input_file, header = 1, \
            sept = tools.config_getstr(config, "result_format_mutation", "sept"), \
            comment = tools.config_getstr(config, "result_format_mutation", "comment") \
            )
    except Exception as e:
        print ("failure open data %s, %s" % (input_file, e.message))
        return None

    if len(df.data) == 0:
        print ("no data %s" % input_file)
        return None

    # func replace 
    for f in range(len(df.data)):
        func_pos = df.name_to_index(cols_di["group"])
        
        if df.data[f][func_pos] == "":
            df.data[f][func_pos] = "_blank_"
            
    [funcs, colors_n] = convert.group_list(df.column(cols_di["group"]), "mutation", "group", config)

    # ID list
    id_list = []
    for row in df.data:
        iid = row[df.name_to_index(cols_di["id"])]
        if iid != "": id_list.append(iid)
    id_list = list(set(id_list))
    id_list.sort()
    
    # gene list
    genes = genes_list(df.column(cols_di["gene"]), \
                        df.column(cols_di["group"]), \
                        df.column(cols_di["id"]), \
                        funcs, id_list, config)    

    option_keys = tools.dict_keys(cols_di)
    option_keys.remove("id")
    option_keys.remove("group")
    option_keys.remove("gene")
            
    # mutation list
    f = open(output_file, "w")
    f.write(js_header)
    f.write(js_mutations_1)

    mutations = {}
    tooltips = {}
    for row in df.data:
        iid = row[df.name_to_index(cols_di["id"])]
        if iid == "": continue
            
        if (iid in mutations) == False:
            mutations[iid] = {}
            tooltips[iid] = {}
                
        func_split = convert.text_to_list(row[df.name_to_index(cols_di["group"])], \
            tools.config_getstr(config, "result_format_mutation", "sept_group"))
        
        tooltip_items = []
        for k in range(len(option_keys)):
            key = option_keys[k]
            if cols_di[key] == "": continue
            tooltip_items.append(row[df.name_to_index(cols_di[key])])
            
        for func in func_split:
            if (func in mutations[iid]) == False:
                mutations[iid][func] = {}
                tooltips[iid][func] = {}

            gene_split = convert.text_to_list(row[df.name_to_index(cols_di["gene"])], \
                tools.config_getstr(config, "result_format_mutation", "sept_gene"))
                
            for gene in gene_split:
                if (gene in mutations[iid][func]) == False:
                    mutations[iid][func][gene] = 1
                    tooltips[iid][func][gene] = []
                else:
                    mutations[iid][func][gene] += 1

                tooltips[iid][func][gene].append(tooltip_items)

    mutations_sum = 0
    for iid in tools.dict_keys(mutations):
        for func in tools.dict_keys(mutations[iid]):
            for gene in tools.dict_keys(mutations[iid][func]):
                idx_i = convert.value_to_index(id_list, iid, -1)
                idx_f = convert.value_to_index(funcs, func, -1)
                idx_g = convert.value_to_index(genes, gene, -1)

                if idx_i >= 0 and idx_f >= 0 and idx_g >= 0:
                    
                    tooltip_items = ""
                    for tips in tooltips[iid][func][gene]: 
                        tooltip_items += "[" + convert.list_to_text(tips) + "],"

                    f.write(mu_mutations_template.format(ID = idx_i, \
                        func = idx_f , \
                        gene = idx_g, \
                        num = mutations[iid][func][gene],
                        tooltip = tooltip_items))
                        
                    mutations_sum += mutations[iid][func][gene]
                    
    f.write(js_mutations_2.format(mutations_sum = mutations_sum))
    
    # write id, func, gene ... list
    f.write(js_dataset.format(
        Ids = convert.list_to_text(id_list), \
        genes = convert.list_to_text(convert.list_prohibition(genes)), \
        funcs = convert.list_to_text(convert.list_prohibition(funcs)), \
        func_colors_n = convert.list_to_text(colors_n), \
        mutation_header = convert.list_to_text(option_keys), \
        checker_title = convert.pyformat_to_jstooltip_text(cols_di, config, "mutation", "result_format_mutation", "tooltip_format_checker_title"), \
        checker_partial = convert.pyformat_to_jstooltip_text(cols_di, config, "mutation", "result_format_mutation", "tooltip_format_checker_partial"), \
        gene_title = convert.pyformat_to_jstooltip_text(cols_di, config, "mutation", "result_format_mutation", "tooltip_format_gene_title"), \
        gene_partial = convert.pyformat_to_jstooltip_text(cols_di, config, "mutation", "result_format_mutation", "tooltip_format_gene_partial"), \
        id_title = convert.pyformat_to_jstooltip_text(cols_di, config, "mutation", "result_format_mutation", "tooltip_format_id_title"), \
        id_partial = convert.pyformat_to_jstooltip_text(cols_di, config, "mutation", "result_format_mutation", "tooltip_format_id_partial"), \
    ))
    
    dataset = {}
    
    ##### subdata #####
    f.write(js_subdata_1)
    subdata = []
    counter = 0
    for sec in config.sections():
        if sec.startswith("mutation_subplot_type1_"):
            ret_val = load_subdata(id_list, sec, config)
            if ret_val == None: continue
            [data_text, item, colors_n, label, title] = ret_val
            
            name = "sub%d" % (counter)
            pos = 1
            counter += 1
            
        elif sec.startswith("mutation_subplot_type2_"):
            ret_val = load_subdata(id_list, sec, config)
            if ret_val == None: continue
            [data_text, item, colors_n, label, title] = ret_val
            
            name = "sub%d" % (counter)
            pos = 2
            counter += 1
            
        else: continue
    
        f.write(subdata_template.format(name = name, \
                title = title, \
                type = tools.config_getstr(config, sec, "mode"), \
                item = convert.list_to_text(item), \
                label = convert.list_to_text(label), \
                colors_n = convert.list_to_text(colors_n), \
                data = data_text ))

        subdata.append({"pos":pos, "label":label, "color":colors_n, "title": title})
         
    f.write(js_subdata_2)
    
    ##### functions #####
    f_template = open(os.path.dirname(os.path.abspath(__file__)) + "/templates/data_mutation.js")
    js_function = f_template.read()
    f_template.close()
    f.write(js_function)
    f.write(js_footer)

    f.close()

    dataset["subdata"] = subdata
    return dataset 
Beispiel #13
0
def convert_tojs(input_file, output_file, positions, config):
    '''
    Convert the input files to Json data and write them to the Javascript file
    Also write functions and methods to process those data

    Parameters
    ----------
    input_file : str : The absolute path of formatted data file
    output_file: str : The absolute path of JavaScript file
    positions  : dict: A nested dictionary with "must" and "option" as keys
    config     : configparser.RawConfigParser

    Return
    ------
    On success, return a dictionary: {"id_list": [...] "group_list": [...], "color": [...]}
        id_list   : The values for id column
        group_list: The names of groups
        color     : The colors in groups
    '''
    import paplot.subcode.data_frame as data_frame
    import paplot.subcode.merge as merge
    import paplot.subcode.tools as tools
    import paplot.convert as convert
    import os
    import math

    # genome_size: a nested list
    # [ [ A chromosome number in lowercase letters,
    #     The size of the 1st element,
    #     The color of the 1st element,
    #     The original name of the 1st element(that is not necessarily lowercase) or a user-defined name, ], ... ]
    genome_size = load_genome_size(config)
    if len(genome_size) == 0:
        return None

    # genome: dictionary-style string like this
    # {"chr":"00", "size":249250621, "color":"#BBBBBB", "label":"1",},
    # {"chr":"01", "size":243199373, "color":"#BBBBBB", "label":"2",},
    # ...
    # chr  : Sequential number
    # size : Size corresponding to the label
    # color: Color corresponding to the label
    # label: Name corresponding to chromosome
    genome = ""
    for i in range(len(genome_size)):
        if len(genome) > 0:
            genome += ",\n"
        genome += genome_size_template.format(Chr=i, size=genome_size[i][1], color=genome_size[i][2], label=genome_size[i][3])

    # Create a data frame that has title and data attributions
    # title is a list like ['Break1', 'Break2', 'Chr1', 'Chr2', 'Sample']
    # data is a nested list like [[16019088, 62784483, '14', '12', 'SAMPLE1'], ...]
    try:
        df = data_frame.load_file(
            input_file, header=1,
            sept=tools.config_getstr(config, "result_format_ca", "sept"),
            comment=tools.config_getstr(config, "result_format_ca", "comment")
        )
    except Exception as e:
        print("failure open data %s, %s" % (input_file, e.message))
        return None
    if len(df.data) == 0:
        print("no data %s" % input_file)
        return None

    # Create groups, labels, and colors_n
    # cols_di: a dictionary that merges must and option values
    #        : ex) {'chr1': 'Chr1', 'break1': 'Break1', 'chr2': 'Chr2', 'break2': 'Break2', 'id': 'Sample'}
    cols_di = merge.position_to_dict(positions)
    if "group" in cols_di:
        for i in range(len(df.data)):
            # A title may be stored in cols_di["group"]
            group_pos = df.name_to_index(cols_di["group"])  # Get group(title) index
            group = df.data[i][group_pos]                   # Get group(title) value for row i
            # Modify group value
            df.data[i][group_pos] = group.replace(" ", "_")
            if group == "":
                df.data[i][group_pos] = "_blank_"
        # groups  : list: group names
        # labels  : list: group names
        # colors_n: list: color values for groups
        [groups, colors_n] = convert.group_list(df.column(cols_di["group"]), "ca", "group", config)
        labels = groups
    else:
        groups = ["outer", "inner"]
        labels = ["Inter-chromosome", "Intra-chromosome"]
        colors_n = ["#9E4A98", "#51BF69"]  # purple, green

    # Create group_text that is a dictionary-style string with name, label, color
    conbined = []
    for i in range(len(groups)):
        conbined.append(group_template.format(name=groups[i], label=labels[i], color=colors_n[i]))
    group_text = ",".join(conbined)

    # id_list: Values for "id" column
    #        : Sorted without duplicates
    id_list = []
    for row in df.data:
        iid = row[df.name_to_index(cols_di["id"])]  # iid: column value for "id" title
        if iid != "":
            id_list.append(iid)
    id_list = list(set(id_list))
    id_list.sort()

    # option_keys: Store the option keys of the positions dictionary
    option_keys = tools.dict_keys(cols_di)  # option_keys: list: sorted keys of cols_di
    option_keys.remove("id")      # option key
    option_keys.remove("chr1")    # must key
    option_keys.remove("break1")  # must key
    option_keys.remove("chr2")    # must key
    option_keys.remove("break2")  # must key
    if "group" in option_keys:
        option_keys.remove("group")  # option key

    # node_size: Size to divide chromosomes
    node_size_select = tools.config_getint(config, "ca", "selector_split_size", 5000000)

    # Write header and dataset of JavaScript file

    f = open(output_file, "w")
    f.write(js_header + js_dataset.format(
        node_size_detail=calc_node_size(genome_size, 500),  # node size for detailed thumbnails
        node_size_thumb=calc_node_size(genome_size, 250),   # node size for rough thumbnails
        node_size_select=node_size_select,                  # node size for bar graph
        genome_size=genome,                 # A dictionary-style string containing keys of "chr", "size", "color", and "label"
        IDs=convert.list_to_text(id_list),  # A comma-separated string of id column values
        group=group_text,                   # A dictionary-style string containing keys of "name", "label", and "color"
        tooltip=convert.pyformat_to_jstooltip_text(cols_di, config, "ca", "result_format_ca", "tooltip_format"),  # A dictionary-style string containing keys of "name", "label", and "color"
        link_header=convert.list_to_text(option_keys),
    ))

    # Write link of JavaScript file

    f.write(js_links_1)  # Write the leading part

    data_links = []
    for row in df.data:
        iid = row[df.name_to_index(cols_di["id"])]  # iid: the value of "id" column
        # Ignore empty string
        if iid == "":
            continue

        chr1 = str(row[df.name_to_index(cols_di["chr1"])])  # chromosome1
        pos1 = row[df.name_to_index(cols_di["break1"])]     # break point1
        chr2 = str(row[df.name_to_index(cols_di["chr2"])])  # chromosome2
        pos2 = row[df.name_to_index(cols_di["break2"])]     # break point2

        # Check if chr1 and chr2 is in the genome list
        # Check if pos1 and pos2 is in the chr1 length
        # index1 and index2 are indexes of the genome_size for chr1 and chr2
        [index1, rang] = insite_genome(genome_size, chr1, pos1)
        if rang > 0:
            print("breakpoint 1 is over range. chr%s: input=%d, range=%d" % (chr1, pos1, rang))
            continue
        if rang < 0:
            #print("chr1 is undefined. %s" % (chr1))
            continue
        [index2, rang] = insite_genome(genome_size, chr2, pos2)
        if rang > 0:
            print("breakpoint 2 is over range. chr%s: input=%d, range=%d" % (chr2, pos2, rang))
            continue
        if rang < 0:
            #print("chr2 is undefined. %s" % (chr2))
            continue

        # Whether chr1 and chr2 are the same chromosome
        inner_flg = "false"
        if (chr1 == chr2):
            inner_flg = "true"

        # Set group_id: -1, 0, 1, index values of groups
        #             : Sequential numbers identifying groups
        group_id = -1  # Not belong to any groups
        if "group" in cols_di:
            # If the value of group column is in group list, then group_id is the index of the list
            # Others, group_id is -1
            group_id = convert.value_to_index(groups, row[df.name_to_index(cols_di["group"])], -1)
        else:
            if inner_flg == "false":
                group_id = 0  # chr1 and chr2 are in the different group
            else:
                group_id = 1  # chr1 and chr2 are in the same group

        # Add an element to data_links
        data_links.append([iid, index1, pos1, index2, pos2, group_id])

        # tooltip_items: Data for tooltip
        tooltip_items = []
        for k in range(len(option_keys)):  # Loop in the column titles except group, id, and must keys (chr1, chr2, break1, and break2)
            key = option_keys[k]
            if cols_di[key] == "":
                continue
            tooltip_items.append(row[df.name_to_index(cols_di[key])])

        # Write link
        f.write(links_template.format(
            ID=iid,
            Chr1=index1, pos1=pos1, Chr2=index2, pos2=pos2,
            inner_flg=inner_flg,
            group_id=group_id,
            tooltip="[" + convert.list_to_text(tooltip_items) + "],"))

    f.write(js_links_2)  # Write the ending part

    # Write integral bar item

    # link: [{bp1: iid, bp2: iid}, {...}, ...]
    #     : Separate elements by group_id
    link = []
    for g in range(len(groups)):
        link.append({})

    for dl in data_links:
        # dl = [iid, index1, pos1, index2, pos2, group_id]
        # iid     : The value of id title
        # index1/2: The index of genome_size
        # pos1/2  : Bareak point
        # group_id: Index of groups

        # Chr: The index of genome_size
        # Pos: A break position based on node
        bp1 = "root.{Chr:0>2}.{Chr:0>2}_{Pos:0>3}".format(Chr=dl[1], Pos=int(math.floor(dl[2] / node_size_select)))
        bp2 = "root.{Chr:0>2}.{Chr:0>2}_{Pos:0>3}".format(Chr=dl[3], Pos=int(math.floor(dl[4] / node_size_select)))

        group_id = dl[5]

        # For bp1
        if bp1 not in link[group_id]:
            link[group_id][bp1] = []
        link[group_id][bp1].append(dl[0])  # Append iid

        # For bp2
        if bp1 != bp2:
            if bp2 not in link[group_id]:
                link[group_id][bp2] = []
            link[group_id][bp2].append(dl[0])  # Append iid

    select_value_text = ""
    select_key_text = ""
    select_item_text = ""
    for i in range(len(groups)):
        values = []  # [Number of id, ...]
        keys = []    # [[genome_size index, Break position], ...]
        items = []   # [[id_list index, ...], ...]

        for bp in sorted(link[i].keys()):
            # values element
            # link[i][bp]: list that stores id column values at a break position of a chromosome in a group
            #            : Duplicate values are stored
            values.append(len(link[i][bp]))

            # keys element
            parts = bp.split(".")[2].split("_")  # parts: [Chr, Pos]
            keys.append([int(parts[0]), int(parts[1])])

            # items element
            sort = sorted(list(set(link[i][bp])))  # Delete duplicates
            temp = []
            for t in sort:
                temp.append(id_list.index(t))  # id_list that stores values of id column
            items.append(temp)

        select_value_text += "[%s]," % (",".join(map(str, values)).replace(" ", ""))  # += [1,1,...],
        select_key_text += "[%s]," % (",".join(map(str, keys)).replace(" ", ""))      # += [[0,1],[0,25],...],
        select_item_text += "[%s]," % (",".join(map(str, items)).replace(" ", ""))    # += [[9],[8],...],

    f.write(js_selection.format(
        value=select_value_text,
        key=select_key_text,
        item=select_item_text
    ))

    # Write rest of JavaScript file and footer

    f_template = open(os.path.dirname(os.path.abspath(__file__)) + "/templates/data_ca.js")  # ./templates/data_ca.js
    js_function = f_template.read()
    f_template.close()
    f.write(js_function)
    f.write(js_footer)

    f.close()

    return {"id_list": id_list, "group_list": groups, "color": colors_n}
Beispiel #14
0
def convert_tojs(params, config):

    import json
    import math
    import itertools
    import paplot.subcode.tools as tools
    import paplot.convert as convert
    import paplot.color as color
    
    # data read
    try:
        jsonData = json.load(open(params["data"]))
    except Exception as e:
        print ("failure open data %s, %s" % (params["data"], e.message))
        return None
    
    key_Ids = tools.config_getstr(config, "result_format_signature", "key_id")
    key_signature = tools.config_getstr(config, "result_format_signature", "key_signature")
    key_mutations = tools.config_getstr(config, "result_format_signature", "key_mutation")
    key_mutation_count = tools.config_getstr(config, "result_format_signature", "key_mutation_count")
    
    sig_num = len(jsonData[key_signature])
    
    if sig_num == 0:
        print ("no data %s" % params["data"])
        return {}
                    
    # signature names
    signature_list = []
    for s in range(sig_num):
        signature_list.append("Signature %d" % (s+1))
    
    # each signature colors
    sig_color_list = color.create_color_array(sig_num, color.r_set2)
    
    # use background?
    if tools.config_getboolean(config, "result_format_signature", "background"):
        signature_list.append("Background ")
        sig_color_list.append(color.r_set2_gray)
        
    # axis-y max
    sig_y_max = tools.config_getint(config, "signature", "signature_y_max")
    if (sig_y_max < 0):
        for sig in jsonData[key_signature]:
            for sub in sig:
                m = max(sub)
                if sig_y_max < m:
                    sig_y_max = m
                    
    # route list
    sub_num = len(jsonData[key_signature][0][0])
    log = math.log(sub_num, 4)
    if log % 1 > 0:
        print ("substitution's list length is invalid (%d, not number 4^N)" % sub_num)
        return None

    route_id = []
    route_list = []
    for p in itertools.product(("A","C","G","T"), repeat = int(log)):
        route_id.append("".join(p))
        route_list.append(p)
        
    # substruction
    sub_di = [
        {"name":"C > A", "ref":"C", "color":tools.config_getstr(config, "signature", "alt_color_CtoA")},
        {"name":"C > G", "ref":"C", "color":tools.config_getstr(config, "signature", "alt_color_CtoG")},
        {"name":"C > T", "ref":"C", "color":tools.config_getstr(config, "signature", "alt_color_CtoT")},
        {"name":"T > A", "ref":"T", "color":tools.config_getstr(config, "signature", "alt_color_TtoA")},
        {"name":"T > C", "ref":"T", "color":tools.config_getstr(config, "signature", "alt_color_TtoC")},
        {"name":"T > G", "ref":"T", "color":tools.config_getstr(config, "signature", "alt_color_TtoG")},
    ]
    
    substruction = ""
    for sub in sub_di:
        route = []
        for r in route_list:
            route.append("p".join(r[0:int(log/2)]) + "p" + sub["ref"] + "p" + "p".join(r[int(log/2):]))
        
        substruction += js_substruction_template.format(name = sub["name"], color = sub["color"], route = convert.list_to_text(route))
    
    # Id list
    id_txt = ""
    if key_Ids in jsonData:
        id_txt = convert.list_to_text(jsonData[key_Ids])
            
    # mutations
    mutations_txt = ""
    if key_mutations in jsonData:
        for m in jsonData[key_mutations]:
            mutations_txt += "[%d,%d,%f]," % (m[0],m[1],m[2])
    
    # signature
    dataset_sig = ""
    for sig in jsonData[key_signature]:
        tmp = ""
        for sub in sig:
            tmp += "[" + ",".join(map(str, sub)) + "],"
        dataset_sig += ("[" + tmp + "],")
        
    mutation_count_txt = ""
    if (key_mutation_count != "") and (key_mutation_count in jsonData.keys()):
        for v in jsonData[key_mutation_count]:
            mutation_count_txt += "%d," % v
    
    # output
    sig_num_sift = 0
    if tools.config_getboolean(config, "result_format_signature", "background"):
        sig_num_sift = 1
    ellipsis = "%s%d" % (params["ellipsis"], (sig_num + sig_num_sift))
    
    js_file = "data_%s.js" % ellipsis
    html_file = "graph_%s.html" % ellipsis
    
    keys_di = {"sig":"", "route":"", "id":""}
    f = open(params["dir"] + "/" + js_file, "w")
    f.write(js_header \
        + js_dataset.format(Ids = id_txt, \
            signatures = convert.list_to_text(signature_list), \
            colors = convert.list_to_text(sig_color_list), \
            dataset_sig_max = sig_y_max, \
            mutations = mutations_txt, \
            dataset_sig = dataset_sig, \
            route_id = convert.list_to_text(route_id), \
            substruction = substruction, \
            signature_title = convert.pyformat_to_jstooltip_text(keys_di, config, "signature", "", "tooltip_format_signature_title"), \
            signature_partial = convert.pyformat_to_jstooltip_text(keys_di, config, "signature", "", "tooltip_format_signature_partial"), \
            mutation_title = convert.pyformat_to_jstooltip_text(keys_di, config, "signature", "", "tooltip_format_mutation_title"), \
            mutation_partial = convert.pyformat_to_jstooltip_text(keys_di, config, "signature", "", "tooltip_format_mutation_partial"), \
            mutation_count = mutation_count_txt, \
            )
        + js_function)
    f.close()

    integral = True
    if key_Ids == "" or key_mutations == "" or key_mutation_count == "":
        integral = False
    
    return {"sig_num": sig_num,
            "js": js_file,
            "html": html_file,
            "intergral": integral,
            } 
Beispiel #15
0
def convert_tojs(input_file, output_file, positions, config):

    import paplot.subcode.data_frame as data_frame
    import paplot.subcode.merge as merge
    import paplot.subcode.tools as tools
    import paplot.convert as convert
    import paplot.color as color
    
    cols_di = merge.position_to_dict(positions)

    # data read
    try:
        df = data_frame.load_file(input_file, header = 1, \
            sept = tools.config_getstr(config, "merge_format_qc", "sept"), \
            comment = tools.config_getstr(config, "result_format_qc", "comment") \
            )
    except Exception as e:
        print ("failure open data %s, %s" % (input_file, e.message))
        return None

    if len(df.data) == 0:
        print ("no data %s" % input_file)
        return None

    # chart list
    plots_text = ""
    plots_option = []
    
    config_sections = config.sections()
    config_sections.sort()
    if "qc_chart_brush" in config_sections:
        config_sections.remove("qc_chart_brush")
        config_sections.insert(0, "qc_chart_brush")
        
    for sec in config.sections():
        if not sec.startswith("qc_chart_"):
            continue
        
        chart_id = sec.replace("qc_chart_", "chart_")
        
        stack_id = []
        label = []
        colors_di = {}
        counter = 0
        for name_set in tools.config_getstr(config, sec, "name_set").split(","):
            name_set_split = convert.text_to_list(name_set, ":")
            if len(name_set_split) == 0:
                continue
            
            stack_id.append("stack" + str(counter))
            label.append(name_set_split[0])
            
            if len(name_set_split) > 1:
                colors_di[name_set_split[0]] = color.name_to_value(name_set_split[1])
            counter += 1
            
        # fill in undefined items
        colors_di = color.create_color_dict(label, colors_di, color.metro_colors) 
        
        # dict to value
        colors_li = []
        for key in label:
            colors_li.append(colors_di[key])
        
        plots_text += plot_template.format(
            chart_id = chart_id, \
            title = tools.config_getstr(config, sec, "title"), \
            title_y = tools.config_getstr(config, sec, "title_y"), \
            stack = convert.pyformat_to_jstooltip_text(cols_di, config, sec, "result_format_qc", "stack"), \
            stack_id = convert.list_to_text(stack_id), \
            label = convert.list_to_text(label), \
            color = convert.list_to_text(colors_li), \
            tooltip = convert.pyformat_to_jstooltip_text(cols_di, config, sec, "result_format_qc", "tooltip_format"), \
            )
        plots_option.append(chart_id)
    
    # ID list
    Ids = []
    for row in df.data:
        iid = row[df.name_to_index(cols_di["id"])]
        if iid != "": Ids.append(iid)
    Ids = list(set(Ids))
    Ids.sort()
    
    # header 
    headers = cols_di.keys()
    headers.sort()
    
    f = open(output_file, "w")
    f.write(js_header)
    f.write(js_dataset.format(IDs = convert.list_to_text(Ids), \
                            header = convert.list_to_text(headers), \
                            plots = plots_text))    
    f.write(js_data1)
                        
    # values
    for row in df.data:
        iid = row[df.name_to_index(cols_di["id"])]
        if iid == "": continue
            
        values = ""
        for item in headers:
            if len(values) > 0:
                values += ","
            val = row[df.name_to_index(cols_di[item])]
            if type(val) == type(""):
                values += "'" + val + "'"
            elif type(val) == type(0.0):
                values += str('%.2f' % val)
            else:
                values += str(val)
        
        f.write("[" + values + "],")

    f.write(js_data2)
    f.write(js_function)
    f.close()
    
    return {"plots": plots_option}
Beispiel #16
0
def convert_tojs(input_file, output_file, positions, config):
    
    import paplot.subcode.data_frame as data_frame
    import paplot.subcode.merge as merge
    import paplot.subcode.tools as tools
    import paplot.convert as convert
    
    cols_di = merge.position_to_dict(positions)

    # data read
    try:
        df = data_frame.load_file(input_file, header = 1, \
            sept = tools.config_getstr(config, "merge_format_mutation", "sept"), \
            comment = tools.config_getstr(config, "result_format_mutation", "comment") \
            )
    except Exception as e:
        print ("failure open data %s, %s" % (input_file, e.message))
        return None

    if len(df.data) == 0:
        print ("no data %s" % input_file)
        return None

    # func replace 
    for f in range(len(df.data)):
        func_pos = df.name_to_index(cols_di["func"])
        func = df.data[f][func_pos]
        df.data[f][func_pos] = func.replace(" ", "_")
        if func == "":
            df.data[f][func_pos] = "_blank_"

    [funcs, colors_n] = convert.group_list(df.column(cols_di["func"]), "mut", "func", config)

    # ID list
    Ids = []
    for row in df.data:
        iid = row[df.name_to_index(cols_di["id"])]
        if iid != "": Ids.append(iid)
    Ids = list(set(Ids))
    Ids.sort()
    
    genes = genes_list(df.column(cols_di["gene"]), \
                        df.column(cols_di["func"]), \
                        df.column(cols_di["id"]), \
                        funcs, Ids, config)    

    option_keys = cols_di.keys()
    option_keys.remove("id")
    option_keys.remove("func")
    option_keys.remove("gene")
            
    f = open(output_file, "w")
    f.write(js_header \
        + js_dataset.format(Ids = convert.list_to_text(Ids), \
            genes = convert.list_to_text(genes), \
            funcs = convert.list_to_text(funcs), \
            func_colors_n = convert.list_to_text(colors_n), \
            mutation_header = convert.list_to_text(option_keys), \
            checker_title = convert.pyformat_to_jstooltip_text(cols_di, config, "mut", "result_format_mutation", "tooltip_format_checker_title"), \
            checker_partial = convert.pyformat_to_jstooltip_text(cols_di, config, "mut", "result_format_mutation", "tooltip_format_checker_partial"), \
            gene_title = convert.pyformat_to_jstooltip_text(cols_di, config, "mut", "result_format_mutation", "tooltip_format_gene_title"), \
            gene_partial = convert.pyformat_to_jstooltip_text(cols_di, config, "mut", "result_format_mutation", "tooltip_format_gene_partial"), \
            id_title = convert.pyformat_to_jstooltip_text(cols_di, config, "mut", "result_format_mutation", "tooltip_format_id_title"), \
            id_partial = convert.pyformat_to_jstooltip_text(cols_di, config, "mut", "result_format_mutation", "tooltip_format_id_partial"), \
            ))
            
    # mutation list
    f.write(js_mutations_1)

    mutations = {}
    tooltips = {}
    for row in df.data:
        iid = row[df.name_to_index(cols_di["id"])]
        if iid == "": continue
            
        if (iid in mutations) == False:
            mutations[iid] = {}
            tooltips[iid] = {}
                
        func_split = convert.text_to_list(row[df.name_to_index(cols_di["func"])], \
                                tools.config_getstr(config, "result_format_mutation", "sept_func"))
                                
        tooltip_items = []
        for k in range(len(option_keys)):
            key = option_keys[k]
            if cols_di[key] == "": continue
            tooltip_items.append(row[df.name_to_index(cols_di[key])])
            
        for func in func_split:
            if (func in mutations[iid]) == False:
                mutations[iid][func] = {}
                tooltips[iid][func] = {}

            gene_split = convert.text_to_list(row[df.name_to_index(cols_di["gene"])], \
                                tools.config_getstr(config, "result_format_mutation", "sept_gene"))
            for gene in gene_split:
                if (gene in mutations[iid][func]) == False:
                    mutations[iid][func][gene] = 1
                    tooltips[iid][func][gene] = []
                else:
                    mutations[iid][func][gene] += 1

                tooltips[iid][func][gene].append(tooltip_items)

    mutations_sum = 0
    for iid in mutations:
        for func in mutations[iid]:
            for gene in mutations[iid][func]:
                idx_i = convert.value_to_index(Ids, iid, -1)
                idx_f = convert.value_to_index(funcs, func, -1)
                idx_g = convert.value_to_index(genes, gene, -1)

                if idx_i >= 0 and idx_f >= 0 and idx_g >= 0:
                    
                    tooltip_items = ""
                    for tips in tooltips[iid][func][gene]: 
                        tooltip_items += "[" + convert.list_to_text(tips) + "],"

                    f.write(mu_mutations_template.format(ID = idx_i, \
                        func = idx_f , \
                        gene = idx_g, \
                        num = mutations[iid][func][gene],
                        tooltip = tooltip_items))
                        
                    mutations_sum += mutations[iid][func][gene]
                    
    f.write(js_mutations_2.format(mutations_sum = mutations_sum))
    
    dataset = {"func":funcs, "color":colors_n}
    
    ##### subdata #####
    f.write(js_subdata_1)
    subdata = []
    counter = 0
    for sec in config.sections():
        if sec.startswith("mut_subplot_type1_"):
            ret_val = load_subdata(Ids, sec, config)
            if ret_val == None: continue
            [data_text, item, colors_n, label, title] = ret_val
            
            name = "sub%d" % (counter)
            pos = 1
            counter += 1
            
        elif sec.startswith("mut_subplot_type2_"):
            ret_val = load_subdata(Ids, sec, config)
            if ret_val == None: continue
            [data_text, item, colors_n, label, title] = ret_val
            
            name = "sub%d" % (counter)
            pos = 2
            counter += 1
            
        else: continue
    
        f.write(subdata_template.format(name = name, \
                title = title, \
                type = tools.config_getstr(config, sec, "mode"), \
                item = convert.list_to_text(item), \
                label = convert.list_to_text(label), \
                colors_n = convert.list_to_text(colors_n), \
                data = data_text ))

        subdata.append({"pos":pos, "label":label, "color":colors_n, "title": title})
         
    f.write(js_subdata_2)
    
    ##### functions #####
    f.write(js_function)
    f.close()

    dataset["subdata"] = subdata
    return dataset 
Beispiel #17
0
def pyformat_to_jstooltip_text(positions, config, section_fmt, section_col,
                               item_startwith):
    """
    Create tooltip components obtained by decomposing a string of the tooltip_format

    Parameters
    ----------
    positions     : dcit: A dictionary with column titles as values
    config        : configparser.RawConfigParser
    section_fmt   : str: A section name such as "ca", "mutation", "pmsignature", "signature", ...
    section_col   : str: A section name such as "result_format_ca", "result_format_mutation", "result_format_qc", ...
    item_startwith: str: The first string of section key, such as "tooltip_format", "tooltip_format_checker_title", ...

    Return
    ------
    A dictionary-style string with "format" and "keys"
        format: [[{...}, ...], ...] where the dictionares have the following keys and values
              : label: For fix type, a string outside the braces
              :      : Others, a string before the letter ":" in braces
              : type : One of the following: fix, numeric, str
              : keys : For fix type, empty string
              :      : Others, a string that precedes the letter ":" in braces and excludes the arithmetic term
              : ext  : For fix type, empty string
              :      : Others, a string after the letter ":" in braces
        keys  : a set-style string
              : A concatenated string of the above "keys" values
    """

    tooltip_detail_templete = "{{label:'{label}',type:'{type}',keys:[{keys}],ext:'{ext}'}},"

    import re
    re_compile = re.compile(
        r"\{[0-9a-zA-Z\+\-\*\/\#\:\,\.\_\ ]+\}")  # Bracketed area
    re_compile2 = re.compile(r"[\+\-\*\/\:]")

    # Determine keys_list and tooltip_fomat_text

    keys_list = []
    tooltip_fomat_text = ""
    for option in tools.config_getoptions(config, section_fmt, item_startwith):

        # formt    : "[{chr1}] {break1:,}; [{chr2}] {break2:,}"
        # keys_list: ["{chr1}", "{break1:,}", "{chr2}", "{break2:,}"]
        formt = tools.config_getstr(config, section_fmt,
                                    option)  # tooltip format
        key_text_list = re_compile.findall(
            formt)  # Extract string enclosed in braces

        # Determine tooltip_detail_text

        tooltip_detail_text = ""
        for key_text in key_text_list:
            # key_text: "{chr1}"   "{break1:,}"   "{chr2}"   "{break2:,}"   ...

            # Update tooltip_detail_text: fix type
            start = formt.find(key_text)  # Index of matched string
            if start > 0:
                # label: "["   "] "   "; ["   "] "   ...
                tooltip_detail_text += tooltip_detail_templete.format(
                    label=formt[0:start], type="fix", keys="", ext="")

            # slice for the next loop
            formt = formt[start + len(key_text):]

            # Update key_text: lowercase
            # Set label_text
            # Set sub_keys
            key_text = key_text.lower()
            label_text = key_text.replace(" ", "").replace("{", "").replace(
                "}", "")  # "{break1:,}" => "break1:,"
            sub_keys = re_compile2.split(
                label_text)  # "break1:,"   => ["break1", ","]

            # Set ttype: numeric or str
            ttype = "numeric"  # numeric if key_text contains either "+", "-", "*", "/", or ":"
            if len(sub_keys) == 1:
                ttype = "str"

            # Set ext          : "" or something like ","
            # Update label_text: remove extra characters
            # Update sub_keys  : remove extra elements
            ext = ""
            if label_text.find(":") > 0:
                ext_start = label_text.index(":")  # Index of :
                ext = label_text[ext_start + 1:]  # "break1:," => ","
                label_text = label_text[0:ext_start]  # "break1:," => "break1"
                sub_keys = re_compile2.split(
                    label_text)  # "break1"   => ["break1"]

            # Update sub_keys: remove numeric block
            for sub_key in sub_keys:
                try:
                    float(sub_key)
                    sub_keys.remove(sub_key)
                except Exception:
                    pass

            # Set check        : True or False
            # Update label_text: add braces
            check = True
            for sub_key in list(set(sub_keys)):
                if sub_key not in positions.keys(
                ) and not sub_key.startswith("#"):
                    print("[WARNING] key:{key} is not defined.".format(
                        key=sub_key))
                    check = False
                    break
                label_text = label_text.replace(sub_key, "{" + sub_key +
                                                "}")  # "break1" => "{break1}"

            # Update tooltip_detail_text: numeric or str type
            if check is True:
                # label: "{chr1}"   "{break1}"   "{chr2}"   {"break2"}   ...
                # keys : "'chr1',"  "'break1',"  "'chr2',"  "'break2',"  ...  "'" and "," are added
                # ext  : ""         ","          ""         ","          ...
                tooltip_detail_text += tooltip_detail_templete.format(
                    label=label_text,
                    type=ttype,
                    keys=list_to_text(sub_keys),
                    ext=ext)
                keys_list.extend(
                    sub_keys
                )  # add a list such as ["chr1"], ["break1"], ["chr2"], ["break2"], ...

        # Update tooltip_detail_text: fix type
        if len(formt) > 0:
            tooltip_detail_text += tooltip_detail_templete.format(label=formt,
                                                                  type="fix",
                                                                  keys="",
                                                                  ext="")

        tooltip_fomat_text += "[" + tooltip_detail_text + "],"

    # Convert a list to a string
    key_text = ""
    keys_dup = list(set(keys_list))
    keys_dup.sort()
    for key in keys_dup:
        key_text += "{" + key.lower() + "} "
    # keys_list: ['break1', 'break2', 'chr1', 'chr2']
    # kety_text: "{break1} {break2} {chr1} {chr2}"

    tooltip_templete = "{{format:[{formats}], keys: '{keys}'}}"
    return tooltip_templete.format(formats=tooltip_fomat_text, keys=key_text)
Beispiel #18
0
def ca_main(args):
    '''
    ca main script

    Parameters
    ----------
    args: argparse.Namespace
        : Args contains the arguments specified in the paplot command

    Args parameters
    ----------
    input       : string  : Input file path
    output_dir  : string  : Output directory path
    project_name: string  : Project name path
    config_file : string  : Config file path
    title       : string  : Report title name
    ellipsis    : string  : File name identifier for report
    overview    : string  : Report summary
    remarks     : string  : Additional information about report
    func        : function: This function (ca_main)

    Return
    ------
    None
    '''
    # The default values of the variables are defined in the file: ../../paplot
    # config_file: ""                       :
    # title      : "CA graphs"              : This value is replaced by {title} in ./templates/graph_ca.html
    # ellipsis   : "ca"                     : This value is used as part of file names, such as graph_ca.html
    # overview   : "Chromosomal Aberration.": This value is used in _convert_index_item function in ./prep.py
    # remarks    : ""                       : This value is replaced by {remarks} in ./templates/index.html

    import paplot.ca as ca

    # config
    [config,
     conf_file] = tools.load_config(tools.win_to_unix(args.config_file))

    # input_list: a list with input data files as elements
    input_list = tools.get_inputlist(tools.win_to_unix(args.input))
    if len(input_list) == 0:
        print("input no file.")
        return

    [sec_in, sec_out] = tools.get_section("ca")
    # id_list is used when the column candidates defined in the config file are not in the header of input files
    # see ids veriable of with_header function in ./subcode/merge.py for details
    id_list = tools.get_idlist(input_list,
                               tools.config_getstr(config, sec_in, "suffix"))

    # output_html_dir: The absolute path of the project directory
    # output_data    : File name like data_ca.csv
    #                : This file is created by the merge.merge_result function and placed in the project directory
    #                : The contents of the file are formatted from the input file to match the configuration file
    #                : After that, read the contents and convert it to Json format in ca.output_html function
    # positions      : A nested dictionary: {'must'  : {key1: title1, ...}, 'option': {key2: title2, ...}}
    output_html_dir = prep.create_dirs(tools.win_to_unix(args.output_dir),
                                       args.project_name, config)
    output_data = "data_%s%s" % (args.ellipsis, os.path.splitext(
        input_list[0])[1])
    positions = merge.merge_result(input_list,
                                   id_list,
                                   output_html_dir + "/" + output_data,
                                   "ca",
                                   config,
                                   extract=True)
    if positions == {}:
        print("merge.merge_result: input file is invalid.")
        return

    # Create JavaScript file and HTML file for CA
    html_name = "graph_%s.html" % args.ellipsis
    params_html = {
        "dir": output_html_dir,  # Project directory full path
        "data": output_data,  # Data file name like csv
        "js": "data_%s.js" % args.ellipsis,  # JavaScript file name
        "html": html_name,  # HTML file name
        "project":
        args.project_name,  # Project name given by user on command line
        "title": args.title,  # 'CA graphs' as default
    }
    ca.output_html(params_html, positions, config)

    # Create homepage
    prep.create_index(config,
                      tools.win_to_unix(args.output_dir),
                      html_name,
                      args.project_name,
                      args.title,
                      overview=args.overview,
                      remarks=args.remarks)
Beispiel #19
0
def load_genome_size(config):
    
    path = tools.config_getpath(config, "genome", "path", "../../config/hg19.csv")
    
    settings = tools.config_getstr(config, "sv", "use_chrs").replace(" ", "").split(",")
    use_chrs = [];
    colors = [];
    labels = [];
    
    for i in range(len(settings)):
        items = settings[i].split(":")
        use_chrs.append(items[0].lower())
        labels.append("")
        colors.append("#BBBBBB")
        
        for j in range(len(items)):            
            if j == 0: 
                if items[j][0:3] == "chr":
                    use_chrs[i] = items[j][3:]
                    
            elif j == 1:
                labels[i] = items[j]
            elif j == 2:
                colors[i] = items[j]

    if len(use_chrs) < 1:
        return []
        
    f = open(path)
    read = f.read()
    f.close()
    
    formatt = read.replace("\r", "\n").replace(" ", "")
    
    genome_size = []
    _max = 0
    for row in formatt.split("\n"):
        sept = ","
        if row.find(",") < 0:
            sept = "\t"
        items = row.split(sept)

        if len(items) < 2:
            continue
        
        if items[1].isdigit() == False:
            continue

        label = items[0].lower()
        if label[0:3] == "chr":
            label = label[3:len(label)]
            
        if (label in use_chrs) == False:
            continue
        
        pos = use_chrs.index(label)
        
        if _max < int(items[1]):
            _max = int(items[1])
        
        if labels[pos] == "":
            labels[pos] = items[0]
            
        genome_size.append([label, int(items[1]), colors[pos], labels[pos]])

    for i in range(len(genome_size)):
        if genome_size[i][1] < int(_max/10):
            genome_size[i][1] = int(_max/10)
    
    return genome_size
Beispiel #20
0
def convert_tojs(input_file, output_file, positions, config):

    import paplot.subcode.data_frame as data_frame
    import paplot.subcode.merge as merge
    import paplot.subcode.tools as tools
    import paplot.convert as convert
    
    genome_size = load_genome_size(config)

    if len(genome_size) == 0:
        return None

    genome = ""
    for i in range(len(genome_size)):
        if len(genome) > 0:
            genome += ",\n"
        genome += genome_size_template.format(Chr=i, size = genome_size[i][1], color = genome_size[i][2], label = genome_size[i][3])

    cols_di = merge.position_to_dict(positions)

    # data read
    try:
        df = data_frame.load_file(input_file, header = 1, \
            sept = tools.config_getstr(config, "merge_format_sv", "sept"), \
            comment = tools.config_getstr(config, "result_format_sv", "comment") \
            )
    except Exception as e:
        print ("failure open data %s, %s" % (input_file, e.message))
        return None

    if len(df.data) == 0:
        print ("no data %s" % input_file)
        return None

    # group list
    if "group" in cols_di:
        for f in range(len(df.data)):
            group_pos = df.name_to_index(cols_di["group"])
            group = df.data[f][group_pos]
            df.data[f][group_pos] = group.replace(" ", "_")
            if group == "":
                df.data[f][group_pos] = "_blank_"
    
        [groups, colors_n] = convert.group_list(df.column(cols_di["group"]), "sv", "group", config)
        labels = groups
        
    else:
        groups = ["outer", "inner"]
        labels = ["Inter Chromosome", "Intra Chromosome"]
        colors_n = ["#9E4A98", "#51BF69"]
    
    conbined = []
    for i in range(len(groups)):
        conbined.append(group_template.format(name = groups[i], label = labels[i], color = colors_n[i]))
        
    group_text = ",".join(conbined)
    
    # ID list
    Ids = []
    for row in df.data:
        iid = row[df.name_to_index(cols_di["id"])]
        if iid != "": Ids.append(iid)
    Ids = list(set(Ids))
    Ids.sort()

    option_keys = cols_di.keys()
    option_keys.remove("id")
    option_keys.remove("chr1")
    option_keys.remove("break1")
    option_keys.remove("chr2")
    option_keys.remove("break2")
    if "group" in option_keys:
        option_keys.remove("group")
            
    f = open(output_file, "w")

    f.write(js_header \
        + js_dataset.format(node_size_detail = calc_node_size(genome_size, 500), \
            node_size_thumb = calc_node_size(genome_size, 250), \
            node_size_select = tools.config_getint(config, "sv", "selector_split_size", 5000000),\
            genome_size = genome, \
            IDs = convert.list_to_text(Ids), \
            group = group_text, \
            tooltip = convert.pyformat_to_jstooltip_text(cols_di, config, "sv", "result_format_sv", "tooltip_format"), \
            link_header = convert.list_to_text(option_keys), \
            ))
            
    # write links
    f.write(js_links_1)

    for row in df.data:
        iid = row[df.name_to_index(cols_di["id"])]
        if iid == "": continue

        chr1 = str(row[df.name_to_index(cols_di["chr1"])])
        pos1 = row[df.name_to_index(cols_di["break1"])]
        chr2 = str(row[df.name_to_index(cols_di["chr2"])])
        pos2 = row[df.name_to_index(cols_di["break2"])]        

        [index1, rang] = insite_genome(genome_size, chr1, pos1)
        if rang > 0:
            print("breakpoint 1 is over range. chr%s: input=%d, range=%d" % (chr1, pos1, rang))
            continue
        if rang < 0:
            #print("chr1 is undefined. %s" % (chr1))
            continue
        
        [index2, rang] = insite_genome(genome_size, chr2, pos2)
        if rang > 0:
            print("breakpoint 2 is over range. chr%s: input=%d, range=%d" % (chr2, pos2, rang))
            continue
        if rang < 0:
            #print("chr2 is undefined. %s" % (chr2))
            continue
        
        inner_flg = "false"
        if (chr1 == chr2):
            inner_flg = "true"
        
        tooltip_items = []
        for k in range(len(option_keys)):
            key = option_keys[k]
            if cols_di[key] == "": continue
            tooltip_items.append(row[df.name_to_index(cols_di[key])])
        
        group_id = -1
        if "group" in cols_di:
            group_id = convert.value_to_index(groups, row[df.name_to_index(cols_di["group"])], -1)
        else:
            if inner_flg == "false":
                group_id = 0
            else:
                group_id = 1
                
        f.write(links_template.format(ID = iid, \
            Chr1=index1, pos1=pos1, Chr2=index2, pos2=pos2, \
            inner_flg = inner_flg, \
            group_id = group_id , \
            tooltip = "[" + convert.list_to_text(tooltip_items) + "],"))

    f.write(js_links_2)
    f.write(js_function)
    f.close()
    
    return {"id_list":Ids, "group_list":groups, "color":colors_n}
Beispiel #21
0
def load_subdata(ids, sec, config):
    import os
    import paplot.subcode.tools as tools
    import paplot.convert as convert
    import paplot.color as color

    input_file = tools.config_getpath(config, sec, "path", default = "../../example/sample_summary.csv")
    if os.path.exists(input_file) == False:
        print ("[ERROR] file is not exist. %s" % input_file)
        return None

    sept = tools.config_getstr(config, sec, "sept")
    mode = tools.config_getstr(config, sec, "mode")
    comment = tools.config_getstr(config, sec, "comment")
    title = tools.config_getstr(config, sec, "title")
    
    label = []
    item = []
    colors_n_di = {}
    colors_h_di = {}
    for name_set in tools.config_getstr(config, sec, "name_set").split(","):
        name_set_split = convert.text_to_list(name_set, ":")
        for i in range(len(name_set_split)):
            text = name_set_split[i]
            if i == 0:
                item.append(text)
                if len(name_set_split) == 1:
                    label.append(text)
            elif i == 1:
                label.append(text)
            elif i == 2:
                colors_n_di[name_set_split[0]] = color.name_to_value(text)
            elif i == 3:
                colors_h_di[name_set_split[0]] = color.name_to_value(text)
    
    # fill in undefined items
    colors_n_di = color.create_color_dict(item, colors_n_di, color.osaka_subway_colors) 
    colors_h_di2 = {}
    for key in colors_n_di:
        if colors_h_di.has_key(key): continue
        colors_h_di2[key] = color.Saturation_down(colors_n_di[key])
    
    # dict to value
    colors_n = []
    for key in item:
        colors_n.append(colors_n_di[key])
    
    if mode == "range":
        item.remove(item[0])
    
    header = []
    if tools.config_getboolean(config, sec, "header") == True:
        pos_value = -1
        pos_ID = -1
    else:
        pos_value = tools.config_getint(config, sec, "col_value")
        pos_ID = tools.config_getint(config, sec, "col_ID")
        header = ["",""]
    
    # copy Ids for find check
    unlookup = []
    for iid in ids:
        unlookup.append(iid)
        
    # read
    data_text = ""
    values = []
    for line in open(input_file):
        line = line.strip()
        if len(line.replace(sept, "")) == 0:
            continue
        
        if comment != "" and line.find(comment) == 0:
            continue
        
        if len(header) == 0:
            header = convert.text_to_list(line,sept)
            try:
                colname = tools.config_getstr(config, sec, "col_value")
                pos_value = header.index(colname)
                colname = tools.config_getstr(config, sec, "col_ID")
                pos_ID = header.index(colname)
            except Exception as e:
                print(e.message)
                return None
                
            continue
        
        cols = convert.text_to_list(line,sept)
        if (cols[pos_ID] in ids) == False: continue
        else: unlookup.remove(cols[pos_ID])

        id_pos = ids.index(cols[pos_ID])
        
        if mode == "fix":
            if cols[pos_value] in item:
                data_text += subdata_data_template.format(id = id_pos, item = item.index(cols[pos_value]))
            else:
                print("[" + sec + "] name_set: data is undefined." + cols[pos_value] + "\n")
                continue
        elif mode == "range":
            try:
                values.append(float(cols[pos_value]))
            except Exception as e:
                print(colname + ": data type is invalid.\n" + e.message)
                continue
            
            data_text += subdata_data_template.format(id = id_pos, item = cols[pos_value])
            
        elif mode == "gradient":
            try:
                values.append(float(cols[pos_value]))
            except Exception as e:
                print(colname + ": data type is invalid.\n" + e.message)
                continue
            
            data_text += subdata_data_template.format(id = id_pos, item = cols[pos_value])
    
    if len(unlookup) > 0:
        print("[WARNING] can't find IDs subplot data.")
        print(unlookup)
        
    if mode == "gradient" and len(values) > 0:
        item[0] = min(values)
        item[1] = max(values)
        
    return [data_text, item, colors_n, label, title] 
Beispiel #22
0
def convert_tojs(input_file, output_file, positions, config):

    import os
    import paplot.subcode.data_frame as data_frame
    import paplot.subcode.merge as merge
    import paplot.subcode.tools as tools
    import paplot.convert as convert
    import paplot.color as color

    cols_di = merge.position_to_dict(positions)

    # data read
    try:
        df = data_frame.load_file(input_file, header = 1, \
            sept = tools.config_getstr(config, "result_format_qc", "sept"), \
            comment = tools.config_getstr(config, "result_format_qc", "comment") \
            )
    except Exception as e:
        print("failure open data %s, %s" % (input_file, e.message))
        return None

    if len(df.data) == 0:
        print("no data %s" % input_file)
        return None

    # chart list
    plots_text = ""
    plots_option = []

    config_sections = config.sections()
    config_sections.sort()
    if "qc_chart_brush" in config_sections:
        config_sections.remove("qc_chart_brush")
        config_sections.insert(0, "qc_chart_brush")

    for sec in config.sections():
        if not sec.startswith("qc_chart_"):
            continue

        chart_id = sec.replace("qc_chart_", "chart_")

        stack_id = []
        label = []
        colors_di = {}
        counter = 0
        for name_set in tools.config_getstr(config, sec,
                                            "name_set").split(","):
            name_set_split = convert.text_to_list(name_set, ":")
            if len(name_set_split) == 0:
                continue

            stack_id.append("stack" + str(counter))
            label.append(name_set_split[0])

            if len(name_set_split) > 1:
                colors_di[name_set_split[0]] = color.name_to_value(
                    name_set_split[1])
            counter += 1

        # fill in undefined items
        colors_di = color.create_color_dict(label, colors_di,
                                            color.metro_colors)

        # dict to value
        colors_li = []
        for key in label:
            colors_li.append(colors_di[key])

        plots_text += plot_template.format(
            chart_id = chart_id, \
            title = tools.config_getstr(config, sec, "title"), \
            title_y = tools.config_getstr(config, sec, "title_y"), \
            stack = convert.pyformat_to_jstooltip_text(cols_di, config, sec, "result_format_qc", "stack"), \
            stack_id = convert.list_to_text(stack_id), \
            label = convert.list_to_text(label), \
            color = convert.list_to_text(colors_li), \
            tooltip = convert.pyformat_to_jstooltip_text(cols_di, config, sec, "result_format_qc", "tooltip_format"), \
            )
        plots_option.append(chart_id)

    # ID list
    id_list = []
    for row in df.data:
        iid = row[df.name_to_index(cols_di["id"])]
        if iid != "": id_list.append(iid)
    id_list = list(set(id_list))
    id_list.sort()

    # header
    headers = tools.dict_keys(cols_di)

    f = open(output_file, "w")
    f.write(js_header)
    f.write(js_dataset.format(IDs = convert.list_to_text(id_list), \
                            header = convert.list_to_text(headers), \
                            plots = plots_text))
    f.write(js_data1)

    # values
    for row in df.data:
        iid = row[df.name_to_index(cols_di["id"])]
        if iid == "": continue

        values = ""
        for item in headers:
            if len(values) > 0:
                values += ","
            val = row[df.name_to_index(cols_di[item])]
            if type(val) == type(""):
                values += "'" + val + "'"
            elif type(val) == type(0.0):
                values += str('%.2f' % val)
            else:
                values += str(val)

        f.write("[" + values + "],")

    f.write(js_data2)

    f_template = open(
        os.path.dirname(os.path.abspath(__file__)) + "/templates/data_qc.js")
    js_function = f_template.read()
    f_template.close()
    f.write(js_function)
    f.write(js_footer)

    f.close()

    return {"plots": plots_option}
Beispiel #23
0
def create_index(config,
                 output_dir,
                 output_html,
                 project_name,
                 name,
                 overview="",
                 sub_text="",
                 composite=False,
                 remarks=""):
    """
    Create homepage

    Parameters
    ----------
    config      : configparser.RawConfigParser
    output_dir  : string: Output directory path
    output_html : string: HTML file name
    project_name: string: Project name given by user on command line
    name        : string: Report title
    overview    : string: Report summary
    sub_text    : string: Additional string to display if composite=True and a report does not exist
    composite   : bool  : Whether or not to have multiple reports
    remarks     : string: Additional information about report

    Return
    ----------
    None
    """

    import paplot.subcode.tools as tools
    import os

    # Confirm existence of homepage
    html_exists = os.path.exists(output_dir + "/" + project_name + "/" +
                                 output_html)
    if output_html == "":
        html_exists = False

    # Create json data
    json_data = _load_metadata(output_dir, output_html, project_name, name,
                               overview, sub_text, composite, html_exists)

    # Create html for link
    link_text = _convert_index_item(json_data)

    # Load the template html for the homepage
    f_template = open(
        os.path.dirname(os.path.abspath(__file__)) +
        "/templates/index.html")  # ./templates/index.html
    html_template = f_template.read()
    f_template.close()

    # Extract remarks from a configuration
    if remarks == "":
        remarks = tools.config_getstr(config, "style", "remarks")

    # Create html file for a homepage
    f_html = open(output_dir + "/index.html", "w")
    f_html.write(
        html_template.format(
            version=version_text(),  # Version
            date=tools.now_string(),  # Current time
            remarks=remarks,  # Some string
            link=link_text)  # HTML for link
    )
    f_html.close()
Beispiel #24
0
def load_genome_size(config):
    """
    Read and parse a genome-size file and return a nested list:
    [ [ A chromosome number in lowercase letters,
        The size of the 1st element,
        The color of the 1st element,
        The original name of the 1st element(that is not necessarily lowercase) or a user-defined name,
      ],
      ...
    ]
    """
    import os
    import paplot.subcode.tools as tools

    default_path = os.path.dirname(os.path.abspath(__file__)) + "/templates/genome_size_hg19.csv"  # ./templates/genome_size_hg19.csv
    path = tools.config_getpath(config, "genome", "path", default_path)

    # Create a list with Name:Label:Color for each element
    # Name is like chromosome number such as 1, 2, ..., X, Y, ...
    # :Label and :Color is optional
    settings = tools.config_getstr(config, "ca", "use_chrs").replace(" ", "").split(",")

    use_chrs = []
    labels = []
    colors = []
    for i in range(len(settings)):
        # items[0]: Name corresponding to chromosome number
        # items[1]: Label
        # items[2]: Color
        items = settings[i].split(":")
        use_chrs.append(items[0].lower())  # Conversion of chromosome number to lowercase
        labels.append("")
        colors.append("#BBBBBB")  # gray

        for j in range(len(items)):
            if j == 0:
                if items[j][0:3] == "chr":
                    use_chrs[i] = items[j][3:]  # Remove the leading "chr"
            elif j == 1:
                labels[i] = items[j]
            elif j == 2:
                colors[i] = items[j]
    if len(use_chrs) < 1:
        return []

    # Read genome size
    f = open(path)
    read = f.read()
    f.close()
    formatt = read.replace("\r", "\n").replace(" ", "")

    genome_size = []
    _max = 0
    for row in formatt.split("\n"):
        # Delimiter setting
        sept = ","
        if row.find(",") < 0:
            sept = "\t"

        # Split the line and the second element must be numeric
        # item[0]: chromosome number
        # item[1]: size of item[0]
        items = row.split(sept)
        if len(items) < 2:
            continue
        if items[1].isdigit() is False:
            continue

        # The first element must be included in the list of chromosome numbers extracted from the configuration file
        label = items[0].lower()  # Convert label to lowercase
        if label[0:3] == "chr":
            label = label[3:len(label)]  # Remove the leading "chr"
        if (label in use_chrs) is False:
            continue

        # Create a list that is an element of genome_size
        # The list has the following elements
        #   1st: A chromosome number in lowercase letters
        #   2st: The size of the 1st element
        #   3st: The color of the 1st element
        #   4st: The original name of the 1st element(that is not necessarily lowercase) or a user-defined name
        # genome_size is in the order read from the genome-size file instead of the configuration file
        pos = use_chrs.index(label)
        if labels[pos] == "":
            labels[pos] = items[0]
        genome_size.append([label, int(items[1]), colors[pos], labels[pos]])

        # Maximum size of the chromosome
        if _max < int(items[1]):
            _max = int(items[1])

    # The minimum size of the chromosomes is set to 1/10 of the maximum size
    for i in range(len(genome_size)):
        if genome_size[i][1] < int(_max / 10):
            genome_size[i][1] = int(_max / 10)

    return genome_size
Beispiel #25
0
def pyformat_to_jstooltip_text(positions, config, section_fmt, section_col, item_startwith):

    tooltip_templete = "{{format:[{formats}], keys: '{keys}'}}"
    tooltip_detail_templete = "{{label:'{label}',type:'{type}',keys:[{keys}],ext:'{ext}'}},"
        
    import re
    re_compile=re.compile(r"\{[0-9a-zA-Z\+\-\*\/\#\:\,\.\_\ ]+\}")
    re_compile2=re.compile(r"[\+\-\*\/\:]")
    
    keys_list = []
    tooltip_fomat_text = ""

    for option in tools.config_getoptions(config, section_fmt, item_startwith):
        
        formt = tools.config_getstr(config, section_fmt, option)
        key_text_list = re_compile.findall(formt)
        tooltip_detail_text = ""
        
        for key_text in key_text_list:
            start = formt.find(key_text)
            
            # write fix area
            if start > 0:
                tooltip_detail_text += tooltip_detail_templete.format(label = formt[0:start], type="fix", keys="", ext="")
            
            formt = formt[start+len(key_text):]
            
            label_text = key_text.replace(" ", "").replace("{", "").replace("}", "")
            sub_keys = re_compile2.split(label_text)
            
            ttype = "numeric"
            ext = ""
            
            # case str
            if len(sub_keys) == 1:
                ttype = "str"

            # case with-extention
            if label_text.find(":") > 0:
                ext_start = label_text.index(":")
                ext=label_text[ext_start+1:]
                label_text = label_text[0:ext_start]
                sub_keys = re_compile2.split(label_text)
            
            for sub_key in sub_keys:
                # remove numeric block
                try:
                    float(sub_key)
                    sub_keys.remove(sub_key)
                except Exception:
                    pass
            
            check = True
            for sub_key in list(set(sub_keys)):
                if not sub_key in positions.keys():
                    if not sub_key.startswith("#"):
                        print("[WARNING] key:{key} is not defined.".format(key = sub_key))
                        check = False
                        break
                label_text = label_text.replace(sub_key, "{" + sub_key +"}")

            if check == True:
                tooltip_detail_text += tooltip_detail_templete.format(label= label_text, type=ttype, keys=list_to_text(sub_keys), ext=ext)
                keys_list.extend(sub_keys)
        
        if len(formt) > 0:
            tooltip_detail_text += tooltip_detail_templete.format(label = formt, type="fix", keys="", ext="")
            
        tooltip_fomat_text += "[" + tooltip_detail_text + "],"

    key_text = ""
    for key in list(set(keys_list)):
        key_text += "{" + key + "} "
        
    return tooltip_templete.format(formats = tooltip_fomat_text, keys = key_text)
Beispiel #26
0
def convert_tojs(params, config):
    import os
    import json
    import paplot.subcode.tools as tools
    import paplot.convert as convert
    import paplot.color as color

    # data read
    try:
        json_data = json.load(open(params["data"]))
    except Exception as e:
        print("failure open data %s, %s" % (params["data"], e.message))
        return None

    key_id_list = tools.config_getstr(config, "result_format_pmsignature",
                                      "key_id")
    key_ref = tools.config_getstr(config, "result_format_pmsignature",
                                  "key_ref")
    key_alt = tools.config_getstr(config, "result_format_pmsignature",
                                  "key_alt")
    key_strand = tools.config_getstr(config, "result_format_pmsignature",
                                     "key_strand")
    key_mutations = tools.config_getstr(config, "result_format_pmsignature",
                                        "key_mutation")
    key_mutation_count = tools.config_getstr(config,
                                             "result_format_pmsignature",
                                             "key_mutation_count")

    sig_num = len(json_data[key_ref])

    if sig_num == 0:
        print("no data %s" % params["data"])
        return {}

    # signature names
    signature_list = []
    for s in range(sig_num):
        signature_list.append("Signature %d" % (s + 1))

    # each signature colors
    sig_color_list = color.create_color_array(sig_num, color.r_set2)

    # use background?
    if tools.config_getboolean(config, "result_format_pmsignature",
                               "background"):
        signature_list.append("Background ")
        sig_color_list.append(color.r_set2_gray)

    # Id list
    id_txt = ""
    if key_id_list in json_data:
        id_txt = convert.list_to_text(json_data[key_id_list])

    # mutations
    mutations_txt = ""
    if key_mutations in json_data:
        for m in json_data[key_mutations]:
            mutations_txt += "[%d,%d,%f]," % (m[0], m[1], m[2])

    # signature
    dataset_ref = ""
    for sig in json_data[key_ref]:
        tmp = ""
        for sub in sig:
            tmp += "[" + ",".join(map(str, sub)) + "],"
        dataset_ref += ("[" + tmp + "],")

    dataset_alt = ""
    for sig in json_data[key_alt]:
        tmp = ""
        for sub in sig:
            tmp += "[" + ",".join(map(str, sub)) + "],"
        dataset_alt += ("[" + tmp + "],")

    dataset_strand = ""
    for sig in json_data[key_strand]:
        dataset_strand += "[" + ",".join(map(str, sig)) + "],"

    # tooltips
    # for ref
    keys_di = {
        "a": "",
        "c": "",
        "g": "",
        "t": "",
        "ca": "",
        "cg": "",
        "ct": "",
        "ta": "",
        "tc": "",
        "tg": "",
        "plus": "",
        "minus": "",
        "id": "",
        "sig": ""
    }

    tooltip_refs_txt = ""
    for r in range(len(json_data[key_ref][0])):
        tooltip_refs_txt += js_tooltip_ref_template.format(
            index=r,
            tooltip_format=convert.pyformat_to_jstooltip_text(
                keys_di, config, "pmsignature", "", "tooltip_format_ref"))

    mutation_count_txt = ""
    if (key_mutation_count != "") and (key_mutation_count in json_data.keys()):
        for v in json_data[key_mutation_count]:
            mutation_count_txt += "%d," % v

    # output
    sig_num_sift = 0
    if tools.config_getboolean(config, "result_format_pmsignature",
                               "background"):
        sig_num_sift = 1
    ellipsis = "%s%d" % (params["ellipsis"], (sig_num + sig_num_sift))

    js_file = "data_%s.js" % ellipsis
    html_file = "graph_%s.html" % ellipsis

    f = open(params["dir"] + "/" + js_file, "w")
    f.write(js_header + js_dataset.format(
        Ids=id_txt,
        color_A=tools.config_getstr(config, "pmsignature", "color_A",
                                    "#06B838"),
        color_C=tools.config_getstr(config, "pmsignature", "color_C",
                                    "#609CFF"),
        color_G=tools.config_getstr(config, "pmsignature", "color_G",
                                    "#B69D02"),
        color_T=tools.config_getstr(config, "pmsignature", "color_T",
                                    "#F6766D"),
        color_plus=tools.config_getstr(config, "pmsignature", "color_plus",
                                       "#00BEC3"),
        color_minus=tools.config_getstr(config, "pmsignature", "color_minus",
                                        "#F263E2"),
        signatures=convert.list_to_text(signature_list),
        colors=convert.list_to_text(sig_color_list),
        mutations=mutations_txt,
        dataset_ref=dataset_ref,
        dataset_alt=dataset_alt,
        dataset_strand=dataset_strand,
        tooltip_ref=tooltip_refs_txt,
        tooltip_alt=convert.pyformat_to_jstooltip_text(
            keys_di, config, "pmsignature", "", "tooltip_format_alt"),
        tooltip_strand=convert.pyformat_to_jstooltip_text(
            keys_di, config, "pmsignature", "", "tooltip_format_strand"),
        mutation_title=convert.pyformat_to_jstooltip_text(
            keys_di, config, "pmsignature", "",
            "tooltip_format_mutation_title"),
        mutation_partial=convert.pyformat_to_jstooltip_text(
            keys_di, config, "pmsignature", "",
            "tooltip_format_mutation_partial"),
        mutation_count=mutation_count_txt,
    ))

    f_template = open(
        os.path.dirname(os.path.abspath(__file__)) +
        "/templates/data_pmsignature.js")
    js_function = f_template.read()
    f_template.close()
    f.write(js_function)
    f.write(js_footer)

    f.close()

    integral = True
    if key_id_list == "" or key_mutations == "" or key_mutation_count == "":
        integral = False

    return {
        "sig_num": sig_num,
        "js": js_file,
        "html": html_file,
        "intergral": integral,
    }
Beispiel #27
0
def pyformat_to_jstooltip_text(positions, config, section_fmt, section_col,
                               item_startwith):

    tooltip_templete = "{{format:[{formats}], keys: '{keys}'}}"
    tooltip_detail_templete = "{{label:'{label}',type:'{type}',keys:[{keys}],ext:'{ext}'}},"

    import re
    re_compile = re.compile(r"\{[0-9a-zA-Z\+\-\*\/\#\:\,\.\_\ ]+\}")
    re_compile2 = re.compile(r"[\+\-\*\/\:]")

    keys_list = []
    tooltip_fomat_text = ""

    for option in tools.config_getoptions(config, section_fmt, item_startwith):

        formt = tools.config_getstr(config, section_fmt, option)
        key_text_list = re_compile.findall(formt)
        tooltip_detail_text = ""

        for key_text in key_text_list:
            start = formt.find(key_text)

            # write fix area
            if start > 0:
                tooltip_detail_text += tooltip_detail_templete.format(
                    label=formt[0:start], type="fix", keys="", ext="")

            key_text = key_text.lower()
            formt = formt[start + len(key_text):]

            label_text = key_text.replace(" ",
                                          "").replace("{",
                                                      "").replace("}", "")
            sub_keys = re_compile2.split(label_text)

            ttype = "numeric"
            ext = ""

            # case str
            if len(sub_keys) == 1:
                ttype = "str"

            # case with-extention
            if label_text.find(":") > 0:
                ext_start = label_text.index(":")
                ext = label_text[ext_start + 1:]
                label_text = label_text[0:ext_start]
                sub_keys = re_compile2.split(label_text)

            for sub_key in sub_keys:
                # remove numeric block
                try:
                    float(sub_key)
                    sub_keys.remove(sub_key)
                except Exception:
                    pass

            check = True
            for sub_key in list(set(sub_keys)):

                if not sub_key in positions.keys() and not sub_key.startswith(
                        "#"):
                    print("[WARNING] key:{key} is not defined.".format(
                        key=sub_key))
                    check = False
                    break
                label_text = label_text.replace(sub_key, "{" + sub_key + "}")

            if check == True:
                tooltip_detail_text += tooltip_detail_templete.format(
                    label=label_text,
                    type=ttype,
                    keys=list_to_text(sub_keys),
                    ext=ext)
                keys_list.extend(sub_keys)

        if len(formt) > 0:
            tooltip_detail_text += tooltip_detail_templete.format(label=formt,
                                                                  type="fix",
                                                                  keys="",
                                                                  ext="")

        tooltip_fomat_text += "[" + tooltip_detail_text + "],"

    key_text = ""
    keys_dup = list(set(keys_list))
    keys_dup.sort()
    for key in keys_dup:
        key_text += "{" + key.lower() + "} "

    return tooltip_templete.format(formats=tooltip_fomat_text, keys=key_text)
Beispiel #28
0
def load_subdata(ids, sec, config):
    import os
    import paplot.subcode.tools as tools
    import paplot.convert as convert
    import paplot.color as color

    input_file = tools.config_getpath(config, sec, "path", default = "")
    if os.path.exists(input_file) == False:
        print ("[ERROR] file is not exist. %s" % input_file)
        return None

    sept = tools.config_getstr(config, sec, "sept").replace("\\t", "\t").replace("\\n", "\n").replace("\\r", "\r")
    mode = tools.config_getstr(config, sec, "mode")
    comment = tools.config_getstr(config, sec, "comment")
    title = tools.config_getstr(config, sec, "title")
    
    label = []
    item = []
    colors_n_di = {}
    colors_h_di = {}
    for name_set in tools.config_getstr(config, sec, "name_set").split(","):
        name_set_split = convert.text_to_list(name_set, ":")
        for i in range(len(name_set_split)):
            text = name_set_split[i]
            if i == 0:
                item.append(text)
                if len(name_set_split) == 1:
                    label.append(text)
            elif i == 1:
                label.append(text)
            elif i == 2:
                colors_n_di[name_set_split[0]] = color.name_to_value(text)
            elif i == 3:
                colors_h_di[name_set_split[0]] = color.name_to_value(text)
    
    # fill in undefined items
    colors_n_di = color.create_color_dict(item, colors_n_di, color.osaka_subway_colors) 
    colors_h_di2 = {}
    for key in colors_n_di:
        if key in colors_h_di: continue
        colors_h_di2[key] = color.saturation_down(colors_n_di[key])
    
    # dict to value
    colors_n = []
    for key in item:
        colors_n.append(colors_n_di[key])
    
    if mode == "range":
        item.remove(item[0])
    
    header = []
    if tools.config_getboolean(config, sec, "header") == True:
        pos_value = -1
        pos_id = -1
    else:
        pos_value = tools.config_getint(config, sec, "col_value")-1
        pos_id = tools.config_getint(config, sec, "col_ID")-1
        header = ["",""]
    
    # copy id_list for find check
    unlookup = []
    for iid in ids:
        unlookup.append(iid)
        
    # read
    data_text = ""
    values = []
    for line in open(input_file):
        line = line.strip()
        if len(line.replace(sept, "")) == 0:
            continue
        
        if comment != "" and line.find(comment) == 0:
            continue
        
        if len(header) == 0:
            header = convert.text_to_list(line,sept)
            try:
                colname = tools.config_getstr(config, sec, "col_value")
                pos_value = header.index(colname)
                colname = tools.config_getstr(config, sec, "col_ID")
                pos_id = header.index(colname)
            except Exception as e:
                print(e.message)
                return None
                
            continue
        
        cols = convert.text_to_list(line,sept)
        if (cols[pos_id] in ids) == False: continue
        else: unlookup.remove(cols[pos_id])

        id_pos = ids.index(cols[pos_id])
        
        if mode == "fix":
            if cols[pos_value] in item:
                data_text += subdata_data_template.format(id = id_pos, item = item.index(cols[pos_value]))
            else:
                print("[" + sec + "] name_set: data is undefined." + cols[pos_value] + "\n")
                continue
        elif mode == "range" or mode == "gradient":
            try:
                values.append(float(cols[pos_value]))
            except Exception as e:
                print(colname + ": data type is invalid.\n" + e.message)
                continue
            
            data_text += subdata_data_template.format(id = id_pos, item = cols[pos_value])

    if len(unlookup) > 0:
        print("[WARNING] can't find IDs subplot data.")
        print(unlookup)
        
    if mode == "gradient" and len(values) > 0:
        item[0] = min(values)
        item[1] = max(values)
        
    return [data_text, item, colors_n, label, title] 
Beispiel #29
0
def group_list(colmun, mode, name, config):
    """
    Create and return group names and their color values

    Parameters
    ----------
    colmun: list: colmun data for group title
    mode  : str : ca or mutation
    name  : str : "group"
    config: configparser.RawConfigParser

    Return
    ------
    A nested list with elements funcs and colors
    funcs : list: group names
    colors: list: color values
    """

    import paplot.color as color

    # option_input: section name
    option_input = ""
    if mode == "mutation":
        option_input = "result_format_mutation"
    elif mode == "ca":
        option_input = "result_format_ca"
    else:
        return []

    # Get values from a configuration file
    sept = tools.config_getstr(config, option_input,
                               "sept_%s" % name)  # key: sept_group
    limited_list = text_to_list(
        tools.config_getstr(config, mode, "limited_%s" % name),
        ",")  # key: limited_group
    nouse_list = text_to_list(
        tools.config_getstr(config, mode, "nouse_%s" % name),
        ",")  # key: nouse_group

    # Create funcs that is a list with group names as elements
    funcs = []
    for row in colmun:
        # Split row if necessary
        splt = []
        if sept == "":
            splt.append(row)
        else:
            splt = row.split(sept)
        # Limit the elements to be added to funcs
        for func in splt:
            func = func.strip()
            # Ignore empty string
            if func == "":
                continue
            # Ignore if func is not in non-empty limited_list
            if len(limited_list) > 0 and fnmatch_list(func,
                                                      limited_list) is False:
                continue
            # Ignore if func is in nouse_list
            if fnmatch_list(func, nouse_list):
                continue
            funcs.append(func)
    # Sort after eliminating duplicated elements of funcs
    funcs = list(set(funcs))
    funcs.sort()

    # Create color_di that is a dictionary with group names as keys and color values as values
    color_di = {}
    for f in tools.config_getstr(config, mode, "%s_color" %
                                 name).split(","):  # key: group_color
        # Ignore empty string
        if len(f) == 0:
            continue
        # f assumes something like "A:#66C2A5"
        cols = text_to_list(f, ":")
        if len(cols) >= 2:
            color_di[cols[0]] = color.name_to_value(cols[1])

    # Determine color values for groups
    color_di = color.create_color_dict(funcs, color_di, color.metro_colors)

    # Create color list for groups
    colors = []
    for key in funcs:
        colors.append(color_di[key])

    return [funcs, colors]