def group_list(colmun, mode, name, config): import paplot.color as color option_input = "" if mode == "mutation": option_input = "result_format_mutation" elif mode == "ca": option_input = "result_format_ca" else: return [] sept = tools.config_getstr(config, option_input, "sept_%s" % name) limited_list = text_to_list( tools.config_getstr(config, mode, "limited_%s" % name), ",") nouse_list = text_to_list( tools.config_getstr(config, mode, "nouse_%s" % name), ",") funcs = [] for row in colmun: splt = [] if sept == "": splt.append(row) else: splt = row.split(sept) for func in splt: func = func.strip() if func == "": continue if len(limited_list) > 0: #if (func in limited_list) == False: if fnmatch_list(func, limited_list) == False: continue #if func in nouse_list: if fnmatch_list(func, nouse_list): continue funcs.append(func) # sort list funcs = list(set(funcs)) funcs.sort() color_list = {} for f in tools.config_getstr(config, mode, "%s_color" % name).split(","): if len(f) == 0: continue cols = text_to_list(f, ":") if len(cols) >= 2: color_list[cols[0]] = color.name_to_value(cols[1]) color_list = color.create_color_dict(funcs, color_list, color.metro_colors) # dict to value colors = [] for key in funcs: colors.append(color_list[key]) return [funcs, colors]
def genes_list(colmun, colmun_f, colmun_id, funcs, id_list, config): import paplot.subcode.tools as tools import paplot.convert as convert sept = tools.config_getstr(config, "result_format_mutation", "sept_gene") genes_di = {} ids_di = {} for i in range(len(colmun)): if (colmun_f[i] in funcs) is False: continue row = colmun[i].replace('"', '') splt = [] if sept == "": splt.append(row) else: splt = row.split(sept) for gene in splt: if gene == "": continue if gene in ids_di.keys(): if (colmun_id[i] in ids_di[gene]) is True: continue else: ids_di[gene].append(colmun_id[i]) else: ids_di.update({gene: [colmun_id[i]]}) value = 1 if gene in genes_di.keys(): value = genes_di[gene] + 1 genes_di.update({gene: value}) # gene list use_gene_rate = config.getfloat("mutation", "use_gene_rate") limited_list = convert.text_to_list( tools.config_getstr(config, "mutation", "limited_gene"), ",") nouse_list = convert.text_to_list( tools.config_getstr(config, "mutation", "nouse_gene"), ",") genes = [] for key in genes_di: if len(limited_list) > 0 and convert.fnmatch_list( key, limited_list) is False: continue if convert.fnmatch_list(key, nouse_list): continue if genes_di[key] < float(len(id_list)) * use_gene_rate: continue genes.append(key) genes.sort() return genes
def group_list(colmun, mode, name, config): import paplot.color as color option_input = "" if mode == "mut": option_input = "result_format_mutation" elif mode == "sv": option_input = "result_format_sv" else: return [] sept = tools.config_getstr(config, option_input, "sept_%s" % name) limited_list = text_to_list(tools.config_getstr(config, mode, "limited_%ss" % name), ",") nouse_list = text_to_list(tools.config_getstr(config, mode, "nouse_%ss" % name), ",") funcs = [] for row in colmun: splt = [] if sept == "": splt.append(row) else: splt = row.split(sept) for func in splt: func = func.strip() if func == "": continue if len(limited_list) > 0: if (func in limited_list) == False: continue if func in nouse_list: continue funcs.append(func) # sort list funcs = list(set(funcs)) funcs.sort() color_n_list = {}; for f in tools.config_getstr(config, mode, "%s_colors" % name).split(","): if len(f) == 0: continue cols = text_to_list(f, ":") if len(cols) >= 2: color_n_list[cols[0]] = color.name_to_value(cols[1]) color_n_list = color.create_color_dict(funcs, color_n_list, color.metro_colors) # dict to value colors_n = [] for key in funcs: colors_n.append(color_n_list[key]) return [funcs, colors_n]
def create_index(config, output_dir, output_html, project_name, name, overview = "", sub_text = "", composite = False, remarks = ""): import paplot.subcode.tools as tools import os html_exists = os.path.exists(output_dir + "/" + project_name + "/" + output_html) if output_html == "": html_exists = False json_data = _load_metadata(output_dir, output_html, project_name, name, overview, sub_text, composite, html_exists) link_text = _convert_index_item(json_data) f_template = open(os.path.dirname(os.path.abspath(__file__)) + "/templates/index.html") html_template = f_template.read() f_template.close() if remarks == "": remarks = tools.config_getstr(config, "style", "remarks") f_html = open(output_dir + "/index.html", "w") f_html.write( html_template.format( version = version_text(), date = tools.now_string(), remarks = remarks, link = link_text ) ) f_html.close()
def recreate_index(config, output_dir, remarks = ""): import paplot.subcode.tools as tools import os json_data = _reload_metadata(output_dir) link_text = _convert_index_item(json_data) f_template = open(os.path.dirname(os.path.abspath(__file__)) + "/templates/index.html") html_template = f_template.read() f_template.close() if remarks == "": remarks = tools.config_getstr(config, "style", "remarks") f_html = open(output_dir + "/index.html", "w") f_html.write( html_template.format( version = version_text(), date = tools.now_string(), remarks = remarks, link = link_text ) ) f_html.close()
def genes_list(colmun, colmun_f, colmun_id, funcs, Ids, config): import paplot.subcode.tools as tools import paplot.convert as convert sept = tools.config_getstr(config, "result_format_mutation", "sept_gene") use_gene_rate = config.getfloat("mut", "use_gene_rate") limited_list = convert.text_to_list(tools.config_getstr(config, "mut", "limited_genes"), ",") nouse_list = convert.text_to_list(tools.config_getstr(config, "mut", "nouse_genes"), ",") genes_di = {} ids_di = {} for i in range(len(colmun)): if (colmun_f[i] in funcs) == False: continue row = colmun[i].replace('"', '') splt = [] if sept == "": splt.append(row) else: splt = row.split(sept) for gene in splt: if gene == "": continue if gene in ids_di.keys(): if (colmun_id[i] in ids_di[gene]) == True: continue else: ids_di[gene].append(colmun_id[i]) else: ids_di.update({gene: [colmun_id[i]]}) value = 1 if gene in genes_di.keys(): value = genes_di[gene] + 1 genes_di.update({gene: value}) # gene list genes = [] for key in genes_di: if len(limited_list) > 0: if (key in limited_list) == False: continue if key in nouse_list: continue if genes_di[key] < float(len(Ids))*use_gene_rate: continue genes.append(key) genes.sort() return genes
def qc_main(args): import paplot.qc as qc # config [config, conf_file] = tools.load_config(tools.win_to_unix(args.config_file)) input_list = tools.get_inputlist(tools.win_to_unix(args.input)) if len(input_list) == 0: print("input no file.") return [sec_in, sec_out] = tools.get_section("qc") id_list = tools.get_idlist(input_list, tools.config_getstr(config, sec_in, "suffix")) # dirs output_html_dir = prep.create_dirs(tools.win_to_unix(args.output_dir), args.project_name, config) output_data = "data_%s%s" % (args.ellipsis, os.path.splitext( input_list[0])[1]) positions = merge.merge_result(input_list, id_list, output_html_dir + "/" + output_data, "qc", config, extract=True) if positions == {}: print("merge.merge_result: input file is invalid.") return html_name = "graph_%s.html" % args.ellipsis params_html = { "dir": output_html_dir, "data": output_data, "js": "data_%s.js" % args.ellipsis, "html": html_name, "project": args.project_name, "title": args.title, } qc.output_html(params_html, positions, config) prep.create_index(config, tools.win_to_unix(args.output_dir), html_name, args.project_name, args.title, overview=args.overview, remarks=args.remarks)
def main(argv): import paplot.subcode.tools as tools import paplot.subcode.merge as merge import paplot.mut as mut import paplot.prep as prep import argparse parser = argparse.ArgumentParser(prog = prog) parser.add_argument("--version", action = "version", version = tools.version_text()) parser.add_argument("input", help = "input files path", type = str) parser.add_argument("output_dir", help = "output file path", type = str) parser.add_argument("project_name", help = "project name", type = str) parser.add_argument("--config_file", help = "config file", type = str, default = "") parser.add_argument("--remarks", help = "optional text", type = str, default = "") args = parser.parse_args(argv) # config if len(args.config_file) > 0: [config, conf_file] = tools.load_config(tools.win_to_unix(args.config_file)) else: [config, conf_file] = tools.load_config("") if len(args.remarks) > 0: tools.config_set(config, "style", "remarks", args.remarks) input_list = tools.get_inputlist(args.input) if len(input_list) == 0: print ("input no file.") return [sec_in, sec_out] = tools.get_section("mutation") id_list = tools.get_IDlist(input_list, tools.config_getstr(config, sec_in, "suffix")) # dirs output_html_dir = prep.create_dirs(tools.win_to_unix(args.output_dir), args.project_name, config) positions = merge.merge_result(input_list, id_list, output_html_dir + "/data_mut.csv", "mutation", config, extract = True) if positions == {}: print ("merge.merge_result: input file is invalid.") return mut.output_html(output_html_dir + "/data_mut.csv", output_html_dir + "/data_mut.js", \ output_html_dir, "graph_mut.html", args.project_name, positions, config) prep.create_index(args.output_dir, args.project_name, config)
def create_index(output_dir, project_name, config): link_qc = """<li><a href="{project}/graph_qc.html" target=_blank>QC graphs</a>......Quality Control of bam.</li> """ link_sv = """<li><a href="{project}/graph_ca.html" target=_blank>CA graphs</a>......Chromosomal Aberration.</li> """ link_mut = """<li><a href="{project}/graph_mut.html" target=_blank>Mutation matrix</a>......Gene-sample mutational profiles.</li> """ link_sv_nodata = """<li>CA graphs......No Data.</li> """ link_mut_nodata = """<li>Mutation matrix......No Data.</li> """ import paplot.subcode.tools as tools import os f_template = open(os.path.dirname(os.path.abspath(__file__)) + "/templates/index.html") html_template = f_template.read() f_template.close() link_text = "" if os.path.exists(output_dir + "/" + project_name + "/graph_qc.html") == True: link_text += link_qc.format(project = project_name) if os.path.exists(output_dir + "/" + project_name + "/graph_ca.html") == True: link_text += link_sv.format(project = project_name) elif os.path.exists(output_dir + "/" + project_name + "/data_ca.csv") == True: link_text += link_sv_nodata if os.path.exists(output_dir + "/" + project_name + "/graph_mut.html") == True: link_text += link_mut.format(project = project_name) elif os.path.exists(output_dir + "/" + project_name + "/data_mut.csv") == True: link_text += link_mut_nodata f_html = open(output_dir + "/index.html", "w") f_html.write( html_template.format(project = project_name, version = tools.version_text(), date = tools.now_string(), remarks = tools.config_getstr(config, "style", "remarks"), link = link_text )) f_html.close()
def convert_tojs(input_file, output_file, positions, config): import paplot.subcode.data_frame as data_frame import paplot.subcode.merge as merge import paplot.subcode.tools as tools import paplot.convert as convert import math genome_size = load_genome_size(config) if len(genome_size) == 0: return None genome = "" for i in range(len(genome_size)): if len(genome) > 0: genome += ",\n" genome += genome_size_template.format(Chr=i, size=genome_size[i][1], color=genome_size[i][2], label=genome_size[i][3]) cols_di = merge.position_to_dict(positions) # data read try: df = data_frame.load_file(input_file, header = 1, \ sept = tools.config_getstr(config, "result_format_ca", "sept"), \ comment = tools.config_getstr(config, "result_format_ca", "comment") \ ) except Exception as e: print("failure open data %s, %s" % (input_file, e.message)) return None if len(df.data) == 0: print("no data %s" % input_file) return None # group list if "group" in cols_di: for f in range(len(df.data)): group_pos = df.name_to_index(cols_di["group"]) group = df.data[f][group_pos] df.data[f][group_pos] = group.replace(" ", "_") if group == "": df.data[f][group_pos] = "_blank_" [groups, colors_n] = convert.group_list(df.column(cols_di["group"]), "ca", "group", config) labels = groups else: groups = ["outer", "inner"] labels = ["Inter-chromosome", "Intra-chromosome"] colors_n = ["#9E4A98", "#51BF69"] conbined = [] for i in range(len(groups)): conbined.append( group_template.format(name=groups[i], label=labels[i], color=colors_n[i])) group_text = ",".join(conbined) # ID list Ids = [] for row in df.data: iid = row[df.name_to_index(cols_di["id"])] if iid != "": Ids.append(iid) Ids = list(set(Ids)) Ids.sort() option_keys = tools.dict_keys(cols_di) option_keys.remove("id") option_keys.remove("chr1") option_keys.remove("break1") option_keys.remove("chr2") option_keys.remove("break2") if "group" in option_keys: option_keys.remove("group") # node_size node_size_select = tools.config_getint(config, "ca", "selector_split_size", 5000000) f = open(output_file, "w") f.write(js_header \ + js_dataset.format(node_size_detail = calc_node_size(genome_size, 500), \ node_size_thumb = calc_node_size(genome_size, 250), \ node_size_select = node_size_select,\ genome_size = genome, \ IDs = convert.list_to_text(Ids), \ group = group_text, \ tooltip = convert.pyformat_to_jstooltip_text(cols_di, config, "ca", "result_format_ca", "tooltip_format"), \ link_header = convert.list_to_text(option_keys), \ )) # write links data_links = [] f.write(js_links_1) for row in df.data: iid = row[df.name_to_index(cols_di["id"])] if iid == "": continue chr1 = str(row[df.name_to_index(cols_di["chr1"])]) pos1 = row[df.name_to_index(cols_di["break1"])] chr2 = str(row[df.name_to_index(cols_di["chr2"])]) pos2 = row[df.name_to_index(cols_di["break2"])] [index1, rang] = insite_genome(genome_size, chr1, pos1) if rang > 0: print("breakpoint 1 is over range. chr%s: input=%d, range=%d" % (chr1, pos1, rang)) continue if rang < 0: #print("chr1 is undefined. %s" % (chr1)) continue [index2, rang] = insite_genome(genome_size, chr2, pos2) if rang > 0: print("breakpoint 2 is over range. chr%s: input=%d, range=%d" % (chr2, pos2, rang)) continue if rang < 0: #print("chr2 is undefined. %s" % (chr2)) continue inner_flg = "false" if (chr1 == chr2): inner_flg = "true" tooltip_items = [] for k in range(len(option_keys)): key = option_keys[k] if cols_di[key] == "": continue tooltip_items.append(row[df.name_to_index(cols_di[key])]) group_id = -1 if "group" in cols_di: group_id = convert.value_to_index( groups, row[df.name_to_index(cols_di["group"])], -1) else: if inner_flg == "false": group_id = 0 else: group_id = 1 data_links.append([iid, index1, pos1, index2, pos2, group_id]) f.write(links_template.format(ID = iid, \ Chr1=index1, pos1=pos1, Chr2=index2, pos2=pos2, \ inner_flg = inner_flg, \ group_id = group_id , \ tooltip = "[" + convert.list_to_text(tooltip_items) + "],")) f.write(js_links_2) # integral bar item link = [] for g in range(len(groups)): link.append({}) for l in data_links: bp1 = "root.{Chr:0>2}.{Chr:0>2}_{Pos:0>3}".format( Chr=l[1], Pos=int(math.floor(l[2] / node_size_select))) bp2 = "root.{Chr:0>2}.{Chr:0>2}_{Pos:0>3}".format( Chr=l[3], Pos=int(math.floor(l[4] / node_size_select))) group = l[5] #print group # add bp1 if not bp1 in link[group]: link[group][bp1] = [] link[group][bp1].append(l[0]) # add bp2 if bp1 != bp2: if not bp2 in link[group]: link[group][bp2] = [] link[group][bp2].append(l[0]) select_item_text = "" select_value_text = "" select_key_text = "" for g in range(len(groups)): items = [] values = [] keys = [] for i in link[g]: values.append(len(link[g][i])) # split key to chr and pos parts = i.split(".")[2].split("_") keys.append([int(parts[0]), int(parts[1])]) # delete duplication sort = sorted(list(set(link[g][i]))) temp = [] for t in sort: temp.append(Ids.index(t)) items.append(temp) select_value_text += "[%s]," % (",".join(map(str, values)).replace( " ", "")) select_key_text += "[%s]," % (",".join(map(str, keys)).replace( " ", "")) select_item_text += "[%s]," % (",".join(map(str, items)).replace( " ", "")) f.write( js_selection.format(value=select_value_text, key=select_key_text, item=select_item_text)) f.write(js_function) f.close() return {"id_list": Ids, "group_list": groups, "color": colors_n}
def load_genome_size(config): path = tools.config_getpath(config, "genome", "path", "../../config/hg19.csv") settings = tools.config_getstr(config, "ca", "use_chrs").replace(" ", "").split(",") use_chrs = [] colors = [] labels = [] for i in range(len(settings)): items = settings[i].split(":") use_chrs.append(items[0].lower()) labels.append("") colors.append("#BBBBBB") for j in range(len(items)): if j == 0: if items[j][0:3] == "chr": use_chrs[i] = items[j][3:] elif j == 1: labels[i] = items[j] elif j == 2: colors[i] = items[j] if len(use_chrs) < 1: return [] f = open(path) read = f.read() f.close() formatt = read.replace("\r", "\n").replace(" ", "") genome_size = [] _max = 0 for row in formatt.split("\n"): sept = "," if row.find(",") < 0: sept = "\t" items = row.split(sept) if len(items) < 2: continue if items[1].isdigit() == False: continue label = items[0].lower() if label[0:3] == "chr": label = label[3:len(label)] if (label in use_chrs) == False: continue pos = use_chrs.index(label) if _max < int(items[1]): _max = int(items[1]) if labels[pos] == "": labels[pos] = items[0] genome_size.append([label, int(items[1]), colors[pos], labels[pos]]) for i in range(len(genome_size)): if genome_size[i][1] < int(_max / 10): genome_size[i][1] = int(_max / 10) return genome_size
def convert_tojs(input_file, output_file, positions, config): import os import paplot.subcode.data_frame as data_frame import paplot.subcode.merge as merge import paplot.subcode.tools as tools import paplot.convert as convert cols_di = merge.position_to_dict(positions) # data read try: df = data_frame.load_file(input_file, header = 1, \ sept = tools.config_getstr(config, "result_format_mutation", "sept"), \ comment = tools.config_getstr(config, "result_format_mutation", "comment") \ ) except Exception as e: print ("failure open data %s, %s" % (input_file, e.message)) return None if len(df.data) == 0: print ("no data %s" % input_file) return None # func replace for f in range(len(df.data)): func_pos = df.name_to_index(cols_di["group"]) if df.data[f][func_pos] == "": df.data[f][func_pos] = "_blank_" [funcs, colors_n] = convert.group_list(df.column(cols_di["group"]), "mutation", "group", config) # ID list id_list = [] for row in df.data: iid = row[df.name_to_index(cols_di["id"])] if iid != "": id_list.append(iid) id_list = list(set(id_list)) id_list.sort() # gene list genes = genes_list(df.column(cols_di["gene"]), \ df.column(cols_di["group"]), \ df.column(cols_di["id"]), \ funcs, id_list, config) option_keys = tools.dict_keys(cols_di) option_keys.remove("id") option_keys.remove("group") option_keys.remove("gene") # mutation list f = open(output_file, "w") f.write(js_header) f.write(js_mutations_1) mutations = {} tooltips = {} for row in df.data: iid = row[df.name_to_index(cols_di["id"])] if iid == "": continue if (iid in mutations) == False: mutations[iid] = {} tooltips[iid] = {} func_split = convert.text_to_list(row[df.name_to_index(cols_di["group"])], \ tools.config_getstr(config, "result_format_mutation", "sept_group")) tooltip_items = [] for k in range(len(option_keys)): key = option_keys[k] if cols_di[key] == "": continue tooltip_items.append(row[df.name_to_index(cols_di[key])]) for func in func_split: if (func in mutations[iid]) == False: mutations[iid][func] = {} tooltips[iid][func] = {} gene_split = convert.text_to_list(row[df.name_to_index(cols_di["gene"])], \ tools.config_getstr(config, "result_format_mutation", "sept_gene")) for gene in gene_split: if (gene in mutations[iid][func]) == False: mutations[iid][func][gene] = 1 tooltips[iid][func][gene] = [] else: mutations[iid][func][gene] += 1 tooltips[iid][func][gene].append(tooltip_items) mutations_sum = 0 for iid in tools.dict_keys(mutations): for func in tools.dict_keys(mutations[iid]): for gene in tools.dict_keys(mutations[iid][func]): idx_i = convert.value_to_index(id_list, iid, -1) idx_f = convert.value_to_index(funcs, func, -1) idx_g = convert.value_to_index(genes, gene, -1) if idx_i >= 0 and idx_f >= 0 and idx_g >= 0: tooltip_items = "" for tips in tooltips[iid][func][gene]: tooltip_items += "[" + convert.list_to_text(tips) + "]," f.write(mu_mutations_template.format(ID = idx_i, \ func = idx_f , \ gene = idx_g, \ num = mutations[iid][func][gene], tooltip = tooltip_items)) mutations_sum += mutations[iid][func][gene] f.write(js_mutations_2.format(mutations_sum = mutations_sum)) # write id, func, gene ... list f.write(js_dataset.format( Ids = convert.list_to_text(id_list), \ genes = convert.list_to_text(convert.list_prohibition(genes)), \ funcs = convert.list_to_text(convert.list_prohibition(funcs)), \ func_colors_n = convert.list_to_text(colors_n), \ mutation_header = convert.list_to_text(option_keys), \ checker_title = convert.pyformat_to_jstooltip_text(cols_di, config, "mutation", "result_format_mutation", "tooltip_format_checker_title"), \ checker_partial = convert.pyformat_to_jstooltip_text(cols_di, config, "mutation", "result_format_mutation", "tooltip_format_checker_partial"), \ gene_title = convert.pyformat_to_jstooltip_text(cols_di, config, "mutation", "result_format_mutation", "tooltip_format_gene_title"), \ gene_partial = convert.pyformat_to_jstooltip_text(cols_di, config, "mutation", "result_format_mutation", "tooltip_format_gene_partial"), \ id_title = convert.pyformat_to_jstooltip_text(cols_di, config, "mutation", "result_format_mutation", "tooltip_format_id_title"), \ id_partial = convert.pyformat_to_jstooltip_text(cols_di, config, "mutation", "result_format_mutation", "tooltip_format_id_partial"), \ )) dataset = {} ##### subdata ##### f.write(js_subdata_1) subdata = [] counter = 0 for sec in config.sections(): if sec.startswith("mutation_subplot_type1_"): ret_val = load_subdata(id_list, sec, config) if ret_val == None: continue [data_text, item, colors_n, label, title] = ret_val name = "sub%d" % (counter) pos = 1 counter += 1 elif sec.startswith("mutation_subplot_type2_"): ret_val = load_subdata(id_list, sec, config) if ret_val == None: continue [data_text, item, colors_n, label, title] = ret_val name = "sub%d" % (counter) pos = 2 counter += 1 else: continue f.write(subdata_template.format(name = name, \ title = title, \ type = tools.config_getstr(config, sec, "mode"), \ item = convert.list_to_text(item), \ label = convert.list_to_text(label), \ colors_n = convert.list_to_text(colors_n), \ data = data_text )) subdata.append({"pos":pos, "label":label, "color":colors_n, "title": title}) f.write(js_subdata_2) ##### functions ##### f_template = open(os.path.dirname(os.path.abspath(__file__)) + "/templates/data_mutation.js") js_function = f_template.read() f_template.close() f.write(js_function) f.write(js_footer) f.close() dataset["subdata"] = subdata return dataset
def convert_tojs(input_file, output_file, positions, config): ''' Convert the input files to Json data and write them to the Javascript file Also write functions and methods to process those data Parameters ---------- input_file : str : The absolute path of formatted data file output_file: str : The absolute path of JavaScript file positions : dict: A nested dictionary with "must" and "option" as keys config : configparser.RawConfigParser Return ------ On success, return a dictionary: {"id_list": [...] "group_list": [...], "color": [...]} id_list : The values for id column group_list: The names of groups color : The colors in groups ''' import paplot.subcode.data_frame as data_frame import paplot.subcode.merge as merge import paplot.subcode.tools as tools import paplot.convert as convert import os import math # genome_size: a nested list # [ [ A chromosome number in lowercase letters, # The size of the 1st element, # The color of the 1st element, # The original name of the 1st element(that is not necessarily lowercase) or a user-defined name, ], ... ] genome_size = load_genome_size(config) if len(genome_size) == 0: return None # genome: dictionary-style string like this # {"chr":"00", "size":249250621, "color":"#BBBBBB", "label":"1",}, # {"chr":"01", "size":243199373, "color":"#BBBBBB", "label":"2",}, # ... # chr : Sequential number # size : Size corresponding to the label # color: Color corresponding to the label # label: Name corresponding to chromosome genome = "" for i in range(len(genome_size)): if len(genome) > 0: genome += ",\n" genome += genome_size_template.format(Chr=i, size=genome_size[i][1], color=genome_size[i][2], label=genome_size[i][3]) # Create a data frame that has title and data attributions # title is a list like ['Break1', 'Break2', 'Chr1', 'Chr2', 'Sample'] # data is a nested list like [[16019088, 62784483, '14', '12', 'SAMPLE1'], ...] try: df = data_frame.load_file( input_file, header=1, sept=tools.config_getstr(config, "result_format_ca", "sept"), comment=tools.config_getstr(config, "result_format_ca", "comment") ) except Exception as e: print("failure open data %s, %s" % (input_file, e.message)) return None if len(df.data) == 0: print("no data %s" % input_file) return None # Create groups, labels, and colors_n # cols_di: a dictionary that merges must and option values # : ex) {'chr1': 'Chr1', 'break1': 'Break1', 'chr2': 'Chr2', 'break2': 'Break2', 'id': 'Sample'} cols_di = merge.position_to_dict(positions) if "group" in cols_di: for i in range(len(df.data)): # A title may be stored in cols_di["group"] group_pos = df.name_to_index(cols_di["group"]) # Get group(title) index group = df.data[i][group_pos] # Get group(title) value for row i # Modify group value df.data[i][group_pos] = group.replace(" ", "_") if group == "": df.data[i][group_pos] = "_blank_" # groups : list: group names # labels : list: group names # colors_n: list: color values for groups [groups, colors_n] = convert.group_list(df.column(cols_di["group"]), "ca", "group", config) labels = groups else: groups = ["outer", "inner"] labels = ["Inter-chromosome", "Intra-chromosome"] colors_n = ["#9E4A98", "#51BF69"] # purple, green # Create group_text that is a dictionary-style string with name, label, color conbined = [] for i in range(len(groups)): conbined.append(group_template.format(name=groups[i], label=labels[i], color=colors_n[i])) group_text = ",".join(conbined) # id_list: Values for "id" column # : Sorted without duplicates id_list = [] for row in df.data: iid = row[df.name_to_index(cols_di["id"])] # iid: column value for "id" title if iid != "": id_list.append(iid) id_list = list(set(id_list)) id_list.sort() # option_keys: Store the option keys of the positions dictionary option_keys = tools.dict_keys(cols_di) # option_keys: list: sorted keys of cols_di option_keys.remove("id") # option key option_keys.remove("chr1") # must key option_keys.remove("break1") # must key option_keys.remove("chr2") # must key option_keys.remove("break2") # must key if "group" in option_keys: option_keys.remove("group") # option key # node_size: Size to divide chromosomes node_size_select = tools.config_getint(config, "ca", "selector_split_size", 5000000) # Write header and dataset of JavaScript file f = open(output_file, "w") f.write(js_header + js_dataset.format( node_size_detail=calc_node_size(genome_size, 500), # node size for detailed thumbnails node_size_thumb=calc_node_size(genome_size, 250), # node size for rough thumbnails node_size_select=node_size_select, # node size for bar graph genome_size=genome, # A dictionary-style string containing keys of "chr", "size", "color", and "label" IDs=convert.list_to_text(id_list), # A comma-separated string of id column values group=group_text, # A dictionary-style string containing keys of "name", "label", and "color" tooltip=convert.pyformat_to_jstooltip_text(cols_di, config, "ca", "result_format_ca", "tooltip_format"), # A dictionary-style string containing keys of "name", "label", and "color" link_header=convert.list_to_text(option_keys), )) # Write link of JavaScript file f.write(js_links_1) # Write the leading part data_links = [] for row in df.data: iid = row[df.name_to_index(cols_di["id"])] # iid: the value of "id" column # Ignore empty string if iid == "": continue chr1 = str(row[df.name_to_index(cols_di["chr1"])]) # chromosome1 pos1 = row[df.name_to_index(cols_di["break1"])] # break point1 chr2 = str(row[df.name_to_index(cols_di["chr2"])]) # chromosome2 pos2 = row[df.name_to_index(cols_di["break2"])] # break point2 # Check if chr1 and chr2 is in the genome list # Check if pos1 and pos2 is in the chr1 length # index1 and index2 are indexes of the genome_size for chr1 and chr2 [index1, rang] = insite_genome(genome_size, chr1, pos1) if rang > 0: print("breakpoint 1 is over range. chr%s: input=%d, range=%d" % (chr1, pos1, rang)) continue if rang < 0: #print("chr1 is undefined. %s" % (chr1)) continue [index2, rang] = insite_genome(genome_size, chr2, pos2) if rang > 0: print("breakpoint 2 is over range. chr%s: input=%d, range=%d" % (chr2, pos2, rang)) continue if rang < 0: #print("chr2 is undefined. %s" % (chr2)) continue # Whether chr1 and chr2 are the same chromosome inner_flg = "false" if (chr1 == chr2): inner_flg = "true" # Set group_id: -1, 0, 1, index values of groups # : Sequential numbers identifying groups group_id = -1 # Not belong to any groups if "group" in cols_di: # If the value of group column is in group list, then group_id is the index of the list # Others, group_id is -1 group_id = convert.value_to_index(groups, row[df.name_to_index(cols_di["group"])], -1) else: if inner_flg == "false": group_id = 0 # chr1 and chr2 are in the different group else: group_id = 1 # chr1 and chr2 are in the same group # Add an element to data_links data_links.append([iid, index1, pos1, index2, pos2, group_id]) # tooltip_items: Data for tooltip tooltip_items = [] for k in range(len(option_keys)): # Loop in the column titles except group, id, and must keys (chr1, chr2, break1, and break2) key = option_keys[k] if cols_di[key] == "": continue tooltip_items.append(row[df.name_to_index(cols_di[key])]) # Write link f.write(links_template.format( ID=iid, Chr1=index1, pos1=pos1, Chr2=index2, pos2=pos2, inner_flg=inner_flg, group_id=group_id, tooltip="[" + convert.list_to_text(tooltip_items) + "],")) f.write(js_links_2) # Write the ending part # Write integral bar item # link: [{bp1: iid, bp2: iid}, {...}, ...] # : Separate elements by group_id link = [] for g in range(len(groups)): link.append({}) for dl in data_links: # dl = [iid, index1, pos1, index2, pos2, group_id] # iid : The value of id title # index1/2: The index of genome_size # pos1/2 : Bareak point # group_id: Index of groups # Chr: The index of genome_size # Pos: A break position based on node bp1 = "root.{Chr:0>2}.{Chr:0>2}_{Pos:0>3}".format(Chr=dl[1], Pos=int(math.floor(dl[2] / node_size_select))) bp2 = "root.{Chr:0>2}.{Chr:0>2}_{Pos:0>3}".format(Chr=dl[3], Pos=int(math.floor(dl[4] / node_size_select))) group_id = dl[5] # For bp1 if bp1 not in link[group_id]: link[group_id][bp1] = [] link[group_id][bp1].append(dl[0]) # Append iid # For bp2 if bp1 != bp2: if bp2 not in link[group_id]: link[group_id][bp2] = [] link[group_id][bp2].append(dl[0]) # Append iid select_value_text = "" select_key_text = "" select_item_text = "" for i in range(len(groups)): values = [] # [Number of id, ...] keys = [] # [[genome_size index, Break position], ...] items = [] # [[id_list index, ...], ...] for bp in sorted(link[i].keys()): # values element # link[i][bp]: list that stores id column values at a break position of a chromosome in a group # : Duplicate values are stored values.append(len(link[i][bp])) # keys element parts = bp.split(".")[2].split("_") # parts: [Chr, Pos] keys.append([int(parts[0]), int(parts[1])]) # items element sort = sorted(list(set(link[i][bp]))) # Delete duplicates temp = [] for t in sort: temp.append(id_list.index(t)) # id_list that stores values of id column items.append(temp) select_value_text += "[%s]," % (",".join(map(str, values)).replace(" ", "")) # += [1,1,...], select_key_text += "[%s]," % (",".join(map(str, keys)).replace(" ", "")) # += [[0,1],[0,25],...], select_item_text += "[%s]," % (",".join(map(str, items)).replace(" ", "")) # += [[9],[8],...], f.write(js_selection.format( value=select_value_text, key=select_key_text, item=select_item_text )) # Write rest of JavaScript file and footer f_template = open(os.path.dirname(os.path.abspath(__file__)) + "/templates/data_ca.js") # ./templates/data_ca.js js_function = f_template.read() f_template.close() f.write(js_function) f.write(js_footer) f.close() return {"id_list": id_list, "group_list": groups, "color": colors_n}
def convert_tojs(params, config): import json import math import itertools import paplot.subcode.tools as tools import paplot.convert as convert import paplot.color as color # data read try: jsonData = json.load(open(params["data"])) except Exception as e: print ("failure open data %s, %s" % (params["data"], e.message)) return None key_Ids = tools.config_getstr(config, "result_format_signature", "key_id") key_signature = tools.config_getstr(config, "result_format_signature", "key_signature") key_mutations = tools.config_getstr(config, "result_format_signature", "key_mutation") key_mutation_count = tools.config_getstr(config, "result_format_signature", "key_mutation_count") sig_num = len(jsonData[key_signature]) if sig_num == 0: print ("no data %s" % params["data"]) return {} # signature names signature_list = [] for s in range(sig_num): signature_list.append("Signature %d" % (s+1)) # each signature colors sig_color_list = color.create_color_array(sig_num, color.r_set2) # use background? if tools.config_getboolean(config, "result_format_signature", "background"): signature_list.append("Background ") sig_color_list.append(color.r_set2_gray) # axis-y max sig_y_max = tools.config_getint(config, "signature", "signature_y_max") if (sig_y_max < 0): for sig in jsonData[key_signature]: for sub in sig: m = max(sub) if sig_y_max < m: sig_y_max = m # route list sub_num = len(jsonData[key_signature][0][0]) log = math.log(sub_num, 4) if log % 1 > 0: print ("substitution's list length is invalid (%d, not number 4^N)" % sub_num) return None route_id = [] route_list = [] for p in itertools.product(("A","C","G","T"), repeat = int(log)): route_id.append("".join(p)) route_list.append(p) # substruction sub_di = [ {"name":"C > A", "ref":"C", "color":tools.config_getstr(config, "signature", "alt_color_CtoA")}, {"name":"C > G", "ref":"C", "color":tools.config_getstr(config, "signature", "alt_color_CtoG")}, {"name":"C > T", "ref":"C", "color":tools.config_getstr(config, "signature", "alt_color_CtoT")}, {"name":"T > A", "ref":"T", "color":tools.config_getstr(config, "signature", "alt_color_TtoA")}, {"name":"T > C", "ref":"T", "color":tools.config_getstr(config, "signature", "alt_color_TtoC")}, {"name":"T > G", "ref":"T", "color":tools.config_getstr(config, "signature", "alt_color_TtoG")}, ] substruction = "" for sub in sub_di: route = [] for r in route_list: route.append("p".join(r[0:int(log/2)]) + "p" + sub["ref"] + "p" + "p".join(r[int(log/2):])) substruction += js_substruction_template.format(name = sub["name"], color = sub["color"], route = convert.list_to_text(route)) # Id list id_txt = "" if key_Ids in jsonData: id_txt = convert.list_to_text(jsonData[key_Ids]) # mutations mutations_txt = "" if key_mutations in jsonData: for m in jsonData[key_mutations]: mutations_txt += "[%d,%d,%f]," % (m[0],m[1],m[2]) # signature dataset_sig = "" for sig in jsonData[key_signature]: tmp = "" for sub in sig: tmp += "[" + ",".join(map(str, sub)) + "]," dataset_sig += ("[" + tmp + "],") mutation_count_txt = "" if (key_mutation_count != "") and (key_mutation_count in jsonData.keys()): for v in jsonData[key_mutation_count]: mutation_count_txt += "%d," % v # output sig_num_sift = 0 if tools.config_getboolean(config, "result_format_signature", "background"): sig_num_sift = 1 ellipsis = "%s%d" % (params["ellipsis"], (sig_num + sig_num_sift)) js_file = "data_%s.js" % ellipsis html_file = "graph_%s.html" % ellipsis keys_di = {"sig":"", "route":"", "id":""} f = open(params["dir"] + "/" + js_file, "w") f.write(js_header \ + js_dataset.format(Ids = id_txt, \ signatures = convert.list_to_text(signature_list), \ colors = convert.list_to_text(sig_color_list), \ dataset_sig_max = sig_y_max, \ mutations = mutations_txt, \ dataset_sig = dataset_sig, \ route_id = convert.list_to_text(route_id), \ substruction = substruction, \ signature_title = convert.pyformat_to_jstooltip_text(keys_di, config, "signature", "", "tooltip_format_signature_title"), \ signature_partial = convert.pyformat_to_jstooltip_text(keys_di, config, "signature", "", "tooltip_format_signature_partial"), \ mutation_title = convert.pyformat_to_jstooltip_text(keys_di, config, "signature", "", "tooltip_format_mutation_title"), \ mutation_partial = convert.pyformat_to_jstooltip_text(keys_di, config, "signature", "", "tooltip_format_mutation_partial"), \ mutation_count = mutation_count_txt, \ ) + js_function) f.close() integral = True if key_Ids == "" or key_mutations == "" or key_mutation_count == "": integral = False return {"sig_num": sig_num, "js": js_file, "html": html_file, "intergral": integral, }
def convert_tojs(input_file, output_file, positions, config): import paplot.subcode.data_frame as data_frame import paplot.subcode.merge as merge import paplot.subcode.tools as tools import paplot.convert as convert import paplot.color as color cols_di = merge.position_to_dict(positions) # data read try: df = data_frame.load_file(input_file, header = 1, \ sept = tools.config_getstr(config, "merge_format_qc", "sept"), \ comment = tools.config_getstr(config, "result_format_qc", "comment") \ ) except Exception as e: print ("failure open data %s, %s" % (input_file, e.message)) return None if len(df.data) == 0: print ("no data %s" % input_file) return None # chart list plots_text = "" plots_option = [] config_sections = config.sections() config_sections.sort() if "qc_chart_brush" in config_sections: config_sections.remove("qc_chart_brush") config_sections.insert(0, "qc_chart_brush") for sec in config.sections(): if not sec.startswith("qc_chart_"): continue chart_id = sec.replace("qc_chart_", "chart_") stack_id = [] label = [] colors_di = {} counter = 0 for name_set in tools.config_getstr(config, sec, "name_set").split(","): name_set_split = convert.text_to_list(name_set, ":") if len(name_set_split) == 0: continue stack_id.append("stack" + str(counter)) label.append(name_set_split[0]) if len(name_set_split) > 1: colors_di[name_set_split[0]] = color.name_to_value(name_set_split[1]) counter += 1 # fill in undefined items colors_di = color.create_color_dict(label, colors_di, color.metro_colors) # dict to value colors_li = [] for key in label: colors_li.append(colors_di[key]) plots_text += plot_template.format( chart_id = chart_id, \ title = tools.config_getstr(config, sec, "title"), \ title_y = tools.config_getstr(config, sec, "title_y"), \ stack = convert.pyformat_to_jstooltip_text(cols_di, config, sec, "result_format_qc", "stack"), \ stack_id = convert.list_to_text(stack_id), \ label = convert.list_to_text(label), \ color = convert.list_to_text(colors_li), \ tooltip = convert.pyformat_to_jstooltip_text(cols_di, config, sec, "result_format_qc", "tooltip_format"), \ ) plots_option.append(chart_id) # ID list Ids = [] for row in df.data: iid = row[df.name_to_index(cols_di["id"])] if iid != "": Ids.append(iid) Ids = list(set(Ids)) Ids.sort() # header headers = cols_di.keys() headers.sort() f = open(output_file, "w") f.write(js_header) f.write(js_dataset.format(IDs = convert.list_to_text(Ids), \ header = convert.list_to_text(headers), \ plots = plots_text)) f.write(js_data1) # values for row in df.data: iid = row[df.name_to_index(cols_di["id"])] if iid == "": continue values = "" for item in headers: if len(values) > 0: values += "," val = row[df.name_to_index(cols_di[item])] if type(val) == type(""): values += "'" + val + "'" elif type(val) == type(0.0): values += str('%.2f' % val) else: values += str(val) f.write("[" + values + "],") f.write(js_data2) f.write(js_function) f.close() return {"plots": plots_option}
def convert_tojs(input_file, output_file, positions, config): import paplot.subcode.data_frame as data_frame import paplot.subcode.merge as merge import paplot.subcode.tools as tools import paplot.convert as convert cols_di = merge.position_to_dict(positions) # data read try: df = data_frame.load_file(input_file, header = 1, \ sept = tools.config_getstr(config, "merge_format_mutation", "sept"), \ comment = tools.config_getstr(config, "result_format_mutation", "comment") \ ) except Exception as e: print ("failure open data %s, %s" % (input_file, e.message)) return None if len(df.data) == 0: print ("no data %s" % input_file) return None # func replace for f in range(len(df.data)): func_pos = df.name_to_index(cols_di["func"]) func = df.data[f][func_pos] df.data[f][func_pos] = func.replace(" ", "_") if func == "": df.data[f][func_pos] = "_blank_" [funcs, colors_n] = convert.group_list(df.column(cols_di["func"]), "mut", "func", config) # ID list Ids = [] for row in df.data: iid = row[df.name_to_index(cols_di["id"])] if iid != "": Ids.append(iid) Ids = list(set(Ids)) Ids.sort() genes = genes_list(df.column(cols_di["gene"]), \ df.column(cols_di["func"]), \ df.column(cols_di["id"]), \ funcs, Ids, config) option_keys = cols_di.keys() option_keys.remove("id") option_keys.remove("func") option_keys.remove("gene") f = open(output_file, "w") f.write(js_header \ + js_dataset.format(Ids = convert.list_to_text(Ids), \ genes = convert.list_to_text(genes), \ funcs = convert.list_to_text(funcs), \ func_colors_n = convert.list_to_text(colors_n), \ mutation_header = convert.list_to_text(option_keys), \ checker_title = convert.pyformat_to_jstooltip_text(cols_di, config, "mut", "result_format_mutation", "tooltip_format_checker_title"), \ checker_partial = convert.pyformat_to_jstooltip_text(cols_di, config, "mut", "result_format_mutation", "tooltip_format_checker_partial"), \ gene_title = convert.pyformat_to_jstooltip_text(cols_di, config, "mut", "result_format_mutation", "tooltip_format_gene_title"), \ gene_partial = convert.pyformat_to_jstooltip_text(cols_di, config, "mut", "result_format_mutation", "tooltip_format_gene_partial"), \ id_title = convert.pyformat_to_jstooltip_text(cols_di, config, "mut", "result_format_mutation", "tooltip_format_id_title"), \ id_partial = convert.pyformat_to_jstooltip_text(cols_di, config, "mut", "result_format_mutation", "tooltip_format_id_partial"), \ )) # mutation list f.write(js_mutations_1) mutations = {} tooltips = {} for row in df.data: iid = row[df.name_to_index(cols_di["id"])] if iid == "": continue if (iid in mutations) == False: mutations[iid] = {} tooltips[iid] = {} func_split = convert.text_to_list(row[df.name_to_index(cols_di["func"])], \ tools.config_getstr(config, "result_format_mutation", "sept_func")) tooltip_items = [] for k in range(len(option_keys)): key = option_keys[k] if cols_di[key] == "": continue tooltip_items.append(row[df.name_to_index(cols_di[key])]) for func in func_split: if (func in mutations[iid]) == False: mutations[iid][func] = {} tooltips[iid][func] = {} gene_split = convert.text_to_list(row[df.name_to_index(cols_di["gene"])], \ tools.config_getstr(config, "result_format_mutation", "sept_gene")) for gene in gene_split: if (gene in mutations[iid][func]) == False: mutations[iid][func][gene] = 1 tooltips[iid][func][gene] = [] else: mutations[iid][func][gene] += 1 tooltips[iid][func][gene].append(tooltip_items) mutations_sum = 0 for iid in mutations: for func in mutations[iid]: for gene in mutations[iid][func]: idx_i = convert.value_to_index(Ids, iid, -1) idx_f = convert.value_to_index(funcs, func, -1) idx_g = convert.value_to_index(genes, gene, -1) if idx_i >= 0 and idx_f >= 0 and idx_g >= 0: tooltip_items = "" for tips in tooltips[iid][func][gene]: tooltip_items += "[" + convert.list_to_text(tips) + "]," f.write(mu_mutations_template.format(ID = idx_i, \ func = idx_f , \ gene = idx_g, \ num = mutations[iid][func][gene], tooltip = tooltip_items)) mutations_sum += mutations[iid][func][gene] f.write(js_mutations_2.format(mutations_sum = mutations_sum)) dataset = {"func":funcs, "color":colors_n} ##### subdata ##### f.write(js_subdata_1) subdata = [] counter = 0 for sec in config.sections(): if sec.startswith("mut_subplot_type1_"): ret_val = load_subdata(Ids, sec, config) if ret_val == None: continue [data_text, item, colors_n, label, title] = ret_val name = "sub%d" % (counter) pos = 1 counter += 1 elif sec.startswith("mut_subplot_type2_"): ret_val = load_subdata(Ids, sec, config) if ret_val == None: continue [data_text, item, colors_n, label, title] = ret_val name = "sub%d" % (counter) pos = 2 counter += 1 else: continue f.write(subdata_template.format(name = name, \ title = title, \ type = tools.config_getstr(config, sec, "mode"), \ item = convert.list_to_text(item), \ label = convert.list_to_text(label), \ colors_n = convert.list_to_text(colors_n), \ data = data_text )) subdata.append({"pos":pos, "label":label, "color":colors_n, "title": title}) f.write(js_subdata_2) ##### functions ##### f.write(js_function) f.close() dataset["subdata"] = subdata return dataset
def pyformat_to_jstooltip_text(positions, config, section_fmt, section_col, item_startwith): """ Create tooltip components obtained by decomposing a string of the tooltip_format Parameters ---------- positions : dcit: A dictionary with column titles as values config : configparser.RawConfigParser section_fmt : str: A section name such as "ca", "mutation", "pmsignature", "signature", ... section_col : str: A section name such as "result_format_ca", "result_format_mutation", "result_format_qc", ... item_startwith: str: The first string of section key, such as "tooltip_format", "tooltip_format_checker_title", ... Return ------ A dictionary-style string with "format" and "keys" format: [[{...}, ...], ...] where the dictionares have the following keys and values : label: For fix type, a string outside the braces : : Others, a string before the letter ":" in braces : type : One of the following: fix, numeric, str : keys : For fix type, empty string : : Others, a string that precedes the letter ":" in braces and excludes the arithmetic term : ext : For fix type, empty string : : Others, a string after the letter ":" in braces keys : a set-style string : A concatenated string of the above "keys" values """ tooltip_detail_templete = "{{label:'{label}',type:'{type}',keys:[{keys}],ext:'{ext}'}}," import re re_compile = re.compile( r"\{[0-9a-zA-Z\+\-\*\/\#\:\,\.\_\ ]+\}") # Bracketed area re_compile2 = re.compile(r"[\+\-\*\/\:]") # Determine keys_list and tooltip_fomat_text keys_list = [] tooltip_fomat_text = "" for option in tools.config_getoptions(config, section_fmt, item_startwith): # formt : "[{chr1}] {break1:,}; [{chr2}] {break2:,}" # keys_list: ["{chr1}", "{break1:,}", "{chr2}", "{break2:,}"] formt = tools.config_getstr(config, section_fmt, option) # tooltip format key_text_list = re_compile.findall( formt) # Extract string enclosed in braces # Determine tooltip_detail_text tooltip_detail_text = "" for key_text in key_text_list: # key_text: "{chr1}" "{break1:,}" "{chr2}" "{break2:,}" ... # Update tooltip_detail_text: fix type start = formt.find(key_text) # Index of matched string if start > 0: # label: "[" "] " "; [" "] " ... tooltip_detail_text += tooltip_detail_templete.format( label=formt[0:start], type="fix", keys="", ext="") # slice for the next loop formt = formt[start + len(key_text):] # Update key_text: lowercase # Set label_text # Set sub_keys key_text = key_text.lower() label_text = key_text.replace(" ", "").replace("{", "").replace( "}", "") # "{break1:,}" => "break1:," sub_keys = re_compile2.split( label_text) # "break1:," => ["break1", ","] # Set ttype: numeric or str ttype = "numeric" # numeric if key_text contains either "+", "-", "*", "/", or ":" if len(sub_keys) == 1: ttype = "str" # Set ext : "" or something like "," # Update label_text: remove extra characters # Update sub_keys : remove extra elements ext = "" if label_text.find(":") > 0: ext_start = label_text.index(":") # Index of : ext = label_text[ext_start + 1:] # "break1:," => "," label_text = label_text[0:ext_start] # "break1:," => "break1" sub_keys = re_compile2.split( label_text) # "break1" => ["break1"] # Update sub_keys: remove numeric block for sub_key in sub_keys: try: float(sub_key) sub_keys.remove(sub_key) except Exception: pass # Set check : True or False # Update label_text: add braces check = True for sub_key in list(set(sub_keys)): if sub_key not in positions.keys( ) and not sub_key.startswith("#"): print("[WARNING] key:{key} is not defined.".format( key=sub_key)) check = False break label_text = label_text.replace(sub_key, "{" + sub_key + "}") # "break1" => "{break1}" # Update tooltip_detail_text: numeric or str type if check is True: # label: "{chr1}" "{break1}" "{chr2}" {"break2"} ... # keys : "'chr1'," "'break1'," "'chr2'," "'break2'," ... "'" and "," are added # ext : "" "," "" "," ... tooltip_detail_text += tooltip_detail_templete.format( label=label_text, type=ttype, keys=list_to_text(sub_keys), ext=ext) keys_list.extend( sub_keys ) # add a list such as ["chr1"], ["break1"], ["chr2"], ["break2"], ... # Update tooltip_detail_text: fix type if len(formt) > 0: tooltip_detail_text += tooltip_detail_templete.format(label=formt, type="fix", keys="", ext="") tooltip_fomat_text += "[" + tooltip_detail_text + "]," # Convert a list to a string key_text = "" keys_dup = list(set(keys_list)) keys_dup.sort() for key in keys_dup: key_text += "{" + key.lower() + "} " # keys_list: ['break1', 'break2', 'chr1', 'chr2'] # kety_text: "{break1} {break2} {chr1} {chr2}" tooltip_templete = "{{format:[{formats}], keys: '{keys}'}}" return tooltip_templete.format(formats=tooltip_fomat_text, keys=key_text)
def ca_main(args): ''' ca main script Parameters ---------- args: argparse.Namespace : Args contains the arguments specified in the paplot command Args parameters ---------- input : string : Input file path output_dir : string : Output directory path project_name: string : Project name path config_file : string : Config file path title : string : Report title name ellipsis : string : File name identifier for report overview : string : Report summary remarks : string : Additional information about report func : function: This function (ca_main) Return ------ None ''' # The default values of the variables are defined in the file: ../../paplot # config_file: "" : # title : "CA graphs" : This value is replaced by {title} in ./templates/graph_ca.html # ellipsis : "ca" : This value is used as part of file names, such as graph_ca.html # overview : "Chromosomal Aberration.": This value is used in _convert_index_item function in ./prep.py # remarks : "" : This value is replaced by {remarks} in ./templates/index.html import paplot.ca as ca # config [config, conf_file] = tools.load_config(tools.win_to_unix(args.config_file)) # input_list: a list with input data files as elements input_list = tools.get_inputlist(tools.win_to_unix(args.input)) if len(input_list) == 0: print("input no file.") return [sec_in, sec_out] = tools.get_section("ca") # id_list is used when the column candidates defined in the config file are not in the header of input files # see ids veriable of with_header function in ./subcode/merge.py for details id_list = tools.get_idlist(input_list, tools.config_getstr(config, sec_in, "suffix")) # output_html_dir: The absolute path of the project directory # output_data : File name like data_ca.csv # : This file is created by the merge.merge_result function and placed in the project directory # : The contents of the file are formatted from the input file to match the configuration file # : After that, read the contents and convert it to Json format in ca.output_html function # positions : A nested dictionary: {'must' : {key1: title1, ...}, 'option': {key2: title2, ...}} output_html_dir = prep.create_dirs(tools.win_to_unix(args.output_dir), args.project_name, config) output_data = "data_%s%s" % (args.ellipsis, os.path.splitext( input_list[0])[1]) positions = merge.merge_result(input_list, id_list, output_html_dir + "/" + output_data, "ca", config, extract=True) if positions == {}: print("merge.merge_result: input file is invalid.") return # Create JavaScript file and HTML file for CA html_name = "graph_%s.html" % args.ellipsis params_html = { "dir": output_html_dir, # Project directory full path "data": output_data, # Data file name like csv "js": "data_%s.js" % args.ellipsis, # JavaScript file name "html": html_name, # HTML file name "project": args.project_name, # Project name given by user on command line "title": args.title, # 'CA graphs' as default } ca.output_html(params_html, positions, config) # Create homepage prep.create_index(config, tools.win_to_unix(args.output_dir), html_name, args.project_name, args.title, overview=args.overview, remarks=args.remarks)
def load_genome_size(config): path = tools.config_getpath(config, "genome", "path", "../../config/hg19.csv") settings = tools.config_getstr(config, "sv", "use_chrs").replace(" ", "").split(",") use_chrs = []; colors = []; labels = []; for i in range(len(settings)): items = settings[i].split(":") use_chrs.append(items[0].lower()) labels.append("") colors.append("#BBBBBB") for j in range(len(items)): if j == 0: if items[j][0:3] == "chr": use_chrs[i] = items[j][3:] elif j == 1: labels[i] = items[j] elif j == 2: colors[i] = items[j] if len(use_chrs) < 1: return [] f = open(path) read = f.read() f.close() formatt = read.replace("\r", "\n").replace(" ", "") genome_size = [] _max = 0 for row in formatt.split("\n"): sept = "," if row.find(",") < 0: sept = "\t" items = row.split(sept) if len(items) < 2: continue if items[1].isdigit() == False: continue label = items[0].lower() if label[0:3] == "chr": label = label[3:len(label)] if (label in use_chrs) == False: continue pos = use_chrs.index(label) if _max < int(items[1]): _max = int(items[1]) if labels[pos] == "": labels[pos] = items[0] genome_size.append([label, int(items[1]), colors[pos], labels[pos]]) for i in range(len(genome_size)): if genome_size[i][1] < int(_max/10): genome_size[i][1] = int(_max/10) return genome_size
def convert_tojs(input_file, output_file, positions, config): import paplot.subcode.data_frame as data_frame import paplot.subcode.merge as merge import paplot.subcode.tools as tools import paplot.convert as convert genome_size = load_genome_size(config) if len(genome_size) == 0: return None genome = "" for i in range(len(genome_size)): if len(genome) > 0: genome += ",\n" genome += genome_size_template.format(Chr=i, size = genome_size[i][1], color = genome_size[i][2], label = genome_size[i][3]) cols_di = merge.position_to_dict(positions) # data read try: df = data_frame.load_file(input_file, header = 1, \ sept = tools.config_getstr(config, "merge_format_sv", "sept"), \ comment = tools.config_getstr(config, "result_format_sv", "comment") \ ) except Exception as e: print ("failure open data %s, %s" % (input_file, e.message)) return None if len(df.data) == 0: print ("no data %s" % input_file) return None # group list if "group" in cols_di: for f in range(len(df.data)): group_pos = df.name_to_index(cols_di["group"]) group = df.data[f][group_pos] df.data[f][group_pos] = group.replace(" ", "_") if group == "": df.data[f][group_pos] = "_blank_" [groups, colors_n] = convert.group_list(df.column(cols_di["group"]), "sv", "group", config) labels = groups else: groups = ["outer", "inner"] labels = ["Inter Chromosome", "Intra Chromosome"] colors_n = ["#9E4A98", "#51BF69"] conbined = [] for i in range(len(groups)): conbined.append(group_template.format(name = groups[i], label = labels[i], color = colors_n[i])) group_text = ",".join(conbined) # ID list Ids = [] for row in df.data: iid = row[df.name_to_index(cols_di["id"])] if iid != "": Ids.append(iid) Ids = list(set(Ids)) Ids.sort() option_keys = cols_di.keys() option_keys.remove("id") option_keys.remove("chr1") option_keys.remove("break1") option_keys.remove("chr2") option_keys.remove("break2") if "group" in option_keys: option_keys.remove("group") f = open(output_file, "w") f.write(js_header \ + js_dataset.format(node_size_detail = calc_node_size(genome_size, 500), \ node_size_thumb = calc_node_size(genome_size, 250), \ node_size_select = tools.config_getint(config, "sv", "selector_split_size", 5000000),\ genome_size = genome, \ IDs = convert.list_to_text(Ids), \ group = group_text, \ tooltip = convert.pyformat_to_jstooltip_text(cols_di, config, "sv", "result_format_sv", "tooltip_format"), \ link_header = convert.list_to_text(option_keys), \ )) # write links f.write(js_links_1) for row in df.data: iid = row[df.name_to_index(cols_di["id"])] if iid == "": continue chr1 = str(row[df.name_to_index(cols_di["chr1"])]) pos1 = row[df.name_to_index(cols_di["break1"])] chr2 = str(row[df.name_to_index(cols_di["chr2"])]) pos2 = row[df.name_to_index(cols_di["break2"])] [index1, rang] = insite_genome(genome_size, chr1, pos1) if rang > 0: print("breakpoint 1 is over range. chr%s: input=%d, range=%d" % (chr1, pos1, rang)) continue if rang < 0: #print("chr1 is undefined. %s" % (chr1)) continue [index2, rang] = insite_genome(genome_size, chr2, pos2) if rang > 0: print("breakpoint 2 is over range. chr%s: input=%d, range=%d" % (chr2, pos2, rang)) continue if rang < 0: #print("chr2 is undefined. %s" % (chr2)) continue inner_flg = "false" if (chr1 == chr2): inner_flg = "true" tooltip_items = [] for k in range(len(option_keys)): key = option_keys[k] if cols_di[key] == "": continue tooltip_items.append(row[df.name_to_index(cols_di[key])]) group_id = -1 if "group" in cols_di: group_id = convert.value_to_index(groups, row[df.name_to_index(cols_di["group"])], -1) else: if inner_flg == "false": group_id = 0 else: group_id = 1 f.write(links_template.format(ID = iid, \ Chr1=index1, pos1=pos1, Chr2=index2, pos2=pos2, \ inner_flg = inner_flg, \ group_id = group_id , \ tooltip = "[" + convert.list_to_text(tooltip_items) + "],")) f.write(js_links_2) f.write(js_function) f.close() return {"id_list":Ids, "group_list":groups, "color":colors_n}
def load_subdata(ids, sec, config): import os import paplot.subcode.tools as tools import paplot.convert as convert import paplot.color as color input_file = tools.config_getpath(config, sec, "path", default = "../../example/sample_summary.csv") if os.path.exists(input_file) == False: print ("[ERROR] file is not exist. %s" % input_file) return None sept = tools.config_getstr(config, sec, "sept") mode = tools.config_getstr(config, sec, "mode") comment = tools.config_getstr(config, sec, "comment") title = tools.config_getstr(config, sec, "title") label = [] item = [] colors_n_di = {} colors_h_di = {} for name_set in tools.config_getstr(config, sec, "name_set").split(","): name_set_split = convert.text_to_list(name_set, ":") for i in range(len(name_set_split)): text = name_set_split[i] if i == 0: item.append(text) if len(name_set_split) == 1: label.append(text) elif i == 1: label.append(text) elif i == 2: colors_n_di[name_set_split[0]] = color.name_to_value(text) elif i == 3: colors_h_di[name_set_split[0]] = color.name_to_value(text) # fill in undefined items colors_n_di = color.create_color_dict(item, colors_n_di, color.osaka_subway_colors) colors_h_di2 = {} for key in colors_n_di: if colors_h_di.has_key(key): continue colors_h_di2[key] = color.Saturation_down(colors_n_di[key]) # dict to value colors_n = [] for key in item: colors_n.append(colors_n_di[key]) if mode == "range": item.remove(item[0]) header = [] if tools.config_getboolean(config, sec, "header") == True: pos_value = -1 pos_ID = -1 else: pos_value = tools.config_getint(config, sec, "col_value") pos_ID = tools.config_getint(config, sec, "col_ID") header = ["",""] # copy Ids for find check unlookup = [] for iid in ids: unlookup.append(iid) # read data_text = "" values = [] for line in open(input_file): line = line.strip() if len(line.replace(sept, "")) == 0: continue if comment != "" and line.find(comment) == 0: continue if len(header) == 0: header = convert.text_to_list(line,sept) try: colname = tools.config_getstr(config, sec, "col_value") pos_value = header.index(colname) colname = tools.config_getstr(config, sec, "col_ID") pos_ID = header.index(colname) except Exception as e: print(e.message) return None continue cols = convert.text_to_list(line,sept) if (cols[pos_ID] in ids) == False: continue else: unlookup.remove(cols[pos_ID]) id_pos = ids.index(cols[pos_ID]) if mode == "fix": if cols[pos_value] in item: data_text += subdata_data_template.format(id = id_pos, item = item.index(cols[pos_value])) else: print("[" + sec + "] name_set: data is undefined." + cols[pos_value] + "\n") continue elif mode == "range": try: values.append(float(cols[pos_value])) except Exception as e: print(colname + ": data type is invalid.\n" + e.message) continue data_text += subdata_data_template.format(id = id_pos, item = cols[pos_value]) elif mode == "gradient": try: values.append(float(cols[pos_value])) except Exception as e: print(colname + ": data type is invalid.\n" + e.message) continue data_text += subdata_data_template.format(id = id_pos, item = cols[pos_value]) if len(unlookup) > 0: print("[WARNING] can't find IDs subplot data.") print(unlookup) if mode == "gradient" and len(values) > 0: item[0] = min(values) item[1] = max(values) return [data_text, item, colors_n, label, title]
def convert_tojs(input_file, output_file, positions, config): import os import paplot.subcode.data_frame as data_frame import paplot.subcode.merge as merge import paplot.subcode.tools as tools import paplot.convert as convert import paplot.color as color cols_di = merge.position_to_dict(positions) # data read try: df = data_frame.load_file(input_file, header = 1, \ sept = tools.config_getstr(config, "result_format_qc", "sept"), \ comment = tools.config_getstr(config, "result_format_qc", "comment") \ ) except Exception as e: print("failure open data %s, %s" % (input_file, e.message)) return None if len(df.data) == 0: print("no data %s" % input_file) return None # chart list plots_text = "" plots_option = [] config_sections = config.sections() config_sections.sort() if "qc_chart_brush" in config_sections: config_sections.remove("qc_chart_brush") config_sections.insert(0, "qc_chart_brush") for sec in config.sections(): if not sec.startswith("qc_chart_"): continue chart_id = sec.replace("qc_chart_", "chart_") stack_id = [] label = [] colors_di = {} counter = 0 for name_set in tools.config_getstr(config, sec, "name_set").split(","): name_set_split = convert.text_to_list(name_set, ":") if len(name_set_split) == 0: continue stack_id.append("stack" + str(counter)) label.append(name_set_split[0]) if len(name_set_split) > 1: colors_di[name_set_split[0]] = color.name_to_value( name_set_split[1]) counter += 1 # fill in undefined items colors_di = color.create_color_dict(label, colors_di, color.metro_colors) # dict to value colors_li = [] for key in label: colors_li.append(colors_di[key]) plots_text += plot_template.format( chart_id = chart_id, \ title = tools.config_getstr(config, sec, "title"), \ title_y = tools.config_getstr(config, sec, "title_y"), \ stack = convert.pyformat_to_jstooltip_text(cols_di, config, sec, "result_format_qc", "stack"), \ stack_id = convert.list_to_text(stack_id), \ label = convert.list_to_text(label), \ color = convert.list_to_text(colors_li), \ tooltip = convert.pyformat_to_jstooltip_text(cols_di, config, sec, "result_format_qc", "tooltip_format"), \ ) plots_option.append(chart_id) # ID list id_list = [] for row in df.data: iid = row[df.name_to_index(cols_di["id"])] if iid != "": id_list.append(iid) id_list = list(set(id_list)) id_list.sort() # header headers = tools.dict_keys(cols_di) f = open(output_file, "w") f.write(js_header) f.write(js_dataset.format(IDs = convert.list_to_text(id_list), \ header = convert.list_to_text(headers), \ plots = plots_text)) f.write(js_data1) # values for row in df.data: iid = row[df.name_to_index(cols_di["id"])] if iid == "": continue values = "" for item in headers: if len(values) > 0: values += "," val = row[df.name_to_index(cols_di[item])] if type(val) == type(""): values += "'" + val + "'" elif type(val) == type(0.0): values += str('%.2f' % val) else: values += str(val) f.write("[" + values + "],") f.write(js_data2) f_template = open( os.path.dirname(os.path.abspath(__file__)) + "/templates/data_qc.js") js_function = f_template.read() f_template.close() f.write(js_function) f.write(js_footer) f.close() return {"plots": plots_option}
def create_index(config, output_dir, output_html, project_name, name, overview="", sub_text="", composite=False, remarks=""): """ Create homepage Parameters ---------- config : configparser.RawConfigParser output_dir : string: Output directory path output_html : string: HTML file name project_name: string: Project name given by user on command line name : string: Report title overview : string: Report summary sub_text : string: Additional string to display if composite=True and a report does not exist composite : bool : Whether or not to have multiple reports remarks : string: Additional information about report Return ---------- None """ import paplot.subcode.tools as tools import os # Confirm existence of homepage html_exists = os.path.exists(output_dir + "/" + project_name + "/" + output_html) if output_html == "": html_exists = False # Create json data json_data = _load_metadata(output_dir, output_html, project_name, name, overview, sub_text, composite, html_exists) # Create html for link link_text = _convert_index_item(json_data) # Load the template html for the homepage f_template = open( os.path.dirname(os.path.abspath(__file__)) + "/templates/index.html") # ./templates/index.html html_template = f_template.read() f_template.close() # Extract remarks from a configuration if remarks == "": remarks = tools.config_getstr(config, "style", "remarks") # Create html file for a homepage f_html = open(output_dir + "/index.html", "w") f_html.write( html_template.format( version=version_text(), # Version date=tools.now_string(), # Current time remarks=remarks, # Some string link=link_text) # HTML for link ) f_html.close()
def load_genome_size(config): """ Read and parse a genome-size file and return a nested list: [ [ A chromosome number in lowercase letters, The size of the 1st element, The color of the 1st element, The original name of the 1st element(that is not necessarily lowercase) or a user-defined name, ], ... ] """ import os import paplot.subcode.tools as tools default_path = os.path.dirname(os.path.abspath(__file__)) + "/templates/genome_size_hg19.csv" # ./templates/genome_size_hg19.csv path = tools.config_getpath(config, "genome", "path", default_path) # Create a list with Name:Label:Color for each element # Name is like chromosome number such as 1, 2, ..., X, Y, ... # :Label and :Color is optional settings = tools.config_getstr(config, "ca", "use_chrs").replace(" ", "").split(",") use_chrs = [] labels = [] colors = [] for i in range(len(settings)): # items[0]: Name corresponding to chromosome number # items[1]: Label # items[2]: Color items = settings[i].split(":") use_chrs.append(items[0].lower()) # Conversion of chromosome number to lowercase labels.append("") colors.append("#BBBBBB") # gray for j in range(len(items)): if j == 0: if items[j][0:3] == "chr": use_chrs[i] = items[j][3:] # Remove the leading "chr" elif j == 1: labels[i] = items[j] elif j == 2: colors[i] = items[j] if len(use_chrs) < 1: return [] # Read genome size f = open(path) read = f.read() f.close() formatt = read.replace("\r", "\n").replace(" ", "") genome_size = [] _max = 0 for row in formatt.split("\n"): # Delimiter setting sept = "," if row.find(",") < 0: sept = "\t" # Split the line and the second element must be numeric # item[0]: chromosome number # item[1]: size of item[0] items = row.split(sept) if len(items) < 2: continue if items[1].isdigit() is False: continue # The first element must be included in the list of chromosome numbers extracted from the configuration file label = items[0].lower() # Convert label to lowercase if label[0:3] == "chr": label = label[3:len(label)] # Remove the leading "chr" if (label in use_chrs) is False: continue # Create a list that is an element of genome_size # The list has the following elements # 1st: A chromosome number in lowercase letters # 2st: The size of the 1st element # 3st: The color of the 1st element # 4st: The original name of the 1st element(that is not necessarily lowercase) or a user-defined name # genome_size is in the order read from the genome-size file instead of the configuration file pos = use_chrs.index(label) if labels[pos] == "": labels[pos] = items[0] genome_size.append([label, int(items[1]), colors[pos], labels[pos]]) # Maximum size of the chromosome if _max < int(items[1]): _max = int(items[1]) # The minimum size of the chromosomes is set to 1/10 of the maximum size for i in range(len(genome_size)): if genome_size[i][1] < int(_max / 10): genome_size[i][1] = int(_max / 10) return genome_size
def pyformat_to_jstooltip_text(positions, config, section_fmt, section_col, item_startwith): tooltip_templete = "{{format:[{formats}], keys: '{keys}'}}" tooltip_detail_templete = "{{label:'{label}',type:'{type}',keys:[{keys}],ext:'{ext}'}}," import re re_compile=re.compile(r"\{[0-9a-zA-Z\+\-\*\/\#\:\,\.\_\ ]+\}") re_compile2=re.compile(r"[\+\-\*\/\:]") keys_list = [] tooltip_fomat_text = "" for option in tools.config_getoptions(config, section_fmt, item_startwith): formt = tools.config_getstr(config, section_fmt, option) key_text_list = re_compile.findall(formt) tooltip_detail_text = "" for key_text in key_text_list: start = formt.find(key_text) # write fix area if start > 0: tooltip_detail_text += tooltip_detail_templete.format(label = formt[0:start], type="fix", keys="", ext="") formt = formt[start+len(key_text):] label_text = key_text.replace(" ", "").replace("{", "").replace("}", "") sub_keys = re_compile2.split(label_text) ttype = "numeric" ext = "" # case str if len(sub_keys) == 1: ttype = "str" # case with-extention if label_text.find(":") > 0: ext_start = label_text.index(":") ext=label_text[ext_start+1:] label_text = label_text[0:ext_start] sub_keys = re_compile2.split(label_text) for sub_key in sub_keys: # remove numeric block try: float(sub_key) sub_keys.remove(sub_key) except Exception: pass check = True for sub_key in list(set(sub_keys)): if not sub_key in positions.keys(): if not sub_key.startswith("#"): print("[WARNING] key:{key} is not defined.".format(key = sub_key)) check = False break label_text = label_text.replace(sub_key, "{" + sub_key +"}") if check == True: tooltip_detail_text += tooltip_detail_templete.format(label= label_text, type=ttype, keys=list_to_text(sub_keys), ext=ext) keys_list.extend(sub_keys) if len(formt) > 0: tooltip_detail_text += tooltip_detail_templete.format(label = formt, type="fix", keys="", ext="") tooltip_fomat_text += "[" + tooltip_detail_text + "]," key_text = "" for key in list(set(keys_list)): key_text += "{" + key + "} " return tooltip_templete.format(formats = tooltip_fomat_text, keys = key_text)
def convert_tojs(params, config): import os import json import paplot.subcode.tools as tools import paplot.convert as convert import paplot.color as color # data read try: json_data = json.load(open(params["data"])) except Exception as e: print("failure open data %s, %s" % (params["data"], e.message)) return None key_id_list = tools.config_getstr(config, "result_format_pmsignature", "key_id") key_ref = tools.config_getstr(config, "result_format_pmsignature", "key_ref") key_alt = tools.config_getstr(config, "result_format_pmsignature", "key_alt") key_strand = tools.config_getstr(config, "result_format_pmsignature", "key_strand") key_mutations = tools.config_getstr(config, "result_format_pmsignature", "key_mutation") key_mutation_count = tools.config_getstr(config, "result_format_pmsignature", "key_mutation_count") sig_num = len(json_data[key_ref]) if sig_num == 0: print("no data %s" % params["data"]) return {} # signature names signature_list = [] for s in range(sig_num): signature_list.append("Signature %d" % (s + 1)) # each signature colors sig_color_list = color.create_color_array(sig_num, color.r_set2) # use background? if tools.config_getboolean(config, "result_format_pmsignature", "background"): signature_list.append("Background ") sig_color_list.append(color.r_set2_gray) # Id list id_txt = "" if key_id_list in json_data: id_txt = convert.list_to_text(json_data[key_id_list]) # mutations mutations_txt = "" if key_mutations in json_data: for m in json_data[key_mutations]: mutations_txt += "[%d,%d,%f]," % (m[0], m[1], m[2]) # signature dataset_ref = "" for sig in json_data[key_ref]: tmp = "" for sub in sig: tmp += "[" + ",".join(map(str, sub)) + "]," dataset_ref += ("[" + tmp + "],") dataset_alt = "" for sig in json_data[key_alt]: tmp = "" for sub in sig: tmp += "[" + ",".join(map(str, sub)) + "]," dataset_alt += ("[" + tmp + "],") dataset_strand = "" for sig in json_data[key_strand]: dataset_strand += "[" + ",".join(map(str, sig)) + "]," # tooltips # for ref keys_di = { "a": "", "c": "", "g": "", "t": "", "ca": "", "cg": "", "ct": "", "ta": "", "tc": "", "tg": "", "plus": "", "minus": "", "id": "", "sig": "" } tooltip_refs_txt = "" for r in range(len(json_data[key_ref][0])): tooltip_refs_txt += js_tooltip_ref_template.format( index=r, tooltip_format=convert.pyformat_to_jstooltip_text( keys_di, config, "pmsignature", "", "tooltip_format_ref")) mutation_count_txt = "" if (key_mutation_count != "") and (key_mutation_count in json_data.keys()): for v in json_data[key_mutation_count]: mutation_count_txt += "%d," % v # output sig_num_sift = 0 if tools.config_getboolean(config, "result_format_pmsignature", "background"): sig_num_sift = 1 ellipsis = "%s%d" % (params["ellipsis"], (sig_num + sig_num_sift)) js_file = "data_%s.js" % ellipsis html_file = "graph_%s.html" % ellipsis f = open(params["dir"] + "/" + js_file, "w") f.write(js_header + js_dataset.format( Ids=id_txt, color_A=tools.config_getstr(config, "pmsignature", "color_A", "#06B838"), color_C=tools.config_getstr(config, "pmsignature", "color_C", "#609CFF"), color_G=tools.config_getstr(config, "pmsignature", "color_G", "#B69D02"), color_T=tools.config_getstr(config, "pmsignature", "color_T", "#F6766D"), color_plus=tools.config_getstr(config, "pmsignature", "color_plus", "#00BEC3"), color_minus=tools.config_getstr(config, "pmsignature", "color_minus", "#F263E2"), signatures=convert.list_to_text(signature_list), colors=convert.list_to_text(sig_color_list), mutations=mutations_txt, dataset_ref=dataset_ref, dataset_alt=dataset_alt, dataset_strand=dataset_strand, tooltip_ref=tooltip_refs_txt, tooltip_alt=convert.pyformat_to_jstooltip_text( keys_di, config, "pmsignature", "", "tooltip_format_alt"), tooltip_strand=convert.pyformat_to_jstooltip_text( keys_di, config, "pmsignature", "", "tooltip_format_strand"), mutation_title=convert.pyformat_to_jstooltip_text( keys_di, config, "pmsignature", "", "tooltip_format_mutation_title"), mutation_partial=convert.pyformat_to_jstooltip_text( keys_di, config, "pmsignature", "", "tooltip_format_mutation_partial"), mutation_count=mutation_count_txt, )) f_template = open( os.path.dirname(os.path.abspath(__file__)) + "/templates/data_pmsignature.js") js_function = f_template.read() f_template.close() f.write(js_function) f.write(js_footer) f.close() integral = True if key_id_list == "" or key_mutations == "" or key_mutation_count == "": integral = False return { "sig_num": sig_num, "js": js_file, "html": html_file, "intergral": integral, }
def pyformat_to_jstooltip_text(positions, config, section_fmt, section_col, item_startwith): tooltip_templete = "{{format:[{formats}], keys: '{keys}'}}" tooltip_detail_templete = "{{label:'{label}',type:'{type}',keys:[{keys}],ext:'{ext}'}}," import re re_compile = re.compile(r"\{[0-9a-zA-Z\+\-\*\/\#\:\,\.\_\ ]+\}") re_compile2 = re.compile(r"[\+\-\*\/\:]") keys_list = [] tooltip_fomat_text = "" for option in tools.config_getoptions(config, section_fmt, item_startwith): formt = tools.config_getstr(config, section_fmt, option) key_text_list = re_compile.findall(formt) tooltip_detail_text = "" for key_text in key_text_list: start = formt.find(key_text) # write fix area if start > 0: tooltip_detail_text += tooltip_detail_templete.format( label=formt[0:start], type="fix", keys="", ext="") key_text = key_text.lower() formt = formt[start + len(key_text):] label_text = key_text.replace(" ", "").replace("{", "").replace("}", "") sub_keys = re_compile2.split(label_text) ttype = "numeric" ext = "" # case str if len(sub_keys) == 1: ttype = "str" # case with-extention if label_text.find(":") > 0: ext_start = label_text.index(":") ext = label_text[ext_start + 1:] label_text = label_text[0:ext_start] sub_keys = re_compile2.split(label_text) for sub_key in sub_keys: # remove numeric block try: float(sub_key) sub_keys.remove(sub_key) except Exception: pass check = True for sub_key in list(set(sub_keys)): if not sub_key in positions.keys() and not sub_key.startswith( "#"): print("[WARNING] key:{key} is not defined.".format( key=sub_key)) check = False break label_text = label_text.replace(sub_key, "{" + sub_key + "}") if check == True: tooltip_detail_text += tooltip_detail_templete.format( label=label_text, type=ttype, keys=list_to_text(sub_keys), ext=ext) keys_list.extend(sub_keys) if len(formt) > 0: tooltip_detail_text += tooltip_detail_templete.format(label=formt, type="fix", keys="", ext="") tooltip_fomat_text += "[" + tooltip_detail_text + "]," key_text = "" keys_dup = list(set(keys_list)) keys_dup.sort() for key in keys_dup: key_text += "{" + key.lower() + "} " return tooltip_templete.format(formats=tooltip_fomat_text, keys=key_text)
def load_subdata(ids, sec, config): import os import paplot.subcode.tools as tools import paplot.convert as convert import paplot.color as color input_file = tools.config_getpath(config, sec, "path", default = "") if os.path.exists(input_file) == False: print ("[ERROR] file is not exist. %s" % input_file) return None sept = tools.config_getstr(config, sec, "sept").replace("\\t", "\t").replace("\\n", "\n").replace("\\r", "\r") mode = tools.config_getstr(config, sec, "mode") comment = tools.config_getstr(config, sec, "comment") title = tools.config_getstr(config, sec, "title") label = [] item = [] colors_n_di = {} colors_h_di = {} for name_set in tools.config_getstr(config, sec, "name_set").split(","): name_set_split = convert.text_to_list(name_set, ":") for i in range(len(name_set_split)): text = name_set_split[i] if i == 0: item.append(text) if len(name_set_split) == 1: label.append(text) elif i == 1: label.append(text) elif i == 2: colors_n_di[name_set_split[0]] = color.name_to_value(text) elif i == 3: colors_h_di[name_set_split[0]] = color.name_to_value(text) # fill in undefined items colors_n_di = color.create_color_dict(item, colors_n_di, color.osaka_subway_colors) colors_h_di2 = {} for key in colors_n_di: if key in colors_h_di: continue colors_h_di2[key] = color.saturation_down(colors_n_di[key]) # dict to value colors_n = [] for key in item: colors_n.append(colors_n_di[key]) if mode == "range": item.remove(item[0]) header = [] if tools.config_getboolean(config, sec, "header") == True: pos_value = -1 pos_id = -1 else: pos_value = tools.config_getint(config, sec, "col_value")-1 pos_id = tools.config_getint(config, sec, "col_ID")-1 header = ["",""] # copy id_list for find check unlookup = [] for iid in ids: unlookup.append(iid) # read data_text = "" values = [] for line in open(input_file): line = line.strip() if len(line.replace(sept, "")) == 0: continue if comment != "" and line.find(comment) == 0: continue if len(header) == 0: header = convert.text_to_list(line,sept) try: colname = tools.config_getstr(config, sec, "col_value") pos_value = header.index(colname) colname = tools.config_getstr(config, sec, "col_ID") pos_id = header.index(colname) except Exception as e: print(e.message) return None continue cols = convert.text_to_list(line,sept) if (cols[pos_id] in ids) == False: continue else: unlookup.remove(cols[pos_id]) id_pos = ids.index(cols[pos_id]) if mode == "fix": if cols[pos_value] in item: data_text += subdata_data_template.format(id = id_pos, item = item.index(cols[pos_value])) else: print("[" + sec + "] name_set: data is undefined." + cols[pos_value] + "\n") continue elif mode == "range" or mode == "gradient": try: values.append(float(cols[pos_value])) except Exception as e: print(colname + ": data type is invalid.\n" + e.message) continue data_text += subdata_data_template.format(id = id_pos, item = cols[pos_value]) if len(unlookup) > 0: print("[WARNING] can't find IDs subplot data.") print(unlookup) if mode == "gradient" and len(values) > 0: item[0] = min(values) item[1] = max(values) return [data_text, item, colors_n, label, title]
def group_list(colmun, mode, name, config): """ Create and return group names and their color values Parameters ---------- colmun: list: colmun data for group title mode : str : ca or mutation name : str : "group" config: configparser.RawConfigParser Return ------ A nested list with elements funcs and colors funcs : list: group names colors: list: color values """ import paplot.color as color # option_input: section name option_input = "" if mode == "mutation": option_input = "result_format_mutation" elif mode == "ca": option_input = "result_format_ca" else: return [] # Get values from a configuration file sept = tools.config_getstr(config, option_input, "sept_%s" % name) # key: sept_group limited_list = text_to_list( tools.config_getstr(config, mode, "limited_%s" % name), ",") # key: limited_group nouse_list = text_to_list( tools.config_getstr(config, mode, "nouse_%s" % name), ",") # key: nouse_group # Create funcs that is a list with group names as elements funcs = [] for row in colmun: # Split row if necessary splt = [] if sept == "": splt.append(row) else: splt = row.split(sept) # Limit the elements to be added to funcs for func in splt: func = func.strip() # Ignore empty string if func == "": continue # Ignore if func is not in non-empty limited_list if len(limited_list) > 0 and fnmatch_list(func, limited_list) is False: continue # Ignore if func is in nouse_list if fnmatch_list(func, nouse_list): continue funcs.append(func) # Sort after eliminating duplicated elements of funcs funcs = list(set(funcs)) funcs.sort() # Create color_di that is a dictionary with group names as keys and color values as values color_di = {} for f in tools.config_getstr(config, mode, "%s_color" % name).split(","): # key: group_color # Ignore empty string if len(f) == 0: continue # f assumes something like "A:#66C2A5" cols = text_to_list(f, ":") if len(cols) >= 2: color_di[cols[0]] = color.name_to_value(cols[1]) # Determine color values for groups color_di = color.create_color_dict(funcs, color_di, color.metro_colors) # Create color list for groups colors = [] for key in funcs: colors.append(color_di[key]) return [funcs, colors]