def run(args): infmat = scipy.io.loadmat(args.infmat_file)[args.infmat_name] infmat_index = hnio.load_index(args.infmat_index_file) heat, heat_params = hnio.load_heat_json(args.heat_file) if args.perm_type == "heat": addtl_genes = hnio.load_genes(args.permutation_genes_file) if args.permutation_genes_file else None deltas = get_deltas_for_heat(infmat, infmat_index, heat, addtl_genes, args.num_permutations, args.parallel) elif args.perm_type == "mutations": deltas = get_deltas_for_mutations(args, infmat, infmat_index, heat_params) else: raise ValueError("Invalid mutation permutation type: %s" % args.perm_type) #find the multiple of the median delta s.t. the size of the largest CC in the real data #is <= MAX_CC_SIZE medianDelta = np.median(deltas[MIN_CC_SIZE]) M, gene_index = hn.induce_infmat(infmat, infmat_index, sorted(heat.keys())) h = hn.heat_vec(heat, gene_index) sim = hn.similarity_matrix(M, h) for i in range(1, 11): G = hn.weighted_graph(sim, gene_index, i*medianDelta) max_cc_size = max([len(cc) for cc in hn.connected_components(G)]) if max_cc_size <= MAX_CC_SIZE: break #and recommend running HotNet with that multiple and the next 4 multiples recommended_deltas = [i*medianDelta for i in range(i, i+5)] output_file = open(args.output_file, 'w') if args.output_file else sys.stdout json.dump({"parameters": vars(args), "heat_parameters": heat_params, "recommended_deltas": recommended_deltas}, output_file, indent=4) if (args.output_file): output_file.close()
def run(args): # create output directory if doesn't exist; warn if output files already exist if not os.path.exists(args.output_directory): os.makedirs(args.output_directory) dir_contents = os.listdir(args.output_directory) if JSON_OUTPUT in dir_contents or COMPONENTS_TSV in dir_contents or SIGNIFICANCE_TSV in dir_contents: print("WARNING: Output directory already contains HotNet results file(s), which will be " "overwritten. (Ctrl-c to cancel).") # load data infmat = scipy.io.loadmat(args.infmat_file)[args.infmat_name] infmat_index = hnio.load_index(args.infmat_index_file) heat, heat_params = hnio.load_heat_json(args.heat_file) # compute similarity matrix and extract connected components M, gene_index = hn.induce_infmat(infmat, infmat_index, sorted(heat.keys()), quiet=False) h = hn.heat_vec(heat, gene_index) sim = hn.similarity_matrix(M, h) G = hn.weighted_graph(sim, gene_index, args.delta) ccs = hn.connected_components(G, args.min_cc_size) # calculate significance if args.permutation_type != "none": if args.permutation_type == "heat": sizes2stats = heat_permutation_significance(args, heat, infmat, infmat_index, G) elif args.permutation_type == "mutations": if heat_params["heat_fn"] != "load_mutation_heat": raise RuntimeError("Heat scores must be based on mutation data to perform\ significance testing based on mutation data permutation.") sizes2stats = mutation_permutation_significance(args, infmat, infmat_index, G, heat_params) else: raise ValueError("Unrecognized permutation type %s" % (args.permutation_type)) #sort ccs list such that genes within components are sorted alphanumerically, and components #are sorted first by length, then alphanumerically by name of the first gene in the component ccs = [sorted(cc) for cc in ccs] ccs.sort(key=lambda comp: comp[0]) ccs.sort(key=len, reverse=True) # write output output_dict = {"parameters": vars(args), "heat_parameters": heat_params, "sizes": hn.component_sizes(ccs), "components": ccs} if args.permutation_type != "none": output_dict["statistics"] = sizes2stats hnio.write_significance_as_tsv(os.path.abspath(args.output_directory) + "/" + SIGNIFICANCE_TSV, sizes2stats) json_out = open(os.path.abspath(args.output_directory) + "/" + JSON_OUTPUT, 'w') json.dump(output_dict, json_out, indent=4) json_out.close() hnio.write_components_as_tsv(os.path.abspath(args.output_directory) + "/" + COMPONENTS_TSV, ccs)
def run(args): infmat = scipy.io.loadmat(args.infmat_file)[args.infmat_name] infmat_index = hnio.load_index(args.infmat_index_file) heat, heat_params = hnio.load_heat_json(args.heat_file) if args.perm_type == "heat": addtl_genes = hnio.load_genes( args.permutation_genes_file ) if args.permutation_genes_file else None deltas = get_deltas_for_heat(infmat, infmat_index, heat, addtl_genes, args.num_permutations, args.parallel) elif args.perm_type == "mutations": deltas = get_deltas_for_mutations(args, infmat, infmat_index, heat_params) else: raise ValueError("Invalid mutation permutation type: %s" % args.perm_type) #find the multiple of the median delta s.t. the size of the largest CC in the real data #is <= MAX_CC_SIZE medianDelta = np.median(deltas[MIN_CC_SIZE]) M, gene_index = hn.induce_infmat(infmat, infmat_index, sorted(heat.keys())) h = hn.heat_vec(heat, gene_index) sim = hn.similarity_matrix(M, h) for i in range(1, 11): G = hn.weighted_graph(sim, gene_index, i * medianDelta) max_cc_size = max([len(cc) for cc in hn.connected_components(G)]) if max_cc_size <= MAX_CC_SIZE: break #and recommend running HotNet with that multiple and the next 4 multiples recommended_deltas = [i * medianDelta for i in range(i, i + 5)] output_file = open(args.output_file, 'w') if args.output_file else sys.stdout json.dump( { "parameters": vars(args), "heat_parameters": heat_params, "recommended_deltas": recommended_deltas }, output_file, indent=4) if (args.output_file): output_file.close()
def run(args): # create output directory if doesn't exist; warn if output files already exist if not os.path.exists(args.output_directory): os.makedirs(args.output_directory) dir_contents = os.listdir(args.output_directory) if JSON_OUTPUT in dir_contents or COMPONENTS_TSV in dir_contents or SIGNIFICANCE_TSV in dir_contents: print( "WARNING: Output directory already contains HotNet results file(s), which will be " "overwritten. (Ctrl-c to cancel).") # load data infmat = scipy.io.loadmat(args.infmat_file)[args.infmat_name] infmat_index = hnio.load_index(args.infmat_index_file) heat, heat_params = hnio.load_heat_json(args.heat_file) # compute similarity matrix and extract connected components M, gene_index = hn.induce_infmat(infmat, infmat_index, sorted(heat.keys()), quiet=False) h = hn.heat_vec(heat, gene_index) sim = hn.similarity_matrix(M, h) G = hn.weighted_graph(sim, gene_index, args.delta) ccs = hn.connected_components(G, args.min_cc_size) # calculate significance if args.permutation_type != "none": if args.permutation_type == "heat": sizes2stats = heat_permutation_significance( args, heat, infmat, infmat_index, G) elif args.permutation_type == "mutations": if heat_params["heat_fn"] != "load_mutation_heat": raise RuntimeError( "Heat scores must be based on mutation data to perform\ significance testing based on mutation data permutation." ) sizes2stats = mutation_permutation_significance( args, infmat, infmat_index, G, heat_params) else: raise ValueError("Unrecognized permutation type %s" % (args.permutation_type)) #sort ccs list such that genes within components are sorted alphanumerically, and components #are sorted first by length, then alphanumerically by name of the first gene in the component ccs = [sorted(cc) for cc in ccs] ccs.sort(key=lambda comp: comp[0]) ccs.sort(key=len, reverse=True) # write output output_dict = { "parameters": vars(args), "heat_parameters": heat_params, "sizes": hn.component_sizes(ccs), "components": ccs } if args.permutation_type != "none": output_dict["statistics"] = sizes2stats hnio.write_significance_as_tsv( os.path.abspath(args.output_directory) + "/" + SIGNIFICANCE_TSV, sizes2stats) json_out = open( os.path.abspath(args.output_directory) + "/" + JSON_OUTPUT, 'w') json.dump(output_dict, json_out, indent=4) json_out.close() hnio.write_components_as_tsv( os.path.abspath(args.output_directory) + "/" + COMPONENTS_TSV, ccs)