def run(args): #if l not specified, set default based on test statistic if not args.sizes: args.sizes = [5,10,15,20] if args.test_statistic == MAX_CC_SIZE else [3] #disallow finding delta by # of CCs of size >= l for HotNet2, since this is not currently #implemented correctly (and is non-trivial to implement) if not args.classic and args.test_statistic != MAX_CC_SIZE: raise ValueError("For HotNet2, the largest CC size test statistic must be used.") infmat_index = hnio.load_index(args.infmat_index_file) heat, heat_params = hnio.load_heat_json(args.heat_file) if args.perm_type == "heat": infmat = hnio.load_infmat(args.infmat_file, args.infmat_name) addtl_genes = hnio.load_genes(args.permutation_genes_file) if args.permutation_genes_file else None deltas = get_deltas_for_heat(infmat, infmat_index, heat, addtl_genes, args.num_permutations, args.test_statistic, args.sizes, args.classic, args.num_cores) elif args.perm_type == "mutations": infmat = hnio.load_infmat(args.infmat_file, args.infmat_name) deltas = get_deltas_for_mutations(args, infmat, infmat_index, heat_params) elif args.perm_type == "network": deltas = get_deltas_for_network(args.permuted_networks_path, heat, args.infmat_name, infmat_index, args.test_statistic, args.sizes, args.classic, args.num_permutations, args.num_cores) else: raise ValueError("Invalid mutation permutation type: %s" % args.perm_type) output_file = open(args.output_file, 'w') if args.output_file else sys.stdout json.dump({"parameters": vars(args), "heat_parameters": heat_params, "deltas": deltas}, output_file, indent=4) if (args.output_file): output_file.close()
def run(args): # if l not specified, set default based on test statistic if not args.sizes: args.sizes = [5, 10, 15, 20] if args.test_statistic == MAX_CC_SIZE else [3] # disallow finding delta by # of CCs of size >= l for HotNet2, since this is not currently # implemented correctly (and is non-trivial to implement) if not args.classic and args.test_statistic != MAX_CC_SIZE: raise ValueError("For HotNet2, the largest CC size test statistic must be used.") infmat_index = hnio.load_index(args.infmat_index_file) heat, heat_params = hnio.load_heat_json(args.heat_file) if args.perm_type == "heat": infmat = hnio.load_infmat(args.infmat_file, args.infmat_name) addtl_genes = hnio.load_genes(args.permutation_genes_file) if args.permutation_genes_file else None deltas = get_deltas_for_heat( infmat, infmat_index, heat, addtl_genes, args.num_permutations, args.test_statistic, args.sizes, args.classic, args.num_cores, ) elif args.perm_type == "mutations": infmat = hnio.load_infmat(args.infmat_file, args.infmat_name) deltas = get_deltas_for_mutations(args, infmat, infmat_index, heat_params) elif args.perm_type == "network": deltas = get_deltas_for_network( args.permuted_networks_path, heat, args.infmat_name, infmat_index, args.test_statistic, args.sizes, args.classic, args.num_permutations, args.num_cores, ) else: raise ValueError("Invalid mutation permutation type: %s" % args.perm_type) output_file = open(args.output_file, "w") if args.output_file else sys.stdout json.dump({"parameters": vars(args), "heat_parameters": heat_params, "deltas": deltas}, output_file, indent=4) if args.output_file: output_file.close()
def run(args): # Load the input data if args.verbose: print('* Loading infmat and heat files...') infmat = hnio.load_infmat(args.infmat_file, args.infmat_name) full_index2gene = hnio.load_index(args.infmat_index_file) using_json_heat = os.path.splitext(args.heat_file.lower())[1] == '.json' if using_json_heat: heat = json.load(open(args.heat_file))['heat'] else: heat = hnio.load_heat_tsv(args.heat_file) print("* Loaded heat scores for %s genes" % len(heat)) # filter out genes not in the network heat = hnheat.filter_heat_to_network_genes(heat, set(full_index2gene.values())) # genes with score 0 cannot be in output components, but are eligible for heat in permutations heat, addtl_genes = hnheat.filter_heat( heat, None, False, 'There are ## genes with heat score 0') if args.verbose: print('* Creating similarity matrix...') sim, index2gene = hn.similarity_matrix(infmat, full_index2gene, heat, True) # Create and output the dendrogram createDendrogram(sim, list(index2gene.values()), args.output_directory, vars(args), args.verbose)
def run(args): # Load the input data if args.verbose: print '* Loading infmat and heat files...' infmat = hnio.load_infmat(args.infmat_file, args.infmat_name) full_index2gene = hnio.load_index(args.infmat_index_file) using_json_heat = os.path.splitext(args.heat_file.lower())[1] == '.json' if using_json_heat: heat = json.load(open(args.heat_file))['heat'] else: heat = hnio.load_heat_tsv(args.heat_file) print "* Loaded heat scores for %s genes" % len(heat) # filter out genes not in the network heat = hnheat.filter_heat_to_network_genes(heat, set(full_index2gene.values())) # genes with score 0 cannot be in output components, but are eligible for heat in permutations heat, addtl_genes = hnheat.filter_heat(heat, None, False, 'There are ## genes with heat score 0') if args.verbose: print '* Creating similarity matrix...' sim, index2gene = hn.similarity_matrix(infmat, full_index2gene, heat, True) # Create and output the dendrogram createDendrogram( sim, index2gene.values(), args.output_directory, vars(args), args.verbose )
def run(args): # create output directory if doesn't exist; warn if it exists and is not empty if not os.path.exists(args.output_directory): os.makedirs(args.output_directory) if len(os.listdir(args.output_directory)) > 0: print( "WARNING: Output directory is not empty. Any conflicting files will be overwritten. " "(Ctrl-c to cancel).") # load data infmat = hnio.load_infmat(args.infmat_file, args.infmat_name) full_index2gene = hnio.load_index(args.infmat_index_file) heat, heat_params = hnio.load_heat_json(args.heat_file) # compute similarity matrix sim, index2gene = hn.similarity_matrix(infmat, full_index2gene, heat, not args.classic) # only calculate permuted data sets for significance testing once if args.permutation_type != "none": if args.permutation_type == "heat": print "* Generating heat permutations for statistical significance testing" extra_genes = hnio.load_genes(args.permutation_genes_file) \ if args.permutation_genes_file else None heat_permutations = p.permute_heat(heat, full_index2gene.values(), args.num_permutations, extra_genes, args.num_cores) elif args.permutation_type == "mutations": if heat_params["heat_fn"] != "load_mutation_heat": raise RuntimeError( "Heat scores must be based on mutation data to perform\ significance testing based on mutation data permutation." ) print "* Generating mutation permutations for statistical significance testing" heat_permutations = p.generate_mutation_permutation_heat( heat_params["heat_fn"], heat_params["sample_file"], heat_params["gene_file"], full_index2gene.values(), heat_params["snv_file"], args.gene_length_file, args.bmr, args.bmr_file, heat_params["cna_file"], args.gene_order_file, heat_params["cna_filter_threshold"], heat_params["min_freq"], args.num_permutations, args.num_cores) elif args.permutation_type == "network": pass #nothing to do right now elif args.permutation_type == "precomputed": heat_file_paths = [ args.datasets_path.replace(ITERATION_REPLACEMENT_TOKEN, str(i)) for i in range(1, args.num_permutations + 1) ] heat_permutations = [ hnio.load_heat_tsv(heat_file) for heat_file in heat_file_paths ] else: raise ValueError("Unrecognized permutation type %s" % (args.permutation_type)) for delta in args.deltas: delta_out_dir = args.output_directory + "/delta_" + str(delta) if not os.path.isdir(delta_out_dir): os.mkdir(delta_out_dir) G = hn.weighted_graph(sim, index2gene, delta, not args.classic) ccs = hn.connected_components(G, args.min_cc_size) # calculate significance if args.permutation_type != "none": if args.permutation_type == "network": sizes2stats = calculate_significance_network( args, args.permuted_networks_path, full_index2gene, G, heat, delta, args.num_permutations) else: sizes2stats = calculate_significance(args, infmat, full_index2gene, G, delta, heat_permutations) #sort ccs list such that genes within components are sorted alphanumerically, and components #are sorted first by length, then alphanumerically by name of the first gene in the component ccs = [sorted(cc) for cc in ccs] ccs.sort(key=lambda comp: comp[0]) ccs.sort(key=len, reverse=True) #write output hnio.write_components_as_tsv( os.path.abspath(delta_out_dir) + "/" + COMPONENTS_TSV, ccs) args.delta = delta # include delta in parameters section of output JSON output_dict = { "parameters": vars(args), "heat_parameters": heat_params, "sizes": hn.component_sizes(ccs), "components": ccs } if args.permutation_type != "none": output_dict["statistics"] = sizes2stats hnio.write_significance_as_tsv( os.path.abspath(delta_out_dir) + "/" + SIGNIFICANCE_TSV, sizes2stats) json_out = open( os.path.abspath(delta_out_dir) + "/" + JSON_OUTPUT, 'w') json.dump(output_dict, json_out, indent=4) json_out.close()