예제 #1
0
def run(args):
    #if l not specified, set default based on test statistic 
    if not args.sizes:
        args.sizes = [5,10,15,20] if args.test_statistic == MAX_CC_SIZE else [3]
    
    #disallow finding delta by # of CCs of size >= l for HotNet2, since this is not currently
    #implemented correctly (and is non-trivial to implement)
    if not args.classic and args.test_statistic != MAX_CC_SIZE:
        raise ValueError("For HotNet2, the largest CC size test statistic must be used.")
    
    infmat_index = hnio.load_index(args.infmat_index_file)
    heat, heat_params = hnio.load_heat_json(args.heat_file)

    if args.perm_type == "heat":
        infmat = hnio.load_infmat(args.infmat_file, args.infmat_name)
        addtl_genes = hnio.load_genes(args.permutation_genes_file) if args.permutation_genes_file else None
        deltas = get_deltas_for_heat(infmat, infmat_index, heat, addtl_genes, args.num_permutations,
                                     args.test_statistic, args.sizes, args.classic, args.num_cores)
    elif args.perm_type == "mutations":
        infmat = hnio.load_infmat(args.infmat_file, args.infmat_name)
        deltas = get_deltas_for_mutations(args, infmat, infmat_index, heat_params)
    elif args.perm_type == "network":
        deltas = get_deltas_for_network(args.permuted_networks_path, heat, args.infmat_name,
                                         infmat_index, args.test_statistic, args.sizes,
                                         args.classic, args.num_permutations, args.num_cores)
    else:
        raise ValueError("Invalid mutation permutation type: %s" % args.perm_type)
    
    output_file = open(args.output_file, 'w') if args.output_file else sys.stdout
    json.dump({"parameters": vars(args), "heat_parameters": heat_params,
               "deltas": deltas}, output_file, indent=4)
    if (args.output_file): output_file.close()
예제 #2
0
def run(args):
    # if l not specified, set default based on test statistic
    if not args.sizes:
        args.sizes = [5, 10, 15, 20] if args.test_statistic == MAX_CC_SIZE else [3]

    # disallow finding delta by # of CCs of size >= l for HotNet2, since this is not currently
    # implemented correctly (and is non-trivial to implement)
    if not args.classic and args.test_statistic != MAX_CC_SIZE:
        raise ValueError("For HotNet2, the largest CC size test statistic must be used.")

    infmat_index = hnio.load_index(args.infmat_index_file)
    heat, heat_params = hnio.load_heat_json(args.heat_file)

    if args.perm_type == "heat":
        infmat = hnio.load_infmat(args.infmat_file, args.infmat_name)
        addtl_genes = hnio.load_genes(args.permutation_genes_file) if args.permutation_genes_file else None
        deltas = get_deltas_for_heat(
            infmat,
            infmat_index,
            heat,
            addtl_genes,
            args.num_permutations,
            args.test_statistic,
            args.sizes,
            args.classic,
            args.num_cores,
        )
    elif args.perm_type == "mutations":
        infmat = hnio.load_infmat(args.infmat_file, args.infmat_name)
        deltas = get_deltas_for_mutations(args, infmat, infmat_index, heat_params)
    elif args.perm_type == "network":
        deltas = get_deltas_for_network(
            args.permuted_networks_path,
            heat,
            args.infmat_name,
            infmat_index,
            args.test_statistic,
            args.sizes,
            args.classic,
            args.num_permutations,
            args.num_cores,
        )
    else:
        raise ValueError("Invalid mutation permutation type: %s" % args.perm_type)

    output_file = open(args.output_file, "w") if args.output_file else sys.stdout
    json.dump({"parameters": vars(args), "heat_parameters": heat_params, "deltas": deltas}, output_file, indent=4)
    if args.output_file:
        output_file.close()
예제 #3
0
def run(args):
    # Load the input data
    if args.verbose: print('* Loading infmat and heat files...')
    infmat = hnio.load_infmat(args.infmat_file, args.infmat_name)
    full_index2gene = hnio.load_index(args.infmat_index_file)

    using_json_heat = os.path.splitext(args.heat_file.lower())[1] == '.json'
    if using_json_heat:
        heat = json.load(open(args.heat_file))['heat']
    else:
        heat = hnio.load_heat_tsv(args.heat_file)
    print("* Loaded heat scores for %s genes" % len(heat))

    # filter out genes not in the network
    heat = hnheat.filter_heat_to_network_genes(heat,
                                               set(full_index2gene.values()))

    # genes with score 0 cannot be in output components, but are eligible for heat in permutations
    heat, addtl_genes = hnheat.filter_heat(
        heat, None, False, 'There are ## genes with heat score 0')
    if args.verbose: print('* Creating similarity matrix...')
    sim, index2gene = hn.similarity_matrix(infmat, full_index2gene, heat, True)

    # Create and output the dendrogram
    createDendrogram(sim, list(index2gene.values()), args.output_directory,
                     vars(args), args.verbose)
예제 #4
0
def run(args):
	# Load the input data
	if args.verbose: print '* Loading infmat and heat files...'
	infmat = hnio.load_infmat(args.infmat_file, args.infmat_name)
	full_index2gene = hnio.load_index(args.infmat_index_file)

	using_json_heat = os.path.splitext(args.heat_file.lower())[1] == '.json'
	if using_json_heat:
	    heat = json.load(open(args.heat_file))['heat']
	else:
	    heat = hnio.load_heat_tsv(args.heat_file)
	print "* Loaded heat scores for %s genes" % len(heat)

	# filter out genes not in the network
	heat = hnheat.filter_heat_to_network_genes(heat, set(full_index2gene.values()))

	# genes with score 0 cannot be in output components, but are eligible for heat in permutations
	heat, addtl_genes = hnheat.filter_heat(heat, None, False, 'There are ## genes with heat score 0')
	if args.verbose: print '* Creating similarity matrix...'
	sim, index2gene = hn.similarity_matrix(infmat, full_index2gene, heat, True)

	# Create and output the dendrogram
	createDendrogram( sim, index2gene.values(), args.output_directory, vars(args), args.verbose )
예제 #5
0
def run(args):
    # create output directory if doesn't exist; warn if it exists and is not empty
    if not os.path.exists(args.output_directory):
        os.makedirs(args.output_directory)
    if len(os.listdir(args.output_directory)) > 0:
        print(
            "WARNING: Output directory is not empty. Any conflicting files will be overwritten. "
            "(Ctrl-c to cancel).")

    # load data
    infmat = hnio.load_infmat(args.infmat_file, args.infmat_name)
    full_index2gene = hnio.load_index(args.infmat_index_file)
    heat, heat_params = hnio.load_heat_json(args.heat_file)

    # compute similarity matrix
    sim, index2gene = hn.similarity_matrix(infmat, full_index2gene, heat,
                                           not args.classic)

    # only calculate permuted data sets for significance testing once
    if args.permutation_type != "none":
        if args.permutation_type == "heat":
            print "* Generating heat permutations for statistical significance testing"
            extra_genes = hnio.load_genes(args.permutation_genes_file) \
                            if args.permutation_genes_file else None
            heat_permutations = p.permute_heat(heat, full_index2gene.values(),
                                               args.num_permutations,
                                               extra_genes, args.num_cores)
        elif args.permutation_type == "mutations":
            if heat_params["heat_fn"] != "load_mutation_heat":
                raise RuntimeError(
                    "Heat scores must be based on mutation data to perform\
                                        significance testing based on mutation data permutation."
                )
            print "* Generating mutation permutations for statistical significance testing"
            heat_permutations = p.generate_mutation_permutation_heat(
                heat_params["heat_fn"],
                heat_params["sample_file"], heat_params["gene_file"],
                full_index2gene.values(), heat_params["snv_file"],
                args.gene_length_file, args.bmr, args.bmr_file,
                heat_params["cna_file"], args.gene_order_file,
                heat_params["cna_filter_threshold"], heat_params["min_freq"],
                args.num_permutations, args.num_cores)
        elif args.permutation_type == "network":
            pass  #nothing to do right now
        elif args.permutation_type == "precomputed":
            heat_file_paths = [
                args.datasets_path.replace(ITERATION_REPLACEMENT_TOKEN, str(i))
                for i in range(1, args.num_permutations + 1)
            ]
            heat_permutations = [
                hnio.load_heat_tsv(heat_file) for heat_file in heat_file_paths
            ]
        else:
            raise ValueError("Unrecognized permutation type %s" %
                             (args.permutation_type))

    for delta in args.deltas:
        delta_out_dir = args.output_directory + "/delta_" + str(delta)
        if not os.path.isdir(delta_out_dir):
            os.mkdir(delta_out_dir)

        G = hn.weighted_graph(sim, index2gene, delta, not args.classic)
        ccs = hn.connected_components(G, args.min_cc_size)

        # calculate significance
        if args.permutation_type != "none":
            if args.permutation_type == "network":
                sizes2stats = calculate_significance_network(
                    args, args.permuted_networks_path, full_index2gene, G,
                    heat, delta, args.num_permutations)
            else:
                sizes2stats = calculate_significance(args, infmat,
                                                     full_index2gene, G, delta,
                                                     heat_permutations)

        #sort ccs list such that genes within components are sorted alphanumerically, and components
        #are sorted first by length, then alphanumerically by name of the first gene in the component
        ccs = [sorted(cc) for cc in ccs]
        ccs.sort(key=lambda comp: comp[0])
        ccs.sort(key=len, reverse=True)

        #write output
        hnio.write_components_as_tsv(
            os.path.abspath(delta_out_dir) + "/" + COMPONENTS_TSV, ccs)
        args.delta = delta  # include delta in parameters section of output JSON
        output_dict = {
            "parameters": vars(args),
            "heat_parameters": heat_params,
            "sizes": hn.component_sizes(ccs),
            "components": ccs
        }
        if args.permutation_type != "none":
            output_dict["statistics"] = sizes2stats
            hnio.write_significance_as_tsv(
                os.path.abspath(delta_out_dir) + "/" + SIGNIFICANCE_TSV,
                sizes2stats)

        json_out = open(
            os.path.abspath(delta_out_dir) + "/" + JSON_OUTPUT, 'w')
        json.dump(output_dict, json_out, indent=4)
        json_out.close()