def run(args): # Load unpermuted network. edge_list = load_edge_list(args.edge_list_file, unweighted=True) # Permute network. G = nx.Graph() G.add_edges_from(edge_list) if args.seed is not None: random.seed(args.seed) minimum_swaps = int(math.ceil(args.Q*G.number_of_edges())) if not args.connected: G = nx.double_edge_swap(G, minimum_swaps, 2**30) else: # If G is not connected, then we perform the connected double edge swap algorithm on a # largest connected component of G. if not nx.is_connected(G): G = max(nx.connected_component_subgraphs(G), key=len) # The current connected double edge swap algorithm does not guarantee a minimum number of # successful edge swaps, so we enforce it. current_swaps = 0 while current_swaps<minimum_swaps: remaining_swaps = max(minimum_swaps-current_swaps, 100) additional_swaps = nx.connected_double_edge_swap(G, remaining_swaps) current_swaps += additional_swaps permuted_edge_list = G.edges() # Save permuted_network. save_edge_list(args.permuted_edge_list_file, permuted_edge_list)
def run(args): # Load data. index_to_gene, gene_to_index = load_index_gene(args.index_gene_file) edge_list = load_edge_list(args.edge_list_file, index_to_gene, unweighted=True) gene_to_score = load_gene_score(args.gene_score_file) # Find degrees of network genes in subgraph induced by scored genes. G = nx.Graph() G.add_edges_from(edge_list) G = G.subgraph(gene_to_score) largest_cc = max(nx.connected_components(G), key=len) G = G.subgraph(largest_cc) common_genes = set(G.nodes) degree_to_nodes = defaultdict(set) for node in common_genes: degree = G.degree(node) degree_to_nodes[degree].add(node) distinct_degrees = set(degree_to_nodes) # Bin genes by degree. bins = list() current_bin = list() for degree in sorted(distinct_degrees, reverse=True): current_bin += sorted(degree_to_nodes[degree]) if len(current_bin) >= args.min_size: bins.append(current_bin) current_bin = list() if bins: bins[-1] += current_bin elif current_bin: bins.append(current_bin) # Save degree bins. with open(args.output_file, 'w') as f: f.write('\n'.join('\t'.join(current_bin) for current_bin in bins))
def run(args): # Load edge list. edge_list = load_edge_list(args.edge_list_file) # Construct adjacency matrix. k = min(min(edge[:2]) for edge in edge_list) l = max(max(edge[:2]) for edge in edge_list) A = np.zeros((l - k + 1, l - k + 1), dtype=np.float64) if args.directed: for i, j, weight in edge_list: A[j - k, i - k] = weight else: for i, j, weight in edge_list: A[i - k, j - k] = A[j - k, i - k] = weight # Choose beta. if args.beta is None: beta = balanced_beta(A, args.threshold, args.num_digits) elif 0 < args.beta < 1: beta = args.beta else: raise ValueError('{} invalid; beta must satisfy 0 < beta < 1.'.format( args.beta)) # Construct Hierarchical HotNet similarity matrix. P = hh_similarity_matrix(A, beta) # Save results. if args.output_file is not None: save_matrix(args.output_file, P, args.name) if args.beta_output_file is not None: fmt = '{:.' + str(args.num_digits) + 'f}' with open(args.beta_output_file, 'w') as f: f.write(fmt.format(beta))
def cut_hierarchy(edge_list_file, index_gene_file, cut_height): T = load_edge_list(edge_list_file) index_to_gene, gene_to_index = load_index_gene(index_gene_file) clusters = find_clusters(T, index_to_gene, cut_height) return clusters
def find_statistics(edge_list_file, index_gene_file, reverse=True): T = load_edge_list(edge_list_file) index_to_gene, gene_to_index = load_index_gene(index_gene_file) return compute_statistics(T, index_to_gene, reverse)
def run(args): # Load data. if args.verbose: progress('Loading data...') assert len(args.component_files) == len(args.index_gene_files) == len( args.edge_list_files) == len(args.networks) == len(args.scores) index_to_gene_collection = dict() gene_to_index_collection = dict() edge_list_collection = dict() components_collection = dict() for network_label, score_label, index_gene_file, edge_list_file, component_file in zip( args.networks, args.scores, args.index_gene_files, args.edge_list_files, args.component_files): index_to_gene, gene_to_index = load_index_gene(index_gene_file) edge_list = set( frozenset(edge) for edge in load_edge_list( edge_list_file, index_to_gene, unweighted=True)) components = load_components(component_file) index_to_gene_collection[(network_label, score_label)] = index_to_gene gene_to_index_collection[(network_label, score_label)] = gene_to_index edge_list_collection[(network_label, score_label)] = edge_list components_collection[(network_label, score_label)] = components # Process data. if args.verbose: progress('Processing data...') edge_to_networks = defaultdict(set) edge_to_scores = defaultdict(set) edge_to_pairs = defaultdict(set) edge_to_tally = defaultdict(int) for network_label, score_label in zip(args.networks, args.scores): edge_list = edge_list_collection[(network_label, score_label)] components = components_collection[(network_label, score_label)] for component in components: for u, v in combinations(component, 2): edge = frozenset((u, v)) if edge in edge_list: edge_to_tally[edge] += 1 thresholded_edges = set(edge for edge, tally in edge_to_tally.items() if tally >= args.threshold) G = nx.Graph() G.add_edges_from(thresholded_edges) consensus_results = sorted(sorted( [sorted(x) for x in nx.connected_components(G)]), key=len, reverse=True) # Save data. if args.verbose: progress('Saving data...') if args.consensus_node_file is not None: output_string = '\n'.join('\t'.join(x) for x in consensus_results) with open(args.consensus_node_file, 'w') as f: f.write(output_string) if args.consensus_edge_file is not None: output_string = '\n'.join( '\t'.join(x) for x in sorted(map(sorted, thresholded_edges))) with open(args.consensus_edge_file, 'w') as f: f.write(output_string) if args.verbose: progress()