def run_fisher(run_parameters): ''' wrapper: call sequence to perform fisher gene-set characterization Args: run_parameters: dictionary of run parameters ''' # ----------------------------------- # - Data read and extraction Section - # ----------------------------------- spreadsheet_df = kn.get_spreadsheet_df( run_parameters['spreadsheet_name_full_path']) prop_gene_network_df = kn.get_network_df( run_parameters['pg_network_name_full_path']) spreadsheet_gene_names = kn.extract_spreadsheet_gene_names(spreadsheet_df) prop_gene_network_n1_names, \ prop_gene_network_n2_names = kn.extract_network_node_names(prop_gene_network_df) # ----------------------------------------------------------------------- # - limit the gene set to the intersection of network and user gene set - # ----------------------------------------------------------------------- common_gene_names = kn.find_common_node_names(prop_gene_network_n2_names, spreadsheet_gene_names) common_gene_names_dict = kn.create_node_names_dict(common_gene_names) prop_gene_network_n1_names_dict = kn.create_node_names_dict( prop_gene_network_n1_names) reverse_prop_dict = kn.create_reverse_node_names_dict( prop_gene_network_n1_names_dict) # ---------------------------------------------------------------------------- # - restrict spreadsheet and network to common genes and drop everthing else - # ---------------------------------------------------------------------------- new_spreadsheet_df = kn.update_spreadsheet_df(spreadsheet_df, common_gene_names) prop_gene_network_df = kn.update_network_df(prop_gene_network_df, common_gene_names, "node_2") prop_gene_network_df['wt'] = 1 # ---------------------------------------------------------------------------- # - map every gene name to an integer index in sequential order startng at 0 - # ---------------------------------------------------------------------------- prop_gene_network_df = kn.map_node_names_to_index( prop_gene_network_df, prop_gene_network_n1_names_dict, "node_1") prop_gene_network_df = kn.map_node_names_to_index(prop_gene_network_df, common_gene_names_dict, "node_2") # -------------------------------------------- # - store the network in a csr sparse format - # -------------------------------------------- universe_count = len(common_gene_names) prop_gene_network_sparse = kn.convert_network_df_to_sparse( prop_gene_network_df, universe_count, len(prop_gene_network_n1_names)) fisher_contingency_pval = get_fisher_exact_test(prop_gene_network_sparse, reverse_prop_dict, new_spreadsheet_df) fisher_final_result = save_fisher_test_result( fisher_contingency_pval, run_parameters['results_directory'], spreadsheet_df.columns.values, 2) map_and_save_droplist(spreadsheet_df, common_gene_names, 'fisher_droplist', run_parameters) return fisher_final_result
def test_create_node_names_dict_start_negative(self): ret = kn.create_node_names_dict(self.node_names_start_negative, start_value=-3) self.assertEqual( ret, { 'a': -3, 8: -2, 'b': -1 }, 'wrong output for node_names with negative start value')
def test_create_node_names_dict_start_positive(self): ret = kn.create_node_names_dict(self.node_names_start_positive, start_value=5) self.assertEqual( ret, { 'a': 5, 8: 6, 'b': 7 }, 'wrong output for node_names with positive start value')
def build_hybrid_sparse_matrix(run_parameters, normalize_by_sum, construct_by_union): """This is to build hybrid sparse matrix with gene gene network and gene property network. Args: run_parameters: dictionary of run parameters. normalize_by_sum: boolean value to check normalization. construct_by_union: boolean value to check construct by union. Returns: network_sparse: output sparse matrix. unique_gene_names: gene names of the hybrid matrix pg_network_n1_names: property names of hybrid matrix. """ pg_network_name_full_path = run_parameters['pg_network_name_full_path'] gg_network_name_full_path = run_parameters['gg_network_name_full_path'] pg_network_df = kn.get_network_df(pg_network_name_full_path) gg_network_df = kn.get_network_df(gg_network_name_full_path) pg_network_n1_names,\ pg_network_n2_names = kn.extract_network_node_names(pg_network_df) gg_network_n1_names,\ gg_network_n2_names = kn.extract_network_node_names(gg_network_df) #------------------- # limit the gene set to the intersection of networks (gene_gene and prop_gene) and user gene set #------------------- unique_gene_names = kn.find_unique_node_names(gg_network_n1_names, gg_network_n2_names) if construct_by_union is True: unique_gene_names = kn.find_unique_node_names(unique_gene_names, pg_network_n2_names) else: pg_network_df = kn.update_network_df(pg_network_df, unique_gene_names, 'node_2') unique_gene_names_dict = kn.create_node_names_dict(unique_gene_names ) pg_network_n1_names_dict = kn.create_node_names_dict(pg_network_n1_names, len(unique_gene_names)) unique_all_node_names = unique_gene_names + pg_network_n1_names #--------------- # map every gene name to a sequential integer index #--------------- gg_network_df = kn.map_node_names_to_index(gg_network_df, unique_gene_names_dict, "node_1") gg_network_df = kn.map_node_names_to_index(gg_network_df, unique_gene_names_dict, "node_2") pg_network_df = kn.map_node_names_to_index(pg_network_df, pg_network_n1_names_dict, "node_1") pg_network_df = kn.map_node_names_to_index(pg_network_df, unique_gene_names_dict, "node_2") gg_network_df = kn.symmetrize_df(gg_network_df) pg_network_df = kn.symmetrize_df(pg_network_df) if normalize_by_sum is True: gg_network_df = kn.normalize_network_df_by_sum(gg_network_df, 'wt') pg_network_df = kn.normalize_network_df_by_sum(pg_network_df, 'wt') hybrid_network_df = kn.form_hybrid_network_df([gg_network_df, pg_network_df]) #------------------ # store the network in a csr sparse format #------------------ network_sparse = kn.convert_network_df_to_sparse( hybrid_network_df, len(unique_all_node_names), len(unique_all_node_names)) return network_sparse, unique_gene_names, pg_network_n1_names
def test_create_node_names_dict_empty(self): ret = kn.create_node_names_dict(self.node_names_empty, start_value=0) self.assertEqual(ret, {}, 'wrong output for empty node_names')
def test_create_node_names_dict(self): ret = kn.create_node_names_dict(self.node_names, start_value=0) self.assertEqual(ret, {'a': 0, 'b': 1, 'c': 2}, 'wrong output')