Ejemplo n.º 1
0
def run_fisher(run_parameters):
    ''' wrapper: call sequence to perform fisher gene-set characterization
    Args:
        run_parameters: dictionary of run parameters
    '''
    # -----------------------------------
    # - Data read and extraction Section -
    # -----------------------------------
    spreadsheet_df = kn.get_spreadsheet_df(
        run_parameters['spreadsheet_name_full_path'])
    prop_gene_network_df = kn.get_network_df(
        run_parameters['pg_network_name_full_path'])

    spreadsheet_gene_names = kn.extract_spreadsheet_gene_names(spreadsheet_df)

    prop_gene_network_n1_names, \
    prop_gene_network_n2_names = kn.extract_network_node_names(prop_gene_network_df)
    # -----------------------------------------------------------------------
    # - limit the gene set to the intersection of network and user gene set -
    # -----------------------------------------------------------------------
    common_gene_names = kn.find_common_node_names(prop_gene_network_n2_names,
                                                  spreadsheet_gene_names)
    common_gene_names_dict = kn.create_node_names_dict(common_gene_names)
    prop_gene_network_n1_names_dict = kn.create_node_names_dict(
        prop_gene_network_n1_names)
    reverse_prop_dict = kn.create_reverse_node_names_dict(
        prop_gene_network_n1_names_dict)
    # ----------------------------------------------------------------------------
    # - restrict spreadsheet and network to common genes and drop everthing else -
    # ----------------------------------------------------------------------------
    new_spreadsheet_df = kn.update_spreadsheet_df(spreadsheet_df,
                                                  common_gene_names)
    prop_gene_network_df = kn.update_network_df(prop_gene_network_df,
                                                common_gene_names, "node_2")
    prop_gene_network_df['wt'] = 1
    # ----------------------------------------------------------------------------
    # - map every gene name to an integer index in sequential order startng at 0 -
    # ----------------------------------------------------------------------------
    prop_gene_network_df = kn.map_node_names_to_index(
        prop_gene_network_df, prop_gene_network_n1_names_dict, "node_1")
    prop_gene_network_df = kn.map_node_names_to_index(prop_gene_network_df,
                                                      common_gene_names_dict,
                                                      "node_2")
    # --------------------------------------------
    # - store the network in a csr sparse format -
    # --------------------------------------------
    universe_count = len(common_gene_names)
    prop_gene_network_sparse = kn.convert_network_df_to_sparse(
        prop_gene_network_df, universe_count, len(prop_gene_network_n1_names))
    fisher_contingency_pval = get_fisher_exact_test(prop_gene_network_sparse,
                                                    reverse_prop_dict,
                                                    new_spreadsheet_df)
    fisher_final_result = save_fisher_test_result(
        fisher_contingency_pval, run_parameters['results_directory'],
        spreadsheet_df.columns.values, 2)
    map_and_save_droplist(spreadsheet_df, common_gene_names, 'fisher_droplist',
                          run_parameters)

    return fisher_final_result
Ejemplo n.º 2
0
 def test_create_node_names_dict_start_negative(self):
     ret = kn.create_node_names_dict(self.node_names_start_negative,
                                     start_value=-3)
     self.assertEqual(
         ret, {
             'a': -3,
             8: -2,
             'b': -1
         }, 'wrong output for node_names with negative start value')
Ejemplo n.º 3
0
 def test_create_node_names_dict_start_positive(self):
     ret = kn.create_node_names_dict(self.node_names_start_positive,
                                     start_value=5)
     self.assertEqual(
         ret, {
             'a': 5,
             8: 6,
             'b': 7
         }, 'wrong output for node_names with positive start value')
def build_hybrid_sparse_matrix(run_parameters, normalize_by_sum, construct_by_union):
    """This is to build hybrid sparse matrix with gene gene network and
    gene property network.

    Args:
        run_parameters: dictionary of run parameters.
        normalize_by_sum: boolean value to check normalization.
        construct_by_union: boolean value to check construct by union.

    Returns:
        network_sparse: output sparse matrix.
        unique_gene_names: gene names of the hybrid matrix
        pg_network_n1_names: property names of hybrid matrix.
    """
    pg_network_name_full_path = run_parameters['pg_network_name_full_path']
    gg_network_name_full_path = run_parameters['gg_network_name_full_path']

    pg_network_df = kn.get_network_df(pg_network_name_full_path)
    gg_network_df = kn.get_network_df(gg_network_name_full_path)

    pg_network_n1_names,\
    pg_network_n2_names = kn.extract_network_node_names(pg_network_df)

    gg_network_n1_names,\
    gg_network_n2_names = kn.extract_network_node_names(gg_network_df)

    #-------------------
    # limit the gene set to the intersection of networks (gene_gene and prop_gene) and user gene set
    #-------------------
    unique_gene_names     = kn.find_unique_node_names(gg_network_n1_names, gg_network_n2_names)

    if construct_by_union is True:
        unique_gene_names = kn.find_unique_node_names(unique_gene_names, pg_network_n2_names)
    else:
        pg_network_df     = kn.update_network_df(pg_network_df, unique_gene_names, 'node_2')

    unique_gene_names_dict   = kn.create_node_names_dict(unique_gene_names                          )
    pg_network_n1_names_dict = kn.create_node_names_dict(pg_network_n1_names, len(unique_gene_names))

    unique_all_node_names = unique_gene_names + pg_network_n1_names
    #---------------
    # map every gene name to a sequential integer index
    #---------------
    gg_network_df = kn.map_node_names_to_index(gg_network_df, unique_gene_names_dict,   "node_1")
    gg_network_df = kn.map_node_names_to_index(gg_network_df, unique_gene_names_dict,   "node_2")
    pg_network_df = kn.map_node_names_to_index(pg_network_df, pg_network_n1_names_dict, "node_1")
    pg_network_df = kn.map_node_names_to_index(pg_network_df, unique_gene_names_dict,   "node_2")

    gg_network_df = kn.symmetrize_df(gg_network_df)
    pg_network_df = kn.symmetrize_df(pg_network_df)

    if normalize_by_sum is True:
        gg_network_df = kn.normalize_network_df_by_sum(gg_network_df, 'wt')
        pg_network_df = kn.normalize_network_df_by_sum(pg_network_df, 'wt')

    hybrid_network_df = kn.form_hybrid_network_df([gg_network_df, pg_network_df])

    #------------------
    # store the network in a csr sparse format
    #------------------
    network_sparse = kn.convert_network_df_to_sparse(
        hybrid_network_df, len(unique_all_node_names), len(unique_all_node_names))

    return network_sparse, unique_gene_names, pg_network_n1_names
Ejemplo n.º 5
0
 def test_create_node_names_dict_empty(self):
     ret = kn.create_node_names_dict(self.node_names_empty, start_value=0)
     self.assertEqual(ret, {}, 'wrong output for empty node_names')
Ejemplo n.º 6
0
 def test_create_node_names_dict(self):
     ret = kn.create_node_names_dict(self.node_names, start_value=0)
     self.assertEqual(ret, {'a': 0, 'b': 1, 'c': 2}, 'wrong output')