def make_clust(net, dist_type='cosine', run_clustering=True, dendro=True, requested_views=['pct_row_sum', 'N_row_sum'], linkage_type='average', sim_mat=False): ''' This will calculate multiple views of a clustergram by filtering the data and clustering after each filtering. This filtering will keep the top N rows based on some quantity (sum, num-non-zero, etc). ''' from copy import deepcopy import calc_clust import run_filter import make_views import scipy df = net.dat_to_df() threshold = 0.0001 df = run_filter.df_filter_row(df, threshold) df = run_filter.df_filter_col(df, threshold) # calculate initial view with no row filtering net.df_to_dat(df) # preparing to make similarity matrices of rows and cols ########################################################### # tmp_dist_mat = calc_clust.calc_distance_matrix(net.dat['mat'], 'col', # get_sim=True, # make_squareform=True, # filter_sim_below=0.1) # # print(tmp_dist_mat) # print(net.dat['node_info']['row']) # print('\n') # print(net.dat['node_info']['col']) calc_clust.cluster_row_and_col(net, dist_type=dist_type, linkage_type=linkage_type, run_clustering=run_clustering, dendro=dendro, ignore_cat=False) all_views = [] send_df = deepcopy(df) if 'N_row_sum' in requested_views: all_views = make_views.N_rows(net, send_df, all_views, dist_type=dist_type, rank_type='sum') if 'N_row_var' in requested_views: all_views = make_views.N_rows(net, send_df, all_views, dist_type=dist_type, rank_type='var') if 'pct_row_sum' in requested_views: all_views = make_views.pct_rows(net, send_df, all_views, dist_type=dist_type, rank_type='sum') if 'pct_row_var' in requested_views: all_views = make_views.pct_rows(net, send_df, all_views, dist_type=dist_type, rank_type='var') if sim_mat is True: print( 'make similarity matrices of rows and columns, add to viz data structure' ) net.viz['views'] = all_views
def N_rows(net, df, all_views, dist_type='cosine', rank_type='sum'): from copy import deepcopy from __init__ import Network import calc_clust, run_filter keep_top = ['all', 500, 400, 300, 200, 100, 80, 60, 40, 20, 10] df_abs = deepcopy(df['mat']) df_abs = df_abs.transpose() if rank_type == 'sum': tmp_sum = df_abs.sum(axis=0) elif rank_type == 'var': tmp_sum = df_abs.var(axis=0) tmp_sum = tmp_sum.abs() tmp_sum.sort_values(inplace=True, ascending=False) rows_sorted = tmp_sum.index.values.tolist() for inst_keep in keep_top: tmp_df = deepcopy(df) if inst_keep < len(rows_sorted) or inst_keep == 'all': tmp_net = deepcopy(Network()) if inst_keep != 'all': keep_rows = rows_sorted[0:inst_keep] tmp_df['mat'] = tmp_df['mat'].ix[keep_rows] if 'mat_up' in tmp_df: tmp_df['mat_up'] = tmp_df['mat_up'].ix[keep_rows] tmp_df['mat_dn'] = tmp_df['mat_dn'].ix[keep_rows] tmp_df = run_filter.df_filter_col(tmp_df, 0.001) tmp_net.df_to_dat(tmp_df) else: tmp_net.df_to_dat(tmp_df) try: try: calc_clust.cluster_row_and_col(tmp_net, dist_type, run_clustering=True) except: calc_clust.cluster_row_and_col(tmp_net, dist_type, run_clustering=False) # add view inst_view = {} inst_view['N_row_' + rank_type] = inst_keep inst_view['dist'] = 'cos' inst_view['nodes'] = {} inst_view['nodes']['row_nodes'] = tmp_net.viz['row_nodes'] inst_view['nodes']['col_nodes'] = tmp_net.viz['col_nodes'] all_views.append(inst_view) except: # print('\t*** did not cluster N filtered view') pass return all_views
def make_clust(net, dist_type='cosine', run_clustering=True, dendro=True, requested_views=['pct_row_sum', 'N_row_sum'], linkage_type='average', sim_mat=False): ''' This will calculate multiple views of a clustergram by filtering the data and clustering after each filtering. This filtering will keep the top N rows based on some quantity (sum, num-non-zero, etc). ''' from copy import deepcopy import calc_clust import run_filter import make_views import scipy df = net.dat_to_df() threshold = 0.0001 df = run_filter.df_filter_row(df, threshold) df = run_filter.df_filter_col(df, threshold) # calculate initial view with no row filtering net.df_to_dat(df) # preparing to make similarity matrices of rows and cols ########################################################### # tmp_dist_mat = calc_clust.calc_distance_matrix(net.dat['mat'], 'col', # get_sim=True, # make_squareform=True, # filter_sim_below=0.1) # # print(tmp_dist_mat) # print(net.dat['node_info']['row']) # print('\n') # print(net.dat['node_info']['col']) calc_clust.cluster_row_and_col(net, dist_type=dist_type, linkage_type=linkage_type, run_clustering=run_clustering, dendro=dendro, ignore_cat=False) all_views = [] send_df = deepcopy(df) if 'N_row_sum' in requested_views: all_views = make_views.N_rows(net, send_df, all_views, dist_type=dist_type, rank_type='sum') if 'N_row_var' in requested_views: all_views = make_views.N_rows(net, send_df, all_views, dist_type=dist_type, rank_type='var') if 'pct_row_sum' in requested_views: all_views = make_views.pct_rows(net, send_df, all_views, dist_type=dist_type, rank_type='sum') if 'pct_row_var' in requested_views: all_views = make_views.pct_rows(net, send_df, all_views, dist_type=dist_type, rank_type='var') if sim_mat is True: print('make similarity matrices of rows and columns, add to viz data structure') net.viz['views'] = all_views