コード例 #1
0
ファイル: make_views.py プロジェクト: jjdblast/clustergrammer
def pct_rows(net, df, all_views, dist_type, rank_type):

    from __init__ import Network
    from copy import deepcopy
    import numpy as np
    import calc_clust, run_filter

    copy_net = deepcopy(net)

    if len(net.dat['node_info']['col']['cat']) > 0:
        cat_key_col = {}
        for i in range(len(net.dat['nodes']['col'])):
            cat_key_col[net.dat['nodes']['col'][i]] = \
                net.dat['node_info']['col']['cat'][i]

    all_filt = range(10)
    all_filt = [i / float(10) for i in all_filt]

    mat = deepcopy(df['mat'])
    sum_row = np.sum(mat, axis=1)
    max_sum = max(sum_row)

    for inst_filt in all_filt:

        cutoff = inst_filt * max_sum
        copy_net = deepcopy(net)
        inst_df = deepcopy(df)
        inst_df = run_filter.df_filter_row(inst_df, cutoff, take_abs=False)

        tmp_net = deepcopy(Network())
        tmp_net.df_to_dat(inst_df)

        try:
            try:
                calc_clust.cluster_row_and_col(tmp_net,
                                               dist_type=dist_type,
                                               run_clustering=True)

            except:
                calc_clust.cluster_row_and_col(tmp_net,
                                               dist_type=dist_type,
                                               run_clustering=False)

            inst_view = {}
            inst_view['pct_row_' + rank_type] = inst_filt
            inst_view['dist'] = 'cos'
            inst_view['nodes'] = {}
            inst_view['nodes']['row_nodes'] = tmp_net.viz['row_nodes']
            inst_view['nodes']['col_nodes'] = tmp_net.viz['col_nodes']

            all_views.append(inst_view)

        except:
            pass

    return all_views
コード例 #2
0
def pct_rows(net, df, all_views, dist_type, rank_type):

  from __init__ import Network
  from copy import deepcopy
  import numpy as np
  import calc_clust, run_filter

  copy_net = deepcopy(net)

  if len(net.dat['node_info']['col']['cat']) > 0:
    cat_key_col = {}
    for i in range(len(net.dat['nodes']['col'])):
      cat_key_col[net.dat['nodes']['col'][i]] = \
          net.dat['node_info']['col']['cat'][i]

  all_filt = range(10)
  all_filt = [i / float(10) for i in all_filt]

  mat = deepcopy(df['mat'])
  sum_row = np.sum(mat, axis=1)
  max_sum = max(sum_row)

  for inst_filt in all_filt:

    cutoff = inst_filt * max_sum
    copy_net = deepcopy(net)
    inst_df = deepcopy(df)
    inst_df = run_filter.df_filter_row(inst_df, cutoff, take_abs=False)

    tmp_net = deepcopy(Network())
    tmp_net.df_to_dat(inst_df)

    try:
      try:
        calc_clust.cluster_row_and_col(tmp_net, dist_type=dist_type, 
                                       run_clustering=True)

      except:
        calc_clust.cluster_row_and_col(tmp_net, dist_type=dist_type, 
                                       run_clustering=False)

      inst_view = {}
      inst_view['pct_row_' + rank_type] = inst_filt
      inst_view['dist'] = 'cos'
      inst_view['nodes'] = {}
      inst_view['nodes']['row_nodes'] = tmp_net.viz['row_nodes']
      inst_view['nodes']['col_nodes'] = tmp_net.viz['col_nodes']

      all_views.append(inst_view)

    except:
      pass

  return all_views  
コード例 #3
0
def make_clust(net,
               dist_type='cosine',
               run_clustering=True,
               dendro=True,
               requested_views=['pct_row_sum', 'N_row_sum'],
               linkage_type='average',
               sim_mat=False):
    ''' This will calculate multiple views of a clustergram by filtering the
  data and clustering after each filtering. This filtering will keep the top
  N rows based on some quantity (sum, num-non-zero, etc). '''

    from copy import deepcopy
    import calc_clust
    import run_filter
    import make_views
    import scipy

    df = net.dat_to_df()

    threshold = 0.0001
    df = run_filter.df_filter_row(df, threshold)
    df = run_filter.df_filter_col(df, threshold)

    # calculate initial view with no row filtering
    net.df_to_dat(df)

    # preparing to make similarity matrices of rows and cols
    ###########################################################
    # tmp_dist_mat = calc_clust.calc_distance_matrix(net.dat['mat'], 'col',
    #                                                get_sim=True,
    #                                                make_squareform=True,
    #                                                filter_sim_below=0.1)
    #
    # print(tmp_dist_mat)

    # print(net.dat['node_info']['row'])
    # print('\n')
    # print(net.dat['node_info']['col'])

    calc_clust.cluster_row_and_col(net,
                                   dist_type=dist_type,
                                   linkage_type=linkage_type,
                                   run_clustering=run_clustering,
                                   dendro=dendro,
                                   ignore_cat=False)

    all_views = []
    send_df = deepcopy(df)

    if 'N_row_sum' in requested_views:
        all_views = make_views.N_rows(net,
                                      send_df,
                                      all_views,
                                      dist_type=dist_type,
                                      rank_type='sum')

    if 'N_row_var' in requested_views:
        all_views = make_views.N_rows(net,
                                      send_df,
                                      all_views,
                                      dist_type=dist_type,
                                      rank_type='var')

    if 'pct_row_sum' in requested_views:
        all_views = make_views.pct_rows(net,
                                        send_df,
                                        all_views,
                                        dist_type=dist_type,
                                        rank_type='sum')

    if 'pct_row_var' in requested_views:
        all_views = make_views.pct_rows(net,
                                        send_df,
                                        all_views,
                                        dist_type=dist_type,
                                        rank_type='var')

    if sim_mat is True:
        print(
            'make similarity matrices of rows and columns, add to viz data structure'
        )

    net.viz['views'] = all_views
コード例 #4
0
def make_clust(net, dist_type='cosine', run_clustering=True,
                          dendro=True, requested_views=['pct_row_sum', 'N_row_sum'],
                          linkage_type='average', sim_mat=False):

  ''' This will calculate multiple views of a clustergram by filtering the
  data and clustering after each filtering. This filtering will keep the top
  N rows based on some quantity (sum, num-non-zero, etc). '''

  from copy import deepcopy
  import calc_clust
  import run_filter
  import make_views
  import scipy

  df = net.dat_to_df()

  threshold = 0.0001
  df = run_filter.df_filter_row(df, threshold)
  df = run_filter.df_filter_col(df, threshold)

  # calculate initial view with no row filtering
  net.df_to_dat(df)

  # preparing to make similarity matrices of rows and cols
  ###########################################################
  # tmp_dist_mat = calc_clust.calc_distance_matrix(net.dat['mat'], 'col',
  #                                                get_sim=True,
  #                                                make_squareform=True,
  #                                                filter_sim_below=0.1)
  #
  # print(tmp_dist_mat)

  # print(net.dat['node_info']['row'])
  # print('\n')
  # print(net.dat['node_info']['col'])


  calc_clust.cluster_row_and_col(net, dist_type=dist_type,
                                linkage_type=linkage_type,
                                run_clustering=run_clustering,
                                dendro=dendro, ignore_cat=False)

  all_views = []
  send_df = deepcopy(df)

  if 'N_row_sum' in requested_views:
    all_views = make_views.N_rows(net, send_df, all_views,
                                  dist_type=dist_type, rank_type='sum')

  if 'N_row_var' in requested_views:
    all_views = make_views.N_rows(net, send_df, all_views,
                                  dist_type=dist_type, rank_type='var')

  if 'pct_row_sum' in requested_views:
    all_views = make_views.pct_rows(net, send_df, all_views,
                                    dist_type=dist_type, rank_type='sum')

  if 'pct_row_var' in requested_views:
    all_views = make_views.pct_rows(net, send_df, all_views,
                                    dist_type=dist_type, rank_type='var')

  if sim_mat is True:
    print('make similarity matrices of rows and columns, add to viz data structure')

  net.viz['views'] = all_views