コード例 #1
0
ファイル: old_load_gct.py プロジェクト: MaayanLab/LINCS_GCT
def make_viz_from_df(df, filename):
    from clustergrammer import Network

    net = Network()

    net.df_to_dat(df)
    net.swap_nan_for_zero()

    # zscore first to get the columns distributions to be similar
    net.normalize(axis='col', norm_type='zscore', keep_orig=True)

    # filter the rows to keep the perts with the largest normalizes values
    net.filter_N_top('row', 2000)

    num_coluns = net.dat['mat'].shape[1]

    if num_coluns < 50:
        # views = ['N_row_sum', 'N_row_var']
        views = ['N_row_sum']
        net.make_clust(dist_type='cos', views=views)

        filename = 'json/' + filename.split('/')[1].replace('.gct',
                                                            '') + '.json'

        net.write_json_to_file('viz', filename)
コード例 #2
0
def make_phos_homepage_viz():

    from clustergrammer import Network
    net = Network()

    filename = 'lung_cellline_3_1_16/lung_cellline_phospho/' + \
      'lung_cellline_TMT_phospho_combined_ratios.tsv'

    net.load_file(filename)

    # quantile normalize to normalize cell lines
    net.normalize(axis='col', norm_type='qn')

    # only keep most differentially regulated PTMs
    net.filter_N_top('row', 250, 'sum')

    # take zscore of rows
    net.normalize(axis='row', norm_type='zscore', keep_orig=True)

    net.swap_nan_for_zero()

    # threshold filter PTMs
    net.filter_threshold('row', threshold=1.75, num_occur=3)

    views = ['N_row_sum', 'N_row_var']
    net.make_clust(dist_type='cos',
                   views=views,
                   dendro=True,
                   sim_mat=True,
                   calc_cat_pval=True)

    net.write_json_to_file('viz', 'json/homepage_phos.json', 'indent')
コード例 #3
0
def clust_vect(db, viz_doc, vect_post):

    from clustergrammer import Network

    try:
        net = Network()
        net.load_vect_post_to_net(vect_post)
        net.swap_nan_for_zero()

        views = ['N_row_sum', 'N_row_var']
        net.make_clust(dist_type='cosine',
                       dendro=True,
                       views=views,
                       linkage_type='average')

        dat_id = upload_dat(db, net)

        update_viz = net.viz
        update_dat = dat_id

    except:
        print('error clustering')
        update_viz = 'error'
        update_dat = 'error'

    viz_doc['viz'] = update_viz
    viz_doc['dat'] = update_dat

    return viz_doc
コード例 #4
0
def make_viz_json(inst_df, name):
  from clustergrammer import Network
  net = Network()

  filename = 'json/'+name
  load_df = {}
  load_df['mat'] = inst_df
  net.df_to_dat(load_df)
  net.swap_nan_for_zero()
  net.make_clust(views=[])
  net.write_json_to_file('viz', filename, 'no-indent')
コード例 #5
0
def main( buff, inst_filename, mongo_address, viz_id):
  import numpy as np
  import flask
  from bson.objectid import ObjectId
  from pymongo import MongoClient
  from flask import request
  from clustergrammer import Network
  import StringIO

  client = MongoClient(mongo_address)
  db = client.clustergrammer

  viz_id = ObjectId(viz_id)
  found_viz = db.networks.find_one({'_id':viz_id})

  try:

    net = Network()
    net.load_tsv_to_net(buff)

    net.swap_nan_for_zero()

    views = ['N_row_sum', 'N_row_var']

    net.make_clust(dist_type='cosine', dendro=True, views=views, \
                   linkage_type='average')

    export_dat = {}
    export_dat['name'] = inst_filename
    export_dat['dat'] = net.export_net_json('dat')
    export_dat['source'] = 'user_upload'

    dat_id = db.network_data.insert(export_dat)

    update_viz = net.viz 
    update_dat = dat_id

  except:
    print('\n-----------------------')
    print('error in clustering')
    print('-----------------------\n')
    update_viz = 'error'
    update_dat = 'error'

  found_viz['viz'] = update_viz
  found_viz['dat'] = update_dat

  db.networks.update_one( {'_id':viz_id}, {'$set': found_viz} )

  client.close()


  
コード例 #6
0
def main(buff, inst_filename, mongo_address, viz_id):
    import numpy as np
    import flask
    from bson.objectid import ObjectId
    from pymongo import MongoClient
    from flask import request
    from clustergrammer import Network
    import StringIO

    client = MongoClient(mongo_address)
    db = client.clustergrammer

    viz_id = ObjectId(viz_id)
    found_viz = db.networks.find_one({'_id': viz_id})

    try:

        net = Network()
        net.load_tsv_to_net(buff)

        net.swap_nan_for_zero()

        views = ['N_row_sum', 'N_row_var']

        net.make_clust(dist_type='cosine', dendro=True, views=views, \
                       linkage_type='average')

        export_dat = {}
        export_dat['name'] = inst_filename
        export_dat['dat'] = net.export_net_json('dat')
        export_dat['source'] = 'user_upload'

        dat_id = db.network_data.insert(export_dat)

        update_viz = net.viz
        update_dat = dat_id

    except:
        print('\n-----------------------')
        print('error in clustering')
        print('-----------------------\n')
        update_viz = 'error'
        update_dat = 'error'

    found_viz['viz'] = update_viz
    found_viz['dat'] = update_dat

    db.networks.update_one({'_id': viz_id}, {'$set': found_viz})

    client.close()
コード例 #7
0
def cluster():
  from clustergrammer import Network

  net = Network()

  vect_post = net.load_json_to_dict('fake_vect_post.json')  

  net.load_vect_post_to_net(vect_post)

  net.swap_nan_for_zero()
  
  # net.N_top_views()
  net.make_clust(dist_type='cos',views=['N_row_sum','N_row_var'], dendro=True)

  net.write_json_to_file('viz','json/large_vect_post_example.json','indent')  
コード例 #8
0
def cluster():
    from clustergrammer import Network

    net = Network()

    vect_post = net.load_json_to_dict('fake_vect_post.json')

    net.load_vect_post_to_net(vect_post)

    net.swap_nan_for_zero()

    # net.N_top_views()
    net.make_clust(dist_type='cos',
                   views=['N_row_sum', 'N_row_var'],
                   dendro=True)

    net.write_json_to_file('viz', 'json/large_vect_post_example.json',
                           'indent')
コード例 #9
0
def proc_locally():
    from clustergrammer import Network
    # import run_g2e_background

    net = Network()

    vect_post = net.load_json_to_dict('large_vect_post.json')

    print(vect_post.keys())

    # mongo_address = '10.125.161.139'

    net.load_vect_post_to_net(vect_post)

    net.swap_nan_for_zero()

    net.N_top_views()

    print(net.viz.keys())
コード例 #10
0
ファイル: old_load_gct.py プロジェクト: MaayanLab/LINCS_GCT
def process_GCT_and_export_tsv():
    from clustergrammer import Network

    filename = 'gcts/LDS-1003.gct'
    print('exporting processed GCT as tsv file')

    df = load_file(filename)

    net = Network()

    net.df_to_dat(df)
    net.swap_nan_for_zero()

    # zscore first to get the columns distributions to be similar
    net.normalize(axis='col', norm_type='zscore', keep_orig=True)

    # filter the rows to keep the perts with the largest normalizes values
    net.filter_N_top('row', 200)

    net.write_matrix_to_tsv('txt/example_gct_export.txt')
コード例 #11
0
def proc_locally():
  from clustergrammer import Network
  # import run_g2e_background

  net = Network()

  vect_post = net.load_json_to_dict('large_vect_post.json')

  print(vect_post.keys())

  # mongo_address = '10.125.161.139'


  net.load_vect_post_to_net(vect_post)

  net.swap_nan_for_zero()

  net.N_top_views()  

  print(net.viz.keys())
コード例 #12
0
ファイル: old_load_gct.py プロジェクト: MaayanLab/LINCS_GCT
def process_GCT_and_export_tsv():
  from clustergrammer import Network

  filename = 'gcts/LDS-1003.gct'
  print('exporting processed GCT as tsv file')

  df = load_file(filename)

  net = Network()

  net.df_to_dat(df)
  net.swap_nan_for_zero()

  # zscore first to get the columns distributions to be similar
  net.normalize(axis='col', norm_type='zscore', keep_orig=True)

  # filter the rows to keep the perts with the largest normalizes values
  net.filter_N_top('row', 200)

  net.write_matrix_to_tsv('txt/example_gct_export.txt')
コード例 #13
0
def make_json_from_tsv(name):
  '''
  make a clustergrammer json from a tsv file
  '''
  from clustergrammer import Network

  print('\n' + name)

  net = Network()

  filename = 'txt/'+ name + '.txt'

  net.load_file(filename)

  df = net.dat_to_df()

  net.swap_nan_for_zero()

  # zscore first to get the columns distributions to be similar
  net.normalize(axis='col', norm_type='zscore', keep_orig=True)

  # filter the rows to keep the perts with the largest normalizes values
  net.filter_N_top('row', 1000)

  num_rows = net.dat['mat'].shape[0]
  num_cols = net.dat['mat'].shape[1]

  print('num_rows ' + str(num_rows))
  print('num_cols ' + str(num_cols))

  if num_cols < 50 or num_rows < 1000:

    views = ['N_row_sum']
    net.make_clust(dist_type='cos', views=views)
    export_filename = 'json/' + name + '.json'
    net.write_json_to_file('viz', export_filename)

  else:
    print('did not cluster, too many columns ')
コード例 #14
0
def reproduce_Mark_correlation_matrix():
    import pandas as pd
    from scipy.spatial.distance import squareform
    from clustergrammer import Network
    from copy import deepcopy

    dist_vect = calc_custom_dist(data_type='ptm_none',
                                 dist_metric='correlation',
                                 pairwise='True')

    dist_mat = squareform(dist_vect)

    # make similarity matrix
    dist_mat = 1 - dist_mat

    net = Network()

    data_type = 'ptm_none'

    filename = '../lung_cellline_3_1_16/lung_cl_all_ptm/precalc_processed/' + \
               data_type + '.txt'

    # load file and export dataframe
    net = deepcopy(Network())
    net.load_file(filename)
    net.swap_nan_for_zero()
    tmp_df = net.dat_to_df()
    df = tmp_df['mat']

    cols = df.columns.tolist()
    rows = cols

    mark_df = pd.DataFrame(data=dist_mat, columns=cols, index=rows)

    save_filename = '../lung_cellline_3_1_16/lung_cl_all_ptm/precalc_processed/' \
               + 'Mark_corr_sim_mat' + '.txt'
    mark_df.to_csv(save_filename, sep='\t', na_rep='nan')
コード例 #15
0
def reproduce_Mark_correlation_matrix():
  import pandas as pd
  from scipy.spatial.distance import squareform
  from clustergrammer import Network
  from copy import deepcopy

  dist_vect = calc_custom_dist(data_type='ptm_none', dist_metric='correlation',
                              pairwise='True')


  dist_mat = squareform(dist_vect)

  # make similarity matrix
  dist_mat = 1 - dist_mat

  net = Network()

  data_type = 'ptm_none'

  filename = '../lung_cellline_3_1_16/lung_cl_all_ptm/precalc_processed/' + \
             data_type + '.txt'

  # load file and export dataframe
  net = deepcopy(Network())
  net.load_file(filename)
  net.swap_nan_for_zero()
  tmp_df = net.dat_to_df()
  df = tmp_df['mat']

  cols = df.columns.tolist()
  rows = cols

  mark_df = pd.DataFrame(data=dist_mat, columns=cols, index=rows)

  save_filename = '../lung_cellline_3_1_16/lung_cl_all_ptm/precalc_processed/' \
             + 'Mark_corr_sim_mat' + '.txt'
  mark_df.to_csv(save_filename, sep='\t')
コード例 #16
0
ファイル: old_load_gct.py プロジェクト: MaayanLab/LINCS_GCT
def make_viz_from_df(df, filename):
  from clustergrammer import Network

  net = Network()

  net.df_to_dat(df)
  net.swap_nan_for_zero()

  # zscore first to get the columns distributions to be similar
  net.normalize(axis='col', norm_type='zscore', keep_orig=True)

  # filter the rows to keep the perts with the largest normalizes values
  net.filter_N_top('row', 2000)

  num_coluns = net.dat['mat'].shape[1]

  if num_coluns < 50:
    # views = ['N_row_sum', 'N_row_var']
    views = ['N_row_sum']
    net.make_clust(dist_type='cos', views=views)

    filename = 'json/' + filename.split('/')[1].replace('.gct','') + '.json'

    net.write_json_to_file('viz', filename)
コード例 #17
0
# possible filtering and normalization
##########################################
# net.filter_sum('row', threshold=20)
# net.filter_sum('col', threshold=30)

# net.normalize(axis='row', norm_type='qn')
# net.normalize(axis='col', norm_type='zscore', keep_orig=True)

# net.filter_N_top('row', 100, rank_type='var')
# net.filter_N_top('col', 3, rank_type='var')

# net.filter_threShold('col', threshold=2, num_occur=3
# net.filter_threshold('row', threshold=3.0, num_occur=4)

net.swap_nan_for_zero()

# df = net.dat_to_df()

views = ['N_row_sum', 'N_row_var']

net.make_clust(dist_type='cos',views=views , dendro=True,
               sim_mat=True, filter_sim=0.1, calc_cat_pval=False)

               # run_enrichr=['ChEA_2015'])
               # run_enrichr=['ENCODE_TF_ChIP-seq_2014'])
               # run_enrichr=['KEA_2015'])
               # run_enrichr=['GO_Biological_Process_2015'])

net.write_json_to_file('viz', 'json/mult_view.json', 'no-indent')
net.write_json_to_file('sim_row', 'json/mult_view_sim_row.json', 'no-indent')
コード例 #18
0
ファイル: process_matrix.py プロジェクト: MaayanLab/adhesome
        gene_attribute_matrix.columns = gene_attribute_matrix.columns.map(
            lambda s: '%s: %s' % (gene_attribute_matrix.columns.name, s))
        # Remove names for clustergrammer
        gene_attribute_matrix.index.name = ""
        gene_attribute_matrix.columns.name = ""
        # Write to file
        # fp = StringIO()
        # gene_attribute_matrix.to_csv(fp, sep='\t')
        gene_attribute_matrix.to_csv('tmp.txt', sep='\t')

        # Custergrammer
        from clustergrammer import Network
        net = Network()
        # net.load_tsv_to_net(fp, name) # StringIO
        net.load_file('tmp.txt')
        net.swap_nan_for_zero()
        # Generate
        net.make_clust(dist_type='cos',
                       views=['N_row_sum', 'N_row_var'],
                       dendro=True,
                       sim_mat=True,
                       filter_sim=0.1,
                       calc_cat_pval=False)

        # Insert into database
        cur.execute(
            'insert into `datasets` (`Name`, `prot_att`, `att_att`, `prot_prot`) values (?, ?, ?, ?)',
            (name, net.export_net_json('viz', indent='no-indent'),
             net.export_net_json('sim_col', indent='no-indent'),
             net.export_net_json('sim_row', indent='no-indent')))
        con.commit()
コード例 #19
0
def main( buff, inst_filename, mongo_address, viz_id):
  import numpy as np
  import flask
  from bson.objectid import ObjectId
  from pymongo import MongoClient
  from flask import request
  from clustergrammer import Network
  import StringIO

  ##############################
  # set up database connection 
  ##############################
  # set up connection 
  client = MongoClient(mongo_address)
  db = client.clustergrammer

  # get placeholder viz data 
  viz_id = ObjectId(viz_id)
  found_viz = db.networks.find_one({'_id':viz_id})

  try:
    ########################
    # load and cluster 
    ########################

    # initiate class network 
    net = Network()
    # net.load_lines_from_tsv_to_net(file_lines)  
    net.pandas_load_tsv_to_net(buff)

    # swap nans for zero 
    net.swap_nan_for_zero()

    # deprecated clustering module
    ####################################

    # # fast mult views takes care of pre-filtering
    # net.fast_mult_views()

    ####################################

    net.make_filtered_views(dist_type='cosine', dendro=True, \
      views=['filter_row_sum'], linkage_type='average')

    ###############################
    # save to database 
    ###############################

    export_dat = {}
    export_dat['name'] = inst_filename
    export_dat['dat'] = net.export_net_json('dat')
    export_dat['source'] = 'user_upload'
    # save dat to separate document 
    dat_id = db.network_data.insert(export_dat)

    update_viz = net.viz 
    update_dat = dat_id

  except:
    print('\n-----------------------')
    print('error in clustering')
    print('-----------------------\n')
    update_viz = 'error'
    update_dat = 'error'

  # update found_viz 
  found_viz['viz'] = update_viz
  found_viz['dat'] = update_dat

  # update found_viz in database 
  db.networks.update_one( {'_id':viz_id}, {'$set': found_viz} )

  ############################
  # end database connection 
  ############################
  client.close()


  
コード例 #20
0
def main(mongo_address, viz_id, vect_post):
  from bson.objectid import ObjectId
  from pymongo import MongoClient
  from clustergrammer import Network

  # set up database connection 
  client = MongoClient(mongo_address)
  db = client.clustergrammer 
  viz_id = ObjectId(viz_id)
  # get placeholder viz data 
  found_viz = db.networks.find_one({'_id': viz_id })

  # initialize export_dat 
  export_dat = {}
  export_viz = {}

  # try to make clustegram using vect_post 
  try:

    # ini network obj 
    net = Network()
    
    # vector endpoint 
    net.load_vect_post_to_net(vect_post)

    # swap nans for zeros
    net.swap_nan_for_zero()

    # deprecated clustering modules 
    ####################################
    # cluster g2e using pandas
    # net.fast_mult_views()

    # # calculate top views rather than percentage views
    # net.N_top_views()
    ####################################

    net.make_filtered_views(dist_type='cosine', dendro=True, \
      views=['N_row_sum'], linkage_type='average')

    # export dat 
    try:

      # convert data to list 
      net.dat['mat'] = net.dat['mat'].tolist()
      net.dat['mat_up'] = net.dat['mat_up'].tolist()
      net.dat['mat_dn'] = net.dat['mat_dn'].tolist()

      export_dat['dat'] = net.export_net_json('dat')
      export_dat['source'] = 'g2e_enr_vect'
      dat_id = db.network_data.insert( export_dat )
      print('G2E: network data successfully uploaded')
    
    except:
      export_dat['dat'] = 'data-too-large'
      export_dat['source'] = 'g2e_enr_vect'
      dat_id = db.network_data.insert( export_dat )
      print('G2E: network data too large to be uploaded')

    update_viz = net.viz 
    update_dat = dat_id

  # if there is an error update json with error 
  except:

    print('\n--------------------------------')
    print('G2E clustering error')
    print('----------------------------------\n')
    update_viz = 'error'
    update_dat = 'error'


  # export vix to database 

  found_viz['viz'] = update_viz
  found_viz['dat'] = update_dat

   # update the viz data 
  try:
    db.networks.update_one( {"_id":viz_id}, {"$set": found_viz} )
    print('\n\n---------------------------------------------------')
    print( 'G2E Successfully made and uploaded clustergram')
    print('---------------------------------------------------\n\n')
  except:
    print('\n--------------------------------')
    print('G2E error in loading viz into database')
    print('----------------------------------\n')

  # close database connection 
  client.close()