예제 #1
0
def exp_heatmap_json(request):
    import pandas as pd
    from clustergrammer import Network
    columns = [i.name for i in Exp._meta.get_fields()]
    #exps = Exp.objects.all().using("expDb").values_list("gene_id", "control_0", "control_1", "control_2", "treated_0", "treated_1", "treated_2")
    exps = Exp.objects.all().using("expDb").values()
    df = pd.DataFrame(list(exps), columns=columns)
    df.index = df.gene_id
    df = df.loc[:, df.columns[1:]]
    net = Network()
    net.load_df(df)

    # Z-score normalize the rows
    net.normalize(axis='row', norm_type='zscore', keep_orig=True)

    # filter for the top 100 columns based on their absolute value sum
    net.filter_N_top('col', 100, 'sum')

    # cluster using default parameters
    net.cluster()

    # save visualization JSON to file for use by front end
    data = net.export_net_json('viz')
    data = json.loads(data)
    #print(data)
    response = {
        'data': data,
    }
    return JsonResponse(response, content_type='application/json')
예제 #2
0
def get_clusters(analysis, data_types):
    cluster_json = {}
    for data_type in data_types:
        analysis_data = get_last_analysis_data(analysis, data_type)
        data_df, design_df = get_dataframes(analysis_data, PKS[data_type],
                                            SAMPLE_COL)
        if data_type == GENOMICS:
            inference = WebOmicsInference(data_df, design_df, data_type)
            df = inference.standardize_df(inference.data_df)
        elif data_type == PROTEOMICS or data_type == METABOLOMICS:
            inference = WebOmicsInference(data_df,
                                          design_df,
                                          data_type,
                                          min_value=5000)
            df = inference.standardize_df(inference.data_df, log=True)
        if not df.empty:
            net = Network()
            net.load_df(df)
            net.cluster()
            data_type_label = {
                GENOMICS: 'gene',
                PROTEOMICS: 'protein',
                METABOLOMICS: 'compound'
            }
            label = data_type_label[data_type]
            json_data = net.export_net_json()
            cluster_json[label] = json_data
    return cluster_json
예제 #3
0
def build_layout():
    df = load_data.load_gsva_compare_cluster('hallmark')

    # TODO THIS NEEDS TO BE CLEANED UP!!!!!!
    cat_to_true = defaultdict(lambda: [])
    for clust in df.index:
        if 'PJ030' in clust:
            cat_to_true['LGG'].append(clust)
        elif 'PJ' in clust:
            cat_to_true['GBM'].append(clust)
        elif 'LX' in clust:
            cat_to_true['LUAD'].append(clust)
        elif 'GSE146026' in clust:
            cat_to_true['OV'].append(clust)
        elif 'GSE72056' in clust:
            cat_to_true['SKCM'].append(clust)
        elif 'GSE103322' in clust:
            cat_to_true['HNSC'].append(clust)
        elif 'GSE111672' in clust:
            cat_to_true['PAAD'].append(clust)

    cats = [{
        'title': 'Cancer Type',
        'cats': {k: v
                 for k, v in cat_to_true.items()}
    }]

    net = Network()
    net.load_df(df)
    net.add_cats('row', cats)
    net.make_clust()

    layout = dcc.Tab(label='Cluster Comparison',
                     children=[
                         dbc.Container(
                             fluid=True,
                             children=[
                                 html.Link(rel='stylesheet',
                                           href='./static/custom.css'),
                                 dash_clustergrammer.DashClustergrammer(
                                     id='cgram-component',
                                     label='',
                                     network_data=net.viz)
                             ])
                     ])
    return layout
예제 #4
0
    def get_clustergrammer_json(self, outfile):

        # Create network
        net = Network()

        # Load file
        net.load_df(self.expression_dataframe)

        # Add categories
        try:
            net.add_cats('col', self.sample_cats)
        except:
            pass

        try:
            # calculate clustering using default parameters
            net.cluster()

            # save visualization JSON to file for use by front end
            net.write_json_to_file('viz', outfile)
        except:
            os.system('touch {outfile}'.format(**locals()))
    def prepare_clustergrammer_data(self,
                                    outfname='clustergrammer_data.json',
                                    G=None):
        """for a distance matrix, output a clustergrammer JSON file
        that clustergrammer-js can use

        for now it loads the clustergrammer-py module from local dev files
        TODO: once changes are pulled into clustergrammer-py, we can use the actual module (pip)

        :outfname: filename for the output json
        :G: networkx graph (use self.G_sym by default)

        """
        G = self.G_sym or self.G
        # if Z is None:
        #     G = self.G_sym or self.G
        #     Z = self.get_linkage(G)
        clustergrammer_py_dev_dir = '../clustergrammer/clustergrammer-py/'
        sys.path.insert(0, clustergrammer_py_dev_dir)
        from clustergrammer import Network as ClustergrammerNetwork
        start = timer()
        d = nx.to_numpy_matrix(G)
        df = pd.DataFrame(d, index=G.nodes(), columns=G.nodes())
        net = ClustergrammerNetwork()
        # net.load_file(infname)
        # net.load_file(mat)
        net.load_df(df)
        net.cluster(dist_type='precalculated')
        logger.debug("done loading and clustering. took {}".format(
            format_timespan(timer() - start)))

        logger.debug("writing to {}".format(outfname))
        start = timer()
        net.write_json_to_file('viz', outfname)
        logger.debug("done writing file {}. took {}".format(
            outfname, format_timespan(timer() - start)))
예제 #6
0
new_rows = [(x, 'Cat-1: A', 'Cat-2: B') for x in df.index]
new_cols = [(x, 'Cell Type: Unknown') for x in df.columns]

# new_cols = []
# inst_val = -round(df.shape[1]/2)
# for inst_col in df.columns.tolist():
#   inst_val = inst_val + 1
#   new_col = (inst_col, 'Cat: C-' + str(inst_val), 'Val: ' + str(inst_val))
#   new_cols.append(new_col)

# new_cols = [(x, 'Cat-1: A', 'Cat-2: B') for x in df.columns]
# new_cols = [(x, 'Cat-1: A', 'Cat-2: B', 'Cat-3: C') for x in df.columns]
df.index = new_rows
df.columns = new_cols

net.load_df(df)

net.cluster(dist_type='cos',
            views=['N_row_sum', 'N_row_var'],
            dendro=True,
            sim_mat=False,
            filter_sim=0.1,
            calc_cat_pval=False,
            enrichrgram=True)

# write jsons for front-end visualizations
net.write_json_to_file('viz', 'data/big_data/custom.json', 'no-indent')

# net.write_json_to_file('sim_row', 'json/mult_view_sim_row.json', 'no-indent')
# net.write_json_to_file('sim_col', 'json/mult_view_sim_col.json', 'no-indent')
예제 #7
0
#make some plots with the combined data
for item in final_LassoLD:
    swarms('all', item, featMatAll, directoryA, '.tif' )
    plt.close()




#%%Junk from here on
    
    
#try clustergrammer 
from clustergrammer import Network
net = Network()
net.load_df(cloz_cluster)

# filter for the top 100 columns based on their absolute value sum
net.filter_N_top('col', 16, 'sum')

# cluster using default parameters
net.cluster()

#leg_patch = mpatches.Patch(color = cmap, label=[lut.keys()])
#plt.legend(handles=[leg_patch])

plt.legend ([cmap], lut.keys())
plt.show()
x
swarms (rep1, feature, features_df, directory, file_type)