import os, sys, json
from clustergram import clustergram
from orm_utils import *
sys.path.append('../../maayanlab_utils')
from fileIO import read_df




def find_name_for_id(uid):
	projection = {'_id':False, 'limma':False, 'fold_changes':False, 'chdir': False}
	dbs = DBSignature(uid,projection)
	return dbs.name + '|' + dbs.meta['geo_id']

# mat, ids, _ = read_df('../signed_jaccard_subset_unique_entries_831x831.txt')
# mat, ids, _ = read_df('../signed_jaccard_subset_unique_entries_519x519.txt')
mat, ids, _ = read_df('../signed_jaccard_subset_unique_entries_259x259.txt')

names = map(find_name_for_id, ids)

json_data = clustergram(mat, names, names,
	row_linkage='average', col_linkage='average',
	row_pdist='cosine', col_pdist='cosine')

json.dump(json_data, open('/Library/WebServer/Documents/d3_clustergram/signed_jaccard_subset_clustergram_259.json', 'wb'))	


Exemple #2
0
## convert repFPKM table to MySQL database
import os, sys
import MySQLdb
import numpy as np
sys.path.append('C:\Users\Zichen\Documents\\bitbucket\maayanlab_utils')
from fileIO import read_df

os.chdir('D:\Zichen_Projects\Rendl_RNAseq')

mat, gene_tids, samples = read_df('repFpkmMatrix_allGenes.txt')
print mat.shape, len(gene_tids), len(samples)

d_sig = {}
with open ('signatures.txt') as f:
	signatures = next(f).split('\t')
	for line in f:
		sl = line.split('\t')
		for gene, sig in zip(sl, signatures):
			if gene != '' and gene.strip() != '':
				d_sig[gene.strip()] = sig.strip()

# from pprint import pprint
# pprint(d_sig)

conn = MySQLdb.connect(host='localhost',user='******', passwd='',db='hairgel')
cur = conn.cursor()


ii = 0
for row, gene_tid in zip(mat, gene_tids):
	genes = gene_tid.split('|')[0].split(',')
## transfer drug tables
transfer_table('drugs_lincs', DrugLINCS, session)
transfer_table('drugs_drugbank', DrugDrugbank, session)
transfer_table('drugs_stitch', DrugStitch, session)


# p_vals from the latest prediction
## get AUC for each side_effect

# aucs = np.loadtxt(HOME + '/Documents/Zichen_Projects/drug_se_prediction/ExtraTrees100_RLogit_GO + MACCS_per-label_AUC_n794x1053.txt').mean(axis=1)
aucs = np.loadtxt(HOME + '/Documents/Zichen_Projects/drug_se_prediction/ET100_balanced_BRLogit2_auto_GO + MACCS_per-label_AUC-S3folds_n794x1053_top_50_features.txt').mean(axis=1)
print len(aucs)
print np.percentile(aucs, 75)
print aucs[aucs > 0.7].shape

mat, pert_ids, se_names = read_df(HOME + '/Documents/Zichen_Projects/drug_se_prediction/PTs_RF1000_proba_df_n20338x1053.txt')
print mat.shape
se_names = np.array(se_names)

d_se_auc = dict(zip(se_names, aucs))
d_umls_soc = pickle.load(open(HOME+'/Documents/Zichen_Projects/drug_se_prediction/d_umls_soc.p', 'rb'))
d_soc_color = pickle.load(open(HOME+'/Documents/Zichen_Projects/drug_se_prediction/d_soc_color.p', 'rb'))

## add soc table from d_soc_color
for soc, color in d_soc_color.items():
	instance = get_or_create(session, SOC, name=soc, color=color)

## transfer side_effects and add AUROC 
conn = MySQLdb.connect(host='localhost',user='******', passwd='',db='maaya0_SEP')
cur = conn.cursor()
query = """SELECT * FROM `%s`""" %'side_effects'
Exemple #4
0
transfer_table('drugs_stitch', DrugStitch, session)

# p_vals from the latest prediction
## get AUC for each side_effect

# aucs = np.loadtxt(HOME + '/Documents/Zichen_Projects/drug_se_prediction/ExtraTrees100_RLogit_GO + MACCS_per-label_AUC_n794x1053.txt').mean(axis=1)
aucs = np.loadtxt(
    HOME +
    '/Documents/Zichen_Projects/drug_se_prediction/ET100_balanced_BRLogit2_auto_GO + MACCS_per-label_AUC-S3folds_n794x1053_top_50_features.txt'
).mean(axis=1)
print len(aucs)
print np.percentile(aucs, 75)
print aucs[aucs > 0.7].shape

mat, pert_ids, se_names = read_df(
    HOME +
    '/Documents/Zichen_Projects/drug_se_prediction/PTs_RF1000_proba_df_n20338x1053.txt'
)
print mat.shape
se_names = np.array(se_names)

d_se_auc = dict(zip(se_names, aucs))
d_umls_soc = pickle.load(
    open(HOME + '/Documents/Zichen_Projects/drug_se_prediction/d_umls_soc.p',
         'rb'))
d_soc_color = pickle.load(
    open(HOME + '/Documents/Zichen_Projects/drug_se_prediction/d_soc_color.p',
         'rb'))

## add soc table from d_soc_color
for soc, color in d_soc_color.items():
    instance = get_or_create(session, SOC, name=soc, color=color)
Exemple #5
0

def find_name_for_id(uid):
    projection = {
        '_id': False,
        'limma': False,
        'fold_changes': False,
        'chdir': False
    }
    dbs = DBSignature(uid, projection)
    return dbs.name + '|' + dbs.meta['geo_id']


# mat, ids, _ = read_df('../signed_jaccard_subset_unique_entries_831x831.txt')
# mat, ids, _ = read_df('../signed_jaccard_subset_unique_entries_519x519.txt')
mat, ids, _ = read_df('../signed_jaccard_subset_unique_entries_259x259.txt')

names = map(find_name_for_id, ids)

json_data = clustergram(mat,
                        names,
                        names,
                        row_linkage='average',
                        col_linkage='average',
                        row_pdist='cosine',
                        col_pdist='cosine')

json.dump(
    json_data,
    open(
        '/Library/WebServer/Documents/d3_clustergram/signed_jaccard_subset_clustergram_259.json',