import os, sys, json from clustergram import clustergram from orm_utils import * sys.path.append('../../maayanlab_utils') from fileIO import read_df def find_name_for_id(uid): projection = {'_id':False, 'limma':False, 'fold_changes':False, 'chdir': False} dbs = DBSignature(uid,projection) return dbs.name + '|' + dbs.meta['geo_id'] # mat, ids, _ = read_df('../signed_jaccard_subset_unique_entries_831x831.txt') # mat, ids, _ = read_df('../signed_jaccard_subset_unique_entries_519x519.txt') mat, ids, _ = read_df('../signed_jaccard_subset_unique_entries_259x259.txt') names = map(find_name_for_id, ids) json_data = clustergram(mat, names, names, row_linkage='average', col_linkage='average', row_pdist='cosine', col_pdist='cosine') json.dump(json_data, open('/Library/WebServer/Documents/d3_clustergram/signed_jaccard_subset_clustergram_259.json', 'wb'))
## convert repFPKM table to MySQL database import os, sys import MySQLdb import numpy as np sys.path.append('C:\Users\Zichen\Documents\\bitbucket\maayanlab_utils') from fileIO import read_df os.chdir('D:\Zichen_Projects\Rendl_RNAseq') mat, gene_tids, samples = read_df('repFpkmMatrix_allGenes.txt') print mat.shape, len(gene_tids), len(samples) d_sig = {} with open ('signatures.txt') as f: signatures = next(f).split('\t') for line in f: sl = line.split('\t') for gene, sig in zip(sl, signatures): if gene != '' and gene.strip() != '': d_sig[gene.strip()] = sig.strip() # from pprint import pprint # pprint(d_sig) conn = MySQLdb.connect(host='localhost',user='******', passwd='',db='hairgel') cur = conn.cursor() ii = 0 for row, gene_tid in zip(mat, gene_tids): genes = gene_tid.split('|')[0].split(',')
## transfer drug tables transfer_table('drugs_lincs', DrugLINCS, session) transfer_table('drugs_drugbank', DrugDrugbank, session) transfer_table('drugs_stitch', DrugStitch, session) # p_vals from the latest prediction ## get AUC for each side_effect # aucs = np.loadtxt(HOME + '/Documents/Zichen_Projects/drug_se_prediction/ExtraTrees100_RLogit_GO + MACCS_per-label_AUC_n794x1053.txt').mean(axis=1) aucs = np.loadtxt(HOME + '/Documents/Zichen_Projects/drug_se_prediction/ET100_balanced_BRLogit2_auto_GO + MACCS_per-label_AUC-S3folds_n794x1053_top_50_features.txt').mean(axis=1) print len(aucs) print np.percentile(aucs, 75) print aucs[aucs > 0.7].shape mat, pert_ids, se_names = read_df(HOME + '/Documents/Zichen_Projects/drug_se_prediction/PTs_RF1000_proba_df_n20338x1053.txt') print mat.shape se_names = np.array(se_names) d_se_auc = dict(zip(se_names, aucs)) d_umls_soc = pickle.load(open(HOME+'/Documents/Zichen_Projects/drug_se_prediction/d_umls_soc.p', 'rb')) d_soc_color = pickle.load(open(HOME+'/Documents/Zichen_Projects/drug_se_prediction/d_soc_color.p', 'rb')) ## add soc table from d_soc_color for soc, color in d_soc_color.items(): instance = get_or_create(session, SOC, name=soc, color=color) ## transfer side_effects and add AUROC conn = MySQLdb.connect(host='localhost',user='******', passwd='',db='maaya0_SEP') cur = conn.cursor() query = """SELECT * FROM `%s`""" %'side_effects'
transfer_table('drugs_stitch', DrugStitch, session) # p_vals from the latest prediction ## get AUC for each side_effect # aucs = np.loadtxt(HOME + '/Documents/Zichen_Projects/drug_se_prediction/ExtraTrees100_RLogit_GO + MACCS_per-label_AUC_n794x1053.txt').mean(axis=1) aucs = np.loadtxt( HOME + '/Documents/Zichen_Projects/drug_se_prediction/ET100_balanced_BRLogit2_auto_GO + MACCS_per-label_AUC-S3folds_n794x1053_top_50_features.txt' ).mean(axis=1) print len(aucs) print np.percentile(aucs, 75) print aucs[aucs > 0.7].shape mat, pert_ids, se_names = read_df( HOME + '/Documents/Zichen_Projects/drug_se_prediction/PTs_RF1000_proba_df_n20338x1053.txt' ) print mat.shape se_names = np.array(se_names) d_se_auc = dict(zip(se_names, aucs)) d_umls_soc = pickle.load( open(HOME + '/Documents/Zichen_Projects/drug_se_prediction/d_umls_soc.p', 'rb')) d_soc_color = pickle.load( open(HOME + '/Documents/Zichen_Projects/drug_se_prediction/d_soc_color.p', 'rb')) ## add soc table from d_soc_color for soc, color in d_soc_color.items(): instance = get_or_create(session, SOC, name=soc, color=color)
def find_name_for_id(uid): projection = { '_id': False, 'limma': False, 'fold_changes': False, 'chdir': False } dbs = DBSignature(uid, projection) return dbs.name + '|' + dbs.meta['geo_id'] # mat, ids, _ = read_df('../signed_jaccard_subset_unique_entries_831x831.txt') # mat, ids, _ = read_df('../signed_jaccard_subset_unique_entries_519x519.txt') mat, ids, _ = read_df('../signed_jaccard_subset_unique_entries_259x259.txt') names = map(find_name_for_id, ids) json_data = clustergram(mat, names, names, row_linkage='average', col_linkage='average', row_pdist='cosine', col_pdist='cosine') json.dump( json_data, open( '/Library/WebServer/Documents/d3_clustergram/signed_jaccard_subset_clustergram_259.json',