# to convert network in gml to json format for web visulization import json import networkx as nx from networkx.readwrite import json_graph import sys sys.path.append('/Users/zichen/Documents/bitbucket/maayanlab_utils') from fileIO import mysqlTable2dict d_sename_umls = mysqlTable2dict('sep', 'side_effects', 2, 1) def gml2json(gml_fn, json_fn): G = nx.read_gml(gml_fn) print G.number_of_nodes(), G.number_of_edges() for node_id in G.nodes(): node_dict = G.node[node_id] if '|' in node_dict['label']: sl = node_dict['label'].split('|') label = '%s (%s)' % (sl[1], sl[0]) G.node[node_id]['label'] = label # clean labels and add xref if node_dict['type'] == "SE": G.node[node_id]['type'] = 'triangle-up' G.node[node_id]['xref'] = d_sename_umls[node_dict['label']] # umls else: sl = G.node[node_id]['label'].split('(') xref = sl[-1] label = '('.join(sl[0:-1]) label = label.strip()
PREDICTION_DF = HOME + '/Documents/Zichen_Projects/drug_se_prediction/PTs_RF1000_proba_df_n20338x1053.txt' ## for side effects # GMT_FN = HOME+'/Documents/Zichen_Projects/drug_se_prediction/RF1000_GOtCS_AUC_0.7_proba_0.6_prediction_only.gmt' GMT_FN = HOME+'/Documents/Zichen_Projects/drug_se_prediction/ET100_GOtCS_AUC_0.76_proba_0.75.gmt' GML_FN = HOME+'/Documents/Zichen_Projects/drug_se_prediction/side_effect_network.gml' ## for drugs # GMT_FN = HOME+'/Documents/Zichen_Projects/drug_se_prediction/RF1000_GOtCS_AUC_0.7_proba_0.6_prediction_only_flipped.gmt' # GML_FN = HOME+'/Documents/Zichen_Projects/drug_se_prediction/drug_network.gml' CSV_FN = GML_FN.replace('.gml', '.csv') JSON_FN = CSV_FN.replace('.csv', '.json') ## retrieve meta data about SE d_umls_pt = mysqlTable2dict('sep', 'side_effects', 1, 2) d_pt_umls = mysqlTable2dict('sep', 'side_effects', 2, 1) d_soc_pt = read_gmt(HOME+'/Documents/bitbucket/pertid2trainingset/Y_matrix_no_mfc/SOC_to_pt.gmt') print len(d_soc_pt) d_umls_soc = {} for soc, pts in d_soc_pt.items(): for pt in pts: umls = d_pt_umls[pt] if umls is not None: if umls not in d_umls_soc: d_umls_soc[umls] = soc # else: # d_umls_soc[umls].append(soc) # one PT may have multiple SOCs # for pt in d_umls_soc: # if len(d_umls_soc[pt]) != 1:
if kwargs['umls_id'] in d_umls_soc: kwargs['soc'] = d_umls_soc[kwargs['umls_id']] else: kwargs['soc'] = None instance = get_or_create(session, SideEffect, **kwargs) for pvals, pert_id in zip(mat, pert_ids): mask = pvals > 0.5 se_names_pos = se_names[mask].tolist() aucs_pos = aucs[mask].tolist() pvals_pos = pvals[mask].tolist() add_predictions(se_names_pos, aucs_pos, pert_id, pvals_pos, session) ## transfer association tables # sider_connections d_pert_ids = mysqlTable2dict('maaya0_SEP', 'drugs_lincs', 0, 1) d_umls_id = mysqlTable2dict('maaya0_SEP', 'side_effects', 0, 1) conn = MySQLdb.connect(host='localhost', user='******', passwd='', db='maaya0_SEP') cur = conn.cursor() query = """SELECT * FROM `%s`""" % 'sider_connections' cur.execute(query) d_pert_umls_ids = {} for row in cur: pert_id, umls_id = row if pert_id in d_pert_ids and umls_id in d_umls_id: if pert_id not in d_pert_umls_ids: d_pert_umls_ids[pert_id] = [umls_id]
kwargs['soc'] = None instance = get_or_create(session, SideEffect, **kwargs) for pvals, pert_id in zip(mat, pert_ids): mask = pvals > 0.5 se_names_pos = se_names[mask].tolist() aucs_pos = aucs[mask].tolist() pvals_pos = pvals[mask].tolist() add_predictions(se_names_pos, aucs_pos, pert_id, pvals_pos, session) ## transfer association tables # sider_connections d_pert_ids = mysqlTable2dict('maaya0_SEP', 'drugs_lincs', 0, 1) d_umls_id = mysqlTable2dict('maaya0_SEP', 'side_effects', 0, 1) conn = MySQLdb.connect(host='localhost',user='******', passwd='',db='maaya0_SEP') cur = conn.cursor() query = """SELECT * FROM `%s`""" %'sider_connections' cur.execute(query) d_pert_umls_ids = {} for row in cur: pert_id, umls_id = row if pert_id in d_pert_ids and umls_id in d_umls_id: if pert_id not in d_pert_umls_ids: d_pert_umls_ids[pert_id] = [umls_id] else: if umls_id not in d_pert_umls_ids[pert_id]: d_pert_umls_ids[pert_id].append(umls_id)
# to convert network in gml to json format for web visulization import json import networkx as nx from networkx.readwrite import json_graph import sys sys.path.append('/Users/zichen/Documents/bitbucket/maayanlab_utils') from fileIO import mysqlTable2dict d_sename_umls = mysqlTable2dict('sep', 'side_effects', 2,1) def gml2json(gml_fn, json_fn): G = nx.read_gml(gml_fn) print G.number_of_nodes(), G.number_of_edges() for node_id in G.nodes(): node_dict = G.node[node_id] if '|' in node_dict['label']: sl = node_dict['label'].split('|') label = '%s (%s)'%( sl[1], sl[0] ) G.node[node_id]['label'] = label # clean labels and add xref if node_dict['type'] == "SE": G.node[node_id]['type'] = 'triangle-up' G.node[node_id]['xref'] = d_sename_umls[node_dict['label']] # umls else: sl = G.node[node_id]['label'].split('(') xref = sl[-1] label = '('.join(sl[0:-1]) label = label.strip() xref = xref.strip(')')
PREDICTION_DF = HOME + '/Documents/Zichen_Projects/drug_se_prediction/PTs_RF1000_proba_df_n20338x1053.txt' ## for side effects # GMT_FN = HOME+'/Documents/Zichen_Projects/drug_se_prediction/RF1000_GOtCS_AUC_0.7_proba_0.6_prediction_only.gmt' GMT_FN = HOME + '/Documents/Zichen_Projects/drug_se_prediction/ET100_GOtCS_AUC_0.76_proba_0.75.gmt' GML_FN = HOME + '/Documents/Zichen_Projects/drug_se_prediction/side_effect_network.gml' ## for drugs # GMT_FN = HOME+'/Documents/Zichen_Projects/drug_se_prediction/RF1000_GOtCS_AUC_0.7_proba_0.6_prediction_only_flipped.gmt' # GML_FN = HOME+'/Documents/Zichen_Projects/drug_se_prediction/drug_network.gml' CSV_FN = GML_FN.replace('.gml', '.csv') JSON_FN = CSV_FN.replace('.csv', '.json') ## retrieve meta data about SE d_umls_pt = mysqlTable2dict('sep', 'side_effects', 1, 2) d_pt_umls = mysqlTable2dict('sep', 'side_effects', 2, 1) d_soc_pt = read_gmt( HOME + '/Documents/bitbucket/pertid2trainingset/Y_matrix_no_mfc/SOC_to_pt.gmt') print len(d_soc_pt) d_umls_soc = {} for soc, pts in d_soc_pt.items(): for pt in pts: umls = d_pt_umls[pt] if umls is not None: if umls not in d_umls_soc: d_umls_soc[umls] = soc # else: # d_umls_soc[umls].append(soc) # one PT may have multiple SOCs