def compute_module_count(inf, is_mmod): if is_mmod: mods = eval_util.read_mmods(inf) else: mods = eval_util.read_rmods(inf) return len(mods)
def compute_efficacy(inf, is_rmod, is_partial_module, S, mets, reacts): if is_rmod: mods = eval_util.read_mmods(inf) else: mods = eval_util.read_rmods(inf) sizes = [] for mod_name, mod_metabs in mods.iteritems(): sizes.append(len(mod_metabs)) if is_partial_module: pass # TODO:PARTIAL: should create one big module of unused ones!? NO! DO NOTHING JUST IGNORE THEM! N = sum(sizes) p = efficacy_empirical_p(N) if len(sizes) != 0: efficacy = 1.0 * (math.log(efficacy_f(N, alpha, p)) - math.log( efficacy_F(sizes, len(sizes), alpha, p))) * 1.0 / ( math.log(efficacy_f(N, alpha, p)) - math.log(2 * efficacy_f(math.sqrt(N), alpha, p))) else: efficacy = 0.0 return efficacy
def general_compute_distance(cached_reaction_similarities, similarity_table, table_gene_order, inf, species, criteria, factor1, factor2, is_mmod, is_partial_module, S, mets, reacts, revs, go_loaded_model, moredata_loaded_model): if is_mmod: mmods = eval_util.read_mmods(inf) mods = eval_util.convert_metabmod_to_reactmod(mmods, is_partial_module, S, mets, reacts) else: mods = eval_util.read_rmods(inf) # TODO:PARTIAL: all reactions that are not counted in modules (NO: are assigned to external module # and) -1 (like blocked reactions) is placed for couplings (in associated rows and columns) if is_partial_module: done_reacts = set() for mod_n, mod_rs in mods.iteritems(): done_reacts.update(mod_rs) not_done_reacts = set(reacts) - set(done_reacts) if len(not_done_reacts) > 0: mods[eval_constants.EXTERNAL_MODULE_ID] = not_done_reacts if species == 'arabidopsis_irs1597': mods = filter_out_reactions_by_type(mods, [(0, 'R_EX')]) else: mods = filter_out_reactions_by_type(mods, [(0, 'R_EX_')]) mods_similarity_table = {} for mod1_name, mod1 in mods.iteritems(): mods_similarity_table[mod1_name] = {} for mod2_name, mod2 in mods.iteritems(): sim = 0 for r1 in mod1: for r2 in mod2: if mod1_name != mod2_name or r1 != r2: # designed to work for overlapping modules. only filters out a gene-similarity-with-itself for intra-module sim += compute_similarity_of_gene_pair(cached_reaction_similarities, r1, r2, similarity_table, species, criteria, factor1, factor2, go_loaded_model, moredata_loaded_model) mods_similarity_table[mod1_name][mod2_name] = sim # print 'mod simetable done' # TODO:BUG: for overlapping modules this fails because (len(rxns) - len(mods[mod_name])) is wrong intra_score = 0 inter_score = 0 for mod_name, mods_sim in mods_similarity_table.iteritems(): intra_piece = mods_sim[mod_name] * 1.0 / pow(len(mods[mod_name]), 2) inter_piece = sum([mods_sim[other_mod_name] * 1.0 / (len(mods[mod_name]) * len(mods[other_mod_name])) for other_mod_name in mods_similarity_table.iterkeys() if other_mod_name != mod_name]) if mod_name == eval_constants.EXTERNAL_MODULE_ID: intra_score -= intra_piece inter_score -= inter_piece else: intra_score += intra_piece inter_score += inter_piece # print 'done' return intra_score - inter_score
def compute_cohesion_coupling(inf, is_mmod, is_partial_module, S, mets, reacts, revs): couplings, blocks = compute_couplings( S, mets, reacts, revs, max(map(lambda s: len(s), reacts)) + 1, max(map(lambda s: len(s), mets)) + 1) if is_mmod: mmods = eval_util.read_mmods(inf) mods = eval_util.convert_metabmod_to_reactmod(mmods, is_partial_module, S, mets, reacts) else: mods = eval_util.read_rmods(inf) # TODO:PARTIAL: all reactions that are not counted in modules (NO: are assigned to external module # and) -1 (like blocked reactions) is placed for couplings (in associated rows and columns) if is_partial_module: done_reacts = set() for mod_n, mod_rs in mods.iteritems(): done_reacts.update(mod_rs) not_done_reacts = set(reacts) - set(done_reacts) if len(not_done_reacts) > 0: mods[eval_constants.EXTERNAL_MODULE_ID] = not_done_reacts couplings = copy.deepcopy(couplings) for r in not_done_reacts: r_idx = reacts.index(r) for i in range(len(reacts)): couplings[i][r_idx] = -1 couplings[r_idx][i] = -1 module_coupling_table, module_order_in_header = compute_module_coupling_table( couplings, blocks, mods, reacts) return compute_coupling_uncoupling_score(module_coupling_table, module_order_in_header)
def compute_and_print_size_distribution(inf, is_mmod, species, method): out_dir = '%s/sizes' % my_constants.evalResultPath my_util.mkdir_p(out_dir) if is_mmod: mods = eval_util.read_mmods(inf) else: mods = eval_util.read_rmods(inf) sizes = [] for mod_name, mod_metabs in mods.iteritems(): sizes.append(len(mod_metabs)) result_file = '%s/%s_%s.png' % (out_dir, species, method) alt_result_file = '%s/sc_%s_%s.png' % (out_dir, species, method) f = open('size_distrib.r', 'w') write_line(f, 'png("%s")' % result_file) write_line( f, "hist(c(%s), main='%d modules (%s)', xlab='', ylab='')" % (','.join([str(s) for s in sizes]), len(mods), species + ' ' + method)) write_line(f, 'dev.off()') write_line(f, 'png("%s")' % alt_result_file) write_line( f, "stripchart(c(%s), main='%d modules (%s)', method='stack', offset=0.5, pch=1)" % (','.join([str(s) for s in sizes]), len(mods), species + ' ' + method)) write_line(f, 'dev.off()') f.close() res = os.system('%s size_distrib.r' % my_constants.rScriptPath) while not os.path.isfile(result_file): time.sleep(5) return '=HYPERLINK("%s")' % result_file
def compute_modularity(inf, S, mets, reacts, is_partial_module): mods = eval_util.read_mmods( inf ) #khotoote dakhele final module ro mikhone mirize toye in dictionary harkodoom az met ha dakhele yek cell metab_edges = {} # the following IF merely fills metabmod_orgranized_reacts # TODO: do_bipartite_explode + is_rmod not supported yet! do_bipartite_explode is simply ignored in the case #if is_rmod: #mods, metabmod_orgranized_reacts = eval_util.convert_reactmod_to_overlapping_metabmod(mods, is_partial_module, S, mets, reacts) #elif do_bipartite_explode: # metabmod_orgranized_reacts = {} # graph_edges = my_util.graph_by_explode_reactions_to_complete_bipartite(S, mets) # the following computes metabmod_orgranized_reacts #metab_to_module = {v: k for k in mods.keys() for v in mods[k]} #for e in graph_edges: # r_mods = {metab_to_module[e[0]], metab_to_module[e[1]]} # if len(r_mods) == 1: # mod = r_mods.pop() # if mod not in metabmod_orgranized_reacts: # metabmod_orgranized_reacts[mod] = ([], [], []) # metabmod_orgranized_reacts[mod][0].append(e) #else: #for mod in r_mods: #if mod not in metabmod_orgranized_reacts: # metabmod_orgranized_reacts[mod] = ([], [], []) #metabmod_orgranized_reacts[mod][2].append(e) #if not is_rmod: metabmod_orgranized_reacts = {} metab_to_module = {v: k for k in mods.keys() for v in mods[k] } #moshakhas mikone har metabi baraye che moduli hast if is_partial_module: done_metabs = set(metab_to_module.keys()) for m in mets: if m not in done_metabs: metab_to_module[m] = eval_constants.EXTERNAL_MODULE_ID for r in reacts: r_metabs = eval_util.get_metabolites_of_reaction_idx( S, mets, reacts.index(r) ) #metab hayi ke dakhele in reaction hastand ra barmigardanad r_mods = set( [metab_to_module[m] for m in r_metabs] ) #moshakhas mikonad ke har metab dakhele in reaction dar che moduli hast # TODO: with the following if, reactions where all nodes are in either one module or external space, are counted as outside-reaction if len(r_mods) == 1: # inner-edge detected mod = r_mods.pop() if is_partial_module and mod == eval_constants.EXTERNAL_MODULE_ID: # TODO:PARTIAL: skip reactions completely outside all modules continue if mod not in metabmod_orgranized_reacts: metabmod_orgranized_reacts[mod] = ([], [], []) metabmod_orgranized_reacts[mod][0].append(r) else: # outer-edge detected for mod in r_mods: if mod not in metabmod_orgranized_reacts: metabmod_orgranized_reacts[mod] = ([], [], []) metabmod_orgranized_reacts[mod][2].append(r) # compute metab_edges #if do_bipartite_explode: #for e in graph_edges: #for ee in e: #if ee not in metab_edges: #metab_edges[ee] = [] #metab_edges[ee].append(e) #if not do_bipartite_explode: for ri, r in enumerate(reacts): for re_idx in range(len(S)): if S[re_idx][ri] == 0: continue m = mets[re_idx] if m not in metab_edges: metab_edges[m] = [] metab_edges[m].append( r ) # ta inja miad reaction haye marbot be har metab ra migzare dakhele dictinary ba kelide metab # compute total_links #if do_bipartite_explode: #total_links = len(graph_edges) #if not do_bipartite_explode: total_links = 0 for m_row in S: # TODO: what about metabolites that are counted more than one time because of overlapping modules? m_degree = sum([1 for ri in m_row if ri != 0]) #tedad azaye mokhalefe 0 ro mishmare total_links += m_degree total_links /= 2.0 #inja darajeye kole graph ro bedast miare modularity = 0.0 for mod_name, mod_metabs in mods.iteritems(): print mod_name, mod_metabs ls = len( metabmod_orgranized_reacts[mod_name][0] ) # + len(metabmod_orgranized_reacts[mod_name][1]) # TODO: wrongly-inner edges should be counted? ds = 0 for m in mod_metabs: # metab_row = S[mets.index(m)] # ds += sum([1 for ri in metab_row if ri != 0]) ds += len( metab_edges[m] ) # TODO: for hyperarcs this causes a reaction to be counted more than once for a module sum_term = ls * 1.0 / total_links - pow(ds / (2.0 * total_links), 2) if mod_name == eval_constants.EXTERNAL_MODULE_ID: modularity -= sum_term else: modularity += sum_term return modularity
def compute_modularity(inf, is_rmod, do_bipartite_explode, is_partial_module, S, mets, reacts): mods = eval_util.read_mmods(inf) metab_edges = {} # the following IF merely fills metabmod_orgranized_reacts # TODO: do_bipartite_explode + is_rmod not supported yet! do_bipartite_explode is simply ignored in the case if is_rmod: mods, metabmod_orgranized_reacts = eval_util.convert_reactmod_to_overlapping_metabmod(mods, is_partial_module, S, mets, reacts) elif do_bipartite_explode: metabmod_orgranized_reacts = {} graph_edges = my_util.graph_by_explode_reactions_to_complete_bipartite(S, mets) # the following computes metabmod_orgranized_reacts metab_to_module = {v: k for k in mods.keys() for v in mods[k]} for e in graph_edges: r_mods = {metab_to_module[e[0]], metab_to_module[e[1]]} if len(r_mods) == 1: mod = r_mods.pop() if mod not in metabmod_orgranized_reacts: metabmod_orgranized_reacts[mod] = ([], [], []) metabmod_orgranized_reacts[mod][0].append(e) else: for mod in r_mods: if mod not in metabmod_orgranized_reacts: metabmod_orgranized_reacts[mod] = ([], [], []) metabmod_orgranized_reacts[mod][2].append(e) else: metabmod_orgranized_reacts = {} metab_to_module = {v: k for k in mods.keys() for v in mods[k]} if is_partial_module: done_metabs = set(metab_to_module.keys()) for m in mets: if m not in done_metabs: metab_to_module[m] = eval_constants.EXTERNAL_MODULE_ID for r in reacts: r_metabs = eval_util.get_metabolites_of_reaction_idx(S, mets, reacts.index(r)) r_mods = set([metab_to_module[m] for m in r_metabs]) # TODO: with the following if, reactions where all nodes are in either one module or external space, are counted as outside-reaction if len(r_mods) == 1: # inner-edge detected mod = r_mods.pop() if is_partial_module and mod == eval_constants.EXTERNAL_MODULE_ID: # TODO:PARTIAL: skip reactions completely outside all modules continue if mod not in metabmod_orgranized_reacts: metabmod_orgranized_reacts[mod] = ([], [], []) metabmod_orgranized_reacts[mod][0].append(r) else: # outer-edge detected for mod in r_mods: if mod not in metabmod_orgranized_reacts: metabmod_orgranized_reacts[mod] = ([], [], []) metabmod_orgranized_reacts[mod][2].append(r) # compute metab_edges if do_bipartite_explode: for e in graph_edges: for ee in e: if ee not in metab_edges: metab_edges[ee] = [] metab_edges[ee].append(e) else: for ri, r in enumerate(reacts): for re_idx in range(len(S)): if S[re_idx][ri] == 0: continue m = mets[re_idx] if m not in metab_edges: metab_edges[m] = [] metab_edges[m].append(r) # compute total_links if do_bipartite_explode: total_links = len(graph_edges) else: total_links = 0 for m_row in S: # TODO: what about metabolites that are counted more than one time because of overlapping modules? m_degree = sum([1 for ri in m_row if ri != 0]) total_links += m_degree total_links /= 2.0 modularity = 0.0 for mod_name, mod_metabs in mods.iteritems(): ls = len(metabmod_orgranized_reacts[mod_name][0]) # + len(metabmod_orgranized_reacts[mod_name][1]) # TODO: wrongly-inner edges should be counted? ds = 0 for m in mod_metabs: # metab_row = S[mets.index(m)] # ds += sum([1 for ri in metab_row if ri != 0]) ds += len(metab_edges[m]) # TODO: for hyperarcs this causes a reaction to be counted more than once for a module sum_term = ls * 1.0 / total_links - pow(ds / (2.0 * total_links), 2) if mod_name == eval_constants.EXTERNAL_MODULE_ID: modularity -= sum_term else: modularity += sum_term return modularity
def do_compute_distance(cached_metabolite_similarities, compound_similarity_table, table_compound_order, inf, species, factor1, is_rmod, is_partial_module, S, mets, reacts, revs, chebi_loaded_model): if is_rmod: mods = eval_util.read_mmods(inf) mods, metabmod_orgranized_reacts = eval_util.convert_reactmod_to_overlapping_metabmod( mods, is_partial_module, S, mets, reacts) else: mods = eval_util.read_mmods(inf) if is_partial_module: done_mets = set() for mod_n, mod_mets in mods.iteritems(): done_mets.update(mod_mets) not_done_metabs = set(mets) - set(done_mets) if len(not_done_metabs) > 0: mods[eval_constants.EXTERNAL_MODULE_ID] = not_done_metabs # mods = filter_out_reactions_by_type(mods, [(0, 'R_EX_')]) mods_similarity_table = {} for mod1_name, mod1 in mods.iteritems(): mods_similarity_table[mod1_name] = {} for mod2_name, mod2 in mods.iteritems(): sim = 0 for m1 in mod1: for m2 in mod2: if mod1_name != mod2_name or m1 != m2: # designed to work for overlapping modules. only filters out a gene-similarity-with-itself for intra-module sim += compute_similarity_of_metabolite_pair( cached_metabolite_similarities, m1, m2, compound_similarity_table, species, factor1, chebi_loaded_model) mods_similarity_table[mod1_name][mod2_name] = sim if eval_constants.VERBOSE >= 2: print 'compute_similarity_of_metabolite_pair stat for species %s: done: %d, missed_for_no_compound: %d, missed_for_not_in_chebi: %d' % ( species, metabolite_pair_similarity_done, metabolite_pair_similarity_missed_reason1, metabolite_pair_similarity_missed_reason2) # print 'mod simetable done' # TODO:BUG: for overlapping modules this fails because (len(rxns) - len(mods[mod_name])) is wrong intra_score = 0 inter_score = 0 for mod_name, mods_sim in mods_similarity_table.iteritems(): intra_piece = mods_sim[mod_name] * 1.0 / pow(len(mods[mod_name]), 2) inter_piece = sum([ mods_sim[other_mod_name] * 1.0 / (len(mods[mod_name]) * len(mods[other_mod_name])) for other_mod_name in mods_similarity_table.iterkeys() if other_mod_name != mod_name ]) if mod_name == eval_constants.EXTERNAL_MODULE_ID: intra_score -= intra_piece inter_score -= inter_piece else: intra_score += intra_piece inter_score += inter_piece # print 'done' return intra_score - inter_score