def run_analysis(self): correct_count = 0 # numver of real miRNA-gene pairs with common gos incorrect_count = 0 #number of random miRNA-gene pairs with common go all_tfs = [] all_mirnas = [] for pmid in self.tfs: all_tfs = [] all_mirnas = [] correct_count = 0 # numver of real miRNA-gene pairs with common gos incorrect_count = 0 # number of random miRNA-gene pairs with common go for tf in self.tfs[pmid]: all_tfs.append(tf) mirna = None for mirna_eid in tf.targets: for m in self.mirnas[pmid]: if m.eid == mirna_eid[0]: mirna = m break all_mirnas.append(mirna) # common_gos = set(tf.go_ids).intersection(set(mirna.go_ids)) # if len(common_gos) > 0: # print "{}->{} common gos:{}".format(tf.text, mirna.text, " ".join(common_gos)) # correct_count += 1 if len(all_tfs) > 1 and len(all_mirnas) > 1: for i in range(0, 10): random_tf = random.choice(all_tfs) random_mirna = random.choice(all_mirnas) common_gos = set(random_tf.go_ids).intersection( set(random_mirna.go_ids)) if (random_mirna.eid, "miRNA-gene") in random_tf.targets: #if len(common_gos) > 0: if random_mirna.best_go.startswith( "GO:") and random_tf.best_go.startswith("GO"): ss = ssm.simui_go(random_mirna.best_go, random_tf.best_go) #print "correct:", ss correct_count += ss else: correct_count += 1 else: #if len(common_gos) > 0: if random_mirna.best_go.startswith( "GO:") and random_tf.best_go.startswith("GO"): ss = ssm.simui_go(random_mirna.best_go, random_tf.best_go) print "incorrect:", ss incorrect_count += ss else: incorrect_count += 1 print "{}-{} ({} mirnas, {} tfs".format( correct_count, incorrect_count, len(all_mirnas), len(all_tfs))
def run_analysis(self): correct_count = 0 # numver of real miRNA-gene pairs with common gos incorrect_count = 0 #number of random miRNA-gene pairs with common go all_tfs = [] all_mirnas = [] for pmid in self.tfs: all_tfs = [] all_mirnas = [] correct_count = 0 # numver of real miRNA-gene pairs with common gos incorrect_count = 0 # number of random miRNA-gene pairs with common go for tf in self.tfs[pmid]: all_tfs.append(tf) mirna = None for mirna_eid in tf.targets: for m in self.mirnas[pmid]: if m.eid == mirna_eid[0]: mirna = m break all_mirnas.append(mirna) # common_gos = set(tf.go_ids).intersection(set(mirna.go_ids)) # if len(common_gos) > 0: # print "{}->{} common gos:{}".format(tf.text, mirna.text, " ".join(common_gos)) # correct_count += 1 if len(all_tfs) > 1 and len(all_mirnas) > 1: for i in range(0, 10): random_tf = random.choice(all_tfs) random_mirna = random.choice(all_mirnas) common_gos = set(random_tf.go_ids).intersection(set(random_mirna.go_ids)) if (random_mirna.eid, "miRNA-gene") in random_tf.targets: #if len(common_gos) > 0: if random_mirna.best_go.startswith("GO:") and random_tf.best_go.startswith("GO"): ss = ssm.simui_go(random_mirna.best_go, random_tf.best_go) #print "correct:", ss correct_count += ss else: correct_count += 1 else: #if len(common_gos) > 0: if random_mirna.best_go.startswith("GO:") and random_tf.best_go.startswith("GO"): ss = ssm.simui_go(random_mirna.best_go, random_tf.best_go) print "incorrect:", ss incorrect_count += ss else: incorrect_count += 1 print "{}-{} ({} mirnas, {} tfs".format(correct_count, incorrect_count, len(all_mirnas), len(all_tfs))
def annotate_corpus_relations(corpus, model, corpuspath): logging.info("getting relations...") # entities, relations = load_gold_relations(reltype) logging.info("finding relations...") # print entities.keys()[:20] for did in corpus.documents: for sentence in corpus.documents[did].sentences: sentences_mirnas = [] sentence_tfs = [] #print sentence.entities.elist for entity in sentence.entities.elist[model]: if entity.type == "mirna": sentences_mirnas.append(entity) elif entity.type == "protein": sentence_tfs.append(entity) for mirna in sentences_mirnas: for tf in sentence_tfs: ss = ssm.simui_go(mirna.best_go, tf.best_go) if ss > 0: print ss, mirna.text, tf.text, mirna.best_go, tf.best_go print "saving corpus..." corpus.save(corpuspath)
def run_ss_analysis(self, pairtype): correct_count = 0 # numver of real miRNA-gene pairs with common gos incorrect_count = 0 # number of random miRNA-gene pairs with common go all_tfs = [] all_mirnas = [] diff_count = [] nexp = 10 for did in self.documents: #for sentence in self.documents[did].sentences: all_tfs = [] all_mirnas = [] correct_count = 0 # numver of real miRNA-gene pairs with common gos incorrect_count = 0 # number of random miRNA-gene pairs with common go all_mirnas = self.documents[did].get_entities('goldstandard_mirna') all_tfs = self.documents[did].get_entities('goldstandard_protein') #count true relations if len(all_mirnas) > 0 and len(all_tfs) > 0: # for entity in sentence.entities.elist["goldstandard"]: # if entity.type == "protein": # all_tfs.append(entity) # elif entity.type == "mirna": # all_mirnas.append(entity) # correct_count += 1 #if len(all_tfs) > 1 and len(all_mirnas) > 1: # print sentence.sid correct = 0 incorrect = 0 while correct < nexp and incorrect < nexp: random_tf = random.choice(all_tfs) random_mirna = random.choice(all_mirnas) # print dir(random_mirna) # common_gos = set(random_tf.go_ids).intersection(set(random_mirna.go_ids)) if correct < nexp and (random_tf.eid, pairtype) in random_mirna.targets: # if len(common_gos) > 0: # if random_mirna.best_go.startswith("GO:") and random_tf.best_go.startswith("GO"): # print random_mirna.best_go, random_tf.best_go max_ss = [] for mirnago in random_mirna.go_ids: for mirnatf in random_tf.go_ids: ss = ssm.simui_go(mirnago, mirnatf) max_ss.append(ss) #if max_ss > ss: # max_ss = ss # ss = ssm.simui_go(random_mirna.best_go, random_tf.best_go) # print "correct:", ss if len(max_ss) > 0: correct_count += sum(max_ss) * 1.0 / len(max_ss) # correct_count += len(common_gos) correct += 1 elif incorrect < nexp: # if len(common_gos) > 0: # if random_mirna.best_go.startswith("GO:") and random_tf.best_go.startswith("GO"): max_ss = [] for mirnago in random_mirna.go_ids: for mirnatf in random_tf.go_ids: # ss = ssm.simui_hindex_go(mirnago, mirnatf, h=0) ss = ssm.simui_go(mirnago, mirnatf) max_ss.append(ss) #if max_ss > ss: # max_ss = ss # ss = ssm.simui_go(random_mirna.best_go, random_tf.best_go) # print "correct:", ss if len(max_ss) > 0: incorrect_count += sum(max_ss) * 1.0 / len(max_ss) # incorrect_count += len(common_gos) incorrect += 1 if correct_count != 0 and incorrect_count != 0: print "{}={}-{} ({} mirnas, {} tfs)".format( correct_count / nexp - incorrect_count / nexp, correct_count / nexp, incorrect_count / nexp, len(all_mirnas), len(all_tfs)) diff_count.append(correct_count / nexp - incorrect_count / nexp) print sum(diff_count) * 1.0 / len(diff_count)
def run_ss_analysis(self, pairtype): correct_count = 0 # numver of real miRNA-gene pairs with common gos incorrect_count = 0 # number of random miRNA-gene pairs with common go all_tfs = [] all_mirnas = [] diff_count = [] nexp = 10 for did in self.documents: #for sentence in self.documents[did].sentences: all_tfs = [] all_mirnas = [] correct_count = 0 # numver of real miRNA-gene pairs with common gos incorrect_count = 0 # number of random miRNA-gene pairs with common go all_mirnas = self.documents[did].get_entities('goldstandard_mirna') all_tfs = self.documents[did].get_entities('goldstandard_protein') #count true relations if len(all_mirnas) > 0 and len(all_tfs) > 0: # for entity in sentence.entities.elist["goldstandard"]: # if entity.type == "protein": # all_tfs.append(entity) # elif entity.type == "mirna": # all_mirnas.append(entity) # correct_count += 1 #if len(all_tfs) > 1 and len(all_mirnas) > 1: # print sentence.sid correct = 0 incorrect = 0 while correct < nexp and incorrect < nexp: random_tf = random.choice(all_tfs) random_mirna = random.choice(all_mirnas) # print dir(random_mirna) # common_gos = set(random_tf.go_ids).intersection(set(random_mirna.go_ids)) if correct < nexp and (random_tf.eid, pairtype) in random_mirna.targets: # if len(common_gos) > 0: # if random_mirna.best_go.startswith("GO:") and random_tf.best_go.startswith("GO"): # print random_mirna.best_go, random_tf.best_go max_ss = [] for mirnago in random_mirna.go_ids: for mirnatf in random_tf.go_ids: ss = ssm.simui_go(mirnago, mirnatf) max_ss.append(ss) #if max_ss > ss: # max_ss = ss # ss = ssm.simui_go(random_mirna.best_go, random_tf.best_go) # print "correct:", ss if len(max_ss) > 0: correct_count += sum(max_ss)*1.0/len(max_ss) # correct_count += len(common_gos) correct += 1 elif incorrect < nexp: # if len(common_gos) > 0: # if random_mirna.best_go.startswith("GO:") and random_tf.best_go.startswith("GO"): max_ss = [] for mirnago in random_mirna.go_ids: for mirnatf in random_tf.go_ids: # ss = ssm.simui_hindex_go(mirnago, mirnatf, h=0) ss = ssm.simui_go(mirnago, mirnatf) max_ss.append(ss) #if max_ss > ss: # max_ss = ss # ss = ssm.simui_go(random_mirna.best_go, random_tf.best_go) # print "correct:", ss if len(max_ss) > 0: incorrect_count += sum(max_ss)*1.0/len(max_ss) # incorrect_count += len(common_gos) incorrect += 1 if correct_count != 0 and incorrect_count != 0: print "{}={}-{} ({} mirnas, {} tfs)".format(correct_count / nexp - incorrect_count / nexp, correct_count / nexp, incorrect_count / nexp, len(all_mirnas), len(all_tfs)) diff_count.append(correct_count / nexp - incorrect_count / nexp) print sum(diff_count) * 1.0 / len(diff_count)