def calculate_similar(): """ calculate rna1 and rna2 similarity and then insert into mongodb :return: """ client = conn_mongo() db = client.md for item in db.rna_times.find(): n1 = int(item["rna1-times"]) n2 = int(item["rna2-times"]) c = int(item["common-times"]) # 将decimal再转化为float,否则不能在mongo中存储 similarity = float(similarity_element(n1, n2, c)) items = { "rna1": item["rna1"], "rna2": item["rna2"], "similar_sore": similarity } db.similar_score_new.insert(items)
def calculate_similar_new(): """ :return: """ client = conn_mongo() db = client.md list2 = db.target_scan.distinct("item1") rna = [] rrna = [] for item in db.rnacount_new.find(): rna.append(item['value']['mirna']) rrna.append(item['value']['mirna']) # rna与rrna分别为正序列逆序列存储着rna的list rrna.reverse() # get the 2th column distinct number target_sum = len(list2) for i in range(0, len(rna)): for j in range(0, len(rrna)-i): item_find = db.rna_times.find({"rna1": rna[i], "rna2": rrna[j]}) # # if not db.similar_score_new2.find({"rna1": item["rna1"], "rna2": item["rna2"]}).count(): item = item_find[0] n1 = int(item["rna1-times"]) n2 = int(item["rna2-times"]) c = int(item["common-times"]) # 将decimal再转化为float,否则不能在mongo中存储 similarity = float(similarity_element_new(n1, n2, c, target_sum)) items = { "rna1": item["rna1"], "rna2": item["rna2"], "similar_sore": similarity } db.similar_score.insert(items)
def calculate_similar_new(): """ :return: """ client = conn_mongo() db = client.md list2 = db.target_scan.distinct("item1") rna = [] rrna = [] for item in db.rnacount_new.find(): rna.append(item['value']['mirna']) rrna.append(item['value']['mirna']) # rna与rrna分别为正序列逆序列存储着rna的list rrna.reverse() # get the 2th column distinct number target_sum = len(list2) for i in range(0, len(rna)): for j in range(0, len(rrna) - i): item_find = db.rna_times.find({"rna1": rna[i], "rna2": rrna[j]}) # # if not db.similar_score_new2.find({"rna1": item["rna1"], "rna2": item["rna2"]}).count(): item = item_find[0] n1 = int(item["rna1-times"]) n2 = int(item["rna2-times"]) c = int(item["common-times"]) # 将decimal再转化为float,否则不能在mongo中存储 similarity = float(similarity_element_new(n1, n2, c, target_sum)) items = { "rna1": item["rna1"], "rna2": item["rna2"], "similar_sore": similarity } db.similar_score.insert(items)
# !/usr/bin/env python # -*- coding: utf-8 -*- __author__ = 'wtq' import traceback from mdat.db.conn_mongo import conn_mongo client = conn_mongo() db = client.md def generate_matrix(file_path): """ generate the similar_score matrix then write it into csv :return: """ files = file(file_path, "w+") rna1 = db.target_scan_split.distinct("item2") rna1.sort() rna2 = rna1 try: for r1 in rna1: similar = [] for r2 in rna2: for i in db.similar_score_new2.find({ "$or": [{ "rna1": r1, "rna2": r2 }, { "rna1": r2, "rna2": r1
# !/usr/bin/env python # -*- coding: utf-8 -*- __author__ = 'wtq' import traceback from mdat.db.conn_mongo import conn_mongo client = conn_mongo() db = client.md def split_rna(): """ split rna if it contains "/" :return: """ try: # db.target_scan_split.ensureIndex({"item1": 1, "item2": 1}) for item in db.target_scan2.find(): rna = item["item2"] head = "" if not rna.__contains__("/"): items = { "item2": "hsa-"+rna, "item1": item["item1"] } if db.target_scan_split.find(items).count() == 0: db.target_scan_split.insert(items)