def calculate_similar():
    """
    calculate rna1 and rna2 similarity and then insert into mongodb
    :return:
    """
    client = conn_mongo()
    db = client.md
    for item in db.rna_times.find():
        n1 = int(item["rna1-times"])
        n2 = int(item["rna2-times"])
        c = int(item["common-times"])
        # 将decimal再转化为float,否则不能在mongo中存储
        similarity = float(similarity_element(n1, n2, c))
        items = {
            "rna1": item["rna1"],
            "rna2": item["rna2"],
            "similar_sore": similarity
        }
        db.similar_score_new.insert(items)
def calculate_similar():
    """
    calculate rna1 and rna2 similarity and then insert into mongodb
    :return:
    """
    client = conn_mongo()
    db = client.md
    for item in db.rna_times.find():
        n1 = int(item["rna1-times"])
        n2 = int(item["rna2-times"])
        c = int(item["common-times"])
        # 将decimal再转化为float,否则不能在mongo中存储
        similarity = float(similarity_element(n1, n2, c))
        items = {
            "rna1": item["rna1"],
            "rna2": item["rna2"],
            "similar_sore": similarity
        }
        db.similar_score_new.insert(items)
def calculate_similar_new():
    """

    :return:
    """
    client = conn_mongo()
    db = client.md
    list2 = db.target_scan.distinct("item1")

    rna = []
    rrna = []
    for item in db.rnacount_new.find():
        rna.append(item['value']['mirna'])
        rrna.append(item['value']['mirna'])
    # rna与rrna分别为正序列逆序列存储着rna的list
    rrna.reverse()

    # get the 2th column distinct number
    target_sum = len(list2)

    for i in range(0, len(rna)):
        for j in range(0, len(rrna)-i):

            item_find = db.rna_times.find({"rna1": rna[i], "rna2": rrna[j]})
        # # if not db.similar_score_new2.find({"rna1": item["rna1"], "rna2": item["rna2"]}).count():
            item = item_find[0]

            n1 = int(item["rna1-times"])
            n2 = int(item["rna2-times"])
            c = int(item["common-times"])
            # 将decimal再转化为float,否则不能在mongo中存储
            similarity = float(similarity_element_new(n1, n2, c, target_sum))
            items = {
                "rna1": item["rna1"],
                "rna2": item["rna2"],
                "similar_sore": similarity
            }
            db.similar_score.insert(items)
def calculate_similar_new():
    """

    :return:
    """
    client = conn_mongo()
    db = client.md
    list2 = db.target_scan.distinct("item1")

    rna = []
    rrna = []
    for item in db.rnacount_new.find():
        rna.append(item['value']['mirna'])
        rrna.append(item['value']['mirna'])
    # rna与rrna分别为正序列逆序列存储着rna的list
    rrna.reverse()

    # get the 2th column distinct number
    target_sum = len(list2)

    for i in range(0, len(rna)):
        for j in range(0, len(rrna) - i):

            item_find = db.rna_times.find({"rna1": rna[i], "rna2": rrna[j]})
            # # if not db.similar_score_new2.find({"rna1": item["rna1"], "rna2": item["rna2"]}).count():
            item = item_find[0]

            n1 = int(item["rna1-times"])
            n2 = int(item["rna2-times"])
            c = int(item["common-times"])
            # 将decimal再转化为float,否则不能在mongo中存储
            similarity = float(similarity_element_new(n1, n2, c, target_sum))
            items = {
                "rna1": item["rna1"],
                "rna2": item["rna2"],
                "similar_sore": similarity
            }
            db.similar_score.insert(items)
# !/usr/bin/env python
# -*- coding: utf-8 -*-
__author__ = 'wtq'

import traceback
from mdat.db.conn_mongo import conn_mongo

client = conn_mongo()
db = client.md


def generate_matrix(file_path):
    """
    generate the similar_score matrix then write it into csv
    :return:
    """
    files = file(file_path, "w+")
    rna1 = db.target_scan_split.distinct("item2")
    rna1.sort()
    rna2 = rna1
    try:
        for r1 in rna1:
            similar = []
            for r2 in rna2:
                for i in db.similar_score_new2.find({
                        "$or": [{
                            "rna1": r1,
                            "rna2": r2
                        }, {
                            "rna1": r2,
                            "rna2": r1
Example #6
0
# !/usr/bin/env python
# -*- coding: utf-8 -*-
__author__ = 'wtq'

import traceback
from mdat.db.conn_mongo import conn_mongo

client = conn_mongo()
db = client.md


def split_rna():
    """
    split rna if it contains "/"
    :return:
    """
    try:

        # db.target_scan_split.ensureIndex({"item1": 1, "item2": 1})
        for item in db.target_scan2.find():
            rna = item["item2"]
            head = ""

            if not rna.__contains__("/"):
                items = {
                    "item2": "hsa-"+rna,
                    "item1": item["item1"]
                }
                if db.target_scan_split.find(items).count() == 0:
                    db.target_scan_split.insert(items)