Esempio n. 1
0
    def test_add_manually_motif_before_wrong_area(self):
        # 先把seq都查出来
        with dao.query_session() as session:
            seqs = dao.sequence.find_all_seqs(session)
            seq_ids_to_seq = dict([(seq.seq_id, seq) for seq in seqs])
        sid = seq_ids_to_seq['SEQ2'].id
        ms = dao.find_motifs_by_seq_ids([
            'SEQ2',
        ], MOTIF_VERSION)['SEQ2']
        ms.sort(key=lambda m: m.offset)
        motif = ms[0]

        # 重叠(新增的offset在前面面)时无法添加
        with boddle(json={"offset": 185}):
            result = lrr_search_web_service.add_manually_motif(
                MOTIF_VERSION, sid)
        self.assertIsNotNone(result)
        result = json.loads(result)
        self.assertDictEqual({'message': str.format(ErrorCode.OFFSET_OVERLAP)},
                             result)

        # 标记错误
        mark_wrong(sid, motif.id)

        # 再次添加OK
        with boddle(json={"offset": 185}):
            result = lrr_search_web_service.add_manually_motif(
                MOTIF_VERSION, sid)
        self.assertIsNotNone(result)
        result = json.loads(result)
        self.assertTrue("motifs_16" in result)
        self.assertEqual(185, result['motifs_16'][0]['offset'])
Esempio n. 2
0
    def test_add_manually_motif_after_wrong_area(self):
        set_up_baseline_seq('SEQ4', '', start=10, count=10, step=24)
        # 先把seq都查出来
        with dao.query_session() as session:
            seqs = dao.sequence.find_all_seqs(session)
            seq_ids_to_seq = dict([(seq.seq_id, seq) for seq in seqs])
        sid = seq_ids_to_seq['SEQ2'].id
        ms = dao.find_motifs_by_seq_ids([
            'SEQ2',
        ], MOTIF_VERSION)['SEQ2']
        ms.sort(key=lambda m: m.offset)
        motif = ms[-1]

        # 重叠(新增的offset在前面面)时无法添加
        with boddle(json={"offset": 595}):
            result = lrr_search_web_service.add_manually_motif(
                MOTIF_VERSION, sid)
        self.assertIsNotNone(result)
        result = json.loads(result)
        self.assertDictEqual({'message': str.format(ErrorCode.OFFSET_OVERLAP)},
                             result)

        # 标记错误
        mark_wrong(sid, motif.id)

        # 再次添加OK
        with boddle(json={"offset": 595}):
            result = lrr_search_web_service.add_manually_motif(
                MOTIF_VERSION, sid)
        self.assertIsNotNone(result)
        result = json.loads(result)
        self.assertEqual(1, len(result.get('motifs_16', [])))
        self.assertEqual(595, result['motifs_16'][0].get('offset', -1))
Esempio n. 3
0
def get_old_correct_new_not_exists():
    old_cls = dao.motif.get_entity(OLD_VERSION)
    new_cls = dao.motif.get_entity(NEW_VERSION)
    with dao.query_session() as session:
        return session.query(old_cls)\
                .outerjoin(new_cls, and_(old_cls.seq_id == new_cls.seq_id, old_cls.offset == new_cls.offset))\
                .filter(new_cls.id==None).all()
Esempio n. 4
0
    def test_delete_manually_motif_ok(self):
        # 先把seq都查出来
        with dao.query_session() as session:
            seqs = dao.sequence.find_all_seqs(session)
            seq_ids_to_seq = dict([(seq.seq_id, seq) for seq in seqs])
        seq1_sid = seq_ids_to_seq['SEQ1'].id

        # 正常添加OK
        with boddle(json={"offset": 196}):
            result = lrr_search_web_service.add_manually_motif(
                MOTIF_VERSION, seq1_sid)
        self.assertIsNotNone(result)
        result = json.loads(result)
        self.assertTrue("motifs_16" in result)
        self.assertEqual(196, result['motifs_16'][0]['offset'])
        manually_motif = self._get_manually_motifs('SEQ1')[0]

        # 删除OK
        result = delete_motif(manually_motif.id)
        self.assertIsNotNone(result)
        self.assertDictEqual({}, json.loads(result))

        # 重复删除,就失败了,当前暂不考虑幂等
        result = delete_motif(manually_motif.id)
        self.assertIsNotNone(result)
def query_density(seq_id):
    with dao.query_session() as session:
        motifs = dao.motif.find_motifs_by_seq_id(session,
                                                 seq_id,
                                                 MOTIF_VERSION,
                                                 with_wrong=False)
        seq = dao.find_seq_by_id(seq_id)

    return motifs, seq
def find_seqs():
    motif_cls = dao.motif.get_entity(MOTIF_VERSION)
    with dao.query_session() as session:
        return session.query(dao.sequence.SequenceEntity)\
                            .join(motif_cls, dao.sequence.SequenceEntity.seq_id == motif_cls.seq_id)\
                            .filter(dao.sequence.SequenceEntity.seq_id.like(SEQ_ID))\
                            .filter(motif_cls.correct)\
                            .group_by(dao.sequence.SequenceEntity.seq_id)\
                            .having(func.count(motif_cls.id) > LRR_COUNT).all()
def obtain_value(seq_id):
    with dao.query_session() as session:
        motifs = dao.motif.find_motifs_by_seq_id(session,
                                                 seq_id,
                                                 MOTIF_VERSION,
                                                 with_wrong=False)
        nsites = dao.nsite.find_nsites_by_seq_id(session, seq_id)
        seq = dao.find_seq_by_id(seq_id)

    return motifs, nsites, seq
Esempio n. 8
0
def main():
    motifs = get_old_wrong_new_exists()
    print(
        str.format("Get wrong in old and still exists in new, Count {}, {}",
                   len(motifs), motifs))
    for old_m, new_m in motifs:
        print(
            str.format(
                "Mark mid {}, seq_id {}, offset {} as false_discovery in version {}",
                new_m.id, new_m.seq_id, new_m.offset, NEW_VERSION))
        #dao.update_false_discovery_by_motif(new_m.id, True, NEW_VERSION)

    matrix = pssm_matrix.calc_pssm_matrix(dao.find_baseline_motifs())
    motifs = get_old_correct_new_not_exists()
    with dao.query_session() as session:
        all_seq_ids = set([m.seq_id for m in motifs])
        seqs = dao.sequence.find_seq_by_ids(session, all_seq_ids)
        seq_ids_to_seq = dict([(seq.seq_id, seq) for seq in seqs])
        assert (len(seq_ids_to_seq) == len(all_seq_ids))

    print(
        str.format("Get correct in old and not exists in new, Count {}, {}",
                   len(motifs), motifs))
    for m in motifs:
        motif_seq = seq_ids_to_seq[m.seq_id].seq[m.offset:m.offset + 16]
        assert (len(motif_seq) == 16)
        score = motif_tools.calc_pssm_score(motif_seq, matrix)
        probability = motif_tools.calc_probability_by_score(score)
        new_m = dao.motif.MotifEntityBase(m.offset,
                                          m.seq_id,
                                          score,
                                          probability,
                                          0,
                                          manually_add=True)
        new_motifs = dao.find_motifs_by_seq_ids([
            m.seq_id,
        ],
                                                NEW_VERSION,
                                                with_wrong=False)[m.seq_id]
        new_motifs.append(new_m)
        new_motifs_no_overlap = motif_tools.found_no_overlapped_motifs(
            new_motifs, 16)
        if len(new_motifs) != len(new_motifs_no_overlap):
            logging.error(
                str.format("Overlap found in seq {}, {}/{}, new offset {}",
                           m.seq_id, len(new_motifs),
                           len(new_motifs_no_overlap), new_m.offset))
        else:
            logging.debug(
                str.format(
                    "Add motif offset {}, score {}, probability {} to seq {} manually",
                    new_m.offset, new_m.score, new_m.probability,
                    new_m.seq_id))
            dao.add_manually_motif(new_m.seq_id, new_m.offset, NEW_VERSION,
                                   new_m.score, new_m.probability)
Esempio n. 9
0
def get_and_check_motif_id(mid_str, version):
    mid = get_and_check_int(mid_str,
                            str.format(ErrorCode.INVALID_PARA, "mid", mid_str))
    if mid < 0:
        raise ValidationError(
            str.format(ErrorCode.INVALID_PARA, "mid", 'less than 0'))
    with dao.query_session() as session:
        motifs = dao.motif.find_motifs_by_ids(session, [mid], version)
    if len(motifs) == 0:
        raise ValidationError(
            str.format(ErrorCode.INVALID_PARA, 'mid',
                       'the motif does not exists'))
    return motifs[0]
Esempio n. 10
0
 def setUp(self):
     set_up_db()
     set_up_seq('SEQ2', 'SEQ2ABCDABCD', start=10, count=20)
     set_up_seq('SEQ1', 'SEQ1ABCDABCD')
     set_up_seq('SEQ3', 'SEQ3ABCDABCD', start=50, count=30)
     with dao.query_session() as session:
         seqs = dao.sequence.find_all_seqs(session)
     self.seq_ids_to_seq = dict([(seq.seq_id, seq) for seq in seqs])
     self.assertEqual(3, len(self.seq_ids_to_seq))
     self.assertEqual({'SEQ1', 'SEQ2', 'SEQ3'},
                      set(self.seq_ids_to_seq.keys()))
     seq_ids_to_motifs = dao.find_motifs_by_seq_ids(
         self.seq_ids_to_seq.keys(), MOTIF_VERSION)
def main():
    logging.basicConfig(level=logging.WARNING)
    with dao.query_session() as session:
        seq_ids = dao.sequence.find_all_seq_ids(session)

    overlap_ids = set()
    for seq_id in seq_ids:
        logging.info(
            str.format("Begin to analyse overlapping info for seq {}", seq_id))
        motifs = _get_motifs_by_seq_id(seq_id)
        ids = _find_overlap(seq_id, motifs)
        overlap_ids.update(ids)
    logging.warning(
        str.format("All overlapped ids {}, write to db...", overlap_ids))
    dao.add_tags_by_names_to_ids({'inner.overlap': overlap_ids}, MOTIF_VERSION)
Esempio n. 12
0
    def test_tag_multiple_times(self):
        # 首先调用tag用例,打上tag
        self.test_tag_false_discovery()

        with dao.query_session() as session:
            seq_ids_to_seq = dict([
                (seq.seq_id, seq)
                for seq in dao.sequence.find_all_seqs(session)
            ])

        #  再打一次
        seq_ids_to_motifs = dao.find_motifs_by_seq_ids(
            {'SEQ1', 'SEQ2', 'SEQ3'}, MOTIF_VERSION)
        mark_wrong(seq_ids_to_seq['SEQ1'].id, seq_ids_to_motifs['SEQ1'][0].id)
        mark_wrong(seq_ids_to_seq['SEQ2'].id, seq_ids_to_motifs['SEQ2'][0].id)

        # 效果与原来是一样的
        self.assertEqual(
            9,
            len(
                dao.find_correct_motifs_by_seq_ids([
                    'SEQ1',
                ], MOTIF_VERSION)['SEQ1']))
        self.assertEqual(
            19,
            len(
                dao.find_correct_motifs_by_seq_ids([
                    'SEQ2',
                ], MOTIF_VERSION)['SEQ2']))
        self.assertEqual(
            30,
            len(
                dao.find_correct_motifs_by_seq_ids([
                    'SEQ3',
                ], MOTIF_VERSION)['SEQ3']))
        seq_ids_to_motifs = dao.find_motifs_by_seq_ids(
            {'SEQ1', 'SEQ2', 'SEQ3'}, MOTIF_VERSION)
        self.assertEqual({'SEQ1', 'SEQ2', 'SEQ3'},
                         set(seq_ids_to_motifs.keys()))
        self.assertFalse(seq_ids_to_motifs['SEQ1'][0].correct)
        self.assertFalse(seq_ids_to_motifs['SEQ2'][0].correct)
        self.assertTrue(seq_ids_to_motifs['SEQ3'][0].correct)
        self.assertEqual(10, len(seq_ids_to_motifs['SEQ1']))
        self.assertEqual(20, len(seq_ids_to_motifs['SEQ2']))
        self.assertEqual(30, len(seq_ids_to_motifs['SEQ3']))
Esempio n. 13
0
    def test_can_not_unmark_wrong_overlapped_with_manually_added_one(self):
        # 如果手工添加的motif与原有已经mark为wrong的moiti存在覆盖,那么手动添加后,原有的motif就不可以unmark了
        # 细分的话,又分两种情况,手动motif的offset在wrong的后面,与在其前面
        # 首先对SEQ2的第一个和最后一个motif标记wrong,然后手工添加motif
        self.test_add_manually_motif_after_wrong_area()
        self.test_add_manually_motif_before_wrong_area()

        # 尝试对第一个和最后一个motif解除wrong标记,应该都要失败(因为前面添加过两个手动motif了,所以最后一个应该是-3)
        seq_ids_to_motifs = dao.find_motifs_by_seq_ids([
            'SEQ2',
        ], MOTIF_VERSION)
        seq_ids_to_motifs['SEQ2'].sort(key=lambda m: m.offset)
        first_motif = seq_ids_to_motifs['SEQ2'][1]
        last_motif = seq_ids_to_motifs['SEQ2'][-2]

        with dao.query_session() as session:
            seqs = dao.sequence.find_all_seqs(session)
            seq_ids_to_seq = dict([(seq.seq_id, seq) for seq in seqs])

        result = unmark_wrong(seq_ids_to_seq['SEQ2'].id, first_motif.id)
        self.assertIsNotNone(result)
        msg = result.get('message', '')
        self.assertTrue(
            msg.endswith(
                ' can not be unmark from wrong because overlapping was found'),
            result)

        result = unmark_wrong(seq_ids_to_seq['SEQ2'].id, last_motif.id)
        self.assertIsNotNone(result)
        msg = result.get('message', '')
        self.assertTrue(
            msg.endswith(
                ' can not be unmark from wrong because overlapping was found'),
            result)

        # 删除手工添加的motif
        for m in self._get_manually_motifs('SEQ2'):
            delete_motif(m.id)

        # 尝试对第一个和最后一个解除wrong标记,成功
        result = unmark_wrong(seq_ids_to_seq['SEQ2'].id, first_motif.id)
        self.assertFalse(result['false_discovery'])
        result = unmark_wrong(seq_ids_to_seq['SEQ2'].id, last_motif.id)
        self.assertFalse(result['false_discovery'])
Esempio n. 14
0
    def test_untag_false_discovery(self):
        # 首先调用tag用例,打上tag
        self.test_tag_false_discovery()

        with dao.query_session() as session:
            seqs = dao.sequence.find_all_seqs(session)
            seq_ids_to_seq = dict([(seq.seq_id, seq) for seq in seqs])

        #  删除SEQ1中motif的tag
        seq_ids_to_motifs = dao.find_motifs_by_seq_ids(
            {'SEQ1', 'SEQ2', 'SEQ3'}, MOTIF_VERSION)
        unmark_wrong(seq_ids_to_seq['SEQ1'].id,
                     seq_ids_to_motifs['SEQ1'][0].id)

        # SEQ1的数量恢复
        self.assertEqual(
            10,
            len(
                dao.find_correct_motifs_by_seq_ids([
                    'SEQ1',
                ], MOTIF_VERSION)['SEQ1']))
        self.assertEqual(
            19,
            len(
                dao.find_correct_motifs_by_seq_ids([
                    'SEQ2',
                ], MOTIF_VERSION)['SEQ2']))
        self.assertEqual(
            30,
            len(
                dao.find_correct_motifs_by_seq_ids([
                    'SEQ3',
                ], MOTIF_VERSION)['SEQ3']))
        seq_ids_to_motifs = dao.find_motifs_by_seq_ids(
            {'SEQ1', 'SEQ2', 'SEQ3'}, MOTIF_VERSION)
        self.assertEqual({'SEQ1', 'SEQ2', 'SEQ3'},
                         set(seq_ids_to_motifs.keys()))
        self.assertTrue(seq_ids_to_motifs['SEQ1'][0].correct)
        self.assertFalse(seq_ids_to_motifs['SEQ2'][0].correct)
        self.assertTrue(seq_ids_to_motifs['SEQ3'][0].correct)
Esempio n. 15
0
def main():
    # generate the matrix
    seqs = dao.find_baseline_seqs()
    seq_ids_to_seq_str = dict([(seq.seq_id, seq.seq) for seq in seqs])
    logging.info(str.format("Baseline sequence ids count({}): {}", len(seq_ids_to_seq_str), seq_ids_to_seq_str.keys()))

    seq_ids_to_motifs = dao.find_motifs_by_seq_ids(seq_ids_to_seq_str.keys(), BASELINE_MOTIF_VERSION, with_wrong=False)
    motif_strs = [seq_ids_to_seq_str[m.seq_id][m.offset:m.offset+16] for motifs in seq_ids_to_motifs.values() for m in motifs]
    logging.info(str.format("Baseline LRR motifs( count {}): {}", len(motif_strs), motif_strs))

    matrix = pssm_matrix.calc_pssm_matrix(motif_strs)
    logging.info(str.format("Matrix: {}", matrix))
    logging.info(str.format("PSSM: {}", matrix.pssm))

    # find the lrr
    with dao.query_session() as session:
        all_seqs = dao.sequence.find_all_seqs(session)

    tasks = [CalculateTask(matrix, seq) for seq in all_seqs]

    with Pool(12) as pool:
        pool.map(find_lrr_and_save_db_task, tasks)
    logging.info("All tasks done")
Esempio n. 16
0
def main():
    with dao.query_session() as session:
        seqs = dao.sequence.find_all_seqs(session)

    step1(seqs)
    logging.info("Step 1 end")

    # 3.4开始,只关注特定的5个亚家族,因此对seq做精简
    relevant_seqs = [seq for seq in seqs if seq.subgroup in SUBGROUPS]
    relevant_seq_ids = [seq.seq_id for seq in relevant_seqs]
    seq_ids_to_nsites = dao.find_nsites_by_seq_ids(relevant_seq_ids)
    seq_ids_to_lrrs = dao.find_motifs_by_seq_ids(relevant_seq_ids,
                                                 MOTIF_VERSION,
                                                 with_wrong=False)

    step3_5(relevant_seqs, seq_ids_to_nsites, seq_ids_to_lrrs)
    logging.info("Step 3.5 end")

    step3_7(relevant_seqs, seq_ids_to_nsites, seq_ids_to_lrrs)
    logging.info("Step 3.7 end")

    step3_9(relevant_seqs, seq_ids_to_nsites, seq_ids_to_lrrs)
    logging.info("Step 3.9 end")

    step3_10(seq_ids_to_lrrs, seqs)
    logging.info("Step 3.10 end")

    step3_11(seqs, seq_ids_to_lrrs)
    logging.info("Step 3.11 end")

    full_seq_ids = [seq.seq_id for seq in seqs]
    full_seq_ids_to_nsites = dao.find_nsites_by_seq_ids(full_seq_ids)
    full_seq_ids_to_lrrs = dao.find_motifs_by_seq_ids(full_seq_ids,
                                                      MOTIF_VERSION,
                                                      with_wrong=False)
    supplement_for_review(seqs, full_seq_ids_to_nsites, full_seq_ids_to_lrrs)
    logging.info("Step supplement for review end")
Esempio n. 17
0
    def test_add_manually_motif_ok(self):
        set_up_baseline_seq('SEQ4', '', start=10, count=10, step=24)
        # 先把seq都查出来
        with dao.query_session() as session:
            seqs = dao.sequence.find_all_seqs(session)
            seq_ids_to_seq = dict([(seq.seq_id, seq) for seq in seqs])
        seq1_sid = seq_ids_to_seq['SEQ1'].id

        # 正常添加前,motif有10个
        with boddle(query={"page": 0, "size": 20}):
            result = lrr_search_web_service.get_sequences(MOTIF_VERSION)
        self.assertIsNotNone(result)
        result = json.loads(result)
        self.assertIsNotNone(result.get("sequences"))
        seq_ids_to_seq = dict([(seq.get("sequence_id"), seq)
                               for seq in result.get("sequences")])
        self.assertEqual(10, len(seq_ids_to_seq['SEQ1'].get("motifs_16")))

        # 正常添加OK
        with boddle(json={"offset": 196}):
            result = lrr_search_web_service.add_manually_motif(
                MOTIF_VERSION, seq1_sid)
        self.assertIsNotNone(result)
        result = json.loads(result)
        self.assertTrue("motifs_16" in result)
        self.assertEqual(196, result['motifs_16'][0]['offset'])

        # 查询sequance,motifs数加1
        with boddle(query={"page": 0, "size": 20}):
            result = lrr_search_web_service.get_sequences(MOTIF_VERSION)
        self.assertIsNotNone(result)
        result = json.loads(result)
        self.assertIsNotNone(result.get("sequences"))
        seq_ids_to_seq = dict([(seq.get("sequence_id"), seq)
                               for seq in result.get("sequences")])
        self.assertEqual(11, len(seq_ids_to_seq['SEQ1'].get("motifs_16")),
                         seq_ids_to_seq['SEQ1'])
Esempio n. 18
0
def main():
    with dao.query_session() as session:
        seqs = dao.sequence.find_all_seqs(session)

    step1(seqs)
    logging.info("Step 1 end")

    # since only five SGs were considered from step 3.4, the seq was redefined
    relevant_seqs = [seq for seq in seqs if seq.subgroup in SUBGROUPS]
    relevant_seq_ids = [seq.seq_id for seq in relevant_seqs]
    seq_ids_to_nsites = dao.find_nsites_by_seq_ids(relevant_seq_ids)
    seq_ids_to_lrrs = dao.find_motifs_by_seq_ids(relevant_seq_ids,
                                                 MOTIF_VERSION,
                                                 with_wrong=False)

    step3_5(relevant_seqs, seq_ids_to_nsites, seq_ids_to_lrrs)
    logging.info("Step 3.5 end")

    step3_7(relevant_seqs, seq_ids_to_nsites, seq_ids_to_lrrs)
    logging.info("Step 3.7 end")

    step3_9(relevant_seqs, seq_ids_to_nsites, seq_ids_to_lrrs)
    logging.info("Step 3.9 end")

    step3_10(seq_ids_to_lrrs, seqs)
    logging.info("Step 3.10 end")

    step3_11(seqs, seq_ids_to_lrrs)
    logging.info("Step 3.11 end")

    full_seq_ids = [seq.seq_id for seq in seqs]
    full_seq_ids_to_nsites = dao.find_nsites_by_seq_ids(full_seq_ids)
    full_seq_ids_to_lrrs = dao.find_motifs_by_seq_ids(full_seq_ids,
                                                      MOTIF_VERSION,
                                                      with_wrong=False)
    supplement_for_review(seqs, full_seq_ids_to_nsites, full_seq_ids_to_lrrs)
    logging.info("Step supplement for review end")
Esempio n. 19
0
    def test_add_manually_motif_can_not_mark_wrong(self):
        # 先把seq都查出来
        with dao.query_session() as session:
            seqs = dao.sequence.find_all_seqs(session)
            seq_ids_to_seq = dict([(seq.seq_id, seq) for seq in seqs])
        seq1_sid = seq_ids_to_seq['SEQ1'].id

        # 正常添加OK
        with boddle(json={"offset": 196}):
            result = lrr_search_web_service.add_manually_motif(
                MOTIF_VERSION, seq1_sid)
        self.assertIsNotNone(result)
        result = json.loads(result)
        self.assertTrue('motifs_16' in result)
        self.assertEqual(196, result['motifs_16'][0]['offset'])
        manually_motif = self._get_manually_motifs('SEQ1')[0]

        # 不允许对手动添加的motif打tag
        result = mark_wrong(seq1_sid, manually_motif.id)
        result = json.loads(result)
        self.assertTrue(
            result.get("message").endswith(
                "and should not be tagged wrong again. The motif can be delete directly"
            ))
Esempio n. 20
0
def _check_motif_can_be_tag(mid, tag_names, version):
    with dao.query_session() as session:
        ms = dao.motif.find_motifs_by_ids(session, [mid], version)
    if ms is None or len(ms) == 0:
        raise ValidationError(str.format(ErrorCode.OBJECT_NOT_EXISTS, mid))
Esempio n. 21
0
    def test_add_manually_motif_invalid_input(self):
        # sid不是数字
        with boddle(json={"offset": 10}):
            result = lrr_search_web_service.add_manually_motif(
                MOTIF_VERSION, "a")
        self.assertIsNotNone(result)
        result = json.loads(result)
        self.assertDictEqual(
            {'message': str.format(ErrorCode.INVALID_PARA, "sid", 'a')},
            result)

        # sid不存在
        with boddle(json={"offset": 10}):
            result = lrr_search_web_service.add_manually_motif(
                MOTIF_VERSION, 10240)
        self.assertIsNotNone(result)
        result = json.loads(result)
        self.assertDictEqual(
            {'message': str.format(ErrorCode.OBJECT_NOT_EXISTS, 10240)},
            result)

        # offset不是数字
        with dao.query_session() as session:
            seqs = dao.sequence.find_all_seqs(session)
            seq_ids_to_seq = dict([(seq.seq_id, seq) for seq in seqs])
        seq1_sid = seq_ids_to_seq['SEQ1'].id
        seq2_sid = seq_ids_to_seq['SEQ2'].id
        with boddle(json={"offset": "b"}):
            result = lrr_search_web_service.add_manually_motif(
                MOTIF_VERSION, seq1_sid)
        self.assertIsNotNone(result)
        result = json.loads(result)
        self.assertDictEqual(
            {'message': str.format(ErrorCode.INVALID_PARA, "offset", 'b')},
            result)

        # offset出现重叠
        with boddle(json={"offset": 195}):
            result = lrr_search_web_service.add_manually_motif(
                MOTIF_VERSION, seq1_sid)
        self.assertIsNotNone(result)
        result = json.loads(result)
        self.assertDictEqual({'message': str.format(ErrorCode.OFFSET_OVERLAP)},
                             result)

        # offset出现重叠
        with boddle(json={"offset": 185}):
            result = lrr_search_web_service.add_manually_motif(
                MOTIF_VERSION, seq2_sid)
        self.assertIsNotNone(result)
        result = json.loads(result)
        self.assertDictEqual({'message': str.format(ErrorCode.OFFSET_OVERLAP)},
                             result)

        # 不可以在已标记为错误的offset上新增,因为直接取消错误标记就可以了
        seq_ids_to_motifs = dao.find_motifs_by_seq_ids([
            'SEQ1',
        ], MOTIF_VERSION)
        seq1_second_motif = seq_ids_to_motifs['SEQ1'][1]
        mark_wrong(seq1_sid, seq1_second_motif.id)
        with boddle(json={"offset": seq1_second_motif.offset}):
            result = lrr_search_web_service.add_manually_motif(
                MOTIF_VERSION, seq1_sid)
        self.assertIsNotNone(result)
        result = json.loads(result)
        self.assertDictEqual({'message': ErrorCode.OFFSET_EXISTS_WRONG},
                             result)
Esempio n. 22
0
    def test_tag_false_discovery(self):
        with dao.query_session() as session:
            seqs = dao.sequence.find_all_seqs(session)
            seq_ids_to_seq = dict([(seq.seq_id, seq) for seq in seqs])
        # 打上inner.falsediscovery之后,motif变为false,sequence lrr数量少1
        # 1. 打tag之前,检查SEQ1,SEQ2,SEQ3的LRR数量,及第一个motif的correct状态
        self.assertEqual(
            10,
            len(
                dao.find_correct_motifs_by_seq_ids([
                    'SEQ1',
                ], MOTIF_VERSION)['SEQ1']))
        self.assertEqual(
            20,
            len(
                dao.find_correct_motifs_by_seq_ids([
                    'SEQ2',
                ], MOTIF_VERSION)['SEQ2']))
        self.assertEqual(
            30,
            len(
                dao.find_correct_motifs_by_seq_ids([
                    'SEQ3',
                ], MOTIF_VERSION)['SEQ3']))
        seq_ids_to_motifs = dao.find_motifs_by_seq_ids(
            {'SEQ1', 'SEQ2', 'SEQ3'}, MOTIF_VERSION)
        self.assertEqual({'SEQ1', 'SEQ2', 'SEQ3'},
                         set(seq_ids_to_motifs.keys()))
        self.assertTrue(seq_ids_to_motifs['SEQ1'][0].correct)
        self.assertTrue(seq_ids_to_motifs['SEQ2'][0].correct)
        self.assertTrue(seq_ids_to_motifs['SEQ3'][0].correct)

        # 2. 为SEQ1、2的第一个motif打上inner.falsediscovery
        result = mark_wrong(seq_ids_to_seq['SEQ1'].id,
                            seq_ids_to_motifs['SEQ1'][0].id)
        self.assertIsNotNone(result)
        result = json.loads(result)
        self.assertTrue(result['false_discovery'])
        result = mark_wrong(seq_ids_to_seq['SEQ2'].id,
                            seq_ids_to_motifs['SEQ2'][0].id)
        self.assertIsNotNone(result)
        result = json.loads(result)
        self.assertTrue(result['false_discovery'])

        # 3. 重新检查SEQ1,SEQ2,SEQ3的LRR数量,及第一个motif的correct状态,SEQ1/2数量减1,correct状态为False
        self.assertEqual(
            9,
            len(
                dao.find_correct_motifs_by_seq_ids([
                    'SEQ1',
                ], MOTIF_VERSION)['SEQ1']))
        self.assertEqual(
            19,
            len(
                dao.find_correct_motifs_by_seq_ids([
                    'SEQ2',
                ], MOTIF_VERSION)['SEQ2']))
        self.assertEqual(
            30,
            len(
                dao.find_correct_motifs_by_seq_ids([
                    'SEQ3',
                ], MOTIF_VERSION)['SEQ3']))
        seq_ids_to_motifs = dao.find_motifs_by_seq_ids(
            {'SEQ1', 'SEQ2', 'SEQ3'}, MOTIF_VERSION)
        self.assertEqual({'SEQ1', 'SEQ2', 'SEQ3'},
                         set(seq_ids_to_motifs.keys()))
        self.assertFalse(seq_ids_to_motifs['SEQ1'][0].correct)
        self.assertFalse(seq_ids_to_motifs['SEQ2'][0].correct)
        self.assertTrue(seq_ids_to_motifs['SEQ3'][0].correct)
        self.assertEqual(10, len(seq_ids_to_motifs['SEQ1']))
        self.assertEqual(20, len(seq_ids_to_motifs['SEQ2']))
        self.assertEqual(30, len(seq_ids_to_motifs['SEQ3']))
Esempio n. 23
0
def obtain_value(seq_id):
    with dao.query_session() as session:
        nsites = dao.nsite.find_nsites_by_seq_id(session, seq_id)
        seq = dao.find_seq_by_id(seq_id)

    return nsites, seq