def test_replace_tags_by_motifs(self): seq_ids_to_motifs = {'SEQ1': [], 'SEQ2': []} with query_session() as session: seq_ids_to_motifs['SEQ1'] = dao.motif.find_motifs_by_seq_id( session, 'SEQ1', MOTIF_VERSION) seq_ids_to_motifs['SEQ2'] = dao.motif.find_motifs_by_seq_id( session, 'SEQ2', MOTIF_VERSION) mids_to_tags = {seq_ids_to_motifs['SEQ1'][0].id: ['a', 'b', 'c']} cls = dao.motif.get_entity(MOTIF_VERSION) with query_session() as session: motifs = session.query(cls).filter(cls.id.in_( mids_to_tags.keys())).all() self.assertEqual(1, len(motifs)) self.assertTrue(motifs[0].correct) dao.replace_tags_by_motifs(mids_to_tags, MOTIF_VERSION) seq = dao.find_seq_by_id('SEQ1') self.assertIsNotNone(seq) self.assertEqual( 10, len( dao.find_correct_motifs_by_seq_ids([ 'SEQ1', ], MOTIF_VERSION)['SEQ1'])) with query_session() as session: motifs = session.query(cls).filter(cls.id.in_( mids_to_tags.keys())).all() self.assertEqual(1, len(motifs)) self.assertTrue(motifs[0].correct)
def query_density(seq_id): with dao.query_session() as session: motifs = dao.motif.find_motifs_by_seq_id(session, seq_id, MOTIF_VERSION, with_wrong=False) seq = dao.find_seq_by_id(seq_id) return motifs, seq
def obtain_value(seq_id): with dao.query_session() as session: motifs = dao.motif.find_motifs_by_seq_id(session, seq_id, MOTIF_VERSION, with_wrong=False) nsites = dao.nsite.find_nsites_by_seq_id(session, seq_id) seq = dao.find_seq_by_id(seq_id) return motifs, nsites, seq
def test_add_seq(self): seq = dao.find_seq_by_id('SEQ1') self.assertIsNotNone(seq) self.assertEqual( 10, len( dao.find_correct_motifs_by_seq_ids([ 'SEQ1', ], MOTIF_VERSION)['SEQ1'])) self.assertEqual('SEQABCDEFG', seq.seq)
def read_in_scratch_result(result): ss = SeqIO.read(result.ss, 'fasta') ss8 = SeqIO.read(result.ss8, 'fasta') acc = SeqIO.read(result.acc, 'fasta') acc20 = read_in_acc20(result.acc20) if acc20 is None: return if ss.id != ss8.id or ss.id != acc.id or ss.id != acc20.id: logging.error( str.format( "The result for seq {} is invalid, the seq id is different", ss.id)) return if len(ss.seq) != len(ss8.seq) or len(ss.seq) != len(acc.seq) or len( ss.seq) != len(acc20.seq): logging.error( str.format( "The result for seq {} is invalid, the seq length is different", ss.id)) return seq_entity = None try: seq_entity = dao.find_seq_by_id(ss.id) except Exception as e: logging.error( str.format( "Failed to save result for seq {}, can not find the record in the db", ss.id)) return if seq_entity is None: logging.error( str.format( "Failed to save result for seq {}, can not find the record in the db", ss.id)) return if len(ss.seq) != len(seq_entity.seq): logging.error( str.format( "The result for seq {} is invalid, the seq length is different with db", ss.id)) return logging.info("Update scratch result for seq " + ss.id) dao.update_seq_for_scratch_result(ss.id, str(ss.seq), str(ss8.seq), str(acc.seq), ' '.join(acc20.seq))
def test_unmark_wrong_with_other_wrong(self): # unmark一个wrong motif的时候,考虑已存在的mark为wrong的motif,不应该影响到正常的计算 set_up_seq('SEQ4', 'SEQ4ABCDABCDABCD', start=10, count=2, step=10) seq4 = dao.find_seq_by_id('SEQ4') motifs = dao.find_correct_motifs_by_seq_ids([ 'SEQ4', ], MOTIF_VERSION)['SEQ4'] self.assertEqual(2, len(motifs)) mark_wrong(seq4.id, motifs[1].id) mark_wrong(seq4.id, motifs[0].id) self.assertEqual( 0, len( dao.find_correct_motifs_by_seq_ids([ 'SEQ4', ], MOTIF_VERSION)['SEQ4'])) result = unmark_wrong(seq4.id, motifs[0].id) self.assertEqual( 1, len( dao.find_correct_motifs_by_seq_ids([ 'SEQ4', ], MOTIF_VERSION)['SEQ4']))
def obtain_value(seq_id): with dao.query_session() as session: nsites = dao.nsite.find_nsites_by_seq_id(session, seq_id) seq = dao.find_seq_by_id(seq_id) return nsites, seq
import dao from tools import motifs as motif_tool, pssm_matrix MOTIF_VERSION = 2 with dao.session_scope() as session: matrix = pssm_matrix.calc_pssm_matrix(dao.find_baseline_motifs()) logging.info(matrix.pssm) cls = dao.motif.get_entity(MOTIF_VERSION) motifs = session.query(cls).filter(cls.manually_add).all() results = [] for m in motifs: seq = dao.find_seq_by_id(m.seq_id) motif_seq = seq.seq[m.offset:m.offset + 24] score = motif_tool.calc_pssm_score(motif_seq[:16], matrix) m.score = score results.append({ 'seq_id': m.seq_id, 'offset': m.offset, 'seq': motif_seq, 'score': score }) logging.info(str.format("Total manually records {}", len(results))) results.sort(key=lambda k: k['score']) for r in results: logging.info( str.format(