def get_sequences(version): filters = {} filters['page_index'], filters['page_size'] = _get_page_arg() filters['keyword'] = _get_keyword_arg() _get_offset_arg(filters) _get_lrr_count_arg(filters) _get_species_arg(filters) filters['page_index'] = filters['page_index'] * filters['page_size'] logging.debug(str.format("Filters: {}", filters)) seqs, total = dao.query_sequences(filters, get_version_arg(version)) # find motifs and nsites seq_ids = [seq.seq_id for seq in seqs] seq_ids_to_motifs = dao.find_motifs_by_seq_ids(seq_ids, get_version_arg(version)) seq_ids_to_nsites = dao.find_nsites_by_seq_ids(seq_ids) # find tags, the overlap mark is tagged on version 1 motifs if get_version_arg(version) == 1: ids_to_tag_names = dao.find_tags_by_motif_ids( set([ m.id for motifs in seq_ids_to_motifs.values() for m in motifs ])) else: ids_to_tag_names = {} result = {'sequences': [], 'total': total} for seq in seqs: result['sequences'].append( sequence_entity_to_output(seq, seq_ids_to_motifs.get(seq.seq_id, []), seq_ids_to_nsites.get(seq.seq_id, []), ids_to_tag_names)) return response_ok(result, True)
def main(): with dao.query_session() as session: seqs = dao.sequence.find_all_seqs(session) step1(seqs) logging.info("Step 1 end") # since only five SGs were considered from step 3.4, the seq was redefined relevant_seqs = [seq for seq in seqs if seq.subgroup in SUBGROUPS] relevant_seq_ids = [seq.seq_id for seq in relevant_seqs] seq_ids_to_nsites = dao.find_nsites_by_seq_ids(relevant_seq_ids) seq_ids_to_lrrs = dao.find_motifs_by_seq_ids(relevant_seq_ids, MOTIF_VERSION, with_wrong=False) step3_5(relevant_seqs, seq_ids_to_nsites, seq_ids_to_lrrs) logging.info("Step 3.5 end") step3_7(relevant_seqs, seq_ids_to_nsites, seq_ids_to_lrrs) logging.info("Step 3.7 end") step3_9(relevant_seqs, seq_ids_to_nsites, seq_ids_to_lrrs) logging.info("Step 3.9 end") step3_10(seq_ids_to_lrrs, seqs) logging.info("Step 3.10 end") step3_11(seqs, seq_ids_to_lrrs) logging.info("Step 3.11 end") full_seq_ids = [seq.seq_id for seq in seqs] full_seq_ids_to_nsites = dao.find_nsites_by_seq_ids(full_seq_ids) full_seq_ids_to_lrrs = dao.find_motifs_by_seq_ids(full_seq_ids, MOTIF_VERSION, with_wrong=False) supplement_for_review(seqs, full_seq_ids_to_nsites, full_seq_ids_to_lrrs) logging.info("Step supplement for review end")
def main(): with dao.query_session() as session: seqs = dao.sequence.find_all_seqs(session) step1(seqs) logging.info("Step 1 end") # 3.4开始,只关注特定的5个亚家族,因此对seq做精简 relevant_seqs = [seq for seq in seqs if seq.subgroup in SUBGROUPS] relevant_seq_ids = [seq.seq_id for seq in relevant_seqs] seq_ids_to_nsites = dao.find_nsites_by_seq_ids(relevant_seq_ids) seq_ids_to_lrrs = dao.find_motifs_by_seq_ids(relevant_seq_ids, MOTIF_VERSION, with_wrong=False) step3_5(relevant_seqs, seq_ids_to_nsites, seq_ids_to_lrrs) logging.info("Step 3.5 end") step3_7(relevant_seqs, seq_ids_to_nsites, seq_ids_to_lrrs) logging.info("Step 3.7 end") step3_9(relevant_seqs, seq_ids_to_nsites, seq_ids_to_lrrs) logging.info("Step 3.9 end") step3_10(seq_ids_to_lrrs, seqs) logging.info("Step 3.10 end") step3_11(seqs, seq_ids_to_lrrs) logging.info("Step 3.11 end") full_seq_ids = [seq.seq_id for seq in seqs] full_seq_ids_to_nsites = dao.find_nsites_by_seq_ids(full_seq_ids) full_seq_ids_to_lrrs = dao.find_motifs_by_seq_ids(full_seq_ids, MOTIF_VERSION, with_wrong=False) supplement_for_review(seqs, full_seq_ids_to_nsites, full_seq_ids_to_lrrs) logging.info("Step supplement for review end")
def step3_11(seqs, seq_ids_to_lrrs): seq_ids = set([seq.seq_id for seq in seqs]) seq_ids_to_nsites = dao.find_nsites_by_seq_ids(seq_ids) subgroups_to_ntypes_to_count = {} for seq in seqs: subgroup = _get_subgroup(seq, seq_ids_to_lrrs) if subgroup is None: continue if subgroup not in subgroups_to_ntypes_to_count: subgroups_to_ntypes_to_count[subgroup] = {'S': 0, 'T': 0} nsites = seq_ids_to_nsites.get(seq.seq_id, []) for nsite in nsites: if nsite.ntype == dao.nsite.S: subgroups_to_ntypes_to_count[subgroup]['S'] += 1 else: subgroups_to_ntypes_to_count[subgroup]['T'] += 1 generate_histogram(subgroups_to_ntypes_to_count, 'ntype', 'step3_11_n_count_by_subgroup_type.png')