def run_msa(queries, input_state): args = input_state.args if args['subsample'] != 1: if args['subsample'] < 1: subsample_size = int(args['subsample']*len(queries)) elif args['subsample'] > 1: subsample_size = int(min(len(queries),args['subsample'])) if args['random_subset']: # Randomize order and take first _subsample_size_ sequences, then reorder query_ids = [x for x in range(len(queries))] shuffle(query_ids) query_ids = sorted(query_ids[0:subsample_size]) queries = list([queries[i] for i in query_ids]) else: if args['subsample_start'] < 1: start = int(args['subsample_start']*len(queries)) else: start = int(args['subsample_start']) queries = list([queries[i] for i in range(start,start+subsample_size)]) if args['random_order']: shuffle(queries) msa_driver = MultipleSequenceDriver(queries, input_state) # Build composite msa_driver.build_composite() # Align sequences (iteratively if given in params) msa_driver.align() # Build resultant consensus consensus_object = msa_driver.build_consensus(args['thresh'],args['type']) # Write MSA and consensus to file consensus_fact = ConsensusFilterFactory(msa_driver,consensus_object) consensus_fact.write(fname=args['build'])
def _extract_domains(targets, is_baseline, input_state, threshold, thresh_type, max_domain_size=15, min_domain_size=1): # Run MSA msa_driver = MultipleSequenceDriver(targets, input_state) msa_driver.build_composite() msa_driver.align() # Derive consensus consensus_obj = msa_driver.build_consensus(threshold,thresh_type) #consensus_fact = ConsensusFilterFactory(msa_driver.alns,msa_driver.composite, threshold, thresh_type) #consensus_fact.build_consensus() #if (is_baseline): # print('Baseline Consensus: '+str(consensus_fact.consensus).replace('-','')) #else: # print('Target Consensus: '+str(consensus_fact.consensus).replace('-','')) # Extract domains (consensus_obj.consensus is already stripped of - chars) domainBuilder = DomainSetBuilder(consensus_obj.consensus,max_domain_size,0,True,is_enum=True,allowable_treeseq_types=['complete_tree','incomplete_tree'],min_win=min_domain_size) domains = domainBuilder.build() return is_baseline, domains, consensus_obj.consensus