def run_msa(queries, input_state):
    args = input_state.args
    if args['subsample'] != 1:
        if args['subsample'] < 1:
            subsample_size = int(args['subsample']*len(queries))
        elif args['subsample'] > 1:
            subsample_size = int(min(len(queries),args['subsample']))
            
        if args['random_subset']:
            # Randomize order and take first _subsample_size_ sequences, then reorder
            query_ids = [x for x in range(len(queries))]
            shuffle(query_ids)
            query_ids = sorted(query_ids[0:subsample_size])
            queries = list([queries[i] for i in query_ids])
        else:
            if args['subsample_start'] < 1:
                start = int(args['subsample_start']*len(queries))
            else:
                start = int(args['subsample_start'])
            queries = list([queries[i] for i in range(start,start+subsample_size)])

    if args['random_order']:
        shuffle(queries)

    msa_driver = MultipleSequenceDriver(queries, input_state)
    # Build composite
    msa_driver.build_composite()
    # Align sequences (iteratively if given in params)
    msa_driver.align()
    # Build resultant consensus
    consensus_object = msa_driver.build_consensus(args['thresh'],args['type'])
    # Write MSA and consensus to file
    consensus_fact = ConsensusFilterFactory(msa_driver,consensus_object)
    consensus_fact.write(fname=args['build'])
Exemple #2
0
def _extract_domains(targets, is_baseline, input_state, threshold, thresh_type, max_domain_size=15, min_domain_size=1):
    # Run MSA
    msa_driver = MultipleSequenceDriver(targets, input_state)
    msa_driver.build_composite()
    msa_driver.align()

    # Derive consensus
    consensus_obj = msa_driver.build_consensus(threshold,thresh_type)
    
    #consensus_fact = ConsensusFilterFactory(msa_driver.alns,msa_driver.composite, threshold, thresh_type)
    #consensus_fact.build_consensus()
    #if (is_baseline):
    #    print('Baseline Consensus: '+str(consensus_fact.consensus).replace('-',''))
    #else:
    #    print('Target Consensus: '+str(consensus_fact.consensus).replace('-',''))

    # Extract domains (consensus_obj.consensus is already stripped of - chars)
    domainBuilder = DomainSetBuilder(consensus_obj.consensus,max_domain_size,0,True,is_enum=True,allowable_treeseq_types=['complete_tree','incomplete_tree'],min_win=min_domain_size)

    domains = domainBuilder.build()
    return is_baseline, domains, consensus_obj.consensus