def run_msa(queries, input_state):
    args = input_state.args
    if args['subsample'] != 1:
        if args['subsample'] < 1:
            subsample_size = int(args['subsample']*len(queries))
        elif args['subsample'] > 1:
            subsample_size = int(min(len(queries),args['subsample']))
            
        if args['random_subset']:
            # Randomize order and take first _subsample_size_ sequences, then reorder
            query_ids = [x for x in range(len(queries))]
            shuffle(query_ids)
            query_ids = sorted(query_ids[0:subsample_size])
            queries = list([queries[i] for i in query_ids])
        else:
            if args['subsample_start'] < 1:
                start = int(args['subsample_start']*len(queries))
            else:
                start = int(args['subsample_start'])
            queries = list([queries[i] for i in range(start,start+subsample_size)])

    if args['random_order']:
        shuffle(queries)

    msa_driver = MultipleSequenceDriver(queries, input_state)
    # Build composite
    msa_driver.build_composite()
    # Align sequences (iteratively if given in params)
    msa_driver.align()
    # Build resultant consensus
    consensus_object = msa_driver.build_consensus(args['thresh'],args['type'])
    # Write MSA and consensus to file
    consensus_fact = ConsensusFilterFactory(msa_driver,consensus_object)
    consensus_fact.write(fname=args['build'])
Beispiel #2
0
            msa.add_consensus(threshold, msa.build_consensus(float(threshold)))
            print("Consensus at threshold " + threshold + ": " + msa.get_consensus(threshold).consensus)

        if args["newick"]:
            if args["threshold"]:
                generate_consensus_newick(msa, msa.get_consensus(float(args["threshold"])), args["newick"])
            else:
                generate_consensus_newick(msa, list(msa.consensuses.values())[0], args["newick"])

        if args["scores"]:
            generate_score_and_conserved_chars_file(msa, args["scores"])

        if args["k"]:
            threshold, k = msa.find_conservation_boundary()
            print(
                "At threshold %.2f, average conservation and proportion of conserved characters are both %.2f%%"
                % (threshold, k * 100)
            )

        if args["newbuild"]:
            #            consensus_object = msa.build_consensus(args['threshold'],args['type'])

            # Write MSA and consensus to file
            consensus_fact = ConsensusFilterFactory(msa, msa.get_consensus(threshold))
            consensus_fact.write(fname=args["newbuild"])

        status_message("Consensus statistics computation complete ", "OK")

    except (IOError, KeyboardInterrupt, IndexError) as e:
        print(str(e))