import argparse import re import sys from errors import filecheck_and_quit, directorycheck_and_quit progname = re.compile('[A-Za-z0-9.-_]+').search(sys.argv[0]).group() desc = 'Read in a SequenceCollection from disk and print scores' input_help = 'Filepath+name of gzipped SequenceCollection object' category_choices = ['Observed', 'Randomised', 'Simulated', 'NA'] parser = argparse.ArgumentParser(prog=progname, description=desc) parser.add_argument('-i', dest='input_file', help=input_help, type=str) parser.add_argument('-t', dest='phyml_dir', help=input_help, type=str) args = parser.parse_args() input_file = args.input_file phyml_dir = args.phyml_dir.rstrip('/') filecheck_and_quit(input_file) directorycheck_and_quit(phyml_dir) from sequence_collection import SequenceCollection sc = SequenceCollection.gunzip(input_file) cluster_records = sc.get_cluster_records() sc.load_phyml_results(phyml_dir, records=cluster_records, use_hashname=True) sc.update_scores() sc.gzip(input_file)
progname = re.compile('[A-Za-z0-9.-_]+').search(sys.argv[0]).group() desc = 'Read in a SequenceCollection from disk and dump records' input_help = 'Filepath+name of gzipped SequenceCollection object' output_help = 'Directory to dump files in' choice_help = \ '\n'.join(['Choose to dump post-clustering concatenated records', 'instead of pre-clustering single records']) parser = argparse.ArgumentParser(prog=progname, description=desc) parser.add_argument('-i', dest='input_file', help=input_help, type=str) parser.add_argument('-o', dest='output_dir', help=output_help, type=str) parser.add_argument('-c', dest='cluster_recs', action='store_true') args = parser.parse_args() input_file = args.input_file output_dir = args.output_dir.rstrip('/') cluster_recs = args.cluster_recs filecheck_and_quit(input_file) directorycheck_and_make(output_dir) from sequence_collection import SequenceCollection sc = SequenceCollection.gunzip(input_file) if cluster_recs: records = sc.get_cluster_records() sc.dump_records(output_dir, records) else: records = sc.get_records() # should be default anyway, but explicit sc.dump_records(output_dir, records) # is better than implicit, and all that
choices=valid_methods, default='spectral', ) args = vars(parser.parse_args()) input_dir = args['input'].rstrip('/') tmpdir = args['tmpdir'].rstrip('/') min_clusters = args['min_clusters'] max_clusters = args['max_clusters'] distance = args['distance'] method = args['cluster_method'] pickle = '{0}/scrand.pkl'.format(input_dir) directorycheck_and_raise(input_dir) directorycheck_and_make(tmpdir) filecheck_and_quit(pickle) sc = cPickle.load(open(pickle)) sc.tmpdir = tmpdir print 'Loading phyml results...' sc.load_phyml_results(input_dir, use_hashname=True) print 'Autotuning...' sc.autotune(distance, max_groups=max_clusters) print 'Clustering...' sc.put_partitions(distance, method, range(min_clusters, max_clusters)) sc.concatenate_records() sc.put_cluster_trees(program='bionj') scores = sorted(sc.get_scores(), key=lambda x: x[0]) print 'Scores:' for score in scores: print score
import argparse import re progname = re.compile('[A-Za-z0-9.-_]+').search(sys.argv[0]).group() desc = '\n'.join(['Read a SequenceCollection from pickle,', 'make a randomised copy,', 'dump records']) input_help = 'Path+Filename for the input pickle file' output_help = \ 'Path to output directory. Will be created if doesn\'t exist' parser = argparse.ArgumentParser(prog=progname, description=desc, formatter_class=argparse.RawTextHelpFormatter) parser.add_argument('-i', '--input', help=input_help, type=str) parser.add_argument('-o', '--output', help=output_help, type=str) args = vars(parser.parse_args()) pickle = args['input'] output_dir = args['output'] filecheck_and_quit(pickle) # can't find file -> quit directorycheck_and_make(output_dir) # can't find directory -> create it sc = cPickle.load(file(pickle)) scrand = sc.make_randomised_copy() scrand.dump_records(output_dir) cPickle.dump(scrand, open('{0}/scrand.pkl'.format(output_dir), 'w'))
from errors import filecheck_and_quit, directorycheck_and_make import cPickle import sys import argparse import re progname = re.compile("[A-Za-z0-9.-_]+").search(sys.argv[0]).group() desc = "\n".join(["Read a SequenceCollection from pickle,", "make a randomised copy,", "dump records"]) input_help = "Path+Filename for the input pickle file" output_help = "Path to output directory. Will be created if doesn't exist" parser = argparse.ArgumentParser(prog=progname, description=desc, formatter_class=argparse.RawTextHelpFormatter) parser.add_argument("-i", "--input", help=input_help, type=str) parser.add_argument("-o", "--output", help=output_help, type=str) args = vars(parser.parse_args()) pickle = args["input"] output_dir = args["output"] filecheck_and_quit(pickle) # can't find file -> quit directorycheck_and_make(output_dir) # can't find directory -> create it sc = cPickle.load(file(pickle)) scrand = sc.make_randomised_copy() scrand.dump_records(output_dir) cPickle.dump(scrand, open("{0}/scrand.pkl".format(output_dir), "w"))