import argparse
import re
import sys
from errors import filecheck_and_quit, directorycheck_and_quit

progname = re.compile('[A-Za-z0-9.-_]+').search(sys.argv[0]).group()
desc = 'Read in a SequenceCollection from disk and print scores'
input_help = 'Filepath+name of gzipped SequenceCollection object'
category_choices = ['Observed', 'Randomised', 'Simulated', 'NA']

parser = argparse.ArgumentParser(prog=progname, description=desc)
parser.add_argument('-i', dest='input_file', help=input_help, type=str)
parser.add_argument('-t', dest='phyml_dir', help=input_help, type=str)

args = parser.parse_args()
input_file = args.input_file
phyml_dir = args.phyml_dir.rstrip('/')

filecheck_and_quit(input_file)
directorycheck_and_quit(phyml_dir)

from sequence_collection import SequenceCollection

sc = SequenceCollection.gunzip(input_file)
cluster_records = sc.get_cluster_records()
sc.load_phyml_results(phyml_dir, records=cluster_records,
                      use_hashname=True)
sc.update_scores()
sc.gzip(input_file)
progname = re.compile('[A-Za-z0-9.-_]+').search(sys.argv[0]).group()
desc = 'Read in a SequenceCollection from disk and dump records'
input_help = 'Filepath+name of gzipped SequenceCollection object'
output_help = 'Directory to dump files in'
choice_help = \
    '\n'.join(['Choose to dump post-clustering concatenated records',
              'instead of pre-clustering single records'])
parser = argparse.ArgumentParser(prog=progname, description=desc)
parser.add_argument('-i', dest='input_file', help=input_help, type=str)
parser.add_argument('-o', dest='output_dir', help=output_help, type=str)
parser.add_argument('-c', dest='cluster_recs', action='store_true')

args = parser.parse_args()
input_file = args.input_file
output_dir = args.output_dir.rstrip('/')
cluster_recs = args.cluster_recs

filecheck_and_quit(input_file)
directorycheck_and_make(output_dir)

from sequence_collection import SequenceCollection

sc = SequenceCollection.gunzip(input_file)
if cluster_recs:
    records = sc.get_cluster_records()
    sc.dump_records(output_dir, records)
else:
    records = sc.get_records()  # should be default anyway, but explicit
    sc.dump_records(output_dir,
                    records)  # is better than implicit, and all that
Example #3
0
    choices=valid_methods,
    default='spectral',
)

args = vars(parser.parse_args())
input_dir = args['input'].rstrip('/')
tmpdir = args['tmpdir'].rstrip('/')
min_clusters = args['min_clusters']
max_clusters = args['max_clusters']
distance = args['distance']
method = args['cluster_method']
pickle = '{0}/scrand.pkl'.format(input_dir)

directorycheck_and_raise(input_dir)
directorycheck_and_make(tmpdir)
filecheck_and_quit(pickle)

sc = cPickle.load(open(pickle))
sc.tmpdir = tmpdir
print 'Loading phyml results...'
sc.load_phyml_results(input_dir, use_hashname=True)
print 'Autotuning...'
sc.autotune(distance, max_groups=max_clusters)
print 'Clustering...'
sc.put_partitions(distance, method, range(min_clusters, max_clusters))
sc.concatenate_records()
sc.put_cluster_trees(program='bionj')
scores = sorted(sc.get_scores(), key=lambda x: x[0])
print 'Scores:'
for score in scores:
    print score
    choices=valid_methods,
    default='spectral',
    )

args = vars(parser.parse_args())
input_dir = args['input'].rstrip('/')
tmpdir = args['tmpdir'].rstrip('/')
min_clusters = args['min_clusters']
max_clusters = args['max_clusters']
distance = args['distance']
method = args['cluster_method']
pickle = '{0}/scrand.pkl'.format(input_dir)

directorycheck_and_raise(input_dir)
directorycheck_and_make(tmpdir)
filecheck_and_quit(pickle)

sc = cPickle.load(open(pickle))
sc.tmpdir = tmpdir
print 'Loading phyml results...'
sc.load_phyml_results(input_dir, use_hashname=True)
print 'Autotuning...'
sc.autotune(distance, max_groups=max_clusters)
print 'Clustering...'
sc.put_partitions(distance, method, range(min_clusters, max_clusters))
sc.concatenate_records()
sc.put_cluster_trees(program='bionj')
scores = sorted(sc.get_scores(), key=lambda x: x[0])
print 'Scores:'
for score in scores:
    print score
import argparse
import re

progname = re.compile('[A-Za-z0-9.-_]+').search(sys.argv[0]).group()

desc = '\n'.join(['Read a SequenceCollection from pickle,',
                 'make a randomised copy,', 'dump records'])
input_help = 'Path+Filename for the input pickle file'
output_help = \
    'Path to output directory. Will be created if doesn\'t exist'
parser = argparse.ArgumentParser(prog=progname, description=desc,
                                 formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument('-i', '--input', help=input_help, type=str)
parser.add_argument('-o', '--output', help=output_help, type=str)

args = vars(parser.parse_args())

pickle = args['input']
output_dir = args['output']

filecheck_and_quit(pickle)           # can't find file -> quit
directorycheck_and_make(output_dir)  # can't find directory -> create it

sc = cPickle.load(file(pickle))

scrand = sc.make_randomised_copy()

scrand.dump_records(output_dir)

cPickle.dump(scrand, open('{0}/scrand.pkl'.format(output_dir), 'w'))
from errors import filecheck_and_quit, directorycheck_and_make
import cPickle
import sys
import argparse
import re

progname = re.compile("[A-Za-z0-9.-_]+").search(sys.argv[0]).group()

desc = "\n".join(["Read a SequenceCollection from pickle,", "make a randomised copy,", "dump records"])
input_help = "Path+Filename for the input pickle file"
output_help = "Path to output directory. Will be created if doesn't exist"
parser = argparse.ArgumentParser(prog=progname, description=desc, formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument("-i", "--input", help=input_help, type=str)
parser.add_argument("-o", "--output", help=output_help, type=str)

args = vars(parser.parse_args())

pickle = args["input"]
output_dir = args["output"]

filecheck_and_quit(pickle)  # can't find file -> quit
directorycheck_and_make(output_dir)  # can't find directory -> create it

sc = cPickle.load(file(pickle))

scrand = sc.make_randomised_copy()

scrand.dump_records(output_dir)

cPickle.dump(scrand, open("{0}/scrand.pkl".format(output_dir), "w"))