def tt_from_file(self, infile, root='best', nodefile=None): self.is_timetree=False self.logger('Reading tree from file '+infile,2) dates = {seq.id:seq.attributes['num_date'] for seq in self.aln if 'date' in seq.attributes} self.tt = TreeTime(dates=dates, tree=infile, gtr='Jukes-Cantor', aln = self.aln, verbose=self.verbose, fill_overhangs=True) if root: self.tt.reroot(root=root) self.tree = self.tt.tree for node in self.tree.find_clades(): if node.is_terminal() and node.name in self.sequence_lookup: seq = self.sequence_lookup[node.name] node.attr = seq.attributes try: node.attr['date'] = node.attr['date'].strftime('%Y-%m-%d') except: pass else: node.attr = {} if nodefile is not None: self.logger('reading node properties from file: '+nodefile,2) with myopen(nodefile, 'r') as infile: from cPickle import load node_props = load(infile) for n in self.tree.find_clades(): if n.name in node_props: for attr in node_props[n.name]: n.__setattr__(attr, node_props[n.name][attr]) else: self.logger("No node properties found for "+n.name,2)
def __init__(self, fname=None, reference_seq=None, **kwarks): super(sequence_set, self).__init__() self.kwarks = kwarks self.nthreads = 2 if fname is not None and os.path.isfile(fname): with myopen(fname) as seq_file: self.all_seqs = { x.name: x for x in SeqIO.parse(seq_file, 'fasta') } elif 'virus' in kwarks: self.from_vdb(kwarks['virus']) else: print('no input sequences found -- empty sequence set') return if 'run_dir' not in kwarks: import random self.run_dir = '_'.join([ 'temp', time.strftime('%Y%m%d-%H%M%S', time.gmtime()), str(random.randint(0, 1000000)) ]) else: self.run_dir = kwarks['run_dir'] if reference_seq is not None: if type(reference_seq) is str and reference_seq in self.all_seqs: self.reference_seq = self.all_seqs[reference_seq] else: self.reference_seq = reference_seq else: self.reference_seq = None
def __init__(self, fname, reference=None, **kwarks): super(sequence_set, self).__init__() self.nthreads = 2 if os.path.isfile(fname): with myopen(fname) as seq_file: self.raw_seqs = { fix_names(x.description): x for x in SeqIO.parse(seq_file, 'fasta') } for x in self.raw_seqs.values(): x.id = fix_names(x.id) x.name = fix_names(x.id) x.description = fix_names(x.description) if 'run_dir' not in kwarks: import random self.run_dir = '_'.join([ 'temp', time.strftime('%Y%m%d-%H%M%S', time.gmtime()), str(random.randint(0, 1000000)) ]) else: self.run_dir = kwarks['run_dir'] if reference is not None: if type(reference) is str and fix_names( reference) in self.raw_seqs: self.reference = self.raw_seqs[fix_names(reference)] else: self.reference = reference else: self.reference = None
def __init__(self, fname=None, reference= None, **kwarks): super(sequence_set, self).__init__() self.kwarks = kwarks self.nthreads = 2 if fname is not None and os.path.isfile(fname): with myopen(fname) as seq_file: self.raw_seqs = {fix_names(x.description):x for x in SeqIO.parse(seq_file, 'fasta')} for x in self.raw_seqs.values(): x.id = fix_names(x.id) x.name = fix_names(x.id) x.description = fix_names(x.description) elif 'virus' in kwarks: self.from_vdb(kwarks['virus']) if 'run_dir' not in kwarks: import random self.run_dir = '_'.join(['temp', time.strftime('%Y%m%d-%H%M%S',time.gmtime()), str(random.randint(0,1000000))]) else: self.run_dir = kwarks['run_dir'] if reference is not None: if type(reference) is str and fix_names(reference) in self.raw_seqs: self.reference = self.raw_seqs[fix_names(reference)] else: self.reference = reference else: self.reference=None
def load_mfa(self, path): try: with myopen(path) as seq_file: self.seqs = {x.name:x for x in SeqIO.parse(seq_file, 'fasta')} except Exception as e: self.log.fatal("Error loading sequences from {}. Error: {}".format(path, e)) self.nstart = len(self.seqs) self.log.notify("Loaded {} sequences from {}".format(self.nstart, path))
def dump(self, treefile, nodefile): from Bio import Phylo Phylo.write(self.tree, treefile, 'newick') node_props = {} for node in self.tree.find_clades(): node_props[node.name] = {attr:node.__getattribute__(attr) for attr in self.dump_attr if hasattr(node, attr)} with myopen(nodefile, 'w') as nfile: from cPickle import dump dump(node_props, nfile)