Beispiel #1
0
    def tt_from_file(self, infile, root='best', nodefile=None):
        self.is_timetree=False
        self.logger('Reading tree from file '+infile,2)
        dates  =   {seq.id:seq.attributes['num_date']
                    for seq in self.aln if 'date' in seq.attributes}
        self.tt = TreeTime(dates=dates, tree=infile, gtr='Jukes-Cantor',
                            aln = self.aln, verbose=self.verbose, fill_overhangs=True)
        if root:
            self.tt.reroot(root=root)
        self.tree = self.tt.tree

        for node in self.tree.find_clades():
            if node.is_terminal() and node.name in self.sequence_lookup:
                seq = self.sequence_lookup[node.name]
                node.attr = seq.attributes
                try:
                    node.attr['date'] = node.attr['date'].strftime('%Y-%m-%d')
                except:
                    pass
            else:
                node.attr = {}

        if nodefile is not None:
            self.logger('reading node properties from file: '+nodefile,2)
            with myopen(nodefile, 'r') as infile:
                from cPickle import load
                node_props = load(infile)
            for n in self.tree.find_clades():
                if n.name in node_props:
                    for attr in node_props[n.name]:
                        n.__setattr__(attr, node_props[n.name][attr])
                else:
                    self.logger("No node properties found for "+n.name,2)
Beispiel #2
0
    def __init__(self, fname=None, reference_seq=None, **kwarks):
        super(sequence_set, self).__init__()
        self.kwarks = kwarks
        self.nthreads = 2
        if fname is not None and os.path.isfile(fname):
            with myopen(fname) as seq_file:
                self.all_seqs = {
                    x.name: x
                    for x in SeqIO.parse(seq_file, 'fasta')
                }
        elif 'virus' in kwarks:
            self.from_vdb(kwarks['virus'])
        else:
            print('no input sequences found -- empty sequence set')
            return

        if 'run_dir' not in kwarks:
            import random
            self.run_dir = '_'.join([
                'temp',
                time.strftime('%Y%m%d-%H%M%S', time.gmtime()),
                str(random.randint(0, 1000000))
            ])
        else:
            self.run_dir = kwarks['run_dir']

        if reference_seq is not None:
            if type(reference_seq) is str and reference_seq in self.all_seqs:
                self.reference_seq = self.all_seqs[reference_seq]
            else:
                self.reference_seq = reference_seq
        else:
            self.reference_seq = None
Beispiel #3
0
    def __init__(self, fname, reference=None, **kwarks):
        super(sequence_set, self).__init__()
        self.nthreads = 2
        if os.path.isfile(fname):
            with myopen(fname) as seq_file:
                self.raw_seqs = {
                    fix_names(x.description): x
                    for x in SeqIO.parse(seq_file, 'fasta')
                }
                for x in self.raw_seqs.values():
                    x.id = fix_names(x.id)
                    x.name = fix_names(x.id)
                    x.description = fix_names(x.description)
        if 'run_dir' not in kwarks:
            import random
            self.run_dir = '_'.join([
                'temp',
                time.strftime('%Y%m%d-%H%M%S', time.gmtime()),
                str(random.randint(0, 1000000))
            ])
        else:
            self.run_dir = kwarks['run_dir']

        if reference is not None:
            if type(reference) is str and fix_names(
                    reference) in self.raw_seqs:
                self.reference = self.raw_seqs[fix_names(reference)]
            else:
                self.reference = reference
        else:
            self.reference = None
Beispiel #4
0
    def __init__(self, fname=None, reference= None, **kwarks):
        super(sequence_set, self).__init__()
        self.kwarks = kwarks
        self.nthreads = 2
        if fname is not None and os.path.isfile(fname):
            with myopen(fname) as seq_file:
                self.raw_seqs = {fix_names(x.description):x for x in SeqIO.parse(seq_file, 'fasta')}
                for x in self.raw_seqs.values():
                    x.id = fix_names(x.id)
                    x.name = fix_names(x.id)
                    x.description = fix_names(x.description)
        elif 'virus' in kwarks:
            self.from_vdb(kwarks['virus'])

        if 'run_dir' not in kwarks:
            import random
            self.run_dir = '_'.join(['temp', time.strftime('%Y%m%d-%H%M%S',time.gmtime()), str(random.randint(0,1000000))])
        else:
            self.run_dir = kwarks['run_dir']

        if reference is not None:
            if type(reference) is str and fix_names(reference) in self.raw_seqs:
                self.reference = self.raw_seqs[fix_names(reference)]
            else:
                self.reference = reference
        else: self.reference=None
Beispiel #5
0
 def load_mfa(self, path):
     try:
         with myopen(path) as seq_file:
             self.seqs = {x.name:x for x in SeqIO.parse(seq_file, 'fasta')}
     except Exception as e:
         self.log.fatal("Error loading sequences from {}. Error: {}".format(path, e))
     self.nstart = len(self.seqs)
     self.log.notify("Loaded {} sequences from {}".format(self.nstart, path))
Beispiel #6
0
    def dump(self, treefile, nodefile):
        from Bio import Phylo
        Phylo.write(self.tree, treefile, 'newick')
        node_props = {}
        for node in self.tree.find_clades():
            node_props[node.name] = {attr:node.__getattribute__(attr) for attr in self.dump_attr if hasattr(node, attr)}

        with myopen(nodefile, 'w') as nfile:
            from cPickle import dump
            dump(node_props, nfile)