def __init__(self, aln_fname, outgroup, include_ref_strains = True, outdir = './', formats = ['pdf','png'], verbose = 0, **kwargs): process.__init__(self, **kwargs) flu_filter.__init__(self, alignment_file = aln_fname, **kwargs) tree_refine.__init__(self, **kwargs) virus_clean.__init__(self, **kwargs) self.midpoint_rooting = False self.include_ref_strains = include_ref_strains self.verbose = verbose self.formats = formats self.outdir = outdir.rstrip('/')+'/' self.auspice_tree_fname = self.outdir + 'tree.json' self.auspice_align_fname = self.outdir + 'aln.fasta' self.auspice_aa_align_fname = self.outdir + 'aa_aln.fasta' self.auspice_sequences_fname = self.outdir + 'sequences.json' self.auspice_frequencies_fname = None self.auspice_meta_fname = self.outdir + 'meta.json' self.path_to_augur = path_to_augur if os.path.isfile(outgroup): tmp = [{'strain':seq.name, 'seq':str(record.seq).upper(), 'desc':seq.description} for seq in SeqIO.parse(outgroup, 'fasta') ] if len(tmp): self.outgroup = tmp[0] if len(tmp)>1: print "More than one sequence in ", outgroup, "taking first" if self.verbose: print "using outgroup found in file ", outgroup elif outgroup=='auto': print "automatically determine outgroup" self.auto_outgroup_blast() elif isinstance(outgroup, basestring): seq_names = [x['strain'] for x in self.viruses] if outgroup in seq_names: self.outgroup = self.viruses.pop(seq_names.index(outgroup)) if self.verbose: print "using outgroup found in alignment", outgroup else: standard_outgroups = self.load_standard_outgroups() if outgroup in standard_outgroups: self.outgroup = standard_outgroups[outgroup] if self.verbose: print "using standard outgroup", outgroup else: raise ValueError("outgroup %s not found" % outgroup) return if "anno:" in self.outgroup['desc']: anno = [x for x in self.outgroup['desc'].split() if "anno:" in x][0] anno = (anno.split(':')[1]).split('_') tmp = [(anno[2*i], int(anno[2*i+1])) for i in range(len(anno)/2)] self.anno = sorted(tmp, key=lambda x:x[1]) print("Using annotation",self.anno) else: self.anno = None print("No annotation found") #self.anno = sorted((('SP',0), ('HA1',16), ('HA2',329+16)), key=lambda x:x[1]) self.viruses.append(self.outgroup) self.filter_geo(prune=False) self.make_strain_names_unique()
def __init__(self, aln_fname, outgroup, outdir='./', formats=['pdf', 'svg', 'png'], verbose=0, **kwargs): process.__init__(self, **kwargs) flu_filter.__init__(self, alignment_file=aln_fname, **kwargs) tree_refine.__init__(self, **kwargs) virus_clean.__init__(self, **kwargs) self.verbose = verbose self.formats = formats self.outdir = outdir.rstrip('/') + '/' self.auspice_tree_fname = self.outdir + 'tree.json' self.auspice_sequences_fname = self.outdir + 'sequences.json' self.auspice_frequencies_fname = None self.auspice_meta_fname = self.outdir + 'meta.json' if os.path.isfile(outgroup): tmp = [{ 'strain': seq.name, 'seq': str(record.seq).upper(), 'desc': seq.description } for seq in SeqIO.parse(outgroup, 'fasta')] if len(tmp): self.outgroup = tmp[0] if len(tmp) > 1: print "More than one sequence in ", outgroup, "taking first" if self.verbose: print "using outgroup found in file ", outgroup elif isinstance(outgroup, basestring): seq_names = [x['strain'] for x in self.viruses] if outgroup in seq_names: self.outgroup = self.viruses.pop(seq_names.index(outgroup)) if self.verbose: print "using outgroup found in alignment", outgroup else: standard_outgroups = [{ 'strain': seq.name, 'seq': str(seq.seq).upper(), 'desc': seq.description } for seq in SeqIO.parse(std_outgroup_file, 'fasta')] outgroup_names = [x['strain'] for x in standard_outgroups] if outgroup in outgroup_names: self.outgroup = standard_outgroups[outgroup_names.index( outgroup)] if self.verbose: print "using standard outgroup", outgroup else: raise ValueError("outgroup %s not found" % outgroup) return self.viruses.append(self.outgroup) self.filter_geo(prune=False) #self.filter_host(prune=False) self.make_strain_names_unique()
def __init__(self, **kwargs): tree_refine.__init__(self, **kwargs) self.epitope_mask = "" if "epitope_masks_fname" in self.kwargs and "epitope_mask_version" in self.kwargs: epitope_map = {} with open(self.kwargs["epitope_masks_fname"]) as f: for line in f: (key, value) = line.split() epitope_map[key] = value if self.kwargs["epitope_mask_version"] in epitope_map: self.epitope_mask = epitope_map[self.kwargs["epitope_mask_version"]] self.epitope_mask = np.fromstring(self.epitope_mask, dtype='S1') # epitope_mask is numpy array
def __init__(self, aln_fname, outgroup, outdir = './', formats = ['pdf','svg','png'], verbose = 0, **kwargs): process.__init__(self, **kwargs) flu_filter.__init__(self, alignment_file = aln_fname, **kwargs) tree_refine.__init__(self, **kwargs) virus_clean.__init__(self, **kwargs) self.verbose = verbose self.formats = formats self.outdir = outdir.rstrip('/')+'/' self.auspice_tree_fname = self.outdir + 'tree.json' self.auspice_sequences_fname = self.outdir + 'sequences.json' self.auspice_frequencies_fname = None self.auspice_meta_fname = self.outdir + 'meta.json' if os.path.isfile(outgroup): tmp = [{'strain':seq.name, 'seq':str(record.seq).upper(), 'desc':seq.description} for seq in SeqIO.parse(outgroup, 'fasta') ] if len(tmp): self.outgroup = tmp[0] if len(tmp)>1: print "More than one sequence in ", outgroup, "taking first" if self.verbose: print "using outgroup found in file ", outgroup elif isinstance(outgroup, basestring): seq_names = [x['strain'] for x in self.viruses] if outgroup in seq_names: self.outgroup = self.viruses.pop(seq_names.index(outgroup)) if self.verbose: print "using outgroup found in alignment", outgroup else: standard_outgroups = [{'strain':seq.name, 'seq':str(seq.seq).upper(), 'desc':seq.description} for seq in SeqIO.parse(std_outgroup_file, 'fasta') ] outgroup_names = [x['strain'] for x in standard_outgroups] if outgroup in outgroup_names: self.outgroup = standard_outgroups[outgroup_names.index(outgroup)] if self.verbose: print "using standard outgroup", outgroup else: raise ValueError("outgroup %s not found" % outgroup) return self.viruses.append(self.outgroup) self.filter_geo(prune=False) #self.filter_host(prune=False) self.make_strain_names_unique()
def __init__(self, **kwargs): tree_refine.__init__(self, **kwargs)
def __init__(self, **kwargs): tree_refine.__init__(self, **kwargs) '''self.epitope_mask = ""
def __init__(self, aln_fname, outgroup, include_ref_strains=True, outdir='./', formats=['pdf', 'png'], verbose=0, **kwargs): process.__init__(self, **kwargs) flu_filter.__init__(self, alignment_file=aln_fname, **kwargs) tree_refine.__init__(self, **kwargs) virus_clean.__init__(self, **kwargs) self.midpoint_rooting = False self.include_ref_strains = include_ref_strains self.verbose = verbose self.formats = formats self.outdir = outdir.rstrip('/') + '/' self.auspice_tree_fname = self.outdir + 'tree.json' self.auspice_align_fname = self.outdir + 'aln.fasta' self.auspice_aa_align_fname = self.outdir + 'aa_aln.fasta' self.auspice_sequences_fname = self.outdir + 'sequences.json' self.auspice_frequencies_fname = None self.auspice_meta_fname = self.outdir + 'meta.json' self.path_to_augur = path_to_augur if os.path.isfile(outgroup): tmp = [{ 'strain': seq.name, 'seq': str(record.seq).upper(), 'desc': seq.description } for seq in SeqIO.parse(outgroup, 'fasta')] if len(tmp): self.outgroup = tmp[0] if len(tmp) > 1: print "More than one sequence in ", outgroup, "taking first" if self.verbose: print "using outgroup found in file ", outgroup elif outgroup == 'auto': print "automatically determine outgroup" self.auto_outgroup_blast() elif isinstance(outgroup, basestring): seq_names = [x['strain'] for x in self.viruses] if outgroup in seq_names: self.outgroup = self.viruses.pop(seq_names.index(outgroup)) if self.verbose: print "using outgroup found in alignment", outgroup else: standard_outgroups = self.load_standard_outgroups() if outgroup in standard_outgroups: self.outgroup = standard_outgroups[outgroup] if self.verbose: print "using standard outgroup", outgroup else: raise ValueError("outgroup %s not found" % outgroup) return if "anno:" in self.outgroup['desc']: anno = [x for x in self.outgroup['desc'].split() if "anno:" in x][0] anno = (anno.split(':')[1]).split('_') tmp = [(anno[2 * i], int(anno[2 * i + 1])) for i in range(len(anno) / 2)] self.anno = sorted(tmp, key=lambda x: x[1]) print("Using annotation", self.anno) else: self.anno = None print("No annotation found") #self.anno = sorted((('SP',0), ('HA1',16), ('HA2',329+16)), key=lambda x:x[1]) self.viruses.append(self.outgroup) self.filter_geo(prune=False) self.make_strain_names_unique()