Ejemplo n.º 1
0
	def __init__(self, aln_fname, outgroup, include_ref_strains = True, outdir = './', formats = ['pdf','png'], verbose = 0, **kwargs):
		process.__init__(self, **kwargs)
		flu_filter.__init__(self, alignment_file = aln_fname, **kwargs)
		tree_refine.__init__(self, **kwargs)
		virus_clean.__init__(self, **kwargs)
		self.midpoint_rooting = False
		self.include_ref_strains = include_ref_strains
		self.verbose = verbose
		self.formats = formats
		self.outdir = outdir.rstrip('/')+'/'
		self.auspice_tree_fname = 		self.outdir + 'tree.json'
		self.auspice_align_fname = 		self.outdir + 'aln.fasta'
		self.auspice_aa_align_fname = 		self.outdir + 'aa_aln.fasta'
		self.auspice_sequences_fname = 	self.outdir + 'sequences.json'
		self.auspice_frequencies_fname = None
		self.auspice_meta_fname = 		self.outdir + 'meta.json'
		self.path_to_augur = path_to_augur

		if os.path.isfile(outgroup):
			tmp = [{'strain':seq.name, 'seq':str(record.seq).upper(), 'desc':seq.description}
								for seq in SeqIO.parse(outgroup, 'fasta') ]
			if len(tmp):
				self.outgroup = tmp[0]
				if len(tmp)>1:
					print "More than one sequence in ", outgroup, "taking first"
				if self.verbose:
					print "using outgroup found in file ", outgroup
		elif outgroup=='auto':
			print "automatically determine outgroup"
			self.auto_outgroup_blast()
		elif isinstance(outgroup, basestring):
			seq_names = [x['strain'] for x in self.viruses]
			if outgroup in seq_names:
				self.outgroup = self.viruses.pop(seq_names.index(outgroup))
				if self.verbose:
					print "using outgroup found in alignment", outgroup
			else:
				standard_outgroups = self.load_standard_outgroups()
				if outgroup in standard_outgroups:
					self.outgroup = standard_outgroups[outgroup]
					if self.verbose:
						print "using standard outgroup", outgroup
				else:
					raise ValueError("outgroup %s not found" % outgroup)
					return
		if "anno:" in self.outgroup['desc']:
			anno = [x for x in self.outgroup['desc'].split() if "anno:" in x][0]
			anno = (anno.split(':')[1]).split('_')
			tmp = [(anno[2*i], int(anno[2*i+1])) for i in range(len(anno)/2)]
			self.anno = sorted(tmp, key=lambda x:x[1])
			print("Using annotation",self.anno)
		else:
			self.anno = None
			print("No annotation found")
		#self.anno = sorted((('SP',0), ('HA1',16), ('HA2',329+16)), key=lambda x:x[1])
		self.viruses.append(self.outgroup)
		self.filter_geo(prune=False)
		self.make_strain_names_unique()
Ejemplo n.º 2
0
    def __init__(self,
                 aln_fname,
                 outgroup,
                 outdir='./',
                 formats=['pdf', 'svg', 'png'],
                 verbose=0,
                 **kwargs):
        process.__init__(self, **kwargs)
        flu_filter.__init__(self, alignment_file=aln_fname, **kwargs)
        tree_refine.__init__(self, **kwargs)
        virus_clean.__init__(self, **kwargs)
        self.verbose = verbose
        self.formats = formats
        self.outdir = outdir.rstrip('/') + '/'
        self.auspice_tree_fname = self.outdir + 'tree.json'
        self.auspice_sequences_fname = self.outdir + 'sequences.json'
        self.auspice_frequencies_fname = None
        self.auspice_meta_fname = self.outdir + 'meta.json'

        if os.path.isfile(outgroup):
            tmp = [{
                'strain': seq.name,
                'seq': str(record.seq).upper(),
                'desc': seq.description
            } for seq in SeqIO.parse(outgroup, 'fasta')]
            if len(tmp):
                self.outgroup = tmp[0]
                if len(tmp) > 1:
                    print "More than one sequence in ", outgroup, "taking first"
                if self.verbose:
                    print "using outgroup found in file ", outgroup
        elif isinstance(outgroup, basestring):
            seq_names = [x['strain'] for x in self.viruses]
            if outgroup in seq_names:
                self.outgroup = self.viruses.pop(seq_names.index(outgroup))
                if self.verbose:
                    print "using outgroup found in alignment", outgroup
            else:
                standard_outgroups = [{
                    'strain': seq.name,
                    'seq': str(seq.seq).upper(),
                    'desc': seq.description
                } for seq in SeqIO.parse(std_outgroup_file, 'fasta')]
                outgroup_names = [x['strain'] for x in standard_outgroups]
                if outgroup in outgroup_names:
                    self.outgroup = standard_outgroups[outgroup_names.index(
                        outgroup)]
                    if self.verbose:
                        print "using standard outgroup", outgroup
                else:
                    raise ValueError("outgroup %s not found" % outgroup)
                    return
        self.viruses.append(self.outgroup)
        self.filter_geo(prune=False)
        #self.filter_host(prune=False)
        self.make_strain_names_unique()
Ejemplo n.º 3
0
	def __init__(self, aln_fname, outgroup, outdir = './', formats = ['pdf','svg','png'], verbose = 0, **kwargs):
		process.__init__(self, **kwargs)
		flu_filter.__init__(self, alignment_file = aln_fname, **kwargs)
		tree_refine.__init__(self, **kwargs)
		virus_clean.__init__(self, **kwargs)
		self.verbose = verbose
		self.formats = formats
		self.outdir = outdir.rstrip('/')+'/'
		self.auspice_tree_fname = 		self.outdir + 'tree.json'
		self.auspice_sequences_fname = 	self.outdir + 'sequences.json'
		self.auspice_frequencies_fname = None
		self.auspice_meta_fname = 		self.outdir + 'meta.json'

		if os.path.isfile(outgroup):
			tmp = [{'strain':seq.name, 'seq':str(record.seq).upper(), 'desc':seq.description}
								for seq in SeqIO.parse(outgroup, 'fasta') ]			
			if len(tmp):
				self.outgroup = tmp[0]
				if len(tmp)>1:
					print "More than one sequence in ", outgroup, "taking first"
				if self.verbose:
					print "using outgroup found in file ", outgroup
		elif isinstance(outgroup, basestring):
			seq_names = [x['strain'] for x in self.viruses]
			if outgroup in seq_names:
				self.outgroup = self.viruses.pop(seq_names.index(outgroup))
				if self.verbose:
					print "using outgroup found in alignment", outgroup
			else:
				standard_outgroups = [{'strain':seq.name, 'seq':str(seq.seq).upper(), 'desc':seq.description}
										for seq in SeqIO.parse(std_outgroup_file, 'fasta') ]
				outgroup_names = [x['strain'] for x in standard_outgroups]
				if outgroup in outgroup_names:
					self.outgroup = standard_outgroups[outgroup_names.index(outgroup)]
					if self.verbose:
						print "using standard outgroup", outgroup
				else:
					raise ValueError("outgroup %s not found" % outgroup)
					return
		self.viruses.append(self.outgroup)
		self.filter_geo(prune=False)
		#self.filter_host(prune=False)
		self.make_strain_names_unique()
Ejemplo n.º 4
0
	def __init__(self,**kwargs):
		virus_clean.__init__(self, **kwargs)
Ejemplo n.º 5
0
 def __init__(self, **kwargs):
     virus_clean.__init__(self, **kwargs)
Ejemplo n.º 6
0
    def __init__(self,
                 aln_fname,
                 outgroup,
                 include_ref_strains=True,
                 outdir='./',
                 formats=['pdf', 'png'],
                 verbose=0,
                 **kwargs):
        process.__init__(self, **kwargs)
        flu_filter.__init__(self, alignment_file=aln_fname, **kwargs)
        tree_refine.__init__(self, **kwargs)
        virus_clean.__init__(self, **kwargs)
        self.midpoint_rooting = False
        self.include_ref_strains = include_ref_strains
        self.verbose = verbose
        self.formats = formats
        self.outdir = outdir.rstrip('/') + '/'
        self.auspice_tree_fname = self.outdir + 'tree.json'
        self.auspice_align_fname = self.outdir + 'aln.fasta'
        self.auspice_aa_align_fname = self.outdir + 'aa_aln.fasta'
        self.auspice_sequences_fname = self.outdir + 'sequences.json'
        self.auspice_frequencies_fname = None
        self.auspice_meta_fname = self.outdir + 'meta.json'
        self.path_to_augur = path_to_augur

        if os.path.isfile(outgroup):
            tmp = [{
                'strain': seq.name,
                'seq': str(record.seq).upper(),
                'desc': seq.description
            } for seq in SeqIO.parse(outgroup, 'fasta')]
            if len(tmp):
                self.outgroup = tmp[0]
                if len(tmp) > 1:
                    print "More than one sequence in ", outgroup, "taking first"
                if self.verbose:
                    print "using outgroup found in file ", outgroup
        elif outgroup == 'auto':
            print "automatically determine outgroup"
            self.auto_outgroup_blast()
        elif isinstance(outgroup, basestring):
            seq_names = [x['strain'] for x in self.viruses]
            if outgroup in seq_names:
                self.outgroup = self.viruses.pop(seq_names.index(outgroup))
                if self.verbose:
                    print "using outgroup found in alignment", outgroup
            else:
                standard_outgroups = self.load_standard_outgroups()
                if outgroup in standard_outgroups:
                    self.outgroup = standard_outgroups[outgroup]
                    if self.verbose:
                        print "using standard outgroup", outgroup
                else:
                    raise ValueError("outgroup %s not found" % outgroup)
                    return
        if "anno:" in self.outgroup['desc']:
            anno = [x for x in self.outgroup['desc'].split()
                    if "anno:" in x][0]
            anno = (anno.split(':')[1]).split('_')
            tmp = [(anno[2 * i], int(anno[2 * i + 1]))
                   for i in range(len(anno) / 2)]
            self.anno = sorted(tmp, key=lambda x: x[1])
            print("Using annotation", self.anno)
        else:
            self.anno = None
            print("No annotation found")
        #self.anno = sorted((('SP',0), ('HA1',16), ('HA2',329+16)), key=lambda x:x[1])
        self.viruses.append(self.outgroup)
        self.filter_geo(prune=False)
        self.make_strain_names_unique()