Ejemplo n.º 1
0
	def __init__(self,min_length = 987, **kwargs):
		'''
		parameters
		min_length  -- minimal length for a sequence to be acceptable
		'''
		flu_filter.__init__(self, **kwargs)
		self.min_length = min_length
		self.vaccine_strains =[{
			'strain':'A/California/07/2009',
			'isolate_id':'EPI_ISL_31553',
			'date':'2009-04-09',
			'lab':'Naval Health Research Center',
			'country':'USA',
			'region':'NorthAmerica',
			'seq':'ATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACACCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGTCATCAAGATACAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAAACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATATCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATACACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAGTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCAAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGCTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAA',
		}]
		self.outgroup = {
			'strain': 'A/Swine/Indiana/P12439/00',
			'db': 'IRD',
			'accession': 'AF455680',
			'date': '2002-03-14',
			'country': 'USA',
			'region': 'NorthAmerica',
			'seq': 'ATGAAGGCAATACTAGTAGTCCTGCTATATACATTTACAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACTGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAGGCATAACGGGAAACTATGTAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGCTCCTGGGAAATCCAGAGTGTGAATCACTCTTCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGATAATGGGACGTGTTACCCAGGAGATTTCATCAATTATGAAGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGATTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACACGAACAGAGGTGTGACGGCAGCATGTCCTTATGCTGGAGCAAAAAGCTTCTACAGAAATTTAATATGGCTGGTCAAAAAAGAAAATTCATACCCAAAGCTCAGCAAATCCTATATTAACAATAAGGGGAAGGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACCAGTGCCGACCAACAAAGTCTCTACCAGAATGCAGATGCATATGTTTTTGTGGGGTCATCAAGATACAGCAAGAAGTTCAAGCCAGAAATAGCAGCCAGACCCAAGGTGAGGGACCAAGCAGGGAGAATAAACTATTACTGGACACTAGTAGAGCCTGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCCTTCGCAATGGAAAGAAATTCTGGATCTGGTATTATCATTTCAGATACATCAGTCCACGATTGTAATACGACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCAGTCACAATTGGAGAATGTCCAAAATATGTAAAAAGCACAAAATTGAGAATGGCCACAGGATTAAGGAATGTCCCGTCTATTCAATCTAGAGGCCTGTTTGGGGCCATTGCCGGCTTTATTGAGGGGGGATGGACAGGAATGATAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGATCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGGGATCACTAACAAAGTAAATTCTGTTATTGAAAAGATGAACACACAATTCATAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAGGTTGATGATGGTTTTCTGGATATTTGGACTTACAATGCCGAACTGTTGATTCTGTTGGAAAATGAAAGAACTTTGGATTACCACGATTCAAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGCTAAAAAACAATGCCAGGGAAATTGGGAATGGCTGCTTTGAATTTTACCACAAATGTGATGACAAGTGCATGGAAAGCGTCAAAAATGGGACTTATGATTACCCAAAATACTCAGAGGAAGCAAAACTAAACAGAGAGGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAA'
		}
Ejemplo n.º 2
0
	def __init__(self,min_length = 987, **kwargs):
		'''
		parameters
		min_length  -- minimal length for a sequence to be acceptable
		'''
		flu_filter.__init__(self, **kwargs)
		self.min_length = min_length
		self.vaccine_strains =[{
			'strain':'A/California/07/2009',
			'isolate_id':'EPI_ISL_31553',
			'date':'2009-04-09',
			'lab':'Naval Health Research Center',
			'country':'USA',
			'region':'NorthAmerica',
			'seq':'ATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACACCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGTCATCAAGATACAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAAACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATATCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATACACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAGTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCAAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGCTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAA',
		}]
		tmp_outgroup = SeqIO.read('source-data/H1N1pdm_outgroup.gb', 'genbank')
		genome_annotation = tmp_outgroup.features
		self.cds = {x.qualifiers['gene'][0]:x for x in genome_annotation
				if 'gene' in x.qualifiers and x.type=='CDS' and
				x.qualifiers['gene'][0] in ['SigPep', 'HA1', 'HA2']}
		self.outgroup = {
			'strain': 'A/Swine/Indiana/P12439/00',
			'db': 'IRD',
			'accession': 'AF455680',
			'date': '2002-03-14',
			'country': 'USA',
			'region': 'NorthAmerica',
			'seq': str(tmp_outgroup.seq).upper()
		}
Ejemplo n.º 3
0
	def __init__(self,min_length = 987, **kwargs):
		'''
		parameters
		min_length  -- minimal length for a sequence to be acceptable
		'''
		flu_filter.__init__(self, **kwargs)
		self.min_length = min_length
		self.vaccine_strains =[
				{ 
					"strain": "A/Wisconsin/67/2005",
					"db": "IRD",
					"accession": "CY163984",
					"date": "2005-08-31",
					"seq": "ATGAAGACTATCATTGCTTTGAGCTACATTCTATGTCTGGTTTTCGCTCAAAAACTTCCCGGAAATGACAACAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACGAATGACCAAATTGAAGTTACTAATGCTACTGAGCTGGTTCAGAGTTCCTCAACAGGTGGAATATGCGACAGTCCTCATCAGATCCTTGATGGAGAAAACTGCACACTAATAGATGCTCTATTGGGAGACCCTCAGTGTGATGGCTTCCAAAATAAGAAATGGGACCTTTTTGTTGAACGCAGCAAAGCCTACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGGAGTTTAACGATGAAAGCTTCAATTGGACTGGAGTCACTCAAAATGGAACAAGCTCTTCTTGCAAAAGGAGATCTAATAACAGTTTCTTTAGTAGATTGAATTGGTTGACCCACTTAAAATTCAAATACCCAGCATTGAACGTGACTATGCCAAACAATGAAAAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGTTACGGACAATGACCAAATCTTCCTGTATGCTCAAGCATCAGGAAGAATCACAGTCTCTACCAAAAGAAGCCAACAAACTGTAATCCCGAATATCGGATCTAGACCCAGAATAAGGAATATCCCCAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAATTCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCATTTCAAAATGTAAACAGGATCACATATGGGGCCTGTCCCAGATATGTTAAGCAAAACACTCTGAAATTGGCAACAGGGATGCGAAATGTACCAGAGAAACAAACTAGAGGCATATTTGGCGCAATCGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGGTTTCAGGCATCAAAATTCTGAGGGAATAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCAATCAAATCAATGGGAAGCTGAATAGGTTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAAGTAGAAGGGAGAATTCAGGACCTCGAGAAATATGTTGAGGACACTAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAACAAACTGTTTGAAAGAACAAAGAAGCAACTGAGGGAAAATGCTGAGGATATGGGCAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATCAGAAATGGAACTTATGACCATGATGTATACAGAGATGAAGCATTAAACAACCGGTTCCAGATCAAAGGCGTTGAGCTGAAGTCAGGATACAAAGATTGGATCCTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAAGGCAACATTAGGTGCAACATTTGCATTTGA"
				},	{
					"strain": "A/Brisbane/10/2007",
					"db": "IRD",
					"accession": "CY113005",
					"date": "2007-02-06",
					"seq": "ATGAAGACTATCATTGCTTTGAGCTACATTCTATGTCTGGTTTTCACTCAAAAACTTCCCGGAAATGACAACAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACGAATGACCAAATTGAAGTTACTAATGCTACTGAGCTGGTTCAGAGTTCCTCAACAGGTGAAATATGCGACAGTCCTCATCAGATCCTTGATGGAGAAAACTGCACACTAATAGATGCTCTATTGGGAGACCCTCAGTGTGATGGCTTCCAAAATAAGAAATGGGACCTTTTTGTTGAACGCAGCAAAGCCTACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGGAGTTTAACAATGAAAGCTTCAATTGGACTGGAGTCACTCAAAACGGAACAAGCTCTGCTTGCATAAGGAGATCTAATAACAGTTTCTTTAGTAGATTGAATTGGTTGACCCACTTAAAATTCAAATACCCAGCATTGAACGTGACTATGCCAAACAATGAAAAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGGTACGGACAATGACCAAATCTTCCCGTATGCTCAAGCATCAGGAAGAATCACAGTCTCTACCAAAAGAAGCCAACAAACTGTAATCCCGAATATCGGATCTAGACCCAGAGTAAGGAATATCCCCAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAATTCTGAATGCATCACTCCAAACGGAAGCATTCCCAATGACAAACCATTCCAAAATGTAAACAGGATCACATACGGGGCCTGTCCCAGATATGTTAAGCAAAACACTCTGAAATTGGCAACAGGGATGCGAAATGTACCAGAGAAACAAACTAGAGGCATATTTGGCGCAATCGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGGTTTCAGGCATCAAAATTCTGAGGGAATAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCGATCAAATCAATGGGAAGCTGAATAGGTTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAAGTCGAAGGGAGAATTCAGGACCTTGAGAAATATGTTGAGGACACCAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAACAAACTGTTTGAAAAAACAAAGAAGCAACTGAGGGAAAATGCTGAGGATATGGGCAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATCAGAAATGGAACTTATGACCACAATGTATACAGAGATGAAGCATTAAACAACCGGTTCCAGATCAAGGGCGTTGAGCTGAAGTCAGGATACAAAGATTGGATCCTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAAGGCAACATTAGGTGCAACATTTGCATTTGA"
				},	{
					"strain": "A/Perth/16/2009",
					"db": "IRD",
					"accession": "GQ293081",
					"date": "2009-04-07",
					"seq": "ATGAAGACTATCATTGCTTTGAGCTACATTCTATGTCTGGTTTTCGCTCAAAAACTTCCTGGAAATGACAACAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACGAATGACCAAATTGAAGTTACTAATGCTACTGAGCTGGTTCAGAGTTCCTCAACAGGTGAAATATGCGACAGTCCTCATCAGATCCTTGATGGAAAAAACTGCACACTAATAGATGCTCTATTGGGAGACCCTCAGTGTGATGGCTTCCAAAATAAGAAATGGGACCTTTTTGTTGAACGCAGCAAAGCCTACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGGAGTTTAACAATGAAAGCTTCAATTGGACTGGAGTCACTCAAAACGGAACAAGCTCTGCTTGCATAAGGAGATCTAAAAACAGTTTCTTTAGTAGATTGAATTGGTTGACCCACTTAAACTTCAAATACCCAGCATTGAACGTGACTATGCCAAACAATGAACAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGGTACGGACAAAGACCAAATCTTCCTGTATGCTCAAGCATCAGGAAGAATCACAGTCTCTACCAAAAGAAGCCAACAAACCGTAAGCCCGAATATCGGATCTAGACCCAGAGTAAGGAATATCCCTAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAATTCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCATTCCAAAATGTAAACAGGATCACATACGGGGCCTGTCCCAGATATGTTAAGCAAAACACTCTGAAATTGGCAACAGGGATGCGAAATGTACCAGAGAAACAAACTAGAGGCATATTTGGCGCAATCGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGGTTTCAGGCATCAAAATTCTGAGGGAAGAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCGATCAAATCAATGGGAAGCTGAATAGATTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAAGTCGAAGGGAGAATTCAGGACCTTGAGAAATATGTTGAGGACACTAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAACAAACTGTTTGAAAAAACAAAGAAGCAACTGAGGGAAAATGCTGAGGATATGGGCAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATCAGAAATGGAACTTATGACCACGATGTATACAGAGATGAAGCATTAAACAACCGGTTTCAGATCAAGGGAGTTGAGCTGAAGTCAGGGTACAAAGATTGGATCCTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAAGGCAACATTAGGTGCAACATTTGCATTTGA"
				},	{
					"strain": "A/Victoria/361/2011",
					"db": "IRD",
					"accession": "GQ293081",
					"date": "2011-10-24",
					"seq": "ATGAAGACTATCATTGCTTTGAGCCACATTCTATGTCTGGTTTTCGCTCAAAAACTTCCTGGAAATGACAACAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACGAATGACCAAATTGAAGTTACTAATGCTACTGAGCTGGTTCAGAATTCCTCAATAGGTGAAATATGCGACAGTCCTCATCAGATCCTTGATGGAGAAAACTGCACACTAATAGATGCTCTATTGGGAGACCCTCAGTGTGATGGCTTCCAAAATAAGAAATGGGACCTTTTTGTTGAACGAAGCAAAGCCTACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGGAGTTTAACAATGAAAGCTTCAATTGGACTGGAGTCACTCAAAACGGAACAAGTTCTGCTTGCATAAGGAGATCTAATAATAGTTTCTTTAGTAGATTAAATTGGTTGACCCGCTTAAACTTCAAATACCCAGCATTGAACGTGACTATGCCAAACAATGAACAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGTTACGGACAAGGAACAAATCTTCCTGTATGCTCAATCATCAGGAAGAATCACAGTATCTACCAAAAGAAGCCAACAAGCTGTAATCCCGAATATCGGATATAGACCCAGAATAAGGAATATCCCTAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAATTCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCATTCCAAAATGTAAACAGGATCACATACGGGGCCTGTCCCAGATATGTTAAGCAAAGCACTCTGAAATTGGCAACAGGAATGCGAAATGTACCAGAGAAACAAACTAGAGGCATATTTGGCGCAATAGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGGTTTCAGGCATCAAAATTCTGAGGGAAGAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCGATCAAATCAATGGGAAGCTGAATCGATTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAAGTCGAAGGGAGAATTCAGGACCTTGAGAAATATGTTGAGGACACTAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAACAAACTGTTTGAAAAAACAAAGAAGCAACTAAGGGAAAATGCTGAGGATATGGGCAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATCAGAAATGGAACTTATGACCACGATGTATACAGAGATGAAGCATTAAACAACCGGTTCCAGATCAAGGGAGTTGAGCTGAAGTCAGGGTACAAAGATTGGATCCTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAGGGCAACATTAGGTGCAACATTTGCATTTGA"
				},	{
					"strain": "A/Texas/50/2012",
					"db": "GISAID",
					"isolate_id": "EPI_ISL_129858",
					"date": "2012-04-15",
					"seq": "ATGAAGACTATCATTGCTTTGAGCTACATTCTATGTCTGGTTTTCGCTCAAAAACTTCCTGGAAATGACAATAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACGAATGACCGAATTGAAGTTACTAATGCTACTGAACTGGTTCAGAATTCCTCAATAGGTGAAATATGCGACAGTCCTCATCAGATCCTTGATGGAGAAAACTGCACACTAATAGATGCTCTATTGGGAGACCCTCAGTGTGATGGCTTCCAAAATAAGAAATGGGACCTTTTTGTTGAACGAAGCAAAGCCTACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGGAGTTTAACAATGAAAGCTTCAATTGGAATGGAGTCACTCAAAACGGAACAAGTTCTGCTTGCATAAGGAGATCTAATAATAGTTTCTTTAGTAGATTAAATTGGTTGACCCACTTAAACTTCAAATACCCAGCATTGAACGTGACTATGCCAAACAATGAACAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGGTACGGACAAGGACCAAATCTTCCTGTATGCTCAACCATCAGGAAGAATCACAGTATCTACCAAAAGAAGCCAACAAGCTGTAATCCCGAATATCGGATCTAGACCCAGAATAAGGAATATCCCTAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAAGTCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCATTCCAAAATGTAAACAGGATCACATACGGGGCCTGTCCCAGATATGTTAAGCAAAGCACTCTGAAATTGGCAACAGGAATGCGGAATGTACCAGAGAAACAAACTAGAGGCATATTTGGCGCAATAGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGGTTTCAGGCATCAAAATTCTGAGGGAAGAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCGATCAAATCAATGGGAAGCTGAATCGATTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAAGTAGAAGGGAGAATTCAGGACCTTGAGAAATATGTTGAGGACACTAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAACAAACTGTTTGAAAAAACAAAGAAGCAACTGAGGGAAAATGCTGAGGATATGGGCAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATCAGAAATGGAACTTATGACCACGATGTATACAGAGATGAAGCATTAAACAACCGGTTCCAGATCAAGGGAGTTGAGCTGAAGTCAGGGTACAAAGATTGGATCCTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAGGGCAACATTAGGTGCAACATTTGCATTTGA",
				},	{
					"strain": "A/Switzerland/9715293/2013",
					"db": "GISAID",
					"isolate_id": "EPI_ISL_162149",
					"date": "2013-12-06",
					"seq": "ATGAAGACTATCATTGCTTTGAGCTACATTCTATGTCTGGTTTTCGCTCAAAAACTTCCTGGAAATGACAATAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACGAATGACCGAATTGAAGTTACTAATGCTACTGAGCTGGTTCAGAATTCCTCAATAGGTGAAATATGCGACAGTCCTCATCAGATCCTTGATGGAGAAAACTGCACACTAATAGATGCTCTATTGGGAGACCCTCAGTGTGATGGCTTTCAAAATAAGAAATGGGACCTTTTTGTTGAACGAAGCAAAGCCTACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGGAGTTTAACAATGAAAGCTTCAATTGGGCTGGAGTCACTCAAAACGGAACAAGTTCTTCTTGCATAAGGGGATCTAATAGTAGTTTCTTTAGTAGATTAAATTGGTTGACCCACTTAAACTCCAAATACCCAGCATTAAACGTGACTATGCCAAACAATGAACAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGGTACGGACAAGGACCAAATCTTCCTGTATGCACAATCATCAGGAAGAATCACAGTATCTACCAAAAGAAGCCAACAAGCTGTAATCCCGAATATCGGATCTAGACCCAGAATAAGGGATATCCCTAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAAGTCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCATTCCAAAATGTAAACAGGATCACATACGGGGCCTGTCCCAGATATGTTAAGCAAAGCACTCTGAAATTGGCAACAGGAATGCGAAATGTACCAGAGAGACAAACTAGAGGCATATTTGGCGCAATAGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGGCTTCAGGCATCAAAATTCTGAGGGAAGAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCGATCAAATCAATGGGAAGCTGAATCGATTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAAGTAGAAGGGAGAATTCAGGACCTTGAGAAATATGTTGAGGACACAAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAACAAACTGTTTGAAAAAACAAAGAAGCAACTGAGGGAAAATGCTGAGGATATGGGCAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATCAGAAATGGAACTTATGACCACGATGTATACAGGGATGAAGCATTAAACAACCGGTTCCAGATCAAGGGAGTTGAGCTGAAGTCAGGGTACAAAGATTGGATCCTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAGGGCAACATTAGGTGCAACATTTGCATTTGA",
				}
			]
		tmp_outgroup = SeqIO.read('source-data/H3N2_outgroup.gb', 'genbank')
		genome_annotation = tmp_outgroup.features
		self.cds = {x.qualifiers['gene'][0]:x for x in genome_annotation
				if 'gene' in x.qualifiers and x.type=='CDS' and 
				x.qualifiers['gene'][0] in ['SigPep', 'HA1', 'HA2']}
		self.outgroup = {
			'strain': 'A/Beijing/32/1992',
			'db': 'IRD',
			'accession': 'U26830',
			'date': '1992-01-01',
			'country': 'China',
			'region': 'China',
			'seq': str(tmp_outgroup.seq).upper()
		}
Ejemplo n.º 4
0
	def __init__(self,min_length = 0, **kwargs):
		'''
		parameters
		min_length  -- minimal length for a sequence to be acceptable
		'''
		flu_filter.__init__(self, **kwargs)
		self.min_length = min_length
		self.vaccine_strains =[
				{
					"strain": "A/Chicken/HongKong/G9/97",
					"db": "GISAID",
					"accession": "EPI_ISL_1263",
					"date": "1997-07-01",
					"seq": "ATGGAAATAATAGCACTAATAGCTATACTGGTAGTGACAAAAACAAGCAATGCAGATAAAATTTGCATTGGCTACCAGTCAACAAACTCCACAGAAACTGTTGATACACTAGTAGAAAACAATGTCCCTGTGACACATACCAAAGAATTGCTCCACACAGAGCACAATGGAATGCTATGTGCAACAAACCTGGGGCACCCTCTCATCCTAGACACCTGCACCATCGAAGGGTTGGTGTACGGCAACCCTTCCTGTGATTTGCTACTGGGAGGGAAAGAATGGTCTTACATTGTCGAAAGATCATCAGCTGTCAATGGGATGTGTTACCCTGGAAGGGTAGAGAACCTGGAAGAACTCAGGTCTTTTTTCAGCTCCGCTCGCTCCTACAAAAGACTCCTGCTCTTTCCAGACAGAACTTGGAATGTGACTTACACTGGGACAAGCAAAGCATGTTCAAACTCATTCTACAGAAGTATGAGATGGCTGACACACAAGAGCGATTCTTACCCTATTCAAGACGCCCAATATACTAACGATTGGGGAAAGAATATTCTCTTCATGTGGGGCATACACCACCCACCTACTGATACTGAGCAAATAAATCTATACAAAAAAGCTGATACAACAACAAGTATAACAACGGAAGATATCAATCGAACTTTCAAACCAGTGATAGGGCCAAGGCCTCTTGTCAATGGTCAACAAGGGAGAATTGATTATTATTGGTCAGTACTAAAGCCAGGCCAGACACTGCGAGTGAGATCCAATGGGAATCTAATTGCCCCATGGTATGGACACATTCTTTCAGGAGAAAGCCATGGAAGAATCTTGAAGACCGATTTGAGTAGTGGCAACTGCGTAGTACAATGCCAAACTGAGAAAGGTGGTTTGAACACGACCTTGCCATTCCACAATGTCAGCAAGTATGCATTTGGGAACTGCCCCAAATATGTTGGAGTGAAGAGTCTCAAACTGGCAGTTGGTCTAAGGAATGTTCCTGCTGCATCATATAGAGGGCTCTTCGGTGCCATAGCTGGATTCATAGAAGGCGGTTGGCCAGGACTAGTTGCAGGCTGGTACGGGTTTCAGCATTCAAATGATCAAGGGGTTGGAATGGCCGCAGATAGGGAATCAACTCAAGAAGCAGTTGACAAGATAACATCCAAAGTAAATAACATAATCGACAAAATGAACAAGCAGTATGGA------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T--------------------------------------------------------------------------------------"}
			]
		tmp_outgroup = SeqIO.read('/Users/yujiazhou/Documents/FluProject/augur/source-data/H9_outgroup.gb', 'genbank')
		genome_annotation = tmp_outgroup.features
		self.cds = {x.qualifiers['gene'][0]:x for x in genome_annotation
				if 'gene' in x.qualifiers and x.type=='CDS' and
				x.qualifiers['gene'][0] in ['SigPep', 'HA1', 'HA2']}
		self.outgroup = {
			'strain': 'A/duck/HongKong/147/1977',
			'db': 'IRD',
			'accession': 'AY206671',
			'date': '2003-03-03',
			'country': 'HongKong',
			'region': 'EastAsia',
			'seq': str(tmp_outgroup.seq).upper()
		}
Ejemplo n.º 5
0
	def __init__(self,min_length = 0, **kwargs):
		'''
		parameters
		min_length  -- minimal length for a sequence to be acceptable
		'''
		flu_filter.__init__(self, **kwargs)
		self.min_length = min_length
		self.vaccine_strains =[
			{
				'strain':		'A/ruddyshelduck/Mongolia/1626/2010',
				'isolate_id':	'EPI-ISL-149888',
				'date':			'2010-09-11',
				'lab':			'Other Database Import',
				'region':		'NorthAsia',
				'seq':			'ATGCTATCAATTGTGATTTTGTTTCTGCTTGTTGCAGAGAGCTCTTCTCAAAACTACACAGGAAACCCTGTGATATGCATGGGACATCATGCTGTGGCCAATGGGACTATGGTAAAGACCCTTACTGATGATCAAGTGGAAGTGGTCACTGCACAAGAATTGGTGGAATCACAGAACCTCCCGGAACTATGCCCGAGTCCTCTAAGACTAGTCGATGGCCAGACCTGTGATATCATCAATGGAGCCTTAGGAAGCCCAGGATGTGACCATTTGAATGGTGCTGAATGGGACATTTTCATAGAAAGGCCCAATGCAGTGGACACTTGCTATCCATTTGATGTGCCAGATTATCAGAGCCTAAGGAGCATACTCGCCAACAATGGGAAATTCGAATTCATTGCTGAAGAATTCCAATGGAGCACCGTGAAGCAAAATGGCAAGTCCGGGGCCTGCAAGAGGGCAAATGTGAACGATTTCTTTAATAAACTGAATTGGCTCGTGAAGTCAGACGGGAATGCATACCCTCTCCAGAATTTGACAAAAGTAAACAACGGTGATTACGCGAGGCTTTACATCTGGGGAGTTCACCACCCTTCGACGGATACCGAGCAAACCGATCTGTACAAGAACAATCCTGGTAGGGTCACTGTATCTACCAAAATCAGTCAAACAAGTGTAGTGCCCAACATTGGCAGCAGACCTTGGGTGAGAGGACAAAGTGGCAGAATCAGCTTCTATTGGACTATTGTAGAGCCTGGAGATTTGATAGTCTTCAACACAATAGGAAATTTAATTGCCCCAAGAGGACATTACAAATTAAACAGTCAGAAGAAGAGCACAATTCTGAACACTGCGACTCCCATAGGCTCATGTGTCAGTAAATGTCATACAGACAAAGGTTCTCTCTCTACCACCAAGCCCTTTCAAAATATCTCAAGGATAGCAGTTGGAGATTGTCCCAAATATGTTAAACAAGGCTCCCTAAAACTTGCAACTGGGATGAGAAATATCCCTGAAAAGGCATCAAGAGGGCTTTTTGGGGCAATAGCTGGGTTCATAGAGAATGGATGGCAAGGTCTGATTGATGGTTGGTATGGCTTCAGACACCAAAATGCAGAAGGAACAGGAACAGCTGTTGATCTAAAATCCACTCAGGCAGCCATCGATCAAATCAATGGAAAACTCAATCGTCTTATTGAGAAAACAAACGAGAAATACCATCAAATCGAAAAAGAATTCGAACAAGTTGAAGGAAGAATCCAAGACCTGGAGAAGTATGTTGAAGACACAAAGATTGATCTATGGTCATATAATGCAGAGCTATTAGTCGCTCTGGAAAACCAGCATACTATAGATGTGACTGACTCGGAGATGAACAAGCTCTTTGAAAGAGTAAGGCGACAACTCAGGGAGAATGCTGAAGACAGAGGAAATGGGTGTTTTGAAATATTCCACAAATGTGACAACAACTGCATTGAAAGCATTCGGAATGGGACCTATGATCATGATGTTTATAGAGATGAAGCGATCAACAATCGATTCCAAATACAGGGAGTCAAATTGACCCAGGGATACAAGGACATCATCCTTTGGATTTCGTTCTCCATATCATGCTTTTTGCTCGTAGCACTGCTTTTGGCCTTCATTTTGTGGGCTTGTCAGAACGGAAACATCCGGTGCCAGATT---TGCATTTGA',
			}
		]
		tmp_outgroup = SeqIO.read('/Users/yujiazhou/Documents/nextflu/H9_nextflu-master/augur/source-data/H4_outgroup.gb', 'genbank')
		genome_annotation = tmp_outgroup.features
		self.cds = {x.qualifiers['gene'][0]:x for x in genome_annotation
				if 'gene' in x.qualifiers and x.type=='CDS' and
				x.qualifiers['gene'][0] in ['SigPep', 'HA1', 'HA2']}
		self.outgroup = {
			'strain': 'A/Duck/Czechoslovakia/1956',
			'db': 'OtherDatabaseImport',
			'accession': 'EPI-ISL-70104',
			'date': '1956-01-01',
			'country': 'Czech Republic',
			'region': 'Europe',
			'seq': str(tmp_outgroup.seq).upper()
		}
Ejemplo n.º 6
0
	def __init__(self, aln_fname, outgroup, include_ref_strains = True, outdir = './', formats = ['pdf','png'], verbose = 0, **kwargs):
		process.__init__(self, **kwargs)
		flu_filter.__init__(self, alignment_file = aln_fname, **kwargs)
		tree_refine.__init__(self, **kwargs)
		virus_clean.__init__(self, **kwargs)
		self.midpoint_rooting = False
		self.include_ref_strains = include_ref_strains
		self.verbose = verbose
		self.formats = formats
		self.outdir = outdir.rstrip('/')+'/'
		self.auspice_tree_fname = 		self.outdir + 'tree.json'
		self.auspice_align_fname = 		self.outdir + 'aln.fasta'
		self.auspice_aa_align_fname = 		self.outdir + 'aa_aln.fasta'
		self.auspice_sequences_fname = 	self.outdir + 'sequences.json'
		self.auspice_frequencies_fname = None
		self.auspice_meta_fname = 		self.outdir + 'meta.json'
		self.path_to_augur = path_to_augur

		if os.path.isfile(outgroup):
			tmp = [{'strain':seq.name, 'seq':str(record.seq).upper(), 'desc':seq.description}
								for seq in SeqIO.parse(outgroup, 'fasta') ]
			if len(tmp):
				self.outgroup = tmp[0]
				if len(tmp)>1:
					print "More than one sequence in ", outgroup, "taking first"
				if self.verbose:
					print "using outgroup found in file ", outgroup
		elif outgroup=='auto':
			print "automatically determine outgroup"
			self.auto_outgroup_blast()
		elif isinstance(outgroup, basestring):
			seq_names = [x['strain'] for x in self.viruses]
			if outgroup in seq_names:
				self.outgroup = self.viruses.pop(seq_names.index(outgroup))
				if self.verbose:
					print "using outgroup found in alignment", outgroup
			else:
				standard_outgroups = self.load_standard_outgroups()
				if outgroup in standard_outgroups:
					self.outgroup = standard_outgroups[outgroup]
					if self.verbose:
						print "using standard outgroup", outgroup
				else:
					raise ValueError("outgroup %s not found" % outgroup)
					return
		if "anno:" in self.outgroup['desc']:
			anno = [x for x in self.outgroup['desc'].split() if "anno:" in x][0]
			anno = (anno.split(':')[1]).split('_')
			tmp = [(anno[2*i], int(anno[2*i+1])) for i in range(len(anno)/2)]
			self.anno = sorted(tmp, key=lambda x:x[1])
			print("Using annotation",self.anno)
		else:
			self.anno = None
			print("No annotation found")
		#self.anno = sorted((('SP',0), ('HA1',16), ('HA2',329+16)), key=lambda x:x[1])
		self.viruses.append(self.outgroup)
		self.filter_geo(prune=False)
		self.make_strain_names_unique()
Ejemplo n.º 7
0
	def __init__(self,min_length = 0, **kwargs):
		'''
		parameters
		min_length  -- minimal length for a sequence to be acceptable
		'''
		flu_filter.__init__(self, **kwargs)
		self.min_length = min_length
		self.vaccine_strains =[
			{
				'strain':    	'A/turkey/Italy/3889/99',
				'isolate_id':	'EPI-ISL-2746',
				'date':    		'1990-07-01', #(Month and day unknown)
				'region':   	'Europe',
				'seq':     		'ATGAACACTCAAATCCTGGTATTCGCTCTGGTGGCGATCATTCCGACAAATGCAGACAAAATCTGCCTTGGGCATCATGCCGTGTCAAACGGGACTAAAGTAAACACATTAACTGAAAGAGGAGTGGAAGTCGTTAATGCAACTGAAACGGTGGAACGAACAAACGTCCCCAGGATCTGCTCAAAAGGGAAAAGGACAGTTGACCTCGGTCAATGTGGACTTCTGGGAACAATCACTGGGCCACCCCAATGTGACCAATTCCTAGAATTTTCAGCCGATCTAATTATTGAGAGGCGAGAAGGAAGTGGTGTCTGTTATCCTGGGAAATTCGTGAATGAAGAAGCTCTGAGGCAAATTCTCAGGGAGTCAGGCGGAATTGACAAGGAGACAATGGGATTCACATACAGCGGAATAAGAACTAATGGAACAACCAGTGCATGTAGGAGATTAGGATCTTCATTCTATGCAGAGATGAAATGGCTCCTGTCAAACACAGACAATGCTGCTTTCCCGCAGATGACTAAGTCATACAAAAACACAAGGAAAGACCCAGCTCTGATAATATGGGGGATCCACCATTCCGGATCAACTACAGAACAGACCAAGCTATATGGGAGTGGAAACAAACTGATAACAGTTGGGAGTTCTAATTACCAACAGTCCTTTGTACCGAGTCCAGGAGCGAGACCACAAGTGAATGGCCAATCTGGAAGAATTGACTTTCATTGGCTGATGCTAAACCCCAATGACACAGTCACTTTCAGTTTCAATGGGGCCTTCATAGCTCCAGACCGTGCAAGTTTTCTGAGAGGGAAGTCTATGGGGATTCAGAGTGGAGTACAGGTTGATGCCAATTGTGAAGGAGATTGCTATCACAGTGGAGGGACAATAATAAGTAATTTGCCCTTTCAGAACATAAATAGCAGGGCAGTAGGGAAATGTCCGAGATATGTTAAGCAAGAGAGTCTGCTGCTGGCAACAGGGATGAAGAATGTTCCCGAAGTTCCAAAGGGA------------------------------AGAGGCCTATTTGGTGCTATAGCGGGTTTCATTGAAAATGGATGGGAAGGTCTGATTGATGGGTGGTATGGCTTCAGGCATCAAAATGCACAAGGGGAGGGAACTGCTGCAGATTACAAAAGCACCCAATCAGCAATTGATCAAGTAACAGGAAAATTGAACCGGCTTATAGAAAAAACTAACCAACAATTTGAGTTAATAGACAATGAATTCACTGAGGTTGAAAAGCAAATTGGCAATGTGATAAATTGGACCAGAGATTCCATGACAGAAGTGTGGTCCTATAACGCTGAACTCTTGGTAGCAATGGAGAACCAGCACACAATTGATCTGGCCGACTCAGAAATGAACAAACTATACGAACGAGTGAAGAGACAACTGAGAGAGAATGCTGAAGAAGATGGCACTGGTTGCTTCGAAATATTTCACAAGTGTGATGACGACTGTATGGCCAGTATTAGAAACAACACCTATGATCACAGCAAGTACAGGGAAGAGGCAATGCAAAATAGAATACAGATTGACCCAGTCAAACTAAGCAGCGGCTACAAAGATGTGATACTTTGGTTTAGCTTCGGGGCATCATGTTTCATACTTCTGGCCATTGCAATGGGCCTTGTCTTCATATGTGTGAAGAATGGAAACATGCGGTGCACTATTTGTATATAA',
			},
			{
				'strain':    	'A/mallard/Netherlands/12/00',
				'isolate_id': 	'EPI-ISL-3548',
				'date':    		'2000-07-01', # (Month and day unknown)
				'region':   	'Europe',
				'seq':     		'ATGAACACTCAAATCCTGGTATTCGCTCTGATGGCGATCATTCCGACAAATGCAGACAAGATCTGCCTTGGGCATCATGCCGTGTCAAACGGGACTAAAGTAAACACATTAACTGAAAGAGGAGTGGAAGTCGTTAATGCAACTGAAACGGTGGAACGAACAAACGTCCCCAGGATCTGCTCAAAAGGGAAAAGGACAGTTGACCTTGGTCAATGTGGACTTCTGGGAACAATCACTGGGCCGCCCCAATGTGACCAATTCCTAGAATTTTCAGCCGATTTAATTATTGAGAGGCGAGAAGGAAGTGATGTCTGTTATCCTGGGAAATTCGTGAATGAAGAAGCTCTGAGGCAAATTCTCAGGGAGTCAGGCGGAATTGACAAGGAGACAATGGGATTCACCTACAGCGGAATAAGAACTAATGGAGCAACCAGTGCATGTAGGAGATCAGGATCTTCATTCTATGCAGAGATGAAATGGCTCCTGTCAAACACAGACAATGCTGCTTTCCCGCAGATGACTAAGTCATACAAAAACACAAGGAAAGACCCAGCTCTGATAATATGGGGGATCCACCATTCCGGATCAACTACAGAACAGACCAAGCTATATGGGAGTGGAAACAAACTGATAACAGTTGGGAGTTCTAATTACCAACAGTCCTTTGTACCGAGTCCAGGAGCGAGACCACAAGTAAATGGCCAATCTGGAAGAATTGACTTTCATTGGCTGATACTAAACCCCAATGACACAGTTACTTTCAGTTTCAATGGGGCCTTCATAGCTCCAGACCGTGCAAGCTTTCTGAGAGGGAAGTCTATGGGAATTCAGAGTGGAGTACAGGTTGATGCCAATTGTGAAGGAGATTGCTATCATAGTGGAGGGACAATAATAAGTAATTTGCCCTTTCAGAACATAAATAGCAGGGCAGTAGGAAAATGTCCGAGATATGTTAAGCAAGAGAGTCTGCTGCTGGCAACAGGAATGAAGAATGTTCCCGAAATCCCAAAGGGA------------------------------AGAGGCCTATTTGGTGCTATAGCGGGTTTCATTGAAAATGGATGGGAAGGTCTGATTGATGGGTGGTATGGCTTCAGGCATCAAAATGCACAAGGGGAGGGAACTGCTGCAGATTACAAAAGCACCCAATCAGCAATTGATCAAATAACAGGAAAATTAAACCGGCTTATAGAAAAAACTAACCAACAATTTGAGTTAATAGACAATGAATTCACTGAGGTTGAAAAGCAAATTGGCAATGTGATAAATTGGACCAGAGATTCCATGACAGAAGTGTGGTCCTATAACGCTGAACTCTTGGTAGCAATGGAGAATCAGCACACAATTGATCTGGCCGACTCAGAAATGAACAAACTGTACGAACGAGTGAAGAGACAACTGAGAGAGAATGCTGAAGAAGATGGCACTGGTTGCTTCGAAATATTTCACAAGTGTGATGACGACTGTATGGCCAGTATTAGAAACAACACCTATGATCACAGCAAGTACAGGGAAGAGGCAATGCAAAATAGAATACAGATTGACCCAGTCAAACTAAGCAGCGGCTACAAAGATGTGATACTTTGGTTTAGCTTCGGGGCATCATGTTTCATACTTCTGGCCATTGCAATGGGCCTTGTCTTCATATGTGTGAAGAATGGAAACATGCGGTGCACTATTTGTATATAA',
			},
			{
				'strain':    	'A/human/New-York/107/2003',
				'isolate_id': 	'EPI-ISL-16424',
				'date':    		'2003-07-01',
				'region':   	'NorthAmerica',
				'seq': 			'ATGAACACTCAAATTCTGGCATTCATTGCTTGTGTGCTGACTGGAGTTAAAGGAGACAAAATATGTCTTGGGCACCATGCTGTGGCAAATGGAACAAAAGTGAACACATTAACAGAGAGGGGGATTGAAGTAGTGAATGCCACAGAGACAGTGGAAACTACGAATATCAAGAAAATATGTACCCAGGGGAAAAGGCCAACAGATCTGGGACAATGTGGACTTCTAGGAACCCTAATAGGACCTCCCCAATGTGATCAATTCCTGGAGTTTTCCTCTGATTTGATAATTGAGCGAAGAGAAGGAACCGATATATGCTATCCCGGTAGATTCACAAACGAAGAATCACTGAGGCAGATCCTTCGAAGATCAGGAGGAATTGGTAAGGAGTCAATGGGCTTCACCTATAGTGGAATAAGAACCAATGGAGCGACAAGTGCCTGCACAAGATCAGGTTCTTCTTTCTATGCAGAGATGAAGTGGTTGCTGTCGAATTCAGATAATGCAGCATTCCCACAGATGACAAAGGCGTATAGAAATCCCAGAAACAAACCAGCTCTGATAATTTGGGGAGTTCATCACTCTGAATCGGTTAGCGAGCAGACCAAACTCTATGGAAGTGGAAACAAGTTGATAACAGTAAGAAGCTCAAAATACCAGCAATCATTCACCCCAAATCCGGGAGCACGA------------------------AGAATCGATTTCCACTGGCTACTCCTTGATCCCAATGACACAGTGACCTTCACTTTCAATGGAGCATTCATAGCCCCTGACAGGACAAGTTTCTTTAGGGGAGAATCACTAGGAGTCCAGAGTGATGCTCCTTTGGATTCAAGTTGTAGAGGAGATTGCTTTCACAGTGGGGGTACGATAGTCAGTTCCCTGCCATTCCAAAACATCAACTCTAGAACTGTGGGGAAATGCCCTCGGTATGTCAAACAGAAAAGCCTCCTTCTGGCTACAGGAATGAGAAATGTTCCAGAGAAACCAAAGCCC------------------------------AGAGGCCTTTTTGGAGCAATTGCTGGATTCATAGAGAATGGATGGGAGGGTCTCATCAATGGATGGTATGGTTTCAGACATCAAAATGCACAAGGAGAGGGAACTGCAGCTGACTACAAAAGCACCCAGTCTGCAATAGATCAGATCACAGGCAAATTGAATCGTTTAATTGGCAAAACAAATCAGCAGTTTGAGCTGATAGACAATGAGTTCAATGAGATAGAACAACAAATAGGAAATGTCATTAATTGGACAAGAGACGCAATGACTGAGATATGGTCGTATAATGCTGAGCTGTTGGTGGCAATGGAAAATCAGCATACAATAGATCTTGCGGACTCAGAAATGAGCAAACTTTATGAGCGTGTCAAAAAACAACTAAGGGAGAATGCTGAAGAAGATGGAACTGGATGTTTTGAGATATTTCATAAATGTGACGATCAGTGTATGGAGAGCATAAGGAACAACACGTATGACCATACTCAATACAGAACAGAGTCATTGCAGAATAGAATACAGATAGACCCAGTGAAGTTGAGTAGTGGGTACAAAGACATAATCTTATGGTTTAGCTTCGGGGCATCATGTTTTCTTCTTCTAGCCATTGCAATGGGACTGGTTTTCATTTGCATAAAGAATGGAAACATGCAGTGCACTATTTGTATATAG',
			},
			{
				'strain':		'A/human/Shanghai/2/2013',
				'isolate_id':	'EPI-ISL-138738',
				'date':			'2013-03-05',
				'region':		'China',
				'seq':			'ATGAACACTCAAATCCTGGTATTCGCTCTGATTGCGATCATTCCAACAAATGCAGACAAAATCTGCCTCGGACATCATGCCGTGTCAAACGGAACCAAAGTAAACACATTAACTGAAAGAGGAGTGGAAGTCGTCAATGCAACTGAAACAGTGGAACGAACAAACATCCCCAGGATCTGCTCAAAAGGGAAAAGGACAGTTGACCTCGGTCAATGTGGACTCCTGGGGACAATCACTGGACCACCTCAATGTGACCAATTCCTAGAATTTTCAGCCGATTTAATTATTGAGAGGCGAGAAGGAAGTGATGTCTGTTATCCTGGGAAATTCGTGAATGAAGAAGCTCTGAGGCAAATTCTCAGAGAATCAGGCGGAATTGACAAGGAAGCAATGGGATTCACATACAGTGGAATAAGAACTAATGGAGCAACCAGTGCATGTAGGAGATCAGGATCTTCATTCTATGCAGAAATGAAATGGCTCCTGTCAAACACAGATAATGCTGCATTCCCGCAGATGACTAAGTCATATAAAAATACAAGAAAAAGCCCAGCTCTAATAGTATGGGGGATCCATCATTCCGTATCAACTGCAGAGCAAACCAAGCTATATGGGAGTGGAAACAAACTGGTGACAGTTGGGAGTTCTAATTATCAACAATCTTTTGTACCGAGTCCAGGAGCGAGACCACAAGTTAATGGTCTATCTGGAAGAATTGACTTTCATTGGCTAATGCTAAATCCCAATGATACAGTCACTTTCAGTTTCAATGGGGCTTTCATAGCTCCAGACCGTGCAAGCTTCCTGAGAGGAAAATCTATGGGAATCCAGAGTGGAGTACAGGTTGATGCCAATTGTGAAGGGGACTGCTATCATAGTGGAGGGACAATAATAAGTAACTTGCCATTTCAGAACATAGATAGCAGGGCAGTTGGAAAATGTCCGAGATATGTTAAGCAAAGGAGTCTGCTGCTAGCAACAGGGATGAAGAATGTTCCTGAGATTCCAAAAGGA------------------------------AGAGGCCTATTTGGTGCTATAGCGGGTTTCATTGAAAATGGATGGGAAGGCCTAATTGATGGTTGGTATGGTTTCAGACACCAGAATGCACAGGGAGAGGGAACTGCTGCAGATTACAAAAGCACTCAATCGGCAATTGATCAAATAACAGGAAAATTAAACCGGCTTATAGAAAAAACCAACCAACAATTTGAGTTGATAGACAATGAATTCAATGAGGTAGAGAAGCAAATCGGTAATGTGATAAATTGGACCAGAGATTCTATAACAGAAGTGTGGTCATACAATGCTGAACTCTTGGTAGCAATGGAGAACCAGCATACAATTGATCTGGCTGATTCAGAAATGGACAAACTGTACGAACGAGTGAAAAGACAGCTGAGAGAGAATGCTGAAGAAGATGGCACTGGTTGCTTTGAAATATTTCACAAGTGTGATGATGACTGTATGGCCAGTATTAGAAATAACACCTATGATCACAGCAAATACAGGGAAGAGGCAATGCAAAATAGAATACAGATTGACCCAGTCAAACTAAGCAGCGGCTACAAAGATGTGATACTTTGGTTTAGCTTCGGGGCATCATGTTTCATACTTCTAGCCATTGTAATGGGCCTTGTCTTCATATGTGTAAAGAATGGAAACATGCGGTGCACTATTTGTATATAA',
			},
			{
				'strain':		'A/human/Anhui/1/2013',
				'isolate_id':	'EPI-ISL-138739',
				'date':			'2013-03-20',
				'region':		'China',
				'seq':			'ATGAACACTCAAATCCTGGTATTCGCTCTGATTGCGATCATTCCAACAAATGCAGACAAAATCTGCCTCGGACATCATGCCGTGTCAAACGGAACCAAAGTAAACACATTAACTGAAAGAGGAGTGGAAGTCGTCAATGCAACTGAAACAGTGGAACGAACAAACATCCCCAGGATCTGCTCAAAAGGGAAAAGGACAGTTGACCTCGGTCAATGTGGACTCCTGGGGACAATCACTGGACCACCTCAATGTGACCAATTCCTAGAATTTTCAGCCGATTTAATTATTGAGAGGCGAGAAGGAAGTGATGTCTGTTATCCTGGGAAATTCGTGAATGAAGAAGCTCTGAGGCAAATTCTCAGAGAATCAGGCGGAATTGACAAGGAAGCAATGGGATTCACATACAGTGGAATAAGAACTAATGGAGCAACCAGTGCATGTAGGAGATCAGGATCTTCATTCTATGCAGAAATGAAATGGCTCCTGTCAAACACAGATAATGCTGCATTCCCGCAGATGACTAAGTCATATAAAAATACAAGAAAAAGCCCAGCTCTAATAGTATGGGGGATCCATCATTCCGTATCAACTGCAGAGCAAACCAAGCTATATGGGAGTGGAAACAAACTGGTGACAGTTGGGAGTTCTAATTATCAACAATCTTTTGTACCGAGTCCAGGAGCGAGACCACAAGTTAATGGTCTATCTGGAAGAATTGACTTTCATTGGCTAATGCTAAATCCCAATGATACAGTCACTTTCAGTTTCAATGGGGCTTTCATAGCTCCAGACCGTGCAAGCTTCCTGAGAGGAAAATCTATGGGAATCCAGAGTGGAGTACAGGTTGATGCCAATTGTGAAGGGGACTGCTATCATAGTGGAGGGACAATAATAAGTAACTTGCCATTTCAGAACATAGATAGCAGGGCAGTTGGAAAATGTCCGAGATATGTTAAGCAAAGGAGTCTGCTGCTAGCAACAGGGATGAAGAATGTTCCTGAGATTCCAAAGGGA------------------------------AGAGGCCTATTTGGTGCTATAGCGGGTTTCATTGAAAATGGATGGGAAGGCCTAATTGATGGTTGGTATGGTTTCAGACACCAGAATGCACAGGGAGAGGGAACTGCTGCAGATTACAAAAGCACTCAATCGGCAATTGATCAAATAACAGGAAAATTAAACCGGCTTATAGAAAAAACCAACCAACAATTTGAGTTGATAGACAATGAATTCAATGAGGTAGAGAAGCAAATCGGTAATGTGATAAATTGGACCAGAGATTCTATAACAGAAGTGTGGTCATACAATGCTGAACTCTTGGTAGCAATGGAGAACCAGCATACAATTGATCTGGCTGATTCAGAAATGGACAAACTGTACGAACGAGTGAAAAGACAGCTGAGAGAGAATGCTGAAGAAGATGGCACTGGTTGCTTTGAAATATTTCACAAGTGTGATGATGACTGTATGGCCAGTATTAGAAATAACACCTATGATCACAGCAAATACAGGGAAGAGGCAATGCAAAATAGAATACAGATTGACCCAGTCAAACTAAGCAGCGGCTACAAAGATGTGATACTTTGGTTTAGCTTCGGGGCATCATGTTTCATACTTCTAGCCATTGTAATGGGCCTTGTCTTCATATGTGTAAAGAATGGAAACATGCGGTGCACTATTTGTATATAA',
			}
		]
		tmp_outgroup = SeqIO.read('/Users/yujiazhou/Documents/nextflu/H9_nextflu-master/augur/source-data/H7_outgroup.gb', 'genbank')
		genome_annotation = tmp_outgroup.features
		self.cds = {x.qualifiers['gene'][0]:x for x in genome_annotation
				if 'gene' in x.qualifiers and x.type=='CDS' and
				x.qualifiers['gene'][0] in ['SigPep', 'HA1', 'HA2']}
		self.outgroup = {
				'strain':'A/equine/Prague/2/1956',
				'db': 'Other-Database-Import',
				'isolate_id':'EPI-ISL-89157',
				'date':'1956-06-11',
				'country': 'CzechRepublic',
				'region':'Europe',
				'seq':str(tmp_outgroup.seq).upper()
			}
Ejemplo n.º 8
0
    def __init__(self,
                 aln_fname,
                 outgroup,
                 outdir='./',
                 formats=['pdf', 'svg', 'png'],
                 verbose=0,
                 **kwargs):
        process.__init__(self, **kwargs)
        flu_filter.__init__(self, alignment_file=aln_fname, **kwargs)
        tree_refine.__init__(self, **kwargs)
        virus_clean.__init__(self, **kwargs)
        self.verbose = verbose
        self.formats = formats
        self.outdir = outdir.rstrip('/') + '/'
        self.auspice_tree_fname = self.outdir + 'tree.json'
        self.auspice_sequences_fname = self.outdir + 'sequences.json'
        self.auspice_frequencies_fname = None
        self.auspice_meta_fname = self.outdir + 'meta.json'

        if os.path.isfile(outgroup):
            tmp = [{
                'strain': seq.name,
                'seq': str(record.seq).upper(),
                'desc': seq.description
            } for seq in SeqIO.parse(outgroup, 'fasta')]
            if len(tmp):
                self.outgroup = tmp[0]
                if len(tmp) > 1:
                    print "More than one sequence in ", outgroup, "taking first"
                if self.verbose:
                    print "using outgroup found in file ", outgroup
        elif isinstance(outgroup, basestring):
            seq_names = [x['strain'] for x in self.viruses]
            if outgroup in seq_names:
                self.outgroup = self.viruses.pop(seq_names.index(outgroup))
                if self.verbose:
                    print "using outgroup found in alignment", outgroup
            else:
                standard_outgroups = [{
                    'strain': seq.name,
                    'seq': str(seq.seq).upper(),
                    'desc': seq.description
                } for seq in SeqIO.parse(std_outgroup_file, 'fasta')]
                outgroup_names = [x['strain'] for x in standard_outgroups]
                if outgroup in outgroup_names:
                    self.outgroup = standard_outgroups[outgroup_names.index(
                        outgroup)]
                    if self.verbose:
                        print "using standard outgroup", outgroup
                else:
                    raise ValueError("outgroup %s not found" % outgroup)
                    return
        self.viruses.append(self.outgroup)
        self.filter_geo(prune=False)
        #self.filter_host(prune=False)
        self.make_strain_names_unique()
Ejemplo n.º 9
0
	def __init__(self,min_length = 987, **kwargs):
		'''
		parameters
		min_length  -- minimal length for a sequence to be acceptable
		'''
		flu_filter.__init__(self, **kwargs)
		self.min_length = min_length
		self.vaccine_strains =[
				{ 
					"strain": "A/Wisconsin/67/2005",
					"db": "IRD",
					"accession": "CY163984",
					"date": "2005-08-31",
					"seq": "ATGAAGACTATCATTGCTTTGAGCTACATTCTATGTCTGGTTTTCGCTCAAAAACTTCCCGGAAATGACAACAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACGAATGACCAAATTGAAGTTACTAATGCTACTGAGCTGGTTCAGAGTTCCTCAACAGGTGGAATATGCGACAGTCCTCATCAGATCCTTGATGGAGAAAACTGCACACTAATAGATGCTCTATTGGGAGACCCTCAGTGTGATGGCTTCCAAAATAAGAAATGGGACCTTTTTGTTGAACGCAGCAAAGCCTACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGGAGTTTAACGATGAAAGCTTCAATTGGACTGGAGTCACTCAAAATGGAACAAGCTCTTCTTGCAAAAGGAGATCTAATAACAGTTTCTTTAGTAGATTGAATTGGTTGACCCACTTAAAATTCAAATACCCAGCATTGAACGTGACTATGCCAAACAATGAAAAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGTTACGGACAATGACCAAATCTTCCTGTATGCTCAAGCATCAGGAAGAATCACAGTCTCTACCAAAAGAAGCCAACAAACTGTAATCCCGAATATCGGATCTAGACCCAGAATAAGGAATATCCCCAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAATTCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCATTTCAAAATGTAAACAGGATCACATATGGGGCCTGTCCCAGATATGTTAAGCAAAACACTCTGAAATTGGCAACAGGGATGCGAAATGTACCAGAGAAACAAACTAGAGGCATATTTGGCGCAATCGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGGTTTCAGGCATCAAAATTCTGAGGGAATAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCAATCAAATCAATGGGAAGCTGAATAGGTTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAAGTAGAAGGGAGAATTCAGGACCTCGAGAAATATGTTGAGGACACTAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAACAAACTGTTTGAAAGAACAAAGAAGCAACTGAGGGAAAATGCTGAGGATATGGGCAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATCAGAAATGGAACTTATGACCATGATGTATACAGAGATGAAGCATTAAACAACCGGTTCCAGATCAAAGGCGTTGAGCTGAAGTCAGGATACAAAGATTGGATCCTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAAGGCAACATTAGGTGCAACATTTGCATTTGA"
				},	{
					"strain": "A/Brisbane/10/2007",
					"db": "IRD",
					"accession": "CY113005",
					"date": "2007-02-06",
					"seq": "ATGAAGACTATCATTGCTTTGAGCTACATTCTATGTCTGGTTTTCACTCAAAAACTTCCCGGAAATGACAACAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACGAATGACCAAATTGAAGTTACTAATGCTACTGAGCTGGTTCAGAGTTCCTCAACAGGTGAAATATGCGACAGTCCTCATCAGATCCTTGATGGAGAAAACTGCACACTAATAGATGCTCTATTGGGAGACCCTCAGTGTGATGGCTTCCAAAATAAGAAATGGGACCTTTTTGTTGAACGCAGCAAAGCCTACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGGAGTTTAACAATGAAAGCTTCAATTGGACTGGAGTCACTCAAAACGGAACAAGCTCTGCTTGCATAAGGAGATCTAATAACAGTTTCTTTAGTAGATTGAATTGGTTGACCCACTTAAAATTCAAATACCCAGCATTGAACGTGACTATGCCAAACAATGAAAAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGGTACGGACAATGACCAAATCTTCCCGTATGCTCAAGCATCAGGAAGAATCACAGTCTCTACCAAAAGAAGCCAACAAACTGTAATCCCGAATATCGGATCTAGACCCAGAGTAAGGAATATCCCCAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAATTCTGAATGCATCACTCCAAACGGAAGCATTCCCAATGACAAACCATTCCAAAATGTAAACAGGATCACATACGGGGCCTGTCCCAGATATGTTAAGCAAAACACTCTGAAATTGGCAACAGGGATGCGAAATGTACCAGAGAAACAAACTAGAGGCATATTTGGCGCAATCGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGGTTTCAGGCATCAAAATTCTGAGGGAATAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCGATCAAATCAATGGGAAGCTGAATAGGTTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAAGTCGAAGGGAGAATTCAGGACCTTGAGAAATATGTTGAGGACACCAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAACAAACTGTTTGAAAAAACAAAGAAGCAACTGAGGGAAAATGCTGAGGATATGGGCAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATCAGAAATGGAACTTATGACCACAATGTATACAGAGATGAAGCATTAAACAACCGGTTCCAGATCAAGGGCGTTGAGCTGAAGTCAGGATACAAAGATTGGATCCTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAAGGCAACATTAGGTGCAACATTTGCATTTGA"
				},	{
					"strain": "A/Perth/16/2009",
					"db": "IRD",
					"accession": "GQ293081",
					"date": "2009-04-07",
					"seq": "ATGAAGACTATCATTGCTTTGAGCTACATTCTATGTCTGGTTTTCGCTCAAAAACTTCCTGGAAATGACAACAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACGAATGACCAAATTGAAGTTACTAATGCTACTGAGCTGGTTCAGAGTTCCTCAACAGGTGAAATATGCGACAGTCCTCATCAGATCCTTGATGGAAAAAACTGCACACTAATAGATGCTCTATTGGGAGACCCTCAGTGTGATGGCTTCCAAAATAAGAAATGGGACCTTTTTGTTGAACGCAGCAAAGCCTACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGGAGTTTAACAATGAAAGCTTCAATTGGACTGGAGTCACTCAAAACGGAACAAGCTCTGCTTGCATAAGGAGATCTAAAAACAGTTTCTTTAGTAGATTGAATTGGTTGACCCACTTAAACTTCAAATACCCAGCATTGAACGTGACTATGCCAAACAATGAACAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGGTACGGACAAAGACCAAATCTTCCTGTATGCTCAAGCATCAGGAAGAATCACAGTCTCTACCAAAAGAAGCCAACAAACCGTAAGCCCGAATATCGGATCTAGACCCAGAGTAAGGAATATCCCTAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAATTCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCATTCCAAAATGTAAACAGGATCACATACGGGGCCTGTCCCAGATATGTTAAGCAAAACACTCTGAAATTGGCAACAGGGATGCGAAATGTACCAGAGAAACAAACTAGAGGCATATTTGGCGCAATCGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGGTTTCAGGCATCAAAATTCTGAGGGAAGAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCGATCAAATCAATGGGAAGCTGAATAGATTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAAGTCGAAGGGAGAATTCAGGACCTTGAGAAATATGTTGAGGACACTAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAACAAACTGTTTGAAAAAACAAAGAAGCAACTGAGGGAAAATGCTGAGGATATGGGCAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATCAGAAATGGAACTTATGACCACGATGTATACAGAGATGAAGCATTAAACAACCGGTTTCAGATCAAGGGAGTTGAGCTGAAGTCAGGGTACAAAGATTGGATCCTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAAGGCAACATTAGGTGCAACATTTGCATTTGA"
				},	{
					"strain": "A/Victoria/361/2011",
					"db": "IRD",
					"accession": "GQ293081",
					"date": "2011-10-24",
					"seq": "ATGAAGACTATCATTGCTTTGAGCCACATTCTATGTCTGGTTTTCGCTCAAAAACTTCCTGGAAATGACAACAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACGAATGACCAAATTGAAGTTACTAATGCTACTGAGCTGGTTCAGAATTCCTCAATAGGTGAAATATGCGACAGTCCTCATCAGATCCTTGATGGAGAAAACTGCACACTAATAGATGCTCTATTGGGAGACCCTCAGTGTGATGGCTTCCAAAATAAGAAATGGGACCTTTTTGTTGAACGAAGCAAAGCCTACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGGAGTTTAACAATGAAAGCTTCAATTGGACTGGAGTCACTCAAAACGGAACAAGTTCTGCTTGCATAAGGAGATCTAATAATAGTTTCTTTAGTAGATTAAATTGGTTGACCCGCTTAAACTTCAAATACCCAGCATTGAACGTGACTATGCCAAACAATGAACAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGTTACGGACAAGGAACAAATCTTCCTGTATGCTCAATCATCAGGAAGAATCACAGTATCTACCAAAAGAAGCCAACAAGCTGTAATCCCGAATATCGGATATAGACCCAGAATAAGGAATATCCCTAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAATTCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCATTCCAAAATGTAAACAGGATCACATACGGGGCCTGTCCCAGATATGTTAAGCAAAGCACTCTGAAATTGGCAACAGGAATGCGAAATGTACCAGAGAAACAAACTAGAGGCATATTTGGCGCAATAGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGGTTTCAGGCATCAAAATTCTGAGGGAAGAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCGATCAAATCAATGGGAAGCTGAATCGATTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAAGTCGAAGGGAGAATTCAGGACCTTGAGAAATATGTTGAGGACACTAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAACAAACTGTTTGAAAAAACAAAGAAGCAACTAAGGGAAAATGCTGAGGATATGGGCAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATCAGAAATGGAACTTATGACCACGATGTATACAGAGATGAAGCATTAAACAACCGGTTCCAGATCAAGGGAGTTGAGCTGAAGTCAGGGTACAAAGATTGGATCCTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAGGGCAACATTAGGTGCAACATTTGCATTTGA"
				},	{
					"strain": "A/Texas/50/2012",
					"db": "GISAID",
					"isolate_id": "EPI_ISL_129858",
					"date": "2012-04-15",
					"seq": "ATGAAGACTATCATTGCTTTGAGCTACATTCTATGTCTGGTTTTCGCTCAAAAACTTCCTGGAAATGACAATAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACGAATGACCGAATTGAAGTTACTAATGCTACTGAACTGGTTCAGAATTCCTCAATAGGTGAAATATGCGACAGTCCTCATCAGATCCTTGATGGAGAAAACTGCACACTAATAGATGCTCTATTGGGAGACCCTCAGTGTGATGGCTTCCAAAATAAGAAATGGGACCTTTTTGTTGAACGAAGCAAAGCCTACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGGAGTTTAACAATGAAAGCTTCAATTGGAATGGAGTCACTCAAAACGGAACAAGTTCTGCTTGCATAAGGAGATCTAATAATAGTTTCTTTAGTAGATTAAATTGGTTGACCCACTTAAACTTCAAATACCCAGCATTGAACGTGACTATGCCAAACAATGAACAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGGTACGGACAAGGACCAAATCTTCCTGTATGCTCAACCATCAGGAAGAATCACAGTATCTACCAAAAGAAGCCAACAAGCTGTAATCCCGAATATCGGATCTAGACCCAGAATAAGGAATATCCCTAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAAGTCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCATTCCAAAATGTAAACAGGATCACATACGGGGCCTGTCCCAGATATGTTAAGCAAAGCACTCTGAAATTGGCAACAGGAATGCGGAATGTACCAGAGAAACAAACTAGAGGCATATTTGGCGCAATAGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGGTTTCAGGCATCAAAATTCTGAGGGAAGAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCGATCAAATCAATGGGAAGCTGAATCGATTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAAGTAGAAGGGAGAATTCAGGACCTTGAGAAATATGTTGAGGACACTAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAACAAACTGTTTGAAAAAACAAAGAAGCAACTGAGGGAAAATGCTGAGGATATGGGCAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATCAGAAATGGAACTTATGACCACGATGTATACAGAGATGAAGCATTAAACAACCGGTTCCAGATCAAGGGAGTTGAGCTGAAGTCAGGGTACAAAGATTGGATCCTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAGGGCAACATTAGGTGCAACATTTGCATTTGA",
				},	{
					"strain": "A/Switzerland/9715293/2013",
					"db": "GISAID",
					"isolate_id": "EPI_ISL_162149",
					"date": "2013-12-06",
					"seq": "ATGAAGACTATCATTGCTTTGAGCTACATTCTATGTCTGGTTTTCGCTCAAAAACTTCCTGGAAATGACAATAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACGAATGACCGAATTGAAGTTACTAATGCTACTGAGCTGGTTCAGAATTCCTCAATAGGTGAAATATGCGACAGTCCTCATCAGATCCTTGATGGAGAAAACTGCACACTAATAGATGCTCTATTGGGAGACCCTCAGTGTGATGGCTTTCAAAATAAGAAATGGGACCTTTTTGTTGAACGAAGCAAAGCCTACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGGAGTTTAACAATGAAAGCTTCAATTGGGCTGGAGTCACTCAAAACGGAACAAGTTCTTCTTGCATAAGGGGATCTAATAGTAGTTTCTTTAGTAGATTAAATTGGTTGACCCACTTAAACTCCAAATACCCAGCATTAAACGTGACTATGCCAAACAATGAACAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGGTACGGACAAGGACCAAATCTTCCTGTATGCACAATCATCAGGAAGAATCACAGTATCTACCAAAAGAAGCCAACAAGCTGTAATCCCGAATATCGGATCTAGACCCAGAATAAGGGATATCCCTAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAAGTCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCATTCCAAAATGTAAACAGGATCACATACGGGGCCTGTCCCAGATATGTTAAGCAAAGCACTCTGAAATTGGCAACAGGAATGCGAAATGTACCAGAGAGACAAACTAGAGGCATATTTGGCGCAATAGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGGCTTCAGGCATCAAAATTCTGAGGGAAGAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCGATCAAATCAATGGGAAGCTGAATCGATTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAAGTAGAAGGGAGAATTCAGGACCTTGAGAAATATGTTGAGGACACAAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAACAAACTGTTTGAAAAAACAAAGAAGCAACTGAGGGAAAATGCTGAGGATATGGGCAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATCAGAAATGGAACTTATGACCACGATGTATACAGGGATGAAGCATTAAACAACCGGTTCCAGATCAAGGGAGTTGAGCTGAAGTCAGGGTACAAAGATTGGATCCTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAGGGCAACATTAGGTGCAACATTTGCATTTGA",
				}
			]
		self.outgroup = {
			'strain': 'A/Beijing/32/1992',
			'db': 'IRD',
			'accession': 'U26830',
			'date': '1992-01-01',
			'country': 'China',
			'region': 'China',
			'seq': 'ATGAAGACTATCATTGCTTTGAGCTACATTTTATGTCTGGTTTTCGCTCAAAAACTTCCCGGAAATGACAACAGCACAGCAACGCTGTGCCTGGGACATCATGCAGTGCCAAACGGAACGCTAGTGAAAACAATCACGAATGATCAAATTGAAGTGACTAATGCTACTGAGCTGGTTCAGAGTTCCTCAACAGGTAGAATATGCGACAGTCCTCACCGAATCCTTGATGGAAAAAACTGCACACTGATAGATGCTCTATTGGGAGACCCTCATTGTGATGGCTTCCAAAATAAGGAATGGGACCTTTTTGTTGAACGCAGCAAAGCTTACAGCAACTGTTACCCTTATGATGTACCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCAGGCACCCTGGAGTTTATCAATGAAGACTTCAATTGGACTGGAGTCGCTCAGGATGGGGGAAGCTATGCTTGCAAAAGGGGATCTGTTAACAGTTTCTTTAGTAGATTGAATTGGTTGCACAAATCAGAATACAAATATCCAGCGCTGAACGTGACTATGCCAAACAATGGCAAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGAGCACGGACAGAGACCAAACCAGCCTATATGTTCGAGCATCAGGGAGAGTCACAGTCTCTACCAAAAGAAGCCAACAAACTGTAACCCCGAATATCGGGTCTAGACCCTGGGTAAGGGGTCAGTCCAGTAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAATAGCACAGGGAATCTAATTGCTCCTCGGGGTTACTTCAAAATACGAAATGGGAAAAGCTCAATAATGAGGTCAGATGCACCCATTGGCACCTGCAGTTCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCTTTTCAAAATGTAAACAGGATCACATATGGGGCCTGCCCCAGATATGTTAAGCAAAACACTCTGAAATTGGCAACAGGGATGCGGAATGTACCAGAGAAACAAACTAGAGGCATATTCGGCGCAATCGCAGGTTTCATAGAAAATGGTTGGGAGGGAATGGTAGACGGTTGGTACGGTTTCAGGCATCAAAATTCTGAGGGCACAGGACAAGCAGCAGATCTTAAAAGCACTCAAGCAGCAATCGACCAAATCAACGGGAAACTGAATAGGTTAATCGAGAAAACGAACGAGAAATTCCATCAAATCGAAAAAGAATTCTCAGAAGTAGAAGGGAGAATTCAGGACCTCGAGAAATATGTTGAAGACACTAAAATAGATCTCTGGTCTTACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTTACTGACTCAGAAATGAACAAACTGTTTGAAAAAACAAGGAAGCAACTGAGGGAAAATGCTGAGGACATGGGCAATGGTTGCTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGGTCAATCAGAAATGGAACTTATGACCATGATGTATACAGAGACGAAGCATTAAACAACCGGTTCCAGATCAAAGGTGTTGAGCTGAAGTCAGGATACAAAGATTGGATCCTGTGGATTTCCTTTGCCATATCATGCTTTTTGCTTTGTGTTGTTTTGCTGGGGTTCATCATGTGGGCCTGCCAAAAAGGCAACATTAGGTGTAACATTTGCATTTGA'
		}
Ejemplo n.º 10
0
	def __init__(self,min_length = 987, **kwargs):
		'''
		parameters
		min_length  -- minimal length for a sequence to be acceptable
		'''
		flu_filter.__init__(self, **kwargs)
		self.min_length = min_length
		self.vaccine_strains =[
			{
				'strain':    	'B/Shangdong/7/97',
				'isolate_id':	'EPI_ISL_1790',
				'date':    		'1997-07-01', #(Month and day unknown)
				'region':   	'china',
				'country':		'china',
				'seq':'GATCGAATCTGCACTGGGATAACATCGTCAAACTCACCCCATGTGGTCAAAACTGCTACTCAAGGGGAGGTCAATGTGACTGGTGTGATACCACTGACAACAACACCCACCAAATCTCATTTTGCAAATCTCAAAGGAACAAAAACCAGAGGGAAACTATGCCCAAAATGCCTCAACTGTACAGATCTGGACGTGGCCTTGGGCAGACCAAAATGCACGGGGAACATACCTTCGGCAAAAGTTTCAATACTCCATGAAGTCAGACCTGTTACATCTGGGTGCTTTCCTATAATGCACGACAGAACAAAAATTAGACAGCTGCCCAATCTTCTCAGAGGATACGAACATATCAGGTTATCAATTCATAACGTTATCAATGCAGAAAAGGCACCAGGAGGACCCTACAAAATTGGAACCTCAGGGTCTTGCCCTAACGTTACCAATGGAAACGGATTCTTCGCAACAATGGCTTGGGCCGTCCCAAAAAACGACAACAACAAAACAGCAACAAATTCATTAACAATAGAAGTACCATACATTTGTACAGAAGGAGAAGACCAAATTACCGTTTGGGGGTTCCACTCTGATAACGAAAACCAAATGGCAAAACTCTATGGGGACTCAAAGCCCCAGAAGTTCACCTCATCTGCCAACGGAGTGACCACACATTACGTTTCACAGATTGGTGGCTTCCCAAATCAAACAGAAGACGGAGGACTACCACAAAGTGGTAGAATTGTTGTTGATTACATGGTGCAAAAATCTGGGAAAACAGGAACAATTACCTATCAAAGAGGTATTTTATTGCCTCAAAAAGTGTGGTGCGCAAGTGGCAGGAGCAAGGTAATAAAAGGGTCCTTGCCTTTAATTGGAGAAGCAGATTGCCTCCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTATTACACAGGGGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCCTTGAAGCTGGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCTTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCCCATGGAGCACATGGAGTAGCAGTGGCAGCAGACCTTAAGAGTACTCAAGAAGCCATAAACAAGATAACAAAAAATCTCAACTCTTTGAGTGAGCTGGAAGTAAAGAATCTTCAAAGACTAAGCGGTGCCATGGATGAACTCCACAACGAAATACTAGAACTAGACGAGAAAGTGGATGATCTCAGAGCTGATACAATAAGCTCGCAAATAGAACTCGCAGTCTTGCTTTCCAAT',
			},
			{
				'strain':   	'B/HongKong/330/2001',
				'isolate_id': 	'EPI_ISL_2342',
				'date':    		'2001-07-01', 	#(Month and day unknown)
				'region':		'china',
				'country':		'hong_kong',
				'seq':   		'GATCGAATCTGCACTGGAATAACATCGTCAAACTCACCCCATGTGGTCAAAACTGCTACTCAAGGGGAAGTCAATGTGACTGGTGTGATACCACTGACAACAACACCCACCAAATCTCATTTTGCAAATCTCAAAGGAACAAAAACCAGAGGGAAACTATGCCCAAAATGTCTCAACTGCACAGATCTGGACGTGGCCTTGGGCAGACCAAAATGCACGGGGAACATACCTTCGGCAAAAGTTTCAATACTCCATGAAGTAAGACCTGTTACATCTGGGTGCTTTCCTATAATGCACGACAGAACAAAAATTAGACAGCTGCCCAATCTTCTCAGAGGATACGAACGTATCAGGTTATCAAACCATAACGTTATCAATGCAGAAAAAGCACCAGGAGGACCCTACAAAATTGGAACCTCAGGGTCTTGCCCTAACGTTACCAATGGAAACGGATTCTTCGCAACAATGGCTTGGGCTGTCCCAAAAAACGAAAACAACAAAACAGCAACAAATTCATTAACAATAGAAGTACCATACATTTGTACAGAAGGAGAAGACCAAATTACCGTTTGGGGGTTCCACTCTGATAGCGAAACCCAAATGGCAAAACTCTATGGAGACTCAAAGCCTCAGAAGTTCACTTCATCTGCCAACGGAGTGACCACACATTACGTTTCACAGATTGGTGGCTTCCCAAATCAAACAGAAGACGGAGGACTACCACAAAGTGGTAGAATTGTTGTTGATTACATGGTGCAAAAATCTGGAAAAACAGGAACAATTACCTATCAAAGAGGTATTTTATTGCCTCAAAAAGTGTGGTGCGCAAGTGGCAGGAGCAAGGTAATAAAAGGATCCTTGCCTTTAATTGGAGAAGCAGATTGCCTCCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTATACAGGGGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCCTTGAAGCTGGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTTTGGGCTG',
			},
			{
				'strain': 		'B/Malaysia/2506/2004',
				'isolate_id': 	'EPI_ISL_21142',
				'date':			'2004-07-01', # (Month and day unknown) |   |
				'region':		'southeast_asia',
				'country':		'malaysia',
				'seq':			'ATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCGTCAAACTCACCACATGTTGTCAAAACTGCTACTCAAGGGGAGGTCAATGTGACTGGTGTAATACCACTGACAACAACACCCACCAAATCTCATTTTGCAAATCTCAAAGGAACAGAAACCAGAGGGAAACTATGCCCAAAATGTCTCAACTGCACAGATCTGGACGTGGCCTTGGGCAGACCAAAATGCACGGGGAACATACCCTCGGCAAGAGTTTCAATACTCCATGAAGTCAGACCTGTTACATCTGGGTGCTTTCCTATAATGCACGACAGAACAAAAATTAGACAGCTGCCTAACCTTCTCAGAGGATACGAACATATCAGGTTATCAACTCATAACGTTATCAATGCAGAAAATGCACCAGGAGGATCCTACAAAATTGGAACCTCAGGGTCTTGCCCTAACGTTACCAATGGAAACGGATTTTTCGCAACAATGGCTTGGGCCGTCCCAAAAAACGACAACAACAAAACAGCAACAAATTCATTAACAATAGAAGTACCATACATTTGTACAGAAGGAGAAGACCAAATTACCGTTTGGGGGTTCCACTCTGATAACGAAGCCCAAATGGCAAAGCTCTATGGGGACTCAAAGCCCCAGAAGTTCACCTCATCTGCCAACGGAGTGACCACACATTACGTTTCACAGATTGGTGGCTTCCCAAATCAAACAGAAGACGGAGGACTACCACAAAGTGGTAGAATTGTTGTTGATTACATGGTGCAAAAATCTGGGAAAACAGGAACAATTACCTATCAAAGAGGTATTTTATTGCCTCAAAAAGTGTGGTGCGCAAGTGGCAGGAGCAAGGTAATAAAAGGATCCTTGCCTTTAATTGGAGAAGCAGATTGCCTCCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGGGAACATGCAAAGGCCATAGGAAATTGCCCAATATGGGTGAAAACACCCTTGAAGCTGGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCTTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCCCATGGGGCACATGGAGTAGCGGTGGCAGCAGACCTTAAGAGCACTCAAGAGGCCATAAACAAGATAACAAAAAATCTCAACTCTTTGAGTGAGCTGGAAGTAAAGAATCTTCAAAGACTAAGCGGTGCCATGGATGAACTCCACAACGAAATACTAGAACTAGACGAGAAAGTGGATGATCTCAGAGCTGATACAATAAGCTCACAAATAGAACTCGCAGTCCTGCTTTCCAATGAAGGAATAATAAACAGTGAAGATGAGCATCTCTTGGCGCTTGAAAGAAAGCTGAAGAAAATGCTGGGCCCCTCTGCTGTAGAGATAGGGAATGGATGCTTTGAAACCAAACACAAGTGCAACCAGACCTGTCTCGACAGAATAGCTGCTGGTACCTTTGATGCAGGAGAATTTTCTCTCCCCACTTTTGATTCACTGAATATTACTGCTGCATCTTTAAATGACGATGGATTGGATAATCATACTATACTGCTTTACTACTCAACTGCTGCCTCCAGTTTGGCTGTAACATTGATGATAGCTATCTTTGTTGTTTATATGGTCTCCAGAGACAATGTTTCTTGCTCCATCTGTCTATAAGGAAAGTTAAACCCTGTATTTTCCTTTATTGTAGTGCTTGTTTGCTTGTTACCATTACAAAAAACGGTTATTGAAAAATGCTCTTGTTACTACTAATA',
			},
			{
				'strain':		'B/Brisbane/60/2008',
				'isolate_id':	'EPI_ISL_24365',
				'date':			'2008-08-04',
				'lab':			'Queensland Health Scientific Services',
				'region':		'oceania',
				'country': 		'australia',
				'seq':			'AGCAGAAGCAGAGCATTTTCTAATATCCACAAAATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCGTCAAACTCACCACATGTCGTCAAAACTGCTACTCAAGGGGAGGTCAATGTGACTGGTGTAATACCACTGACAACAACACCCACCAAATCTCATTTTGCAAATCTCAAAGGAACAGAAACCAGGGGGAAACTATGCCCAAAATGCCTCAACTGCACAGATCTGGACGTAGCCTTGGGCAGACCAAAATGCACGGGGAAAATACCCTCGGCAAGAGTTTCAATACTCCATGAAGTCAGACCTGTTACATCTGGGTGCTTTCCTATAATGCACGACAGAACAAAAATTAGACAGCTGCCTAACCTTCTCCGAGGATACGAACATATCAGGTTATCAACCCATAACGTTATCAATGCAGAAAATGCACCAGGAGGACCCTACAAAATTGGAACCTCAGGGTCTTGCCCTAACATTACCAATGGAAACGGATTTTTCGCAACAATGGCTTGGGCCGTCCCAAAAAACGACAAAAACAAAACAGCAACAAATCCATTAACAATAGAAGTACCATACATTTGTACAGAAGGAGAAGACCAAATTACCGTTTGGGGGTTCCACTCTGACAACGAGACCCAAATGGCAAAGCTCTATGGGGACTCAAAGCCCCAGAAGTTCACCTCATCTGCCAACGGAGTGACCACACATTACGTTTCACAGATTGGTGGCTTCCCAAATCAAACAGAAGACGGAGGACTACCACAAAGTGGTAGAATTGTTGTTGATTACATGGTGCAAAAATCTGGGAAAACAGGAACAATTACCTATCAAAGGGGTATTTTATTGCCTCAAAAGGTGTGGTGCGCAAGTGGCAGGAGCAAGGTAATAAAAGGATCCTTGCCTTTAATTGGAGAAGCAGATTGCCTCCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGGGAACATGCAAAGGCCATAGGAAATTGCCCAATATGGGTGAAAACACCCTTGAAGCTGGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCTTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCCCATGGGGCACATGGAGTAGCGGTGGCAGCAGACCTTAAGAGCACTCAAGAGGCCATAAACAAGATAACAAAAAATCTCAACTCTTTGAGTGAGCTGGAAGTAAAGAATCTTCAAAGACTAAGCGGTGCCATGGATGAACTCCACAACGAAATACTAGAACTAGATGAGAAAGTGGATGATCTCAGAGCTGATACAATAAGCTCACAAATAGAACTCGCAGTCCTGCTTTCCAATGAAGGAATAATAAACAGTGAAGATGAACATCTCTTGGCGCTTGAAAGAAAGCTGAAGAAAATGCTGGGCCCCTCTGCTGTAGAGATAGGGAATGGATGCTTTGAAACCAAACACAAGTGCAACCAGACCTGTCTCGACAGAATAGCTGCTGGTACCTTTGATGCAGGAGAATTTTCTCTCCCCACCTTTGATTCACTGAATATTACTGCTGCATCTTTAAATGACGATGGATTGGATAATCATACTATACTGCTTTACTACTCAACTGCTGCCTCCAGT',
			}
		]
		tmp_outgroup = SeqIO.read('source-data/Vic_outgroup.gb', 'genbank')
		genome_annotation = tmp_outgroup.features
		self.cds = {x.qualifiers['gene'][0]:x for x in genome_annotation
				if 'gene' in x.qualifiers and x.type=='CDS' and
				x.qualifiers['gene'][0] in ['SigPep', 'HA1', 'HA2']}
		self.outgroup = {
						'strain':'B/HongKong/02/1993',
						'region':'China',
						'isolate_id':'EPI_ISL_6617',
						'date':'1993-02-15', #(Month and day unknown)
						'seq': str(tmp_outgroup.seq).upper()
						}
Ejemplo n.º 11
0
    def __init__(self, min_length=987, **kwargs):
        '''
		parameters
		min_length  -- minimal length for a sequence to be acceptable
		'''
        flu_filter.__init__(self, **kwargs)
        self.min_length = min_length
        self.vaccine_strains = [{
            'strain':
            'A/California/07/2009',
            'isolate_id':
            'EPI_ISL_31553',
            'date':
            '2009-04-09',
            'lab':
            'Naval Health Research Center',
            'region':
            'north_america',
            'country':
            'usa',
            'seq':
            'ATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACACCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGTCATCAAGATACAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAAACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATATCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATACACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAGTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCAAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGCTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAA',
        }, {
            "strain":
            "A/Michigan/45/2015",
            'isolate_id':
            'EPI699812',
            "date":
            "2015-09-07",
            "region":
            "north_america",
            "country":
            "usa",
            "seq":
            "GGAAAAACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTACAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTGGAAGACAAGCATAACGGAAAACTATGCAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGTTCATGGTCCTACATTGTGGAAACATCTAATTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCAATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCACGCTGGAGCAAAAAGCTTCTACAAAAACTTGATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTTAACCAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTGTGGGGCATTCACCATCCATCTACTACTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGACATCAAGATACAGCAAGAAGTTCAAGCCGGAAATAGCAACAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCACAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCGAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAGTATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTTCCGTCTATTCAATCTAGAGGCCTATTCGGGGCCATTGCCGGCTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAAAATGCCATTGACAAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATACACAGTTCACAGCAGTGGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATCTAAATAAAAAAGTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTATCACGATTCAAATGTGAAGAACTTGTATGAAAAAGTAAGAAACCAGTTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAAAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGCTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAATCATGAGAAAAACAC"
        }]
        tmp_outgroup = SeqIO.read('source-data/H1N1pdm_outgroup.gb', 'genbank')
        genome_annotation = tmp_outgroup.features
        self.cds = {
            x.qualifiers['gene'][0]: x
            for x in genome_annotation
            if 'gene' in x.qualifiers and x.type == 'CDS'
            and x.qualifiers['gene'][0] in ['SigPep', 'HA1', 'HA2']
        }
        self.outgroup = {
            'strain': 'A/Swine/Indiana/P12439/00',
            'db': 'IRD',
            'accession': 'AF455680',
            'date': '2002-03-14',
            'region': 'north_america',
            'country': 'usa',
            'seq': str(tmp_outgroup.seq).upper()
        }
Ejemplo n.º 12
0
	def __init__(self,min_length = 987, **kwargs):
		'''
		parameters
		min_length  -- minimal length for a sequence to be acceptable
		'''
		flu_filter.__init__(self, **kwargs)
		self.min_length = min_length
		self.vaccine_strains =[
			{	
				'strain':    	'B/Shangdong/7/97',
				'isolate_id':	'EPI_ISL_1790',
				'date':    		'1997-07-01', #(Month and day unknown)
				'region':   	'China', 
				'seq':'GATCGAATCTGCACTGGGATAACATCGTCAAACTCACCCCATGTGGTCAAAACTGCTACTCAAGGGGAGGTCAATGTGACTGGTGTGATACCACTGACAACAACACCCACCAAATCTCATTTTGCAAATCTCAAAGGAACAAAAACCAGAGGGAAACTATGCCCAAAATGCCTCAACTGTACAGATCTGGACGTGGCCTTGGGCAGACCAAAATGCACGGGGAACATACCTTCGGCAAAAGTTTCAATACTCCATGAAGTCAGACCTGTTACATCTGGGTGCTTTCCTATAATGCACGACAGAACAAAAATTAGACAGCTGCCCAATCTTCTCAGAGGATACGAACATATCAGGTTATCAATTCATAACGTTATCAATGCAGAAAAGGCACCAGGAGGACCCTACAAAATTGGAACCTCAGGGTCTTGCCCTAACGTTACCAATGGAAACGGATTCTTCGCAACAATGGCTTGGGCCGTCCCAAAAAACGACAACAACAAAACAGCAACAAATTCATTAACAATAGAAGTACCATACATTTGTACAGAAGGAGAAGACCAAATTACCGTTTGGGGGTTCCACTCTGATAACGAAAACCAAATGGCAAAACTCTATGGGGACTCAAAGCCCCAGAAGTTCACCTCATCTGCCAACGGAGTGACCACACATTACGTTTCACAGATTGGTGGCTTCCCAAATCAAACAGAAGACGGAGGACTACCACAAAGTGGTAGAATTGTTGTTGATTACATGGTGCAAAAATCTGGGAAAACAGGAACAATTACCTATCAAAGAGGTATTTTATTGCCTCAAAAAGTGTGGTGCGCAAGTGGCAGGAGCAAGGTAATAAAAGGGTCCTTGCCTTTAATTGGAGAAGCAGATTGCCTCCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTATTACACAGGGGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCCTTGAAGCTGGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCTTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCCCATGGAGCACATGGAGTAGCAGTGGCAGCAGACCTTAAGAGTACTCAAGAAGCCATAAACAAGATAACAAAAAATCTCAACTCTTTGAGTGAGCTGGAAGTAAAGAATCTTCAAAGACTAAGCGGTGCCATGGATGAACTCCACAACGAAATACTAGAACTAGACGAGAAAGTGGATGATCTCAGAGCTGATACAATAAGCTCGCAAATAGAACTCGCAGTCTTGCTTTCCAAT',
			},
			{
				'strain':   'B/HongKong/330/2001', 	
				'isolate_id': 'EPI_ISL_2342',
				'date':    	'2001-07-01', 	#(Month and day unknown)
				'region':	'China',
				'seq':   	'GATCGAATCTGCACTGGAATAACATCGTCAAACTCACCCCATGTGGTCAAAACTGCTACTCAAGGGGAAGTCAATGTGACTGGTGTGATACCACTGACAACAACACCCACCAAATCTCATTTTGCAAATCTCAAAGGAACAAAAACCAGAGGGAAACTATGCCCAAAATGTCTCAACTGCACAGATCTGGACGTGGCCTTGGGCAGACCAAAATGCACGGGGAACATACCTTCGGCAAAAGTTTCAATACTCCATGAAGTAAGACCTGTTACATCTGGGTGCTTTCCTATAATGCACGACAGAACAAAAATTAGACAGCTGCCCAATCTTCTCAGAGGATACGAACGTATCAGGTTATCAAACCATAACGTTATCAATGCAGAAAAAGCACCAGGAGGACCCTACAAAATTGGAACCTCAGGGTCTTGCCCTAACGTTACCAATGGAAACGGATTCTTCGCAACAATGGCTTGGGCTGTCCCAAAAAACGAAAACAACAAAACAGCAACAAATTCATTAACAATAGAAGTACCATACATTTGTACAGAAGGAGAAGACCAAATTACCGTTTGGGGGTTCCACTCTGATAGCGAAACCCAAATGGCAAAACTCTATGGAGACTCAAAGCCTCAGAAGTTCACTTCATCTGCCAACGGAGTGACCACACATTACGTTTCACAGATTGGTGGCTTCCCAAATCAAACAGAAGACGGAGGACTACCACAAAGTGGTAGAATTGTTGTTGATTACATGGTGCAAAAATCTGGAAAAACAGGAACAATTACCTATCAAAGAGGTATTTTATTGCCTCAAAAAGTGTGGTGCGCAAGTGGCAGGAGCAAGGTAATAAAAGGATCCTTGCCTTTAATTGGAGAAGCAGATTGCCTCCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTATACAGGGGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCCTTGAAGCTGGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTTTGGGCTG',
			},
			{
				'strain': 'B/Malaysia/2506/2004',
				'isolate_id': 'EPI_ISL_21142',
				'date':'2004-07-01', # (Month and day unknown) |   | 
				'region':'SouthEast Asia',
				'seq':'ATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCGTCAAACTCACCACATGTTGTCAAAACTGCTACTCAAGGGGAGGTCAATGTGACTGGTGTAATACCACTGACAACAACACCCACCAAATCTCATTTTGCAAATCTCAAAGGAACAGAAACCAGAGGGAAACTATGCCCAAAATGTCTCAACTGCACAGATCTGGACGTGGCCTTGGGCAGACCAAAATGCACGGGGAACATACCCTCGGCAAGAGTTTCAATACTCCATGAAGTCAGACCTGTTACATCTGGGTGCTTTCCTATAATGCACGACAGAACAAAAATTAGACAGCTGCCTAACCTTCTCAGAGGATACGAACATATCAGGTTATCAACTCATAACGTTATCAATGCAGAAAATGCACCAGGAGGATCCTACAAAATTGGAACCTCAGGGTCTTGCCCTAACGTTACCAATGGAAACGGATTTTTCGCAACAATGGCTTGGGCCGTCCCAAAAAACGACAACAACAAAACAGCAACAAATTCATTAACAATAGAAGTACCATACATTTGTACAGAAGGAGAAGACCAAATTACCGTTTGGGGGTTCCACTCTGATAACGAAGCCCAAATGGCAAAGCTCTATGGGGACTCAAAGCCCCAGAAGTTCACCTCATCTGCCAACGGAGTGACCACACATTACGTTTCACAGATTGGTGGCTTCCCAAATCAAACAGAAGACGGAGGACTACCACAAAGTGGTAGAATTGTTGTTGATTACATGGTGCAAAAATCTGGGAAAACAGGAACAATTACCTATCAAAGAGGTATTTTATTGCCTCAAAAAGTGTGGTGCGCAAGTGGCAGGAGCAAGGTAATAAAAGGATCCTTGCCTTTAATTGGAGAAGCAGATTGCCTCCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGGGAACATGCAAAGGCCATAGGAAATTGCCCAATATGGGTGAAAACACCCTTGAAGCTGGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCTTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCCCATGGGGCACATGGAGTAGCGGTGGCAGCAGACCTTAAGAGCACTCAAGAGGCCATAAACAAGATAACAAAAAATCTCAACTCTTTGAGTGAGCTGGAAGTAAAGAATCTTCAAAGACTAAGCGGTGCCATGGATGAACTCCACAACGAAATACTAGAACTAGACGAGAAAGTGGATGATCTCAGAGCTGATACAATAAGCTCACAAATAGAACTCGCAGTCCTGCTTTCCAATGAAGGAATAATAAACAGTGAAGATGAGCATCTCTTGGCGCTTGAAAGAAAGCTGAAGAAAATGCTGGGCCCCTCTGCTGTAGAGATAGGGAATGGATGCTTTGAAACCAAACACAAGTGCAACCAGACCTGTCTCGACAGAATAGCTGCTGGTACCTTTGATGCAGGAGAATTTTCTCTCCCCACTTTTGATTCACTGAATATTACTGCTGCATCTTTAAATGACGATGGATTGGATAATCATACTATACTGCTTTACTACTCAACTGCTGCCTCCAGTTTGGCTGTAACATTGATGATAGCTATCTTTGTTGTTTATATGGTCTCCAGAGACAATGTTTCTTGCTCCATCTGTCTATAAGGAAAGTTAAACCCTGTATTTTCCTTTATTGTAGTGCTTGTTTGCTTGTTACCATTACAAAAAACGGTTATTGAAAAATGCTCTTGTTACTACTAATA',
			},
			{
				'strain':'B/Brisbane/60/2008',
				'isolate_id':'EPI_ISL_24365',
				'date': '2008-08-04',
				'lab':'Queensland Health Scientific Services',
				'region':'Oceania',
				'seq':'AGCAGAAGCAGAGCATTTTCTAATATCCACAAAATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCGTCAAACTCACCACATGTCGTCAAAACTGCTACTCAAGGGGAGGTCAATGTGACTGGTGTAATACCACTGACAACAACACCCACCAAATCTCATTTTGCAAATCTCAAAGGAACAGAAACCAGGGGGAAACTATGCCCAAAATGCCTCAACTGCACAGATCTGGACGTAGCCTTGGGCAGACCAAAATGCACGGGGAAAATACCCTCGGCAAGAGTTTCAATACTCCATGAAGTCAGACCTGTTACATCTGGGTGCTTTCCTATAATGCACGACAGAACAAAAATTAGACAGCTGCCTAACCTTCTCCGAGGATACGAACATATCAGGTTATCAACCCATAACGTTATCAATGCAGAAAATGCACCAGGAGGACCCTACAAAATTGGAACCTCAGGGTCTTGCCCTAACATTACCAATGGAAACGGATTTTTCGCAACAATGGCTTGGGCCGTCCCAAAAAACGACAAAAACAAAACAGCAACAAATCCATTAACAATAGAAGTACCATACATTTGTACAGAAGGAGAAGACCAAATTACCGTTTGGGGGTTCCACTCTGACAACGAGACCCAAATGGCAAAGCTCTATGGGGACTCAAAGCCCCAGAAGTTCACCTCATCTGCCAACGGAGTGACCACACATTACGTTTCACAGATTGGTGGCTTCCCAAATCAAACAGAAGACGGAGGACTACCACAAAGTGGTAGAATTGTTGTTGATTACATGGTGCAAAAATCTGGGAAAACAGGAACAATTACCTATCAAAGGGGTATTTTATTGCCTCAAAAGGTGTGGTGCGCAAGTGGCAGGAGCAAGGTAATAAAAGGATCCTTGCCTTTAATTGGAGAAGCAGATTGCCTCCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGGGAACATGCAAAGGCCATAGGAAATTGCCCAATATGGGTGAAAACACCCTTGAAGCTGGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCTTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCCCATGGGGCACATGGAGTAGCGGTGGCAGCAGACCTTAAGAGCACTCAAGAGGCCATAAACAAGATAACAAAAAATCTCAACTCTTTGAGTGAGCTGGAAGTAAAGAATCTTCAAAGACTAAGCGGTGCCATGGATGAACTCCACAACGAAATACTAGAACTAGATGAGAAAGTGGATGATCTCAGAGCTGATACAATAAGCTCACAAATAGAACTCGCAGTCCTGCTTTCCAATGAAGGAATAATAAACAGTGAAGATGAACATCTCTTGGCGCTTGAAAGAAAGCTGAAGAAAATGCTGGGCCCCTCTGCTGTAGAGATAGGGAATGGATGCTTTGAAACCAAACACAAGTGCAACCAGACCTGTCTCGACAGAATAGCTGCTGGTACCTTTGATGCAGGAGAATTTTCTCTCCCCACCTTTGATTCACTGAATATTACTGCTGCATCTTTAAATGACGATGGATTGGATAATCATACTATACTGCTTTACTACTCAACTGCTGCCTCCAGT',
			}
		]
		tmp_outgroup = SeqIO.read('source-data/Vic_outgroup.gb', 'genbank')
		genome_annotation = tmp_outgroup.features
		self.cds = {x.qualifiers['gene'][0]:x for x in genome_annotation
				if 'gene' in x.qualifiers and x.type=='CDS' and 
				x.qualifiers['gene'][0] in ['SigPep', 'HA1', 'HA2']}
		self.outgroup = {
						'strain':'B/HongKong/02/1993',
						'region':'China',
						'isolate_id':'EPI_ISL_6617',
						'date':'1993-02-15', #(Month and day unknown) 
						'seq': str(tmp_outgroup.seq).upper()
						}
Ejemplo n.º 13
0
	def __init__(self,min_length = 987, **kwargs):
		'''
		parameters
		min_length  -- minimal length for a sequence to be acceptable
		'''
		flu_filter.__init__(self, **kwargs)
		self.min_length = min_length
		self.vaccine_strains =[
			{	
				'strain':    	'B/Shangdong/7/97',
				'isolate_id':	'EPI_ISL_1790',
				'date':    		'1997-07-01', #(Month and day unknown)
				'region':   	'China', 
				'seq':'GATCGAATCTGCACTGGGATAACATCGTCAAACTCACCCCATGTGGTCAAAACTGCTACTCAAGGGGAGGTCAATGTGACTGGTGTGATACCACTGACAACAACACCCACCAAATCTCATTTTGCAAATCTCAAAGGAACAAAAACCAGAGGGAAACTATGCCCAAAATGCCTCAACTGTACAGATCTGGACGTGGCCTTGGGCAGACCAAAATGCACGGGGAACATACCTTCGGCAAAAGTTTCAATACTCCATGAAGTCAGACCTGTTACATCTGGGTGCTTTCCTATAATGCACGACAGAACAAAAATTAGACAGCTGCCCAATCTTCTCAGAGGATACGAACATATCAGGTTATCAATTCATAACGTTATCAATGCAGAAAAGGCACCAGGAGGACCCTACAAAATTGGAACCTCAGGGTCTTGCCCTAACGTTACCAATGGAAACGGATTCTTCGCAACAATGGCTTGGGCCGTCCCAAAAAACGACAACAACAAAACAGCAACAAATTCATTAACAATAGAAGTACCATACATTTGTACAGAAGGAGAAGACCAAATTACCGTTTGGGGGTTCCACTCTGATAACGAAAACCAAATGGCAAAACTCTATGGGGACTCAAAGCCCCAGAAGTTCACCTCATCTGCCAACGGAGTGACCACACATTACGTTTCACAGATTGGTGGCTTCCCAAATCAAACAGAAGACGGAGGACTACCACAAAGTGGTAGAATTGTTGTTGATTACATGGTGCAAAAATCTGGGAAAACAGGAACAATTACCTATCAAAGAGGTATTTTATTGCCTCAAAAAGTGTGGTGCGCAAGTGGCAGGAGCAAGGTAATAAAAGGGTCCTTGCCTTTAATTGGAGAAGCAGATTGCCTCCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTATTACACAGGGGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCCTTGAAGCTGGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCTTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCCCATGGAGCACATGGAGTAGCAGTGGCAGCAGACCTTAAGAGTACTCAAGAAGCCATAAACAAGATAACAAAAAATCTCAACTCTTTGAGTGAGCTGGAAGTAAAGAATCTTCAAAGACTAAGCGGTGCCATGGATGAACTCCACAACGAAATACTAGAACTAGACGAGAAAGTGGATGATCTCAGAGCTGATACAATAAGCTCGCAAATAGAACTCGCAGTCTTGCTTTCCAAT',
			},
			{
				'strain':   'B/HongKong/330/2001', 	
				'isolate_id': 'EPI_ISL_2342',
				'date':    	'2001-07-01', 	#(Month and day unknown)
				'region':	'China',
				'seq':   	'GATCGAATCTGCACTGGAATAACATCGTCAAACTCACCCCATGTGGTCAAAACTGCTACTCAAGGGGAAGTCAATGTGACTGGTGTGATACCACTGACAACAACACCCACCAAATCTCATTTTGCAAATCTCAAAGGAACAAAAACCAGAGGGAAACTATGCCCAAAATGTCTCAACTGCACAGATCTGGACGTGGCCTTGGGCAGACCAAAATGCACGGGGAACATACCTTCGGCAAAAGTTTCAATACTCCATGAAGTAAGACCTGTTACATCTGGGTGCTTTCCTATAATGCACGACAGAACAAAAATTAGACAGCTGCCCAATCTTCTCAGAGGATACGAACGTATCAGGTTATCAAACCATAACGTTATCAATGCAGAAAAAGCACCAGGAGGACCCTACAAAATTGGAACCTCAGGGTCTTGCCCTAACGTTACCAATGGAAACGGATTCTTCGCAACAATGGCTTGGGCTGTCCCAAAAAACGAAAACAACAAAACAGCAACAAATTCATTAACAATAGAAGTACCATACATTTGTACAGAAGGAGAAGACCAAATTACCGTTTGGGGGTTCCACTCTGATAGCGAAACCCAAATGGCAAAACTCTATGGAGACTCAAAGCCTCAGAAGTTCACTTCATCTGCCAACGGAGTGACCACACATTACGTTTCACAGATTGGTGGCTTCCCAAATCAAACAGAAGACGGAGGACTACCACAAAGTGGTAGAATTGTTGTTGATTACATGGTGCAAAAATCTGGAAAAACAGGAACAATTACCTATCAAAGAGGTATTTTATTGCCTCAAAAAGTGTGGTGCGCAAGTGGCAGGAGCAAGGTAATAAAAGGATCCTTGCCTTTAATTGGAGAAGCAGATTGCCTCCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTATACAGGGGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCCTTGAAGCTGGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTTTGGGCTG',
			},
			{
				'strain': 'B/Malaysia/2506/2004',
				'isolate_id': 'EPI_ISL_21142',
				'date':'2004-07-01', # (Month and day unknown) |   | 
				'region':'SouthEast Asia',
				'seq':'ATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCGTCAAACTCACCACATGTTGTCAAAACTGCTACTCAAGGGGAGGTCAATGTGACTGGTGTAATACCACTGACAACAACACCCACCAAATCTCATTTTGCAAATCTCAAAGGAACAGAAACCAGAGGGAAACTATGCCCAAAATGTCTCAACTGCACAGATCTGGACGTGGCCTTGGGCAGACCAAAATGCACGGGGAACATACCCTCGGCAAGAGTTTCAATACTCCATGAAGTCAGACCTGTTACATCTGGGTGCTTTCCTATAATGCACGACAGAACAAAAATTAGACAGCTGCCTAACCTTCTCAGAGGATACGAACATATCAGGTTATCAACTCATAACGTTATCAATGCAGAAAATGCACCAGGAGGATCCTACAAAATTGGAACCTCAGGGTCTTGCCCTAACGTTACCAATGGAAACGGATTTTTCGCAACAATGGCTTGGGCCGTCCCAAAAAACGACAACAACAAAACAGCAACAAATTCATTAACAATAGAAGTACCATACATTTGTACAGAAGGAGAAGACCAAATTACCGTTTGGGGGTTCCACTCTGATAACGAAGCCCAAATGGCAAAGCTCTATGGGGACTCAAAGCCCCAGAAGTTCACCTCATCTGCCAACGGAGTGACCACACATTACGTTTCACAGATTGGTGGCTTCCCAAATCAAACAGAAGACGGAGGACTACCACAAAGTGGTAGAATTGTTGTTGATTACATGGTGCAAAAATCTGGGAAAACAGGAACAATTACCTATCAAAGAGGTATTTTATTGCCTCAAAAAGTGTGGTGCGCAAGTGGCAGGAGCAAGGTAATAAAAGGATCCTTGCCTTTAATTGGAGAAGCAGATTGCCTCCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGGGAACATGCAAAGGCCATAGGAAATTGCCCAATATGGGTGAAAACACCCTTGAAGCTGGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCTTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCCCATGGGGCACATGGAGTAGCGGTGGCAGCAGACCTTAAGAGCACTCAAGAGGCCATAAACAAGATAACAAAAAATCTCAACTCTTTGAGTGAGCTGGAAGTAAAGAATCTTCAAAGACTAAGCGGTGCCATGGATGAACTCCACAACGAAATACTAGAACTAGACGAGAAAGTGGATGATCTCAGAGCTGATACAATAAGCTCACAAATAGAACTCGCAGTCCTGCTTTCCAATGAAGGAATAATAAACAGTGAAGATGAGCATCTCTTGGCGCTTGAAAGAAAGCTGAAGAAAATGCTGGGCCCCTCTGCTGTAGAGATAGGGAATGGATGCTTTGAAACCAAACACAAGTGCAACCAGACCTGTCTCGACAGAATAGCTGCTGGTACCTTTGATGCAGGAGAATTTTCTCTCCCCACTTTTGATTCACTGAATATTACTGCTGCATCTTTAAATGACGATGGATTGGATAATCATACTATACTGCTTTACTACTCAACTGCTGCCTCCAGTTTGGCTGTAACATTGATGATAGCTATCTTTGTTGTTTATATGGTCTCCAGAGACAATGTTTCTTGCTCCATCTGTCTATAAGGAAAGTTAAACCCTGTATTTTCCTTTATTGTAGTGCTTGTTTGCTTGTTACCATTACAAAAAACGGTTATTGAAAAATGCTCTTGTTACTACTAATA',
			},
			{
				'strain':'B/Brisbane/60/2008',
				'isolate_id':'EPI_ISL_24365',
				'date': '2008-08-04',
				'lab':'Queensland Health Scientific Services',
				'region':'Oceania',
				'seq':'AGCAGAAGCAGAGCATTTTCTAATATCCACAAAATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCGTCAAACTCACCACATGTCGTCAAAACTGCTACTCAAGGGGAGGTCAATGTGACTGGTGTAATACCACTGACAACAACACCCACCAAATCTCATTTTGCAAATCTCAAAGGAACAGAAACCAGGGGGAAACTATGCCCAAAATGCCTCAACTGCACAGATCTGGACGTAGCCTTGGGCAGACCAAAATGCACGGGGAAAATACCCTCGGCAAGAGTTTCAATACTCCATGAAGTCAGACCTGTTACATCTGGGTGCTTTCCTATAATGCACGACAGAACAAAAATTAGACAGCTGCCTAACCTTCTCCGAGGATACGAACATATCAGGTTATCAACCCATAACGTTATCAATGCAGAAAATGCACCAGGAGGACCCTACAAAATTGGAACCTCAGGGTCTTGCCCTAACATTACCAATGGAAACGGATTTTTCGCAACAATGGCTTGGGCCGTCCCAAAAAACGACAAAAACAAAACAGCAACAAATCCATTAACAATAGAAGTACCATACATTTGTACAGAAGGAGAAGACCAAATTACCGTTTGGGGGTTCCACTCTGACAACGAGACCCAAATGGCAAAGCTCTATGGGGACTCAAAGCCCCAGAAGTTCACCTCATCTGCCAACGGAGTGACCACACATTACGTTTCACAGATTGGTGGCTTCCCAAATCAAACAGAAGACGGAGGACTACCACAAAGTGGTAGAATTGTTGTTGATTACATGGTGCAAAAATCTGGGAAAACAGGAACAATTACCTATCAAAGGGGTATTTTATTGCCTCAAAAGGTGTGGTGCGCAAGTGGCAGGAGCAAGGTAATAAAAGGATCCTTGCCTTTAATTGGAGAAGCAGATTGCCTCCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGGGAACATGCAAAGGCCATAGGAAATTGCCCAATATGGGTGAAAACACCCTTGAAGCTGGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCTTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCCCATGGGGCACATGGAGTAGCGGTGGCAGCAGACCTTAAGAGCACTCAAGAGGCCATAAACAAGATAACAAAAAATCTCAACTCTTTGAGTGAGCTGGAAGTAAAGAATCTTCAAAGACTAAGCGGTGCCATGGATGAACTCCACAACGAAATACTAGAACTAGATGAGAAAGTGGATGATCTCAGAGCTGATACAATAAGCTCACAAATAGAACTCGCAGTCCTGCTTTCCAATGAAGGAATAATAAACAGTGAAGATGAACATCTCTTGGCGCTTGAAAGAAAGCTGAAGAAAATGCTGGGCCCCTCTGCTGTAGAGATAGGGAATGGATGCTTTGAAACCAAACACAAGTGCAACCAGACCTGTCTCGACAGAATAGCTGCTGGTACCTTTGATGCAGGAGAATTTTCTCTCCCCACCTTTGATTCACTGAATATTACTGCTGCATCTTTAAATGACGATGGATTGGATAATCATACTATACTGCTTTACTACTCAACTGCTGCCTCCAGT',
			}
		]
		self.outgroup = {
						'strain':'B/HongKong/02/1993',
						'region':'China',
						'isolate_id':'EPI_ISL_6617',
						'date':'1993-02-15', #(Month and day unknown) 
						'seq': 'ATATCCACAAAATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCGTCAAACTCACCCCATGTGGTCAAAACTGCTACTCAAGGGGAAGTCAATGTGACTGGTGTGATACCACTGACAACAACACCCACCAAATCTCATTTTGCAAATCTCAAAGGAACAAAAACCAGAGGGAAACTATGCCCAAAGTGTCTCAACTGCACAGATCTGGACGTGGCCTTGGGCAGACCAAAATGCACGGGGACCATACCTTCGGCAAAAGTTTCAATACTCCACGAAGTCAAACCTGTTACATCTGGGTGCTTTCCTATAATGCACGACAGAACAAAAATTAGACAGCTGCCCAATCTTCTCAGAGGATACGAACATATCAGGTTATCAACCCATAACGTTATCAACGCAGAAAAGGCACCAGGAGGACCCTACAAAATTGGAACCTCAGGGTCTTGCCCTAACATTACCAATGGAAACGGATTCTTCGCAACAATGGCTTGGGCTGTCCCAAAAAACGACAACAACAAAACAGCAACAAATTCATTAACAGTAGAAGTACCATACATTTGTACAGAAGGAGAAGACCAAATTACCGTTTGGGGGTTCCACTCTGATAACGAAACCCAAATGGCAAAACTCTATGGAGACTCAAAACCTCAGAAGTTCACCTCATCTGCCAACGGAGTGACCACACATTACGTTTCACAGATTGGTGGCTTCCCAAATCAAGCAGAAGACGGAGGGCTACCACAAAGTGGTAGAATTGTTGTTGATTACATGGTGCAAAAATCTGGAAAAACAGGAACAATTACCTACCAAAGAGGTATTTTATTGCCTCAAAAAGTGTGGTGCGCAAGTGGCAGGAGCAAGGTAATAAAAGGATCCTTGCCTTTAATTGGCGAAGCAGATTGCCTCCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGGGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCCTTGAATCTGGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAAGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCTTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCCCATGGAGCACATGGAGTAGCAGTGGCAGCAGACCTTAAGAGTACACAAGAAGCCATAAACAAGATAACAAAAAATCTCAACTCTTTGAGTGAGCTTGAAGTAAAGAATCTTCAAAGACTAAGCGGTGCCATGGATGAACTCCACAACGAAATACTCGAACTGGATGAGAAAGTGGATGATCTCAGAGCTGACACAATAAGCTCGCAAATAGAGCTCGCAGTCTTGCTTTCCAATGAAGGAATAATAAACAGCGAAGATGAGCATCTCTTGGCACTTGAAAGAAAACTGAAGAAAATGCTGGGCCCCTCTGCTGTAGAGATAGGGAATGGATGCTTCGAAACCAAACACAAGTGCAACCAGACCTGCCTCGACAGAATAGCTGCTGGCACCTTTAATGCAGGAGAATTTTCTCTCCCCACCTTTGATTCATTAAATATTACTGCTGCATCTTTAAATGATGATGGATTGGATAATCATACTATACTGCTTTACTACTCAACTGCTGCTTCCAGTTTGGCTGTAACATTGATGATAGCTATCTTTATTGTTTATATGGTCTCCAGAGACAATGTTTCTTGCTCCATCTGTCTATAAGGAAAGTTAAGCCCTGTATTTTCCTTTATTGTAGTGCTTGTTTGCTTGTTACCATTACAAAAAACGTTATTGA'
						}
Ejemplo n.º 14
0
    def __init__(self, min_length=987, **kwargs):
        """
		parameters
		min_length  -- minimal length for a sequence to be acceptable
		"""
        flu_filter.__init__(self, **kwargs)
        self.min_length = min_length
        self.vaccine_strains = [
            {
                "strain": "B/Shangdong/7/97",
                "isolate_id": "EPI_ISL_1790",
                "date": "1997-07-01",  # (Month and day unknown)
                "region": "China",
                "seq": "GATCGAATCTGCACTGGGATAACATCGTCAAACTCACCCCATGTGGTCAAAACTGCTACTCAAGGGGAGGTCAATGTGACTGGTGTGATACCACTGACAACAACACCCACCAAATCTCATTTTGCAAATCTCAAAGGAACAAAAACCAGAGGGAAACTATGCCCAAAATGCCTCAACTGTACAGATCTGGACGTGGCCTTGGGCAGACCAAAATGCACGGGGAACATACCTTCGGCAAAAGTTTCAATACTCCATGAAGTCAGACCTGTTACATCTGGGTGCTTTCCTATAATGCACGACAGAACAAAAATTAGACAGCTGCCCAATCTTCTCAGAGGATACGAACATATCAGGTTATCAATTCATAACGTTATCAATGCAGAAAAGGCACCAGGAGGACCCTACAAAATTGGAACCTCAGGGTCTTGCCCTAACGTTACCAATGGAAACGGATTCTTCGCAACAATGGCTTGGGCCGTCCCAAAAAACGACAACAACAAAACAGCAACAAATTCATTAACAATAGAAGTACCATACATTTGTACAGAAGGAGAAGACCAAATTACCGTTTGGGGGTTCCACTCTGATAACGAAAACCAAATGGCAAAACTCTATGGGGACTCAAAGCCCCAGAAGTTCACCTCATCTGCCAACGGAGTGACCACACATTACGTTTCACAGATTGGTGGCTTCCCAAATCAAACAGAAGACGGAGGACTACCACAAAGTGGTAGAATTGTTGTTGATTACATGGTGCAAAAATCTGGGAAAACAGGAACAATTACCTATCAAAGAGGTATTTTATTGCCTCAAAAAGTGTGGTGCGCAAGTGGCAGGAGCAAGGTAATAAAAGGGTCCTTGCCTTTAATTGGAGAAGCAGATTGCCTCCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTATTACACAGGGGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCCTTGAAGCTGGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCTTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCCCATGGAGCACATGGAGTAGCAGTGGCAGCAGACCTTAAGAGTACTCAAGAAGCCATAAACAAGATAACAAAAAATCTCAACTCTTTGAGTGAGCTGGAAGTAAAGAATCTTCAAAGACTAAGCGGTGCCATGGATGAACTCCACAACGAAATACTAGAACTAGACGAGAAAGTGGATGATCTCAGAGCTGATACAATAAGCTCGCAAATAGAACTCGCAGTCTTGCTTTCCAAT",
            },
            {
                "strain": "B/HongKong/330/2001",
                "isolate_id": "EPI_ISL_2342",
                "date": "2001-07-01",  # (Month and day unknown)
                "region": "China",
                "seq": "GATCGAATCTGCACTGGAATAACATCGTCAAACTCACCCCATGTGGTCAAAACTGCTACTCAAGGGGAAGTCAATGTGACTGGTGTGATACCACTGACAACAACACCCACCAAATCTCATTTTGCAAATCTCAAAGGAACAAAAACCAGAGGGAAACTATGCCCAAAATGTCTCAACTGCACAGATCTGGACGTGGCCTTGGGCAGACCAAAATGCACGGGGAACATACCTTCGGCAAAAGTTTCAATACTCCATGAAGTAAGACCTGTTACATCTGGGTGCTTTCCTATAATGCACGACAGAACAAAAATTAGACAGCTGCCCAATCTTCTCAGAGGATACGAACGTATCAGGTTATCAAACCATAACGTTATCAATGCAGAAAAAGCACCAGGAGGACCCTACAAAATTGGAACCTCAGGGTCTTGCCCTAACGTTACCAATGGAAACGGATTCTTCGCAACAATGGCTTGGGCTGTCCCAAAAAACGAAAACAACAAAACAGCAACAAATTCATTAACAATAGAAGTACCATACATTTGTACAGAAGGAGAAGACCAAATTACCGTTTGGGGGTTCCACTCTGATAGCGAAACCCAAATGGCAAAACTCTATGGAGACTCAAAGCCTCAGAAGTTCACTTCATCTGCCAACGGAGTGACCACACATTACGTTTCACAGATTGGTGGCTTCCCAAATCAAACAGAAGACGGAGGACTACCACAAAGTGGTAGAATTGTTGTTGATTACATGGTGCAAAAATCTGGAAAAACAGGAACAATTACCTATCAAAGAGGTATTTTATTGCCTCAAAAAGTGTGGTGCGCAAGTGGCAGGAGCAAGGTAATAAAAGGATCCTTGCCTTTAATTGGAGAAGCAGATTGCCTCCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTATACAGGGGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCCTTGAAGCTGGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTTTGGGCTG",
            },
            {
                "strain": "B/Malaysia/2506/2004",
                "isolate_id": "EPI_ISL_21142",
                "date": "2004-07-01",  # (Month and day unknown) |   |
                "region": "SouthEast Asia",
                "seq": "ATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCGTCAAACTCACCACATGTTGTCAAAACTGCTACTCAAGGGGAGGTCAATGTGACTGGTGTAATACCACTGACAACAACACCCACCAAATCTCATTTTGCAAATCTCAAAGGAACAGAAACCAGAGGGAAACTATGCCCAAAATGTCTCAACTGCACAGATCTGGACGTGGCCTTGGGCAGACCAAAATGCACGGGGAACATACCCTCGGCAAGAGTTTCAATACTCCATGAAGTCAGACCTGTTACATCTGGGTGCTTTCCTATAATGCACGACAGAACAAAAATTAGACAGCTGCCTAACCTTCTCAGAGGATACGAACATATCAGGTTATCAACTCATAACGTTATCAATGCAGAAAATGCACCAGGAGGATCCTACAAAATTGGAACCTCAGGGTCTTGCCCTAACGTTACCAATGGAAACGGATTTTTCGCAACAATGGCTTGGGCCGTCCCAAAAAACGACAACAACAAAACAGCAACAAATTCATTAACAATAGAAGTACCATACATTTGTACAGAAGGAGAAGACCAAATTACCGTTTGGGGGTTCCACTCTGATAACGAAGCCCAAATGGCAAAGCTCTATGGGGACTCAAAGCCCCAGAAGTTCACCTCATCTGCCAACGGAGTGACCACACATTACGTTTCACAGATTGGTGGCTTCCCAAATCAAACAGAAGACGGAGGACTACCACAAAGTGGTAGAATTGTTGTTGATTACATGGTGCAAAAATCTGGGAAAACAGGAACAATTACCTATCAAAGAGGTATTTTATTGCCTCAAAAAGTGTGGTGCGCAAGTGGCAGGAGCAAGGTAATAAAAGGATCCTTGCCTTTAATTGGAGAAGCAGATTGCCTCCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGGGAACATGCAAAGGCCATAGGAAATTGCCCAATATGGGTGAAAACACCCTTGAAGCTGGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCTTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCCCATGGGGCACATGGAGTAGCGGTGGCAGCAGACCTTAAGAGCACTCAAGAGGCCATAAACAAGATAACAAAAAATCTCAACTCTTTGAGTGAGCTGGAAGTAAAGAATCTTCAAAGACTAAGCGGTGCCATGGATGAACTCCACAACGAAATACTAGAACTAGACGAGAAAGTGGATGATCTCAGAGCTGATACAATAAGCTCACAAATAGAACTCGCAGTCCTGCTTTCCAATGAAGGAATAATAAACAGTGAAGATGAGCATCTCTTGGCGCTTGAAAGAAAGCTGAAGAAAATGCTGGGCCCCTCTGCTGTAGAGATAGGGAATGGATGCTTTGAAACCAAACACAAGTGCAACCAGACCTGTCTCGACAGAATAGCTGCTGGTACCTTTGATGCAGGAGAATTTTCTCTCCCCACTTTTGATTCACTGAATATTACTGCTGCATCTTTAAATGACGATGGATTGGATAATCATACTATACTGCTTTACTACTCAACTGCTGCCTCCAGTTTGGCTGTAACATTGATGATAGCTATCTTTGTTGTTTATATGGTCTCCAGAGACAATGTTTCTTGCTCCATCTGTCTATAAGGAAAGTTAAACCCTGTATTTTCCTTTATTGTAGTGCTTGTTTGCTTGTTACCATTACAAAAAACGGTTATTGAAAAATGCTCTTGTTACTACTAATA",
            },
            {
                "strain": "B/Brisbane/60/2008",
                "isolate_id": "EPI_ISL_24365",
                "date": "2008-08-04",
                "lab": "Queensland Health Scientific Services",
                "region": "Oceania",
                "seq": "AGCAGAAGCAGAGCATTTTCTAATATCCACAAAATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCGTCAAACTCACCACATGTCGTCAAAACTGCTACTCAAGGGGAGGTCAATGTGACTGGTGTAATACCACTGACAACAACACCCACCAAATCTCATTTTGCAAATCTCAAAGGAACAGAAACCAGGGGGAAACTATGCCCAAAATGCCTCAACTGCACAGATCTGGACGTAGCCTTGGGCAGACCAAAATGCACGGGGAAAATACCCTCGGCAAGAGTTTCAATACTCCATGAAGTCAGACCTGTTACATCTGGGTGCTTTCCTATAATGCACGACAGAACAAAAATTAGACAGCTGCCTAACCTTCTCCGAGGATACGAACATATCAGGTTATCAACCCATAACGTTATCAATGCAGAAAATGCACCAGGAGGACCCTACAAAATTGGAACCTCAGGGTCTTGCCCTAACATTACCAATGGAAACGGATTTTTCGCAACAATGGCTTGGGCCGTCCCAAAAAACGACAAAAACAAAACAGCAACAAATCCATTAACAATAGAAGTACCATACATTTGTACAGAAGGAGAAGACCAAATTACCGTTTGGGGGTTCCACTCTGACAACGAGACCCAAATGGCAAAGCTCTATGGGGACTCAAAGCCCCAGAAGTTCACCTCATCTGCCAACGGAGTGACCACACATTACGTTTCACAGATTGGTGGCTTCCCAAATCAAACAGAAGACGGAGGACTACCACAAAGTGGTAGAATTGTTGTTGATTACATGGTGCAAAAATCTGGGAAAACAGGAACAATTACCTATCAAAGGGGTATTTTATTGCCTCAAAAGGTGTGGTGCGCAAGTGGCAGGAGCAAGGTAATAAAAGGATCCTTGCCTTTAATTGGAGAAGCAGATTGCCTCCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGGGAACATGCAAAGGCCATAGGAAATTGCCCAATATGGGTGAAAACACCCTTGAAGCTGGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCTTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCCCATGGGGCACATGGAGTAGCGGTGGCAGCAGACCTTAAGAGCACTCAAGAGGCCATAAACAAGATAACAAAAAATCTCAACTCTTTGAGTGAGCTGGAAGTAAAGAATCTTCAAAGACTAAGCGGTGCCATGGATGAACTCCACAACGAAATACTAGAACTAGATGAGAAAGTGGATGATCTCAGAGCTGATACAATAAGCTCACAAATAGAACTCGCAGTCCTGCTTTCCAATGAAGGAATAATAAACAGTGAAGATGAACATCTCTTGGCGCTTGAAAGAAAGCTGAAGAAAATGCTGGGCCCCTCTGCTGTAGAGATAGGGAATGGATGCTTTGAAACCAAACACAAGTGCAACCAGACCTGTCTCGACAGAATAGCTGCTGGTACCTTTGATGCAGGAGAATTTTCTCTCCCCACCTTTGATTCACTGAATATTACTGCTGCATCTTTAAATGACGATGGATTGGATAATCATACTATACTGCTTTACTACTCAACTGCTGCCTCCAGT",
            },
        ]
        self.outgroup = {
            "strain": "B/HongKong/02/1993",
            "region": "China",
            "isolate_id": "EPI_ISL_6617",
            "date": "1993-02-15",  # (Month and day unknown)
            "seq": "ATATCCACAAAATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCGTCAAACTCACCCCATGTGGTCAAAACTGCTACTCAAGGGGAAGTCAATGTGACTGGTGTGATACCACTGACAACAACACCCACCAAATCTCATTTTGCAAATCTCAAAGGAACAAAAACCAGAGGGAAACTATGCCCAAAGTGTCTCAACTGCACAGATCTGGACGTGGCCTTGGGCAGACCAAAATGCACGGGGACCATACCTTCGGCAAAAGTTTCAATACTCCACGAAGTCAAACCTGTTACATCTGGGTGCTTTCCTATAATGCACGACAGAACAAAAATTAGACAGCTGCCCAATCTTCTCAGAGGATACGAACATATCAGGTTATCAACCCATAACGTTATCAACGCAGAAAAGGCACCAGGAGGACCCTACAAAATTGGAACCTCAGGGTCTTGCCCTAACATTACCAATGGAAACGGATTCTTCGCAACAATGGCTTGGGCTGTCCCAAAAAACGACAACAACAAAACAGCAACAAATTCATTAACAGTAGAAGTACCATACATTTGTACAGAAGGAGAAGACCAAATTACCGTTTGGGGGTTCCACTCTGATAACGAAACCCAAATGGCAAAACTCTATGGAGACTCAAAACCTCAGAAGTTCACCTCATCTGCCAACGGAGTGACCACACATTACGTTTCACAGATTGGTGGCTTCCCAAATCAAGCAGAAGACGGAGGGCTACCACAAAGTGGTAGAATTGTTGTTGATTACATGGTGCAAAAATCTGGAAAAACAGGAACAATTACCTACCAAAGAGGTATTTTATTGCCTCAAAAAGTGTGGTGCGCAAGTGGCAGGAGCAAGGTAATAAAAGGATCCTTGCCTTTAATTGGCGAAGCAGATTGCCTCCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGGGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCCTTGAATCTGGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAAGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCTTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCCCATGGAGCACATGGAGTAGCAGTGGCAGCAGACCTTAAGAGTACACAAGAAGCCATAAACAAGATAACAAAAAATCTCAACTCTTTGAGTGAGCTTGAAGTAAAGAATCTTCAAAGACTAAGCGGTGCCATGGATGAACTCCACAACGAAATACTCGAACTGGATGAGAAAGTGGATGATCTCAGAGCTGACACAATAAGCTCGCAAATAGAGCTCGCAGTCTTGCTTTCCAATGAAGGAATAATAAACAGCGAAGATGAGCATCTCTTGGCACTTGAAAGAAAACTGAAGAAAATGCTGGGCCCCTCTGCTGTAGAGATAGGGAATGGATGCTTCGAAACCAAACACAAGTGCAACCAGACCTGCCTCGACAGAATAGCTGCTGGCACCTTTAATGCAGGAGAATTTTCTCTCCCCACCTTTGATTCATTAAATATTACTGCTGCATCTTTAAATGATGATGGATTGGATAATCATACTATACTGCTTTACTACTCAACTGCTGCTTCCAGTTTGGCTGTAACATTGATGATAGCTATCTTTATTGTTTATATGGTCTCCAGAGACAATGTTTCTTGCTCCATCTGTCTATAAGGAAAGTTAAGCCCTGTATTTTCCTTTATTGTAGTGCTTGTTTGCTTGTTACCATTACAAAAAACGTTATTGA",
        }
Ejemplo n.º 15
0
	def __init__(self,min_length = 987, **kwargs):
		'''
		parameters
		min_length  -- minimal length for a sequence to be acceptable
		'''
		flu_filter.__init__(self, **kwargs)
		self.min_length = min_length
		self.vaccine_strains =[
			{ 'strain':'A/Beijing/262/95',
			  'isolate_id':'EPI_ISL_2656',
			  'region':'China',
			  'db':'GISAID',
			  'date':'1995-07-01', # (Month and day unknown)
			  'seq':'AGCAAAAGCAGGGGAAAATAAAAACAACCAAAATGAAAGCAAAACTACTAGTCCTGTTATGTACATTTACAGCTACATATGCAGACACAATATGTATAGGCTACCATGCCAACAACTCAACCGACACTGTTGACACAGTACTTGAGAAGAATGTGACAGTGACACACTCTGTCAACCTACTTGAGGACAGTCACAATGGAAAACTATGTCTACTAAAAGGAATAGCCCCACTACAATTGGGTAATTGCAGCGTTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATCACTGATTTCTAAGGAATCATGGTCCTACATTGTAGAGACACCAAACCCTGAGAATGGAACATGTTACCCAGGGTATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGTTCAGTATCATCATTTGAGAGATTCGAAATATTCCCCAAAGAAAGCTCATGGCCCAAACACACCGTAACAGGAGTAACGGCATCATGCTCCCATAATGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAGAATGGCTTGTACCCAAATCTGAGCAATTCCTATGTGAACAACAAAGAGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCATCTAACATAGGGGACCAAAGGGCCATCTATCATACAGAAAACGCTTATGTCTCTGTAGTGTCTTCACATTATAGCAGAAGATTCACCCCAGAAATAGCAAAAAGACCCAAAGTAAGAGGTCAGGAAGGAAGAATCAACTACTACTGGACTCTGCTGGAACCCGGGGACACAATAATATTTGAGGCAAATGGAAATCTAATAGCGCCATGGTATGCTTTCGCACTGAGTAGAGGCTTTGGGTCAGGAATCATCACCTCAAATGCACCAATGAATGAATGTGATGCGAAGTGTCAAACACCTCAGGGAGCTATAAACAGTAGTCTTCCTTTCCAGAATGTACACCCAGTCACAATAGGAGAGTGTCCAAAGTATGTCAGGAGTACAAAATTAAGGATGGTTACAGGACTAAGGAATATCCCATCCATTCAATCCAGAGGTTTGTTTGGAGCCATTGCCGGTTTCATTGAAGGGGGGTGGACTGGAATGATGGATGGGTGGTATGGTTATCATCATCAGAATGAGCAAGGATCTGGCTATGCTGCAGATCAAAAAAGCACACAAAATGCCATTAACGGGATTACAAATAAGGTGAATTCTGTAATTGAGAAAATGAACACTCAATTCACAGCTGTGGGCAAAGAATTCAACAAATTAGAAAGAAGGATGGAAAACTTAAATAAAAAAGTTGATGATGGATTTCTAGACATTTGGACATATAATGCAGAATTGTTGGTTCTACTGGAAAATGAAAGGACTTTGGATTTCCATGACTCAAATGTGAAGAATCTGTATGAGAAAGTGAAAAGCCAATTAAAGAATAATGCCAAAGAAATAGGGAACGGGTGTTTTGAATTCTATCACAAGTGTAACAATGAATGCATGGAAAGTGTGAAAAATGGAACTTATGACTATCCAAAATATTCCGAAGAATCAAAGTTAAACAGGGAGAAAATTGATGGAGTGAAATTGGAATCAATGGGAGTCTATCAGATTCTGGCGATCTACTCAACTGTCGCCAGTTCACTGGTTCTTTTGGTCTCCCTGGGGGCAATCAGCTTCTGGATGTGTTCCAATGGGTCTTTGCAGTGTAGAATATGCATCTGAGACCAGAATTTCAGAAATATAAGAAAAAACACCCTTGTTTCTACT',
			},
			{	'strain':'A/NewCaledonia/20/99',
				'isolate_id':'EPI_ISL_158137',
				  'region':'Oceania',
				'date':'1999-06-09',
				'seq':'GACACAATATGTATAGGCTACCATGCCAACAACTCAACCGACACTGTTGACACAGTACTTGAGAAGAATGTGACAGTGACACACTCTGTCAACCTACTTGAGGACAGTCACAACGGAAAACTATGTCTACTAAAAGGAATAGCCCCACTACAATTGGGTAATTGCAGCGTTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATTACTGATTTCCAAGGAATCATGGTCCTACATTGTAGAAACACCAAATCCTGAGAATGGAACATGTTACCCAGGGTATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGTTCAGTATCTTCATTTGAGAGATTCGAAATATTCCCCAAAGAAAGCTCATGGCCCAACCACACCGTAACCGGAGTATCAGCATCATGCTCCCATAATGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGGGAAGAATGGTTTGTACCCAAACCTGAGCAAGTCCTATGTAAACAACAAAGAGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGCCTAACATAGGGGACCAAAGGGCCCTCTATCATACAGAAAATGCTTATGTCTCTGTAGTGTCTTCACATTATAGCAGAAGATTCACCCCAGAAATAGCCAAAAGACCCAAAGTAAGAGATCAGGAAGGAAGAATCAACTACTACTGGACTCTGCTGGAACCTGGGGATACAATAATATTTGAGGCAAATGGAAATCTAATAGCGCCATGGTATGCTTTTGCACTGAGTAGAGGCTTTGGATCAGGAATCATCACCTCAAATGCACCAATGGATGAATGTGATGCGAAGTGTCAAACACCTCAGGGAGCTATAAACAGCAGTCTTCCTTTCCAGAATGTACACCCAGTCACAATAGGAGAGTGTCCAAAGTATGTCAGGAGTGCAAAATTAAGGATGGTTACAGGACTAAGGAACATCCCATCCATTCAATCCAGAGGTTTGTTTGGAGCCATTGCCGGTTTCATTGAAGGGGGGTGGACTGGAATGGTAGATGGGTGGTATGGTTATCATCATCAGAATGAGCAAGGATCTGGCTATGCTGCAGATCAAAAAAGTACACAAAATGCCATTAACGGGATTACAAACAAGGTGAATTCTGTAATTGAGAAAATGAACACTCAATTCACAGCTGTGGGCAAAGAATTCAACAAATTGGAAAGAAGGATGGAAAACTTAAATAAAAAAGTTGATGATGGGTTTCTAGACATTTGGACATATAATGCAGAATTGTTGGTTCTACTGGAAAATGAAAGGACTTTGGATTTCCATGACTCCAATGTGAAGAATCTGTATGAGAAAGTAAAAAGCCAATTAAAGAATAATGCCAAAGAAATAGGAAACGGGTGTTTTGAATTCTATCACAAGTGTAACAATGAATGCATGGAGAGTGTGAAAAATGGAACTTATGACTATCCAAAATATTCCGAAGAATCAAAGTTAAACAGGGAGAAAATTGATGGAGTGAAATTGGAATCAATGGGAGTCTATCAGATTCTGGCGATCTACTCAACTGTCGCCAGTTCCCTGGTTCTTTTGGTCTCCCTGGGGGCAATCAGCTTCTGGATGTGTTCCAATGGGTCTTTGCAGTGCAGAATATGC',
			},
			{'strain': 'A/SolomonIslands/3/2006',
			 'isolate_id':'EPI_ISL_157458',
			  'region':'Oceania',
			 'date':'2006-08-21',
			 'lab':'WHO Centre for Reference & Research on Influenza',
			 'seq': 'GACACAATATGTATAGGCTACCATGCCAACAACTCAACCGACACTGTTGACACAGTACTTGAGAAGAATGTGACAGTGACACACTCTGTCAACCTGCTTGAGGACAGTCACAATGGAAAATTATGTCTATTAAAAGGAATAGCCCCACTACAATTGGGTAATTGCAGCGTTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATTACTGATTTCCAGGGAATCATGGTCCTACATTGTAGAAAAACCAAATCCTGAGAATGGAACATGTTACCCAGGGCATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGTTCAGTATCTTCATTTGAGAGATTCGAAATATTCCCCAAAGAAAGCTCATGGCCCAACCACACCACAACCGGAGTATCAGCATCATGCTCCCATAATGGGGAAAGCAGTTTTTACAAAAATTTGCTATGGCTGACGGGGAAGAATGGTTTGTACCCAAACCTGAGCAAGTCCTATGCAAACAACAAAGAGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGCCTAACATAGGTGACCAAAGGGCTCTCTATCATACAGAAAATGCTTATGTCTCTGTAGTGTCTTCACATTATAGCAGAAAATTCACCCCAGAAATAGCCAAAAGACCCAAAGTAAGAGATCGAGAAGGAAGAATCAACTACTACTGGACTCTACTTGAACCCGGGGATACAATAATATTTGAGGCAAATGGAAATCTAATAGCGCCAAGATATGCTTTCGCACTGAGTAGAGGCTTTGGATCAGGAATCATCAACTCAAATGCACCAATGGATGAATGTGATGCGAAGTGCCAAACACCTCAGGGAGCTATAAACAGCAGTCTTCCTTTCCAGAATGTACACCCTGTCACAATAGGAGAGTGTCCAAAGTATGTCAGGAGTGCAAAATTAAGGATGGTTACAGGACTAAGGAACATCCCATCCATTCAATCCAGAGGTTTGTTTGGAGCCATTGCCGGTTTCATTGAAGGGGGGTGGACTGGAATGGTAGATGGTTGGTATGGTTATCATCATCAGAATGAGCAAGGATCTGGCTATGCTGCAGATCAAAAAAGCACACAAAATGCCATTAATGGGATTACAAACAAGGTGAATTCTGTAATTGAGAAAATGAACACTCAATTCACAGCTGTGGGCAAAGAATTCAACAAATTGGAAAGAAGGATGGAAAACTTAAATAAAAAAGTTGATGATGGGTTTATAGACATTTGGACATATAATGCAGAATTGTTGGTTCTACTGGAAAATGAAAGGACTTTGGATTTCCATGACTCCAATGTGAAGAATCTGTATGAGAAAGTAAAAAGCCAATTAAAGAATAATGCCAAAGAAATAGGAAATGGGTGTTTTGAATTCTATCACAAGTGTAACGATGAATGCATGGAGAGTGTAAAAAATGGAACTTATGACTATCCAAAATATTCCGAAGAATCAAAGTTAAACAGGGAGAAAATTGATGGAGTGAAATTGGAATCAATGGGAGTCTATCAGATTCTGGCGATCTACTCAACAGTCGCCAGTTCTCTGGTCCTTTTGGTCTCCCTGGGGGCAATCAGATTCG',
			},
			{'strain':'A/Brisbane/59/2007',
			'isolate_id':'EPI_ISL_23344',
			 'region':'Oceania',
			'date':'2007-07-01', # (Month and day unknown)
			'seq': 'ATGAAAGTAAAACTACTGGTCCTGTTATGCACATTTACAGCTACATATGCAGACACAATATGTATAGGCTACCATGCTAACAACTCGACCGACACTGTTGACACAGTACTTGAAAAGAATGTGACAGTGACACACTCTGTCAACCTGCTTGAGAACAGTCACAATGGAAAACTATGTCTATTAAAAGGAATAGCCCCACTACAATTGGGTAATTGCAGCGTTGCCGGGTGGATCTTAGGAAACCCAGAATGCGAATTACTGATTTCCAAGGAGTCATGGTCCTACATTGTAGAAAAACCAAATCCTGAGAATGGAACATGTTACCCAGGGCATTTCGCTGACTATGAGGAACTGAGGGAGCAATTGAGTTCAGTATCTTCATTTGAGAGGTTCGAAATATTCCCCAAAGAAAGCTCATGGCCCAACCACACCGTAACCGGAGTGTCAGCATCATGCTCCCATAATGGGGAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGGGAAGAATGGTTTGTACCCAAACCTGAGCAAGTCCTATGCAAACAACAAAGAAAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGCCAAACATAGGTRWCCAAAAGGCCCTCTATCATACAGAAAATGCTTATGTCTCTGTAGTGTCTTCACATTATAGCAGAAAATTCACCCCAGAAATAGCCAAAAGACCCAAAGTAAGAGATCAAGAAGGAAGAATCAATTACTACTGGACTCTGCTTGAACCCGGGGATACAATAATATTTGAGGCAAATGGAAATCTAATAGCGCCAAGATATGCTTTCGCACTGAGTAGAGGCTTTGGATCAGGAATCATCAACTCAAATGCACCAATGGATAAATGTGATGCGAAGTGCCAAACACCTCAGGGAGCTATAAACAGCAGTCTTCCTTTCCAGAACGTACACCCAGTCACAATAGGAGAGTGTCCAAAGTATGTCAGGAGTGCAAAATTAAGGATGGTTACAGGACTAAGGAACATCCCATCCATTCAATCCAGAGGTTTGTTTGGAGCCATTGCCGGTTTCATTGAAGGGGGGTGGACTGGAATGGTAGATGGTTGGTATGGTTATCATCATCAGAATGAGCAAGGATCTGGCTATGCTGCAGATCAAAAAAGCACACAAAATGCCATTAATGGGATTACAAACAAGGTGAATTCTGTAATTGAGAAAATGAACACTCAATTCACAGCAGTGGGCAAAGAATTCAACAAATTGGAAAGAAGGATGGAAAACTTGAATAAAAAAGTTGATGATGGGTTTATAGACATTTGGACATATAATGCAGAACTGTTGGTTCTACTGGAAAATGAAAGGACTTTGGATTTCCATGACTCCAATGTGAAGAATCTGTATGAGAAAGTAAAAAGCCAGTTAAAGAATAATGCTAAAGAAATAGGAAATGGGTGTTTTGAATTCTATCACAAGTGTAACGATGAATGCATGGAGAGTGTAAAGAATGGAACTTATGACTATCCAAAATATTCCGAAGAATCAAAGTTAAACAGGGAGAAAATTGATGGAGTGAAATTGGAATCAATGGGAGTCTATCAGATTCTGGCGATCTACTCAACAGTCGCCAGTTCTCTGGTTCTTTTGGTCTCCCTGGGGGCAATCAGCTTCTGGATGTGTTCCAATGGGTCTTTACAGTGTAGAATATGCATCTAA',
			}
		]
		self.outgroup = {
			'strain': 'A/Tokyo/1/51',
			'db': 'GISAID',
			'accession': 'EPI_ISL_101',
			'date': '1951-07-01',
			'country': 'Japan',
			'region': 'JapanKorea',
			'seq': 'ATGAAAGCAAAACTACTGATCCTGTTATGTGCACTTTCAGCTACAGATGCAGACACAATATGTATAGGCTACCATGCTAACAATTCAACCGACACTGTTGACACAGTACTCGAAAAGAATGTGACAGTGACACACTCTGTAAACCTACTCGAAGACAGCCACAACGGGAAATTATGCAGATTAAAAGGAATAGCCCCACTACAATTGGGGAAATGTAACATTGCCGGATGGATCTTGGGAACCCCAGAATGCGAATCATTGCTCTCTAATAGATCATGGTCCTACATTGCAGAAACACCAAACTGTGAGAATGGAACATGTTACCCAGGAGATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGCTCAGTATCATCATTCGAGAGATTCGAAATATTCCCCAAGGAAAGATCATGGCCCAAACACAACATAACCAGAGGAGTAACGGCAGCATGCTCCCACGCGAAGAAAAGCAGTTTTTACAAAAATTTGCTCTGGCTGACGGAGGCAAATGGCTCATACCCAAATCTGAGCAAGTCCTATGTGAACAATAAAGAGAAAGAAGTCCTTGTGCTGTGGGGTGTTCATCACCCGTCTAACATAGAGGATCAAAGGACCCTCTATCGGAAAGAAAATGCTTATGTCTCTGTGGTGTCTTCAAATTATAACAGGAGATTCACCCCGGAAATAGCAGAAAGACCCAAAGTAAGAGGTCAAGCAGGGAGAATAAACTATTACTGGACTTTGCTAGAACCCGGAGACAAAATAATATTTGAGGCAAATGGAAACCTAATAGCGCCATGGTATGCTTTCGCACTGAGTAGAGGCCTTGGATCAGGAATCATCACCTCAAACGCATCAATGGATGAATGTGACACGAAGTGTCAGACACCCCAGGGAGCTATAAACAGTAGTCTCCCTTTTCAGAACATACACCCAGTCACAATAGGAGAGTGCCCAAAATACGTCAGGAGTACCAAATTGAGGATGGTTACAGGACTAAGGAACATCCCATCCATTCAATCCAGA',			
		}
Ejemplo n.º 16
0
	def __init__(self,min_length = 987, **kwargs):
		'''
		parameters
		min_length  -- minimal length for a sequence to be acceptable
		'''
		flu_filter.__init__(self, **kwargs)
		self.min_length = min_length
		self.vaccine_strains =[
			{ 'strain':'A/Beijing/262/95',
			  'isolate_id':'EPI_ISL_2656',
			  'region':'China',
			  'db':'GISAID',
			  'date':'1995-07-01', # (Month and day unknown)
			  'seq':'AGCAAAAGCAGGGGAAAATAAAAACAACCAAAATGAAAGCAAAACTACTAGTCCTGTTATGTACATTTACAGCTACATATGCAGACACAATATGTATAGGCTACCATGCCAACAACTCAACCGACACTGTTGACACAGTACTTGAGAAGAATGTGACAGTGACACACTCTGTCAACCTACTTGAGGACAGTCACAATGGAAAACTATGTCTACTAAAAGGAATAGCCCCACTACAATTGGGTAATTGCAGCGTTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATCACTGATTTCTAAGGAATCATGGTCCTACATTGTAGAGACACCAAACCCTGAGAATGGAACATGTTACCCAGGGTATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGTTCAGTATCATCATTTGAGAGATTCGAAATATTCCCCAAAGAAAGCTCATGGCCCAAACACACCGTAACAGGAGTAACGGCATCATGCTCCCATAATGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAGAATGGCTTGTACCCAAATCTGAGCAATTCCTATGTGAACAACAAAGAGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCATCTAACATAGGGGACCAAAGGGCCATCTATCATACAGAAAACGCTTATGTCTCTGTAGTGTCTTCACATTATAGCAGAAGATTCACCCCAGAAATAGCAAAAAGACCCAAAGTAAGAGGTCAGGAAGGAAGAATCAACTACTACTGGACTCTGCTGGAACCCGGGGACACAATAATATTTGAGGCAAATGGAAATCTAATAGCGCCATGGTATGCTTTCGCACTGAGTAGAGGCTTTGGGTCAGGAATCATCACCTCAAATGCACCAATGAATGAATGTGATGCGAAGTGTCAAACACCTCAGGGAGCTATAAACAGTAGTCTTCCTTTCCAGAATGTACACCCAGTCACAATAGGAGAGTGTCCAAAGTATGTCAGGAGTACAAAATTAAGGATGGTTACAGGACTAAGGAATATCCCATCCATTCAATCCAGAGGTTTGTTTGGAGCCATTGCCGGTTTCATTGAAGGGGGGTGGACTGGAATGATGGATGGGTGGTATGGTTATCATCATCAGAATGAGCAAGGATCTGGCTATGCTGCAGATCAAAAAAGCACACAAAATGCCATTAACGGGATTACAAATAAGGTGAATTCTGTAATTGAGAAAATGAACACTCAATTCACAGCTGTGGGCAAAGAATTCAACAAATTAGAAAGAAGGATGGAAAACTTAAATAAAAAAGTTGATGATGGATTTCTAGACATTTGGACATATAATGCAGAATTGTTGGTTCTACTGGAAAATGAAAGGACTTTGGATTTCCATGACTCAAATGTGAAGAATCTGTATGAGAAAGTGAAAAGCCAATTAAAGAATAATGCCAAAGAAATAGGGAACGGGTGTTTTGAATTCTATCACAAGTGTAACAATGAATGCATGGAAAGTGTGAAAAATGGAACTTATGACTATCCAAAATATTCCGAAGAATCAAAGTTAAACAGGGAGAAAATTGATGGAGTGAAATTGGAATCAATGGGAGTCTATCAGATTCTGGCGATCTACTCAACTGTCGCCAGTTCACTGGTTCTTTTGGTCTCCCTGGGGGCAATCAGCTTCTGGATGTGTTCCAATGGGTCTTTGCAGTGTAGAATATGCATCTGAGACCAGAATTTCAGAAATATAAGAAAAAACACCCTTGTTTCTACT',
			},
			{	'strain':'A/NewCaledonia/20/99',
				'isolate_id':'EPI_ISL_158137',
				  'region':'Oceania',
				'date':'1999-06-09',
				'seq':'GACACAATATGTATAGGCTACCATGCCAACAACTCAACCGACACTGTTGACACAGTACTTGAGAAGAATGTGACAGTGACACACTCTGTCAACCTACTTGAGGACAGTCACAACGGAAAACTATGTCTACTAAAAGGAATAGCCCCACTACAATTGGGTAATTGCAGCGTTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATTACTGATTTCCAAGGAATCATGGTCCTACATTGTAGAAACACCAAATCCTGAGAATGGAACATGTTACCCAGGGTATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGTTCAGTATCTTCATTTGAGAGATTCGAAATATTCCCCAAAGAAAGCTCATGGCCCAACCACACCGTAACCGGAGTATCAGCATCATGCTCCCATAATGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGGGAAGAATGGTTTGTACCCAAACCTGAGCAAGTCCTATGTAAACAACAAAGAGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGCCTAACATAGGGGACCAAAGGGCCCTCTATCATACAGAAAATGCTTATGTCTCTGTAGTGTCTTCACATTATAGCAGAAGATTCACCCCAGAAATAGCCAAAAGACCCAAAGTAAGAGATCAGGAAGGAAGAATCAACTACTACTGGACTCTGCTGGAACCTGGGGATACAATAATATTTGAGGCAAATGGAAATCTAATAGCGCCATGGTATGCTTTTGCACTGAGTAGAGGCTTTGGATCAGGAATCATCACCTCAAATGCACCAATGGATGAATGTGATGCGAAGTGTCAAACACCTCAGGGAGCTATAAACAGCAGTCTTCCTTTCCAGAATGTACACCCAGTCACAATAGGAGAGTGTCCAAAGTATGTCAGGAGTGCAAAATTAAGGATGGTTACAGGACTAAGGAACATCCCATCCATTCAATCCAGAGGTTTGTTTGGAGCCATTGCCGGTTTCATTGAAGGGGGGTGGACTGGAATGGTAGATGGGTGGTATGGTTATCATCATCAGAATGAGCAAGGATCTGGCTATGCTGCAGATCAAAAAAGTACACAAAATGCCATTAACGGGATTACAAACAAGGTGAATTCTGTAATTGAGAAAATGAACACTCAATTCACAGCTGTGGGCAAAGAATTCAACAAATTGGAAAGAAGGATGGAAAACTTAAATAAAAAAGTTGATGATGGGTTTCTAGACATTTGGACATATAATGCAGAATTGTTGGTTCTACTGGAAAATGAAAGGACTTTGGATTTCCATGACTCCAATGTGAAGAATCTGTATGAGAAAGTAAAAAGCCAATTAAAGAATAATGCCAAAGAAATAGGAAACGGGTGTTTTGAATTCTATCACAAGTGTAACAATGAATGCATGGAGAGTGTGAAAAATGGAACTTATGACTATCCAAAATATTCCGAAGAATCAAAGTTAAACAGGGAGAAAATTGATGGAGTGAAATTGGAATCAATGGGAGTCTATCAGATTCTGGCGATCTACTCAACTGTCGCCAGTTCCCTGGTTCTTTTGGTCTCCCTGGGGGCAATCAGCTTCTGGATGTGTTCCAATGGGTCTTTGCAGTGCAGAATATGC',
			},
			{'strain': 'A/SolomonIslands/3/2006',
			 'isolate_id':'EPI_ISL_157458',
			  'region':'Oceania',
			 'date':'2006-08-21',
			 'lab':'WHO Centre for Reference & Research on Influenza',
			 'seq': 'GACACAATATGTATAGGCTACCATGCCAACAACTCAACCGACACTGTTGACACAGTACTTGAGAAGAATGTGACAGTGACACACTCTGTCAACCTGCTTGAGGACAGTCACAATGGAAAATTATGTCTATTAAAAGGAATAGCCCCACTACAATTGGGTAATTGCAGCGTTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATTACTGATTTCCAGGGAATCATGGTCCTACATTGTAGAAAAACCAAATCCTGAGAATGGAACATGTTACCCAGGGCATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGTTCAGTATCTTCATTTGAGAGATTCGAAATATTCCCCAAAGAAAGCTCATGGCCCAACCACACCACAACCGGAGTATCAGCATCATGCTCCCATAATGGGGAAAGCAGTTTTTACAAAAATTTGCTATGGCTGACGGGGAAGAATGGTTTGTACCCAAACCTGAGCAAGTCCTATGCAAACAACAAAGAGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGCCTAACATAGGTGACCAAAGGGCTCTCTATCATACAGAAAATGCTTATGTCTCTGTAGTGTCTTCACATTATAGCAGAAAATTCACCCCAGAAATAGCCAAAAGACCCAAAGTAAGAGATCGAGAAGGAAGAATCAACTACTACTGGACTCTACTTGAACCCGGGGATACAATAATATTTGAGGCAAATGGAAATCTAATAGCGCCAAGATATGCTTTCGCACTGAGTAGAGGCTTTGGATCAGGAATCATCAACTCAAATGCACCAATGGATGAATGTGATGCGAAGTGCCAAACACCTCAGGGAGCTATAAACAGCAGTCTTCCTTTCCAGAATGTACACCCTGTCACAATAGGAGAGTGTCCAAAGTATGTCAGGAGTGCAAAATTAAGGATGGTTACAGGACTAAGGAACATCCCATCCATTCAATCCAGAGGTTTGTTTGGAGCCATTGCCGGTTTCATTGAAGGGGGGTGGACTGGAATGGTAGATGGTTGGTATGGTTATCATCATCAGAATGAGCAAGGATCTGGCTATGCTGCAGATCAAAAAAGCACACAAAATGCCATTAATGGGATTACAAACAAGGTGAATTCTGTAATTGAGAAAATGAACACTCAATTCACAGCTGTGGGCAAAGAATTCAACAAATTGGAAAGAAGGATGGAAAACTTAAATAAAAAAGTTGATGATGGGTTTATAGACATTTGGACATATAATGCAGAATTGTTGGTTCTACTGGAAAATGAAAGGACTTTGGATTTCCATGACTCCAATGTGAAGAATCTGTATGAGAAAGTAAAAAGCCAATTAAAGAATAATGCCAAAGAAATAGGAAATGGGTGTTTTGAATTCTATCACAAGTGTAACGATGAATGCATGGAGAGTGTAAAAAATGGAACTTATGACTATCCAAAATATTCCGAAGAATCAAAGTTAAACAGGGAGAAAATTGATGGAGTGAAATTGGAATCAATGGGAGTCTATCAGATTCTGGCGATCTACTCAACAGTCGCCAGTTCTCTGGTCCTTTTGGTCTCCCTGGGGGCAATCAGATTCG',
			},
			{'strain':'A/Brisbane/59/2007',
			'isolate_id':'EPI_ISL_23344',
			 'region':'Oceania',
			'date':'2007-07-01', # (Month and day unknown)
			'seq': 'ATGAAAGTAAAACTACTGGTCCTGTTATGCACATTTACAGCTACATATGCAGACACAATATGTATAGGCTACCATGCTAACAACTCGACCGACACTGTTGACACAGTACTTGAAAAGAATGTGACAGTGACACACTCTGTCAACCTGCTTGAGAACAGTCACAATGGAAAACTATGTCTATTAAAAGGAATAGCCCCACTACAATTGGGTAATTGCAGCGTTGCCGGGTGGATCTTAGGAAACCCAGAATGCGAATTACTGATTTCCAAGGAGTCATGGTCCTACATTGTAGAAAAACCAAATCCTGAGAATGGAACATGTTACCCAGGGCATTTCGCTGACTATGAGGAACTGAGGGAGCAATTGAGTTCAGTATCTTCATTTGAGAGGTTCGAAATATTCCCCAAAGAAAGCTCATGGCCCAACCACACCGTAACCGGAGTGTCAGCATCATGCTCCCATAATGGGGAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGGGAAGAATGGTTTGTACCCAAACCTGAGCAAGTCCTATGCAAACAACAAAGAAAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGCCAAACATAGGTRWCCAAAAGGCCCTCTATCATACAGAAAATGCTTATGTCTCTGTAGTGTCTTCACATTATAGCAGAAAATTCACCCCAGAAATAGCCAAAAGACCCAAAGTAAGAGATCAAGAAGGAAGAATCAATTACTACTGGACTCTGCTTGAACCCGGGGATACAATAATATTTGAGGCAAATGGAAATCTAATAGCGCCAAGATATGCTTTCGCACTGAGTAGAGGCTTTGGATCAGGAATCATCAACTCAAATGCACCAATGGATAAATGTGATGCGAAGTGCCAAACACCTCAGGGAGCTATAAACAGCAGTCTTCCTTTCCAGAACGTACACCCAGTCACAATAGGAGAGTGTCCAAAGTATGTCAGGAGTGCAAAATTAAGGATGGTTACAGGACTAAGGAACATCCCATCCATTCAATCCAGAGGTTTGTTTGGAGCCATTGCCGGTTTCATTGAAGGGGGGTGGACTGGAATGGTAGATGGTTGGTATGGTTATCATCATCAGAATGAGCAAGGATCTGGCTATGCTGCAGATCAAAAAAGCACACAAAATGCCATTAATGGGATTACAAACAAGGTGAATTCTGTAATTGAGAAAATGAACACTCAATTCACAGCAGTGGGCAAAGAATTCAACAAATTGGAAAGAAGGATGGAAAACTTGAATAAAAAAGTTGATGATGGGTTTATAGACATTTGGACATATAATGCAGAACTGTTGGTTCTACTGGAAAATGAAAGGACTTTGGATTTCCATGACTCCAATGTGAAGAATCTGTATGAGAAAGTAAAAAGCCAGTTAAAGAATAATGCTAAAGAAATAGGAAATGGGTGTTTTGAATTCTATCACAAGTGTAACGATGAATGCATGGAGAGTGTAAAGAATGGAACTTATGACTATCCAAAATATTCCGAAGAATCAAAGTTAAACAGGGAGAAAATTGATGGAGTGAAATTGGAATCAATGGGAGTCTATCAGATTCTGGCGATCTACTCAACAGTCGCCAGTTCTCTGGTTCTTTTGGTCTCCCTGGGGGCAATCAGCTTCTGGATGTGTTCCAATGGGTCTTTACAGTGTAGAATATGCATCTAA',
			}
		]
		self.outgroup = {
			'strain': 'A/Tokyo/1/51',
			'db': 'GISAID',
			'accession': 'EPI_ISL_101',
			'date': '1951-07-01',
			'country': 'Japan',
			'region': 'JapanKorea',
			'seq': 'ATGAAAGCAAAACTACTGATCCTGTTATGTGCACTTTCAGCTACAGATGCAGACACAATATGTATAGGCTACCATGCTAACAATTCAACCGACACTGTTGACACAGTACTCGAAAAGAATGTGACAGTGACACACTCTGTAAACCTACTCGAAGACAGCCACAACGGGAAATTATGCAGATTAAAAGGAATAGCCCCACTACAATTGGGGAAATGTAACATTGCCGGATGGATCTTGGGAACCCCAGAATGCGAATCATTGCTCTCTAATAGATCATGGTCCTACATTGCAGAAACACCAAACTGTGAGAATGGAACATGTTACCCAGGAGATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGCTCAGTATCATCATTCGAGAGATTCGAAATATTCCCCAAGGAAAGATCATGGCCCAAACACAACATAACCAGAGGAGTAACGGCAGCATGCTCCCACGCGAAGAAAAGCAGTTTTTACAAAAATTTGCTCTGGCTGACGGAGGCAAATGGCTCATACCCAAATCTGAGCAAGTCCTATGTGAACAATAAAGAGAAAGAAGTCCTTGTGCTGTGGGGTGTTCATCACCCGTCTAACATAGAGGATCAAAGGACCCTCTATCGGAAAGAAAATGCTTATGTCTCTGTGGTGTCTTCAAATTATAACAGGAGATTCACCCCGGAAATAGCAGAAAGACCCAAAGTAAGAGGTCAAGCAGGGAGAATAAACTATTACTGGACTTTGCTAGAACCCGGAGACAAAATAATATTTGAGGCAAATGGAAACCTAATAGCGCCATGGTATGCTTTCGCACTGAGTAGAGGCCTTGGATCAGGAATCATCACCTCAAACGCATCAATGGATGAATGTGACACGAAGTGTCAGACACCCCAGGGAGCTATAAACAGTAGTCTCCCTTTTCAGAACATACACCCAGTCACAATAGGAGAGTGCCCAAAATACGTCAGGAGTACCAAATTGAGGATGGTTACAGGACTAAGGAACATCCCATCCATTCAATCCAGA',			
		}
Ejemplo n.º 17
0
	def __init__(self, aln_fname, outgroup, outdir = './', formats = ['pdf','svg','png'], verbose = 0, **kwargs):
		process.__init__(self, **kwargs)
		flu_filter.__init__(self, alignment_file = aln_fname, **kwargs)
		tree_refine.__init__(self, **kwargs)
		virus_clean.__init__(self, **kwargs)
		self.verbose = verbose
		self.formats = formats
		self.outdir = outdir.rstrip('/')+'/'
		self.auspice_tree_fname = 		self.outdir + 'tree.json'
		self.auspice_sequences_fname = 	self.outdir + 'sequences.json'
		self.auspice_frequencies_fname = None
		self.auspice_meta_fname = 		self.outdir + 'meta.json'

		if os.path.isfile(outgroup):
			tmp = [{'strain':seq.name, 'seq':str(record.seq).upper(), 'desc':seq.description}
								for seq in SeqIO.parse(outgroup, 'fasta') ]			
			if len(tmp):
				self.outgroup = tmp[0]
				if len(tmp)>1:
					print "More than one sequence in ", outgroup, "taking first"
				if self.verbose:
					print "using outgroup found in file ", outgroup
		elif isinstance(outgroup, basestring):
			seq_names = [x['strain'] for x in self.viruses]
			if outgroup in seq_names:
				self.outgroup = self.viruses.pop(seq_names.index(outgroup))
				if self.verbose:
					print "using outgroup found in alignment", outgroup
			else:
				standard_outgroups = [{'strain':seq.name, 'seq':str(seq.seq).upper(), 'desc':seq.description}
										for seq in SeqIO.parse(std_outgroup_file, 'fasta') ]
				outgroup_names = [x['strain'] for x in standard_outgroups]
				if outgroup in outgroup_names:
					self.outgroup = standard_outgroups[outgroup_names.index(outgroup)]
					if self.verbose:
						print "using standard outgroup", outgroup
				else:
					raise ValueError("outgroup %s not found" % outgroup)
					return
		self.viruses.append(self.outgroup)
		self.filter_geo(prune=False)
		#self.filter_host(prune=False)
		self.make_strain_names_unique()
Ejemplo n.º 18
0
    def __init__(self, min_length=987, **kwargs):
        '''
		parameters
		min_length  -- minimal length for a sequence to be acceptable
		'''
        flu_filter.__init__(self, **kwargs)
        self.min_length = min_length
        self.vaccine_strains = [{
            'strain':
            'A/California/07/2009',
            'isolate_id':
            'EPI_ISL_31553',
            'date':
            '2009-04-09',
            'lab':
            'Naval Health Research Center',
            'country':
            'USA',
            'region':
            'NorthAmerica',
            'seq':
            'ATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACACCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGTCATCAAGATACAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAAACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATATCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATACACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAGTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCAAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGCTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAA',
        }]
        tmp_outgroup = SeqIO.read(
            '/Users/yujia_zhou/Documents/Work/H9_nextflu-master/augur/source-data/H1N1pdm_outgroup.gb',
            'genbank')
        genome_annotation = tmp_outgroup.features
        self.cds = {
            x.qualifiers['gene'][0]: x
            for x in genome_annotation
            if 'gene' in x.qualifiers and x.type == 'CDS'
            and x.qualifiers['gene'][0] in ['SigPep', 'HA1', 'HA2']
        }
        self.outgroup = {
            'strain': 'A/Swine/Indiana/P12439/00',
            'db': 'IRD',
            'accession': 'AF455680',
            'date': '2002-03-14',
            'country': 'USA',
            'region': 'NorthAmerica',
            'seq': str(tmp_outgroup.seq).upper()
        }
Ejemplo n.º 19
0
    def __init__(self, min_length=0, **kwargs):
        '''
		parameters
		min_length  -- minimal length for a sequence to be acceptable
		'''
        flu_filter.__init__(self, **kwargs)
        self.min_length = min_length
        self.vaccine_strains = [{
            "strain":
            "A/Chicken/HongKong/G9/97",
            "db":
            "GISAID",
            "accession":
            "EPI_ISL_1263",
            "date":
            "1997-07-01",
            "seq":
            "ATGGAAATAATAGCACTAATAGCTATACTGGTAGTGACAAAAACAAGCAATGCAGATAAAATTTGCATTGGCTACCAGTCAACAAACTCCACAGAAACTGTTGATACACTAGTAGAAAACAATGTCCCTGTGACACATACCAAAGAATTGCTCCACACAGAGCACAATGGAATGCTATGTGCAACAAACCTGGGGCACCCTCTCATCCTAGACACCTGCACCATCGAAGGGTTGGTGTACGGCAACCCTTCCTGTGATTTGCTACTGGGAGGGAAAGAATGGTCTTACATTGTCGAAAGATCATCAGCTGTCAATGGGATGTGTTACCCTGGAAGGGTAGAGAACCTGGAAGAACTCAGGTCTTTTTTCAGCTCCGCTCGCTCCTACAAAAGACTCCTGCTCTTTCCAGACAGAACTTGGAATGTGACTTACACTGGGACAAGCAAAGCATGTTCAAACTCATTCTACAGAAGTATGAGATGGCTGACACACAAGAGCGATTCTTACCCTATTCAAGACGCCCAATATACTAACGATTGGGGAAAGAATATTCTCTTCATGTGGGGCATACACCACCCACCTACTGATACTGAGCAAATAAATCTATACAAAAAAGCTGATACAACAACAAGTATAACAACGGAAGATATCAATCGAACTTTCAAACCAGTGATAGGGCCAAGGCCTCTTGTCAATGGTCAACAAGGGAGAATTGATTATTATTGGTCAGTACTAAAGCCAGGCCAGACACTGCGAGTGAGATCCAATGGGAATCTAATTGCCCCATGGTATGGACACATTCTTTCAGGAGAAAGCCATGGAAGAATCTTGAAGACCGATTTGAGTAGTGGCAACTGCGTAGTACAATGCCAAACTGAGAAAGGTGGTTTGAACACGACCTTGCCATTCCACAATGTCAGCAAGTATGCATTTGGGAACTGCCCCAAATATGTTGGAGTGAAGAGTCTCAAACTGGCAGTTGGTCTAAGGAATGTTCCTGCTGCATCATATAGAGGGCTCTTCGGTGCCATAGCTGGATTCATAGAAGGCGGTTGGCCAGGACTAGTTGCAGGCTGGTACGGGTTTCAGCATTCAAATGATCAAGGGGTTGGAATGGCCGCAGATAGGGAATCAACTCAAGAAGCAGTTGACAAGATAACATCCAAAGTAAATAACATAATCGACAAAATGAACAAGCAGTATGGA------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T--------------------------------------------------------------------------------------"
        }]
        tmp_outgroup = SeqIO.read(
            '/Users/yujiazhou/Documents/FluProject/augur/source-data/H9_outgroup.gb',
            'genbank')
        genome_annotation = tmp_outgroup.features
        self.cds = {
            x.qualifiers['gene'][0]: x
            for x in genome_annotation
            if 'gene' in x.qualifiers and x.type == 'CDS'
            and x.qualifiers['gene'][0] in ['SigPep', 'HA1', 'HA2']
        }
        self.outgroup = {
            'strain': 'A/duck/HongKong/147/1977',
            'db': 'IRD',
            'accession': 'AY206671',
            'date': '2003-03-03',
            'country': 'HongKong',
            'region': 'EastAsia',
            'seq': str(tmp_outgroup.seq).upper()
        }
Ejemplo n.º 20
0
    def __init__(self, min_length=987, **kwargs):
        """
		parameters
		min_length  -- minimal length for a sequence to be acceptable
		"""
        flu_filter.__init__(self, **kwargs)
        self.min_length = min_length
        self.vaccine_strains = [
            {
                "strain": "B/Beijing/184/93",
                "isolate_id": "EPI_ISL_969",
                "date": "1993-07-01",  # (Month and day unknown)
                "region": "China",
                "seq": "GATCGAATCTGTACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGTGTGATACCACTGACAACAACACCAACAAAATCTCATTTTGGAAATCTCAAAGGAACAAAGACCAGAGGGAAACTATGCCCAAACTGTCTCAACTGCACAGATCTGGATGTGGCCTTGGGCAGACCAATGTGTGTGGGGACCACACCTTCGGCAAAAGCTTCAATACTCCACGAAGTCAGACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGACAGCTACCCAATCTTCTCAGAGGATATGAAAATATCAGATTATCAACCCAAAACGTTATCAACGCAGAAAAGGCACCAGGAGGACCCTACAGGCTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAGAAGCGGATTTTTCGCAACAATGGCTTGGGCTGTCCCAAGGGACAACAACAAAACAGCAACAAATCCACTAACAGTAGAAGTACCATACATTTGTACAAAAGGAGAAGACCAAATTACTGTTTGGGGGTTCCATTCTGATAACAAAATCCAAATGAAAAACCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACATTATGTTTCTCAGATTGGCGGCTTCCCAGATCAAACAGAAGACGGAGGGCTACCACAAAGCGGCAGAATTGTTGTTGATTACATGGTGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCCTTGCCTTTAATTGGTGAAGCAGATTGCCTTCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGG",
            },
            {
                "strain": "B/Sichuan/379/99",
                "isolate_id": "EPI_ISL_21113",
                "date": "1999-07-01",  # (Month and day unknown)
                "region": "China",
                "seq": "GAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCGTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTAACTGGTGCGATACCACTGACAACAACACCAACAAAATCTCATTTTGCAAATCTCAAAGGAACAAAGACCAGAGGGAAACTATGCCCAACCTGTCTCAACTGCACAGATCTGGATGTGGCCTTGGGCAGACCAATGTGTGTGGGGATCACACCTTCGGCAAAAGCTTCAATACTCCACGAAATCAAACCTGTTACATCCGGATGCTTTCCTATAATGCACGACAGAACAAAAATCAGACAGCTACCCAATCTTCTCAGAGGATATGAAAAAATCAGATTATCAACCCAAAACGTTATCAACGCAGAAAAGGCACCAGGAGGACCTTACAGACTTGGAACTTCAGGATCTTGCCCTAACGCTACCAGTAAAAGCGGATTTTTCGCAACAATGGCTTGGGCTGTCCCAAGGGACAACAACAAAACAGCAACGAATCCACTAACAGTAGAAGTACCACACATCTGTACAAAAGAAGAAGACCAAATTACTGTTTGGGGGTTCCATTCTGATGACAAAACCCAAATGAAAAACCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAATAACCACACATTATGTTTCTCAGATTGGCGGCTTCCCGGACCAAACAGAGGACGGAGGGCTACCACAAAGCGGCAGAATTGTTGTTGATTACATGGTGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGGATTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGTAGGAGCAAAGTAATAAAAGGGTCCTTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCT",
            },
            {
                "strain": "B/Shanghai/361/2002",
                "isolate_id": "EPI_ISL_2842",
                "date": "2002-06-12",
                "region": "China",
                "seq": "AATGCAGATCGAATCTGCACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGTGTGATACCACTGACAACAACTCCAATAAAATCTCATTTTGCAAATCTCAAAGGAACAAGGACTAGAGGGAAACTATGCCCAGATTGTCTCAACTGCACAGATCTGGATGTGGCCTTGGGCAGACCAATGTGTGTGGGGACCACACCTTCGGCAAAAGCTTCAATACTCCACGAAGTCAGACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGACAACTACCCAATCTTCTCAGAGGATATGAAAATATCAGGTTATCAACCCAAAACGTTATCGATGCAGAAAAGGCCCTAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCCACCAGTAAAAGCGGATTTTTCGCAACAATGGCTTGGGCTGTCCCAAAGGACAACAACAAAAATGCAACGAACCCACTAACAGTAGAAGTACCATACATCTGTACAGAAGGGGAAGACCAAATTACTGTTTGGGGGTTCCATTCAGATGACAAAACCCAAATGAAAAACCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACATTATGTTTCTCAGATTGGCGGCTTCCCAGATCAAACAGAAGACGGAGGACTACCACAAAGCGGCAGAATTGTTGTTGATTACATGGTGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCCTTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAAAATACGGTGGGTTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTC",
            },
            {
                "strain": "B/Florida/4/2006",
                "isolate_id": "EPI_ISL_21307",
                "date": "2006-11-01",
                "region": "NorthAmerica",
                "seq": "ATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGAATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCCACTCAAGGGGAGGTCAATGTGACTGGTGTGATACCACTAACAACAACACCAACAAAATCTTATTTTGCAAATCTCAAAGGAACAAGGACCAGAGGGAAACTATGCCCAGACTGTCTCAACTGCACAGATCTGGATGTGGCTTTGGGCAGACCAATGTGTGTGGGGACCACACCTTCGGCGAAAGCTTCAATACTCCACGAAGTCAAACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGGCAACTACCCAATCTTCTCAGAGGATATGAAAATATCAGGCTATCAACCCAAAACGTCATCGATGCGGAAAAGGCACCAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAAGAGCGGATTTTTCGCAACAATGGCTTGGGCTGTCCCAAAGGACAACAACAAAAATGCAACGAACCCACTAACAGTAGAAGTACCATACATTTGTACAGAAGGGGAAGACCAAATCACTGTTTGGGGGTTCCATTCAGATGACAAAACCCAAATGAAGAACCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACACTATGTTTCTCAGATTGGCAGCTTCCCAGATCAAACAGAAGACGGAGGACTACCACAAAGCGGCAGGATTGTTGTTGATTACATGATGCAAAAACCTGGGAAAACAGGAACAATTGTCTACCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCCTTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCCTAGAAGGAGGATGGGAAGGAATGATTGCAGGCTGGCACGGATACACATCTCACGGAGCACATGGAGTGGCAGTGGCGGCGGACCTTAAGAGTACGCAAGAAGCTATAAACAAGATAACAAAAAATCTCAATTCTTTGAGTGAGCTAGAAGTAAAGAATCTTCAAAGACTAAGTGGTGCCATGGATGAACTCCACAACGAAATACTCGAGCTGGATGAGAAAGTGGATGATCTCAGAGCTGACACTATAAGCTCGCAAATAGAACTTGCAGTCTTGCTTTCCAACGAAGGAATAATAAACAGTGAAGATGAGCATCTATTGGCACTTGAGAGAAAACTAAAGAAAATGCTGGGTCCCTCTGCTGTAGAGATAGGAAATGGATGCTTCGAAACCAAACACAAGTGCAACCAGACCTGCTTAGACAGGATAGCTGCTGGCACCTTTAATGCAGGAGAATTTTCTCTCCCCACTTTTGATTCACTGAACATTACTGCTGCATCTTTAAATGATGATGGATTGGATAACCATACTATACTGCTCTATTACTCAACTGCTGCTTCTAGTTTGGCTGTAACATTGATGCTAGCTATTTTTATTGTTTATATGGTCTCCAGAGACAACGTTTCATGCTCCATCTGTCTATAA",
            },
            {
                "strain": "B/Wisconsin/01/2010",
                "isolate_id": "EPI_ISL_76940",
                "date": "2010-02-20",
                "region": "NorthAmerica",
                "seq": "ATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGCGTGATACCACTGACAACAACACCAACAAAATCTTATTTTGCAAATCTCAAAGGAACAAGGACCAGAGGGAAACTATGCCCGGACTGTCTCAACTGTACAGATCTGGATGTGGCCTTGGGCAGGCCAATGTGTGTGGGGACCACACCTTCTGCTAAAGCTTCAATACTCCACGAGGTCAGACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGGCAACTACCCAATCTTCTCAGAGGATATGAAAATATCAGGTTATCAACCCAAAACGTTATCGATGCAGAAAAAGCACCAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAAAATCGGATTTTTTGCAACAATGGCTTGGGCTGTCCCAAAGGACAACTACAAAAATGCAACGAACCCACTAACAGTAGAAGTACCATACATTTGTACAGAAGGGGAAGACCAAATTACTGTTTGGGGGTTCCATTCAGATAACAAAACCCAAATGAAGAGCCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACATTATGTTTCTCAGATTGGCGACTTCCCAGATCAAACAGAAGACGGAGGACTACCACAAAGCGGCAGAATTGTTGTTGATTACATGATGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCATTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTAAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTGAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCCTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCTCACGGAGCACATGGAGTGGCAGTGGCGGCAGACCTTAAGAGTACACAAGAAGCTATAAATAAGATAACAAAAAATCTCAATTCTTTGAGTGAGCTAGAAGTAAAGAACCTTCAAAGACTAAGTGGTGCCATGGATGAACTCCACAACGAAATACTCGAGCTGGATGAGAAAGTGGATGATCTCAGAGCTGACACTATAAGCTCACAAATAGAACTTGCAGTCTTGCTTTCCAACGAAGGAATAATAAACAGTGAAGACGAGCATCTATTGGCACTTGAGAGAAAACTAAAGAAAATGCTGGGTCCCTCTGCTGTAGACATAGGAAACGGATGCTTCGAAACCAAACACAAATGCAACCAGACCTGCTTAGACAGGATAGCTGCTGGCACCTTTAATGCAGGAGAATTTTCTCTCCCCACTTTTGATTCATTGAACATTACTGCTGCATCTTTAAATGATGATGGATTGGATAACCATACTATACTGCTCTATTACTCAACTGCTGCTTCTAGTTTGGCTGTAACATTAATGCTAGCTATTTTTATTGTTTATATGGTCTCCAGAGACAACGTTTCATGCTCCATCTGTCTATAA",
            },
            {
                "strain": "B/Massachusetts/02/2012",
                "isolate_id": "EPI_ISL_121434",
                "date": "2012-03-13",
                "region": "NorthAmerica",
                "seq": "ATGAAGGCAATAATTGTACTACTAATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGTGTGATACCACTAACAACAACACCAACAAAATCTTATTTTGCAAATCTCAAAGGAACAAAGACCAGAGGGAAACTATGCCCAGACTGTCTCAACTGTACAGATCTGGATGTGGCCCTGGGCAGGCCAATGTGTGTGGGAACTACACCTTCTGCGAAAGCTTCAATACTTCACGAAGTCAGACCTGTTACATCCGGGTGCTTCCCTATAATGCACGACAGAACAAAAATCAGGCAACTAGCCAATCTTCTCAGAGGATATGAAAATATCAGGTTATCAACCCAAAACGTTATCGATGCAGAAAAGGCACCAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAAAAGCGGATTTTTCGCAACAATGGCTTGGGCTGTCCCAAAGGACAACAACAAAAATGCAACGAACCCATTAACAGTAGAAGTACCATACATTTGTGCAGAAGGGGAAGACCAAATTACTGTTTGGGGGTTCCATTCAGATAACAAAACCCAAATGAAGAACCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACATTATGTTTCTCAGATTGGCGGCTTCCCAGATCAAACAGAAGACGGAGGACTACCACAAAGCGGCAGAATTGTCGTTGATTACATGATGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCCTTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCCTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCTCACGGAGCACATGGAGTGGCAGTTGCTGCAGACCTTAAGAGCACACAAGAAGCTATAAACAAGATAACAAAAAATCTCAACTCTTTGAGTGAGCTAGAAGTAAAGAATCTTCAAAGGCTAAGTGGTGCCATGGATGAACTCCACAACGAAATACTCGAGCTGGATGAGAAAGTGGATGACCTCAGAGCTGACACTATAAGTTCACAAATAGAACTTGCAGTCTTGCTTTCCAACGAAGGAATAATAAACAGTGAAGACGAGCATCTATTGGCACTTGAGAGAAAACTAAAGAAAATGCTGGGTCCCTCTGCTGTAGACATAGGAAATGGATGCTTCGAAACCAAACACAAATGCAACCAGACCTGCTTAGACAGGATAGCTGCTGGCACCTTTAATGCAGGAGAGTTTTCTCTCCCCACTTTTGATTCATTGAACATTACTGCTGCATCTTTAAATGATGATGGATTGGATAACCATACTATACTGCTCTATTACTCAACTGCTGCTTCTAGTTTGGCTGTAACATTGATGCTAGCTATTTTTATTGTTTATATGGTCTCCAGAGACAACGTTTCATGCTCCATCTGTCTATAA",
            },
            {
                "strain": "B/PHUKET/3073/2013",
                "isolate_id": "EPI_ISL_161843",
                "date": "2013-11-21",
                "region": "SoutheastAsia",
                "seq": "ATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAACGCAGATCGAATCTGCACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGCGTGATACCACTGACAACAACACCAACAAAATCTTATTTTGCAAATCTCAAAGGAACAAGGACCAGAGGGAAACTATGCCCGGACTGTCTCAACTGTACAGATCTGGATGTGGCCTTGGGCAGGCCAATGTGTGTGGGGACCACACCTTCTGCTAAAGCTTCAATACTCCATGAGGTCAGACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGGCAACTACCCAATCTTCTCAGAGGATATGAAAAGATCAGGTTATCAACCCAAAACGTTATCGATGCAGAAAAAGCACCAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAAAATCGGATTTTTTGCAACAATGGCTTGGGCTGTCCCAAAGGACAACTACAAAAATGCAACGAACCCACTAACAGTGGAAGTACCATACATTTGTACAGAAGGGGAAGACCAAATTACTGTTTGGGGGTTCCATTCGGATAACAAAACCCAAATGAAGAGCCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACGCATTATGTTTCTCAGATTGGCGACTTCCCAGATCAAACAGAAGACGGAGGACTACCACAAAGCGGCAGAATTGTTGTTGATTACATGATGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGGGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCATTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAGAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAAAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTAAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTGAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCCTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCTCACGGAGCACATGGAGTGGCAGTGGCGGCAGACCTTAAGAGTACACAAGAAGCTATAAATAAGATAACAAAAAATCTCAATTCTTTGAGTGAACTAGAAGTAAAGAACCTTCAAAGACTAAGTGGTGCCATGGATGAACTCCACAACGAAATACTCGAGCTGGATGAAAAAGTGGATGATCTCAGAGCTGACACTATAAGCTCACAAATAGAACTTGCAGTCTTGCTTTCCAACGAAGGAATAATAAACAGTGAAGACGAGCATCTATTGGCACTTGAGAGAAAACTAAAGAAAATGCTGGGTCCCTCTGCTGTAGACATAGGAAACGGATGCTTCGAAACCAAACACAAATGCAACCAGACCTGCTTAGACAGGATAGCTGCTGGCACCTTTAATGCAGGAGAATTTTCTCTCCCCACTTTTGATTCATTGAACATTACTGCTGCATCTTTAAATGATGATGGATTGGATAACCATACTATACTGCTCTATTACTCAACTGCTGCTTCTAGTTTGGCTGTAACATTAATGCTAGCTATTTTTATTGTTTATATGGTCTCCAGAGACAACGTTTCATGCTCCATCTGTCTATAAAGAAGGTTAGGCCTTGTATTTTCCTTTATTGTAGTGCTTGTTTGCTTGTCATCATTACAAAGAAAC",
            },
        ]
        tmp_outgroup = SeqIO.read("source-data/Yam_outgroup.gb", "genbank")
        genome_annotation = tmp_outgroup.features
        self.cds = {
            x.qualifiers["gene"][0]: x
            for x in genome_annotation
            if "gene" in x.qualifiers and x.type == "CDS" and x.qualifiers["gene"][0] in ["SigPep", "HA1", "HA2"]
        }
        self.outgroup = {
            "strain": "B/Singapore/11/94",
            "isolate_id": "EPI_ISL_20980",
            "date": "1994-05-10",
            "region": "China",
            "seq": str(tmp_outgroup.seq).upper(),
        }
Ejemplo n.º 21
0
	def __init__(self,min_length = 987, **kwargs):
		'''
		parameters
		min_length  -- minimal length for a sequence to be acceptable
		'''
		flu_filter.__init__(self, **kwargs)
		self.min_length = min_length
		self.vaccine_strains =[
			{
				'strain':    	'B/Beijing/184/93',
				'isolate_id':	'EPI_ISL_969',
				'date':    		'1993-07-01', #(Month and day unknown)
				'region':   	'China', 
				'seq':     		'GATCGAATCTGTACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGTGTGATACCACTGACAACAACACCAACAAAATCTCATTTTGGAAATCTCAAAGGAACAAAGACCAGAGGGAAACTATGCCCAAACTGTCTCAACTGCACAGATCTGGATGTGGCCTTGGGCAGACCAATGTGTGTGGGGACCACACCTTCGGCAAAAGCTTCAATACTCCACGAAGTCAGACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGACAGCTACCCAATCTTCTCAGAGGATATGAAAATATCAGATTATCAACCCAAAACGTTATCAACGCAGAAAAGGCACCAGGAGGACCCTACAGGCTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAGAAGCGGATTTTTCGCAACAATGGCTTGGGCTGTCCCAAGGGACAACAACAAAACAGCAACAAATCCACTAACAGTAGAAGTACCATACATTTGTACAAAAGGAGAAGACCAAATTACTGTTTGGGGGTTCCATTCTGATAACAAAATCCAAATGAAAAACCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACATTATGTTTCTCAGATTGGCGGCTTCCCAGATCAAACAGAAGACGGAGGGCTACCACAAAGCGGCAGAATTGTTGTTGATTACATGGTGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCCTTGCCTTTAATTGGTGAAGCAGATTGCCTTCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGG',
			},
			{	
				'strain':    	'B/Sichuan/379/99',
				'isolate_id': 	'EPI_ISL_21113',
				'date':    		'1999-07-01', # (Month and day unknown)	
				'region':   	'China',
				'seq':     		'GAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCGTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTAACTGGTGCGATACCACTGACAACAACACCAACAAAATCTCATTTTGCAAATCTCAAAGGAACAAAGACCAGAGGGAAACTATGCCCAACCTGTCTCAACTGCACAGATCTGGATGTGGCCTTGGGCAGACCAATGTGTGTGGGGATCACACCTTCGGCAAAAGCTTCAATACTCCACGAAATCAAACCTGTTACATCCGGATGCTTTCCTATAATGCACGACAGAACAAAAATCAGACAGCTACCCAATCTTCTCAGAGGATATGAAAAAATCAGATTATCAACCCAAAACGTTATCAACGCAGAAAAGGCACCAGGAGGACCTTACAGACTTGGAACTTCAGGATCTTGCCCTAACGCTACCAGTAAAAGCGGATTTTTCGCAACAATGGCTTGGGCTGTCCCAAGGGACAACAACAAAACAGCAACGAATCCACTAACAGTAGAAGTACCACACATCTGTACAAAAGAAGAAGACCAAATTACTGTTTGGGGGTTCCATTCTGATGACAAAACCCAAATGAAAAACCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAATAACCACACATTATGTTTCTCAGATTGGCGGCTTCCCGGACCAAACAGAGGACGGAGGGCTACCACAAAGCGGCAGAATTGTTGTTGATTACATGGTGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGGATTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGTAGGAGCAAAGTAATAAAAGGGTCCTTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCT',
			},
			{
				'strain':    	'B/Shanghai/361/2002',
				'isolate_id': 	'EPI_ISL_2842',
				'date':    		'2002-06-12',
				'region':   	'China',
				'seq': 			'AATGCAGATCGAATCTGCACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGTGTGATACCACTGACAACAACTCCAATAAAATCTCATTTTGCAAATCTCAAAGGAACAAGGACTAGAGGGAAACTATGCCCAGATTGTCTCAACTGCACAGATCTGGATGTGGCCTTGGGCAGACCAATGTGTGTGGGGACCACACCTTCGGCAAAAGCTTCAATACTCCACGAAGTCAGACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGACAACTACCCAATCTTCTCAGAGGATATGAAAATATCAGGTTATCAACCCAAAACGTTATCGATGCAGAAAAGGCCCTAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCCACCAGTAAAAGCGGATTTTTCGCAACAATGGCTTGGGCTGTCCCAAAGGACAACAACAAAAATGCAACGAACCCACTAACAGTAGAAGTACCATACATCTGTACAGAAGGGGAAGACCAAATTACTGTTTGGGGGTTCCATTCAGATGACAAAACCCAAATGAAAAACCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACATTATGTTTCTCAGATTGGCGGCTTCCCAGATCAAACAGAAGACGGAGGACTACCACAAAGCGGCAGAATTGTTGTTGATTACATGGTGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCCTTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAAAATACGGTGGGTTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTC',
			},
			{
				'strain':		'B/Florida/4/2006',
				'isolate_id':	'EPI_ISL_21307',
				'date':			'2006-11-01',
				'region':		'NorthAmerica',
				'seq':			'ATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGAATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCCACTCAAGGGGAGGTCAATGTGACTGGTGTGATACCACTAACAACAACACCAACAAAATCTTATTTTGCAAATCTCAAAGGAACAAGGACCAGAGGGAAACTATGCCCAGACTGTCTCAACTGCACAGATCTGGATGTGGCTTTGGGCAGACCAATGTGTGTGGGGACCACACCTTCGGCGAAAGCTTCAATACTCCACGAAGTCAAACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGGCAACTACCCAATCTTCTCAGAGGATATGAAAATATCAGGCTATCAACCCAAAACGTCATCGATGCGGAAAAGGCACCAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAAGAGCGGATTTTTCGCAACAATGGCTTGGGCTGTCCCAAAGGACAACAACAAAAATGCAACGAACCCACTAACAGTAGAAGTACCATACATTTGTACAGAAGGGGAAGACCAAATCACTGTTTGGGGGTTCCATTCAGATGACAAAACCCAAATGAAGAACCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACACTATGTTTCTCAGATTGGCAGCTTCCCAGATCAAACAGAAGACGGAGGACTACCACAAAGCGGCAGGATTGTTGTTGATTACATGATGCAAAAACCTGGGAAAACAGGAACAATTGTCTACCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCCTTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCCTAGAAGGAGGATGGGAAGGAATGATTGCAGGCTGGCACGGATACACATCTCACGGAGCACATGGAGTGGCAGTGGCGGCGGACCTTAAGAGTACGCAAGAAGCTATAAACAAGATAACAAAAAATCTCAATTCTTTGAGTGAGCTAGAAGTAAAGAATCTTCAAAGACTAAGTGGTGCCATGGATGAACTCCACAACGAAATACTCGAGCTGGATGAGAAAGTGGATGATCTCAGAGCTGACACTATAAGCTCGCAAATAGAACTTGCAGTCTTGCTTTCCAACGAAGGAATAATAAACAGTGAAGATGAGCATCTATTGGCACTTGAGAGAAAACTAAAGAAAATGCTGGGTCCCTCTGCTGTAGAGATAGGAAATGGATGCTTCGAAACCAAACACAAGTGCAACCAGACCTGCTTAGACAGGATAGCTGCTGGCACCTTTAATGCAGGAGAATTTTCTCTCCCCACTTTTGATTCACTGAACATTACTGCTGCATCTTTAAATGATGATGGATTGGATAACCATACTATACTGCTCTATTACTCAACTGCTGCTTCTAGTTTGGCTGTAACATTGATGCTAGCTATTTTTATTGTTTATATGGTCTCCAGAGACAACGTTTCATGCTCCATCTGTCTATAA'
			},
			{
				'strain':		'B/Wisconsin/01/2010',
				'isolate_id':	'EPI_ISL_76940',
				'date':			'2010-02-20',
				'region':		'NorthAmerica',
				'seq':			'ATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGCGTGATACCACTGACAACAACACCAACAAAATCTTATTTTGCAAATCTCAAAGGAACAAGGACCAGAGGGAAACTATGCCCGGACTGTCTCAACTGTACAGATCTGGATGTGGCCTTGGGCAGGCCAATGTGTGTGGGGACCACACCTTCTGCTAAAGCTTCAATACTCCACGAGGTCAGACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGGCAACTACCCAATCTTCTCAGAGGATATGAAAATATCAGGTTATCAACCCAAAACGTTATCGATGCAGAAAAAGCACCAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAAAATCGGATTTTTTGCAACAATGGCTTGGGCTGTCCCAAAGGACAACTACAAAAATGCAACGAACCCACTAACAGTAGAAGTACCATACATTTGTACAGAAGGGGAAGACCAAATTACTGTTTGGGGGTTCCATTCAGATAACAAAACCCAAATGAAGAGCCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACATTATGTTTCTCAGATTGGCGACTTCCCAGATCAAACAGAAGACGGAGGACTACCACAAAGCGGCAGAATTGTTGTTGATTACATGATGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCATTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTAAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTGAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCCTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCTCACGGAGCACATGGAGTGGCAGTGGCGGCAGACCTTAAGAGTACACAAGAAGCTATAAATAAGATAACAAAAAATCTCAATTCTTTGAGTGAGCTAGAAGTAAAGAACCTTCAAAGACTAAGTGGTGCCATGGATGAACTCCACAACGAAATACTCGAGCTGGATGAGAAAGTGGATGATCTCAGAGCTGACACTATAAGCTCACAAATAGAACTTGCAGTCTTGCTTTCCAACGAAGGAATAATAAACAGTGAAGACGAGCATCTATTGGCACTTGAGAGAAAACTAAAGAAAATGCTGGGTCCCTCTGCTGTAGACATAGGAAACGGATGCTTCGAAACCAAACACAAATGCAACCAGACCTGCTTAGACAGGATAGCTGCTGGCACCTTTAATGCAGGAGAATTTTCTCTCCCCACTTTTGATTCATTGAACATTACTGCTGCATCTTTAAATGATGATGGATTGGATAACCATACTATACTGCTCTATTACTCAACTGCTGCTTCTAGTTTGGCTGTAACATTAATGCTAGCTATTTTTATTGTTTATATGGTCTCCAGAGACAACGTTTCATGCTCCATCTGTCTATAA'
			},
			{
				'strain':		'B/Massachusetts/02/2012',
				'isolate_id':	'EPI_ISL_121434',
				'date':			'2012-03-13',
				'region':		'NorthAmerica',
				'seq':			'ATGAAGGCAATAATTGTACTACTAATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGTGTGATACCACTAACAACAACACCAACAAAATCTTATTTTGCAAATCTCAAAGGAACAAAGACCAGAGGGAAACTATGCCCAGACTGTCTCAACTGTACAGATCTGGATGTGGCCCTGGGCAGGCCAATGTGTGTGGGAACTACACCTTCTGCGAAAGCTTCAATACTTCACGAAGTCAGACCTGTTACATCCGGGTGCTTCCCTATAATGCACGACAGAACAAAAATCAGGCAACTAGCCAATCTTCTCAGAGGATATGAAAATATCAGGTTATCAACCCAAAACGTTATCGATGCAGAAAAGGCACCAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAAAAGCGGATTTTTCGCAACAATGGCTTGGGCTGTCCCAAAGGACAACAACAAAAATGCAACGAACCCATTAACAGTAGAAGTACCATACATTTGTGCAGAAGGGGAAGACCAAATTACTGTTTGGGGGTTCCATTCAGATAACAAAACCCAAATGAAGAACCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACATTATGTTTCTCAGATTGGCGGCTTCCCAGATCAAACAGAAGACGGAGGACTACCACAAAGCGGCAGAATTGTCGTTGATTACATGATGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCCTTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCCTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCTCACGGAGCACATGGAGTGGCAGTTGCTGCAGACCTTAAGAGCACACAAGAAGCTATAAACAAGATAACAAAAAATCTCAACTCTTTGAGTGAGCTAGAAGTAAAGAATCTTCAAAGGCTAAGTGGTGCCATGGATGAACTCCACAACGAAATACTCGAGCTGGATGAGAAAGTGGATGACCTCAGAGCTGACACTATAAGTTCACAAATAGAACTTGCAGTCTTGCTTTCCAACGAAGGAATAATAAACAGTGAAGACGAGCATCTATTGGCACTTGAGAGAAAACTAAAGAAAATGCTGGGTCCCTCTGCTGTAGACATAGGAAATGGATGCTTCGAAACCAAACACAAATGCAACCAGACCTGCTTAGACAGGATAGCTGCTGGCACCTTTAATGCAGGAGAGTTTTCTCTCCCCACTTTTGATTCATTGAACATTACTGCTGCATCTTTAAATGATGATGGATTGGATAACCATACTATACTGCTCTATTACTCAACTGCTGCTTCTAGTTTGGCTGTAACATTGATGCTAGCTATTTTTATTGTTTATATGGTCTCCAGAGACAACGTTTCATGCTCCATCTGTCTATAA'
			},
			{
				'strain':    	'B/PHUKET/3073/2013',
				'isolate_id':	'EPI_ISL_161843',
				'date':    		'2013-11-21',
				'region':   	'SoutheastAsia',
				'seq':			'ATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAACGCAGATCGAATCTGCACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGCGTGATACCACTGACAACAACACCAACAAAATCTTATTTTGCAAATCTCAAAGGAACAAGGACCAGAGGGAAACTATGCCCGGACTGTCTCAACTGTACAGATCTGGATGTGGCCTTGGGCAGGCCAATGTGTGTGGGGACCACACCTTCTGCTAAAGCTTCAATACTCCATGAGGTCAGACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGGCAACTACCCAATCTTCTCAGAGGATATGAAAAGATCAGGTTATCAACCCAAAACGTTATCGATGCAGAAAAAGCACCAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAAAATCGGATTTTTTGCAACAATGGCTTGGGCTGTCCCAAAGGACAACTACAAAAATGCAACGAACCCACTAACAGTGGAAGTACCATACATTTGTACAGAAGGGGAAGACCAAATTACTGTTTGGGGGTTCCATTCGGATAACAAAACCCAAATGAAGAGCCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACGCATTATGTTTCTCAGATTGGCGACTTCCCAGATCAAACAGAAGACGGAGGACTACCACAAAGCGGCAGAATTGTTGTTGATTACATGATGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGGGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCATTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAGAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAAAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTAAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTGAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCCTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCTCACGGAGCACATGGAGTGGCAGTGGCGGCAGACCTTAAGAGTACACAAGAAGCTATAAATAAGATAACAAAAAATCTCAATTCTTTGAGTGAACTAGAAGTAAAGAACCTTCAAAGACTAAGTGGTGCCATGGATGAACTCCACAACGAAATACTCGAGCTGGATGAAAAAGTGGATGATCTCAGAGCTGACACTATAAGCTCACAAATAGAACTTGCAGTCTTGCTTTCCAACGAAGGAATAATAAACAGTGAAGACGAGCATCTATTGGCACTTGAGAGAAAACTAAAGAAAATGCTGGGTCCCTCTGCTGTAGACATAGGAAACGGATGCTTCGAAACCAAACACAAATGCAACCAGACCTGCTTAGACAGGATAGCTGCTGGCACCTTTAATGCAGGAGAATTTTCTCTCCCCACTTTTGATTCATTGAACATTACTGCTGCATCTTTAAATGATGATGGATTGGATAACCATACTATACTGCTCTATTACTCAACTGCTGCTTCTAGTTTGGCTGTAACATTAATGCTAGCTATTTTTATTGTTTATATGGTCTCCAGAGACAACGTTTCATGCTCCATCTGTCTATAAAGAAGGTTAGGCCTTGTATTTTCCTTTATTGTAGTGCTTGTTTGCTTGTCATCATTACAAAGAAAC'
			}
		]
		self.outgroup = {
				'strain':'B/Singapore/11/94',
				'isolate_id':'EPI_ISL_20980',
				'date':'1994-05-10',
				'region':'China',
				'seq':'ATATCCACAAAATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGTGTGATACCACTGACAACAACACCAACAAAATCTCATTTTGCAAATCTCAAAGGAACAAAGACCAGAGGGAAACTATGCCCAAACTGTCTCAACTGCACAGATCTGGATGTGGCCTTGGGCAGACCAATGTGTGTGGGGACCACACCTTCGGCAAAAGCTTCAATACTCCACGAAGTCAGACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGACAGCTACCCAATCTTCTCAGAGGATATGAAAATATCAGATTATCAACCCAAAACGTTATCAATGCAGAAAAGGCACCAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAGAAGCGGATTTTTCGCAACAATGGCTTGGGCTGTCCCAAGGGACAACAACAAAACAGCAACGAATCCACTAACAGTAGAAGTACCATACGTTTGTACAGAAGGAGAAGACCAAATTACTGTTTGGGGGTTCCATTCTGATAACAAAACCCAAATGAAAAACCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACATTATGTTTCTCAGATTGGCGGCTTCCCAGATCAAACAGAAGACGGAGGGCTACCACAAAGCGGCAGAATTGTTGTTGATTACATGGTGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCCTTGCCTTTAATTGGTGAAGCAGATTGCCTTCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCTTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCTCACGGAGCACATGGAGTGGCAGTGGCAGCAGACCTTAAGAGTACGCAAGAAGCCATAAACAAGATAACAAAAAATCTCAATTCTTTGAGTGAGCTAGAAGTAAAGAATCTTCAAAGACTAAGTGGTGCCATGGATGAACTCCACAACGAAATACTCGAGCTGGATGAGAAAGTGGATGATCTCAGAGCTGACACTATAAGCTCGCAAATAGAACTTGCAGTCTTGCTTTCCAACGAAGGAATAATAAACAGTGAAGATGAGCATCTATTGGCACTTGAGAGAAAACTAAAGAAAATGCTGGGTCCCTCTGCTGTAGACATAGGGAATGGATGCTTCGAAACCAAACACAAGTGCAACCAGACCTGCTTAGACAGGATAGCTGCTGGCACCTTTAATGCAGGAGAATTTTCTCTCCCCACTTTTGATTCACTGAATATTACTGCTGCATCTTTAAATGATGATGGATTGGATAACCATACTATACTGCTCTACTACTCAACTGCTGCTTCTAGTTTGGCTGTAACATTGATGATAGCTATTTTTATTGTTTATATGGTCTCCAGAGACAATGTTTCATGCTCCATCTGTCTATAAGGAAAATTAAGCCCTGTATTTTCCTTTATTGTAGTGCTTGTTTGTTTGTTACCATTACAAAGAAACGTTATTGA'
			}
Ejemplo n.º 22
0
    def __init__(self, min_length=0, **kwargs):
        '''
		parameters
		min_length  -- minimal length for a sequence to be acceptable
		'''
        flu_filter.__init__(self, **kwargs)
        self.min_length = min_length
        self.vaccine_strains = [{
            'strain':
            'A/mallard/Maryland/13OS3318/2014',
            'isolate_id':
            'EPI-ISL-216765',
            'date':
            '2014-06-24',
            'lab':
            'Other Database Import',
            'region':
            'NorthAmerica',
            'seq':
            'ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGAATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAACGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGATCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTACTGTTACCCGGGTACCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATCGACAAGATAAGCACTGGGTTTACATATGGATCATCCATCAATTCAGCTGGAACCACTAAAGCATGCATGAGAAATGGAGGAAATAGTTTCTATGCGGAGCTAAAGTGGTTAGTGTCGAAGAGCAAAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTGTAATCTGGGGAATTCATCACCCTTCAAGCACACAAGAAAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAATGGCCAAAGTGGGCGGATTGATTTCCATTGGACGATGGTACAACCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGGAGCTTCAGTAGACAATAACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAATATGTGAACAAAAAGAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGGATGGTAGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAGAAGTACACAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAATGAAATAGAACATCAAATCGGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATTTATATGAGAGAGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATATCACAAATGCAATGACAACTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTGAAAAATGGAAACATGCGCTGCACAATCTGTATTTAG',
        }]
        self.new_strains = [{
            "strain":
            "A/Unknown/Unknown/Batch2-1_002_01102017_4_H10N6",
            "db":
            "Unknown",
            "accession":
            "Unknown",
            "date":
            "2017-01-10",
            "seq":
            "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG",
        }, {
            "strain":
            "A/Unknown/Unknown/Batch2-1_007_01102017_4_H10N4",
            "db":
            "Unknown",
            "accession":
            "Unknown",
            "date":
            "2017-01-10",
            "seq":
            "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCAGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACGCAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG",
        }, {
            "strain":
            "A/Unknown/Unknown/Batch2-1_008_01102017_4_H10N7",
            "db":
            "Unknown",
            "accession":
            "Unknown",
            "date":
            "2017-01-10",
            "seq":
            "ATGTACAAAATAGCACTAGTATTCGCGCTCCTTGGAGCGGTGGATGGTCTTGATAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCATAAACGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAATCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGATCTACACCTTACCGGAACATGGGACACCTTAATAGAGAGAGACAATTCTATTGCCTACTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATCGACAAGATAAGCACTGGGTTTACATATGGATCATCCATCAATTCAGCTGGAACCACTAAAGCATGCATGAGAAATGGAGGAAATAGTTTCTACGCGGAGCTAAAGTGGCTAGTGTCGAAGAGCAAAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACACAAGAAAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGAAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAATGGCCAAAGTGGACGGATTGATTTCCATTGGACGATGGTACAACCAGGTGATAACATTACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTGGGCATTCAATCAGGAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAATATGTGAACAAAAAGAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGGATGGTAGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTAAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCGGTAACGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAGATGCTGAATTTATATGAGAGAGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGAAAAGGGTGCTTTGAAATATATCACAAATGCGATGACAACTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTCTTCTTCTGCTTGAAAAATGGAAACATGCGCTGCACAATCTGTATTTAG",
        }, {
            "strain":
            "A/Unknown/Unknown/Batch2-1_010_01102017_4_H10N5",
            "db":
            "Unknown",
            "accession":
            "Unknown",
            "date":
            "2017-01-10",
            "seq":
            "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGRGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG",
        }, {
            "strain":
            "A/Unknown/Unknown/Batch2-1_013_01102017_4_H10N5",
            "db":
            "Unknown",
            "accession":
            "Unknown",
            "date":
            "2017-01-10",
            "seq":
            "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCTGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGTTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG",
        }, {
            "strain":
            "A/Unknown/Unknown/Batch2-1_021_01102017_4_H10N4",
            "db":
            "Unknown",
            "accession":
            "Unknown",
            "date":
            "2017-01-10",
            "seq":
            "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCAGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG",
        }, {
            "strain":
            "A/Unknown/Unknown/Batch2-1_027_01102017_4_H10N5",
            "db":
            "Unknown",
            "accession":
            "Unknown",
            "date":
            "2017-01-10",
            "seq":
            "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATTTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG",
        }, {
            "strain":
            "A/Unknown/Unknown/Batch2-1_028_01102017_4_H10N5",
            "db":
            "Unknown",
            "accession":
            "Unknown",
            "date":
            "2017-01-10",
            "seq":
            "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGACAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG",
        }, {
            "strain":
            "A/Unknown/Unknown/Batch2-1_030_01102017_4_H10N5",
            "db":
            "Unknown",
            "accession":
            "Unknown",
            "date":
            "2017-01-10",
            "seq":
            "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG",
        }, {
            "strain":
            "A/Unknown/Unknown/Batch2-1_031_01102017_4_H10N5",
            "db":
            "Unknown",
            "accession":
            "Unknown",
            "date":
            "2017-01-10",
            "seq":
            "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCATCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG",
        }, {
            "strain":
            "A/Unknown/Unknown/Batch2-1_033_01102017_4_H10N5",
            "db":
            "Unknown",
            "accession":
            "Unknown",
            "date":
            "2017-01-10",
            "seq":
            "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTACTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG",
        }, {
            "strain":
            "A/Unknown/Unknown/Batch2-1_034_01102017_4_H10N7",
            "db":
            "Unknown",
            "accession":
            "Unknown",
            "date":
            "2017-01-10",
            "seq":
            "ATGTACAAAATAGCACTAGTATTCGCGCTCCTTGGAGCGGTGGATGGTCTTGATAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCATAAACGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAATCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGATCTACACCTTACCGGAACATGGGACACCTTAATAGAGAGAGACAATTCTATTGCCTACTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATCGACAAGATAAGCACTGGGTTTACATATGGATCATCCATCAATTCAGCTGGAACCACTAAAGCATGCATGAGAAATGGAGGAAATAGTTTCTACGCGGAGCTAAAGTGGCTAGTGTCGAAGAGCAAAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACACAAGAAAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGAAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAATGGCCAAAGTGGACGGATTGATTTCCATTGGACGATGGTACAACCAGGTGATAACATTACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTGGGCATTCAATCAGGAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAATATGTGAACAAAAAGAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGGATGGTAGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTAAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCGGTAACGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAGATGCTGAATTTATATGAGAGAGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGAAAAGGGTGCTTTGAAATATATCACAAATGCGATGACAACTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTCTTCTTCTGCTTGAAAAATGGAAACATGCGCTGCACAATCTGTATTTAG",
        }, {
            "strain":
            "A/Unknown/Unknown/Batch2-1_036_01102017_4_H10N5",
            "db":
            "Unknown",
            "accession":
            "Unknown",
            "date":
            "2017-01-10",
            "seq":
            "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG",
        }, {
            "strain":
            "A/Unknown/Unknown/Batch2-1_037_01102017_4_H10N5",
            "db":
            "Unknown",
            "accession":
            "Unknown",
            "date":
            "2017-01-10",
            "seq":
            "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAACCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTGATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCTGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG",
        }, {
            "strain":
            "A/Unknown/Unknown/Batch2-1_039_01102017_4_H10N5",
            "db":
            "Unknown",
            "accession":
            "Unknown",
            "date":
            "2017-01-10",
            "seq":
            "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG",
        }, {
            "strain":
            "A/Unknown/Unknown/Batch2-1_042_01102017_4_H10N5",
            "db":
            "Unknown",
            "accession":
            "Unknown",
            "date":
            "2017-01-10",
            "seq":
            "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG",
        }, {
            "strain":
            "A/Unknown/Unknown/Batch2-1_043_01102017_4_H10N4",
            "db":
            "Unknown",
            "accession":
            "Unknown",
            "date":
            "2017-01-10",
            "seq":
            "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAGATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCGGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG",
        }, {
            "strain":
            "A/Unknown/Unknown/Batch2-1_044_01102017_4_H10N5",
            "db":
            "Unknown",
            "accession":
            "Unknown",
            "date":
            "2017-01-10",
            "seq":
            "ATGCACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG",
        }, {
            "strain":
            "A/Unknown/Unknown/Batch2-1_048_01102017_4_H10N5",
            "db":
            "Unknown",
            "accession":
            "Unknown",
            "date":
            "2017-01-10",
            "seq":
            "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGACTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG",
        }]
        tmp_outgroup = SeqIO.read(
            '/Users/yujiazhou/Documents/nextflu/H9_nextflu-master/augur/source-data/H10_outgroup.gb',
            'genbank')
        genome_annotation = tmp_outgroup.features
        self.cds = {
            x.qualifiers['gene'][0]: x
            for x in genome_annotation
            if 'gene' in x.qualifiers and x.type == 'CDS'
            and x.qualifiers['gene'][0] in ['SigPep', 'HA1', 'HA2']
        }
        self.outgroup = {
            'strain': 'A/mallard-duck/ALB/302/1977',
            'region': 'NorthAmerica',
            'isolate_id': 'EPI-ISL-8890',
            'date': '1977-08-13',  #(Month and day unknown)
            'seq': str(tmp_outgroup.seq).upper()
        }
Ejemplo n.º 23
0
    def __init__(self, min_length=0, **kwargs):
        '''
		parameters
		min_length  -- minimal length for a sequence to be acceptable
		'''
        flu_filter.__init__(self, **kwargs)
        self.min_length = min_length
        self.vaccine_strains = [
            {
                'strain':
                'A/turkey/Italy/3889/99',
                'isolate_id':
                'EPI-ISL-2746',
                'date':
                '1990-07-01',  #(Month and day unknown)
                'region':
                'Europe',
                'seq':
                'ATGAACACTCAAATCCTGGTATTCGCTCTGGTGGCGATCATTCCGACAAATGCAGACAAAATCTGCCTTGGGCATCATGCCGTGTCAAACGGGACTAAAGTAAACACATTAACTGAAAGAGGAGTGGAAGTCGTTAATGCAACTGAAACGGTGGAACGAACAAACGTCCCCAGGATCTGCTCAAAAGGGAAAAGGACAGTTGACCTCGGTCAATGTGGACTTCTGGGAACAATCACTGGGCCACCCCAATGTGACCAATTCCTAGAATTTTCAGCCGATCTAATTATTGAGAGGCGAGAAGGAAGTGGTGTCTGTTATCCTGGGAAATTCGTGAATGAAGAAGCTCTGAGGCAAATTCTCAGGGAGTCAGGCGGAATTGACAAGGAGACAATGGGATTCACATACAGCGGAATAAGAACTAATGGAACAACCAGTGCATGTAGGAGATTAGGATCTTCATTCTATGCAGAGATGAAATGGCTCCTGTCAAACACAGACAATGCTGCTTTCCCGCAGATGACTAAGTCATACAAAAACACAAGGAAAGACCCAGCTCTGATAATATGGGGGATCCACCATTCCGGATCAACTACAGAACAGACCAAGCTATATGGGAGTGGAAACAAACTGATAACAGTTGGGAGTTCTAATTACCAACAGTCCTTTGTACCGAGTCCAGGAGCGAGACCACAAGTGAATGGCCAATCTGGAAGAATTGACTTTCATTGGCTGATGCTAAACCCCAATGACACAGTCACTTTCAGTTTCAATGGGGCCTTCATAGCTCCAGACCGTGCAAGTTTTCTGAGAGGGAAGTCTATGGGGATTCAGAGTGGAGTACAGGTTGATGCCAATTGTGAAGGAGATTGCTATCACAGTGGAGGGACAATAATAAGTAATTTGCCCTTTCAGAACATAAATAGCAGGGCAGTAGGGAAATGTCCGAGATATGTTAAGCAAGAGAGTCTGCTGCTGGCAACAGGGATGAAGAATGTTCCCGAAGTTCCAAAGGGA------------------------------AGAGGCCTATTTGGTGCTATAGCGGGTTTCATTGAAAATGGATGGGAAGGTCTGATTGATGGGTGGTATGGCTTCAGGCATCAAAATGCACAAGGGGAGGGAACTGCTGCAGATTACAAAAGCACCCAATCAGCAATTGATCAAGTAACAGGAAAATTGAACCGGCTTATAGAAAAAACTAACCAACAATTTGAGTTAATAGACAATGAATTCACTGAGGTTGAAAAGCAAATTGGCAATGTGATAAATTGGACCAGAGATTCCATGACAGAAGTGTGGTCCTATAACGCTGAACTCTTGGTAGCAATGGAGAACCAGCACACAATTGATCTGGCCGACTCAGAAATGAACAAACTATACGAACGAGTGAAGAGACAACTGAGAGAGAATGCTGAAGAAGATGGCACTGGTTGCTTCGAAATATTTCACAAGTGTGATGACGACTGTATGGCCAGTATTAGAAACAACACCTATGATCACAGCAAGTACAGGGAAGAGGCAATGCAAAATAGAATACAGATTGACCCAGTCAAACTAAGCAGCGGCTACAAAGATGTGATACTTTGGTTTAGCTTCGGGGCATCATGTTTCATACTTCTGGCCATTGCAATGGGCCTTGTCTTCATATGTGTGAAGAATGGAAACATGCGGTGCACTATTTGTATATAA',
            },
            {
                'strain':
                'A/mallard/Netherlands/12/00',
                'isolate_id':
                'EPI-ISL-3548',
                'date':
                '2000-07-01',  # (Month and day unknown)
                'region':
                'Europe',
                'seq':
                'ATGAACACTCAAATCCTGGTATTCGCTCTGATGGCGATCATTCCGACAAATGCAGACAAGATCTGCCTTGGGCATCATGCCGTGTCAAACGGGACTAAAGTAAACACATTAACTGAAAGAGGAGTGGAAGTCGTTAATGCAACTGAAACGGTGGAACGAACAAACGTCCCCAGGATCTGCTCAAAAGGGAAAAGGACAGTTGACCTTGGTCAATGTGGACTTCTGGGAACAATCACTGGGCCGCCCCAATGTGACCAATTCCTAGAATTTTCAGCCGATTTAATTATTGAGAGGCGAGAAGGAAGTGATGTCTGTTATCCTGGGAAATTCGTGAATGAAGAAGCTCTGAGGCAAATTCTCAGGGAGTCAGGCGGAATTGACAAGGAGACAATGGGATTCACCTACAGCGGAATAAGAACTAATGGAGCAACCAGTGCATGTAGGAGATCAGGATCTTCATTCTATGCAGAGATGAAATGGCTCCTGTCAAACACAGACAATGCTGCTTTCCCGCAGATGACTAAGTCATACAAAAACACAAGGAAAGACCCAGCTCTGATAATATGGGGGATCCACCATTCCGGATCAACTACAGAACAGACCAAGCTATATGGGAGTGGAAACAAACTGATAACAGTTGGGAGTTCTAATTACCAACAGTCCTTTGTACCGAGTCCAGGAGCGAGACCACAAGTAAATGGCCAATCTGGAAGAATTGACTTTCATTGGCTGATACTAAACCCCAATGACACAGTTACTTTCAGTTTCAATGGGGCCTTCATAGCTCCAGACCGTGCAAGCTTTCTGAGAGGGAAGTCTATGGGAATTCAGAGTGGAGTACAGGTTGATGCCAATTGTGAAGGAGATTGCTATCATAGTGGAGGGACAATAATAAGTAATTTGCCCTTTCAGAACATAAATAGCAGGGCAGTAGGAAAATGTCCGAGATATGTTAAGCAAGAGAGTCTGCTGCTGGCAACAGGAATGAAGAATGTTCCCGAAATCCCAAAGGGA------------------------------AGAGGCCTATTTGGTGCTATAGCGGGTTTCATTGAAAATGGATGGGAAGGTCTGATTGATGGGTGGTATGGCTTCAGGCATCAAAATGCACAAGGGGAGGGAACTGCTGCAGATTACAAAAGCACCCAATCAGCAATTGATCAAATAACAGGAAAATTAAACCGGCTTATAGAAAAAACTAACCAACAATTTGAGTTAATAGACAATGAATTCACTGAGGTTGAAAAGCAAATTGGCAATGTGATAAATTGGACCAGAGATTCCATGACAGAAGTGTGGTCCTATAACGCTGAACTCTTGGTAGCAATGGAGAATCAGCACACAATTGATCTGGCCGACTCAGAAATGAACAAACTGTACGAACGAGTGAAGAGACAACTGAGAGAGAATGCTGAAGAAGATGGCACTGGTTGCTTCGAAATATTTCACAAGTGTGATGACGACTGTATGGCCAGTATTAGAAACAACACCTATGATCACAGCAAGTACAGGGAAGAGGCAATGCAAAATAGAATACAGATTGACCCAGTCAAACTAAGCAGCGGCTACAAAGATGTGATACTTTGGTTTAGCTTCGGGGCATCATGTTTCATACTTCTGGCCATTGCAATGGGCCTTGTCTTCATATGTGTGAAGAATGGAAACATGCGGTGCACTATTTGTATATAA',
            },
            {
                'strain':
                'A/human/New-York/107/2003',
                'isolate_id':
                'EPI-ISL-16424',
                'date':
                '2003-07-01',
                'region':
                'NorthAmerica',
                'seq':
                'ATGAACACTCAAATTCTGGCATTCATTGCTTGTGTGCTGACTGGAGTTAAAGGAGACAAAATATGTCTTGGGCACCATGCTGTGGCAAATGGAACAAAAGTGAACACATTAACAGAGAGGGGGATTGAAGTAGTGAATGCCACAGAGACAGTGGAAACTACGAATATCAAGAAAATATGTACCCAGGGGAAAAGGCCAACAGATCTGGGACAATGTGGACTTCTAGGAACCCTAATAGGACCTCCCCAATGTGATCAATTCCTGGAGTTTTCCTCTGATTTGATAATTGAGCGAAGAGAAGGAACCGATATATGCTATCCCGGTAGATTCACAAACGAAGAATCACTGAGGCAGATCCTTCGAAGATCAGGAGGAATTGGTAAGGAGTCAATGGGCTTCACCTATAGTGGAATAAGAACCAATGGAGCGACAAGTGCCTGCACAAGATCAGGTTCTTCTTTCTATGCAGAGATGAAGTGGTTGCTGTCGAATTCAGATAATGCAGCATTCCCACAGATGACAAAGGCGTATAGAAATCCCAGAAACAAACCAGCTCTGATAATTTGGGGAGTTCATCACTCTGAATCGGTTAGCGAGCAGACCAAACTCTATGGAAGTGGAAACAAGTTGATAACAGTAAGAAGCTCAAAATACCAGCAATCATTCACCCCAAATCCGGGAGCACGA------------------------AGAATCGATTTCCACTGGCTACTCCTTGATCCCAATGACACAGTGACCTTCACTTTCAATGGAGCATTCATAGCCCCTGACAGGACAAGTTTCTTTAGGGGAGAATCACTAGGAGTCCAGAGTGATGCTCCTTTGGATTCAAGTTGTAGAGGAGATTGCTTTCACAGTGGGGGTACGATAGTCAGTTCCCTGCCATTCCAAAACATCAACTCTAGAACTGTGGGGAAATGCCCTCGGTATGTCAAACAGAAAAGCCTCCTTCTGGCTACAGGAATGAGAAATGTTCCAGAGAAACCAAAGCCC------------------------------AGAGGCCTTTTTGGAGCAATTGCTGGATTCATAGAGAATGGATGGGAGGGTCTCATCAATGGATGGTATGGTTTCAGACATCAAAATGCACAAGGAGAGGGAACTGCAGCTGACTACAAAAGCACCCAGTCTGCAATAGATCAGATCACAGGCAAATTGAATCGTTTAATTGGCAAAACAAATCAGCAGTTTGAGCTGATAGACAATGAGTTCAATGAGATAGAACAACAAATAGGAAATGTCATTAATTGGACAAGAGACGCAATGACTGAGATATGGTCGTATAATGCTGAGCTGTTGGTGGCAATGGAAAATCAGCATACAATAGATCTTGCGGACTCAGAAATGAGCAAACTTTATGAGCGTGTCAAAAAACAACTAAGGGAGAATGCTGAAGAAGATGGAACTGGATGTTTTGAGATATTTCATAAATGTGACGATCAGTGTATGGAGAGCATAAGGAACAACACGTATGACCATACTCAATACAGAACAGAGTCATTGCAGAATAGAATACAGATAGACCCAGTGAAGTTGAGTAGTGGGTACAAAGACATAATCTTATGGTTTAGCTTCGGGGCATCATGTTTTCTTCTTCTAGCCATTGCAATGGGACTGGTTTTCATTTGCATAAAGAATGGAAACATGCAGTGCACTATTTGTATATAG',
            },
            {
                'strain':
                'A/human/Shanghai/2/2013',
                'isolate_id':
                'EPI-ISL-138738',
                'date':
                '2013-03-05',
                'region':
                'China',
                'seq':
                'ATGAACACTCAAATCCTGGTATTCGCTCTGATTGCGATCATTCCAACAAATGCAGACAAAATCTGCCTCGGACATCATGCCGTGTCAAACGGAACCAAAGTAAACACATTAACTGAAAGAGGAGTGGAAGTCGTCAATGCAACTGAAACAGTGGAACGAACAAACATCCCCAGGATCTGCTCAAAAGGGAAAAGGACAGTTGACCTCGGTCAATGTGGACTCCTGGGGACAATCACTGGACCACCTCAATGTGACCAATTCCTAGAATTTTCAGCCGATTTAATTATTGAGAGGCGAGAAGGAAGTGATGTCTGTTATCCTGGGAAATTCGTGAATGAAGAAGCTCTGAGGCAAATTCTCAGAGAATCAGGCGGAATTGACAAGGAAGCAATGGGATTCACATACAGTGGAATAAGAACTAATGGAGCAACCAGTGCATGTAGGAGATCAGGATCTTCATTCTATGCAGAAATGAAATGGCTCCTGTCAAACACAGATAATGCTGCATTCCCGCAGATGACTAAGTCATATAAAAATACAAGAAAAAGCCCAGCTCTAATAGTATGGGGGATCCATCATTCCGTATCAACTGCAGAGCAAACCAAGCTATATGGGAGTGGAAACAAACTGGTGACAGTTGGGAGTTCTAATTATCAACAATCTTTTGTACCGAGTCCAGGAGCGAGACCACAAGTTAATGGTCTATCTGGAAGAATTGACTTTCATTGGCTAATGCTAAATCCCAATGATACAGTCACTTTCAGTTTCAATGGGGCTTTCATAGCTCCAGACCGTGCAAGCTTCCTGAGAGGAAAATCTATGGGAATCCAGAGTGGAGTACAGGTTGATGCCAATTGTGAAGGGGACTGCTATCATAGTGGAGGGACAATAATAAGTAACTTGCCATTTCAGAACATAGATAGCAGGGCAGTTGGAAAATGTCCGAGATATGTTAAGCAAAGGAGTCTGCTGCTAGCAACAGGGATGAAGAATGTTCCTGAGATTCCAAAAGGA------------------------------AGAGGCCTATTTGGTGCTATAGCGGGTTTCATTGAAAATGGATGGGAAGGCCTAATTGATGGTTGGTATGGTTTCAGACACCAGAATGCACAGGGAGAGGGAACTGCTGCAGATTACAAAAGCACTCAATCGGCAATTGATCAAATAACAGGAAAATTAAACCGGCTTATAGAAAAAACCAACCAACAATTTGAGTTGATAGACAATGAATTCAATGAGGTAGAGAAGCAAATCGGTAATGTGATAAATTGGACCAGAGATTCTATAACAGAAGTGTGGTCATACAATGCTGAACTCTTGGTAGCAATGGAGAACCAGCATACAATTGATCTGGCTGATTCAGAAATGGACAAACTGTACGAACGAGTGAAAAGACAGCTGAGAGAGAATGCTGAAGAAGATGGCACTGGTTGCTTTGAAATATTTCACAAGTGTGATGATGACTGTATGGCCAGTATTAGAAATAACACCTATGATCACAGCAAATACAGGGAAGAGGCAATGCAAAATAGAATACAGATTGACCCAGTCAAACTAAGCAGCGGCTACAAAGATGTGATACTTTGGTTTAGCTTCGGGGCATCATGTTTCATACTTCTAGCCATTGTAATGGGCCTTGTCTTCATATGTGTAAAGAATGGAAACATGCGGTGCACTATTTGTATATAA',
            },
            {
                'strain':
                'A/human/Anhui/1/2013',
                'isolate_id':
                'EPI-ISL-138739',
                'date':
                '2013-03-20',
                'region':
                'China',
                'seq':
                'ATGAACACTCAAATCCTGGTATTCGCTCTGATTGCGATCATTCCAACAAATGCAGACAAAATCTGCCTCGGACATCATGCCGTGTCAAACGGAACCAAAGTAAACACATTAACTGAAAGAGGAGTGGAAGTCGTCAATGCAACTGAAACAGTGGAACGAACAAACATCCCCAGGATCTGCTCAAAAGGGAAAAGGACAGTTGACCTCGGTCAATGTGGACTCCTGGGGACAATCACTGGACCACCTCAATGTGACCAATTCCTAGAATTTTCAGCCGATTTAATTATTGAGAGGCGAGAAGGAAGTGATGTCTGTTATCCTGGGAAATTCGTGAATGAAGAAGCTCTGAGGCAAATTCTCAGAGAATCAGGCGGAATTGACAAGGAAGCAATGGGATTCACATACAGTGGAATAAGAACTAATGGAGCAACCAGTGCATGTAGGAGATCAGGATCTTCATTCTATGCAGAAATGAAATGGCTCCTGTCAAACACAGATAATGCTGCATTCCCGCAGATGACTAAGTCATATAAAAATACAAGAAAAAGCCCAGCTCTAATAGTATGGGGGATCCATCATTCCGTATCAACTGCAGAGCAAACCAAGCTATATGGGAGTGGAAACAAACTGGTGACAGTTGGGAGTTCTAATTATCAACAATCTTTTGTACCGAGTCCAGGAGCGAGACCACAAGTTAATGGTCTATCTGGAAGAATTGACTTTCATTGGCTAATGCTAAATCCCAATGATACAGTCACTTTCAGTTTCAATGGGGCTTTCATAGCTCCAGACCGTGCAAGCTTCCTGAGAGGAAAATCTATGGGAATCCAGAGTGGAGTACAGGTTGATGCCAATTGTGAAGGGGACTGCTATCATAGTGGAGGGACAATAATAAGTAACTTGCCATTTCAGAACATAGATAGCAGGGCAGTTGGAAAATGTCCGAGATATGTTAAGCAAAGGAGTCTGCTGCTAGCAACAGGGATGAAGAATGTTCCTGAGATTCCAAAGGGA------------------------------AGAGGCCTATTTGGTGCTATAGCGGGTTTCATTGAAAATGGATGGGAAGGCCTAATTGATGGTTGGTATGGTTTCAGACACCAGAATGCACAGGGAGAGGGAACTGCTGCAGATTACAAAAGCACTCAATCGGCAATTGATCAAATAACAGGAAAATTAAACCGGCTTATAGAAAAAACCAACCAACAATTTGAGTTGATAGACAATGAATTCAATGAGGTAGAGAAGCAAATCGGTAATGTGATAAATTGGACCAGAGATTCTATAACAGAAGTGTGGTCATACAATGCTGAACTCTTGGTAGCAATGGAGAACCAGCATACAATTGATCTGGCTGATTCAGAAATGGACAAACTGTACGAACGAGTGAAAAGACAGCTGAGAGAGAATGCTGAAGAAGATGGCACTGGTTGCTTTGAAATATTTCACAAGTGTGATGATGACTGTATGGCCAGTATTAGAAATAACACCTATGATCACAGCAAATACAGGGAAGAGGCAATGCAAAATAGAATACAGATTGACCCAGTCAAACTAAGCAGCGGCTACAAAGATGTGATACTTTGGTTTAGCTTCGGGGCATCATGTTTCATACTTCTAGCCATTGTAATGGGCCTTGTCTTCATATGTGTAAAGAATGGAAACATGCGGTGCACTATTTGTATATAA',
            }
        ]
        tmp_outgroup = SeqIO.read(
            '/Users/yujiazhou/Documents/nextflu/H9_nextflu-master/augur/source-data/H7_outgroup.gb',
            'genbank')
        genome_annotation = tmp_outgroup.features
        self.cds = {
            x.qualifiers['gene'][0]: x
            for x in genome_annotation
            if 'gene' in x.qualifiers and x.type == 'CDS'
            and x.qualifiers['gene'][0] in ['SigPep', 'HA1', 'HA2']
        }
        self.outgroup = {
            'strain': 'A/equine/Prague/2/1956',
            'db': 'Other-Database-Import',
            'isolate_id': 'EPI-ISL-89157',
            'date': '1956-06-11',
            'country': 'CzechRepublic',
            'region': 'Europe',
            'seq': str(tmp_outgroup.seq).upper()
        }
Ejemplo n.º 24
0
	def __init__(self,min_length = 0, **kwargs):
		'''
		parameters
		min_length  -- minimal length for a sequence to be acceptable
		'''
		flu_filter.__init__(self, **kwargs)
		self.min_length = min_length
		self.vaccine_strains =[
			{
				'strain':'A/mallard/Maryland/13OS3318/2014',
				'isolate_id':'EPI-ISL-216765',
				'date': '2014-06-24',
				'lab':'Other Database Import',
				'region':'NorthAmerica',
				'seq':'ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGAATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAACGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGATCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTACTGTTACCCGGGTACCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATCGACAAGATAAGCACTGGGTTTACATATGGATCATCCATCAATTCAGCTGGAACCACTAAAGCATGCATGAGAAATGGAGGAAATAGTTTCTATGCGGAGCTAAAGTGGTTAGTGTCGAAGAGCAAAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTGTAATCTGGGGAATTCATCACCCTTCAAGCACACAAGAAAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAATGGCCAAAGTGGGCGGATTGATTTCCATTGGACGATGGTACAACCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGGAGCTTCAGTAGACAATAACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAATATGTGAACAAAAAGAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGGATGGTAGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAGAAGTACACAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAATGAAATAGAACATCAAATCGGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATTTATATGAGAGAGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATATCACAAATGCAATGACAACTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTGAAAAATGGAAACATGCGCTGCACAATCTGTATTTAG',
			}
		]
		self.new_strains =[
				{
					"strain": "A/Unknown/Unknown/Batch2-1_002_01102017_4_H10N6",
					"db": "Unknown",
					"accession": "Unknown",
					"date": "2017-01-10",
					"seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG",
				},	{
					"strain": "A/Unknown/Unknown/Batch2-1_007_01102017_4_H10N4",
					"db": "Unknown",
					"accession": "Unknown",
					"date": "2017-01-10",
					"seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCAGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACGCAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG",
				},	{
					"strain": "A/Unknown/Unknown/Batch2-1_008_01102017_4_H10N7",
					"db": "Unknown",
					"accession": "Unknown",
					"date": "2017-01-10",
					"seq": "ATGTACAAAATAGCACTAGTATTCGCGCTCCTTGGAGCGGTGGATGGTCTTGATAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCATAAACGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAATCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGATCTACACCTTACCGGAACATGGGACACCTTAATAGAGAGAGACAATTCTATTGCCTACTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATCGACAAGATAAGCACTGGGTTTACATATGGATCATCCATCAATTCAGCTGGAACCACTAAAGCATGCATGAGAAATGGAGGAAATAGTTTCTACGCGGAGCTAAAGTGGCTAGTGTCGAAGAGCAAAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACACAAGAAAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGAAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAATGGCCAAAGTGGACGGATTGATTTCCATTGGACGATGGTACAACCAGGTGATAACATTACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTGGGCATTCAATCAGGAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAATATGTGAACAAAAAGAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGGATGGTAGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTAAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCGGTAACGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAGATGCTGAATTTATATGAGAGAGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGAAAAGGGTGCTTTGAAATATATCACAAATGCGATGACAACTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTCTTCTTCTGCTTGAAAAATGGAAACATGCGCTGCACAATCTGTATTTAG",
				},  {
					"strain": "A/Unknown/Unknown/Batch2-1_010_01102017_4_H10N5",
					"db": "Unknown",
					"accession": "Unknown",
					"date": "2017-01-10",
					"seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGRGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG",
				},  {
					"strain": "A/Unknown/Unknown/Batch2-1_013_01102017_4_H10N5",
					"db": "Unknown",
					"accession": "Unknown",
					"date": "2017-01-10",
					"seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCTGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGTTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG",
				},  {
					"strain": "A/Unknown/Unknown/Batch2-1_021_01102017_4_H10N4",
					"db": "Unknown",
					"accession": "Unknown",
					"date": "2017-01-10",
					"seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCAGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG",
				},  {
					"strain": "A/Unknown/Unknown/Batch2-1_027_01102017_4_H10N5",
					"db": "Unknown",
					"accession": "Unknown",
					"date": "2017-01-10",
					"seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATTTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG",
				},  {
					"strain": "A/Unknown/Unknown/Batch2-1_028_01102017_4_H10N5",
					"db": "Unknown",
					"accession": "Unknown",
					"date": "2017-01-10",
					"seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGACAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG",
				},  {
					"strain": "A/Unknown/Unknown/Batch2-1_030_01102017_4_H10N5",
					"db": "Unknown",
					"accession": "Unknown",
					"date": "2017-01-10",
					"seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG",
				},  {
					"strain": "A/Unknown/Unknown/Batch2-1_031_01102017_4_H10N5",
					"db": "Unknown",
					"accession": "Unknown",
					"date": "2017-01-10",
					"seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCATCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG",
				},  {
					"strain": "A/Unknown/Unknown/Batch2-1_033_01102017_4_H10N5",
					"db": "Unknown",
					"accession": "Unknown",
					"date": "2017-01-10",
					"seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTACTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG",
				},  {
					"strain": "A/Unknown/Unknown/Batch2-1_034_01102017_4_H10N7",
					"db": "Unknown",
					"accession": "Unknown",
					"date": "2017-01-10",
					"seq": "ATGTACAAAATAGCACTAGTATTCGCGCTCCTTGGAGCGGTGGATGGTCTTGATAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCATAAACGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAATCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGATCTACACCTTACCGGAACATGGGACACCTTAATAGAGAGAGACAATTCTATTGCCTACTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATCGACAAGATAAGCACTGGGTTTACATATGGATCATCCATCAATTCAGCTGGAACCACTAAAGCATGCATGAGAAATGGAGGAAATAGTTTCTACGCGGAGCTAAAGTGGCTAGTGTCGAAGAGCAAAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACACAAGAAAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGAAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAATGGCCAAAGTGGACGGATTGATTTCCATTGGACGATGGTACAACCAGGTGATAACATTACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTGGGCATTCAATCAGGAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAATATGTGAACAAAAAGAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGGATGGTAGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTAAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCGGTAACGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAGATGCTGAATTTATATGAGAGAGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGAAAAGGGTGCTTTGAAATATATCACAAATGCGATGACAACTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTCTTCTTCTGCTTGAAAAATGGAAACATGCGCTGCACAATCTGTATTTAG",
				},  {
					"strain": "A/Unknown/Unknown/Batch2-1_036_01102017_4_H10N5",
					"db": "Unknown",
					"accession": "Unknown",
					"date": "2017-01-10",
					"seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG",
				},  {
					"strain": "A/Unknown/Unknown/Batch2-1_037_01102017_4_H10N5",
					"db": "Unknown",
					"accession": "Unknown",
					"date": "2017-01-10",
					"seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAACCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTGATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCTGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG",
				},  {
					"strain": "A/Unknown/Unknown/Batch2-1_039_01102017_4_H10N5",
					"db": "Unknown",
					"accession": "Unknown",
					"date": "2017-01-10",
					"seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG",
				},  {
					"strain": "A/Unknown/Unknown/Batch2-1_042_01102017_4_H10N5",
					"db": "Unknown",
					"accession": "Unknown",
					"date": "2017-01-10",
					"seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG",
				},  {
					"strain": "A/Unknown/Unknown/Batch2-1_043_01102017_4_H10N4",
					"db": "Unknown",
					"accession": "Unknown",
					"date": "2017-01-10",
					"seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAGATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCGGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG",
				},  {
					"strain": "A/Unknown/Unknown/Batch2-1_044_01102017_4_H10N5",
					"db": "Unknown",
					"accession": "Unknown",
					"date": "2017-01-10",
					"seq": "ATGCACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG",
				},  {
					"strain": "A/Unknown/Unknown/Batch2-1_048_01102017_4_H10N5",
					"db": "Unknown",
					"accession": "Unknown",
					"date": "2017-01-10",
					"seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGACTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG",
				}
			]
		tmp_outgroup = SeqIO.read('/Users/yujiazhou/Documents/nextflu/H9_nextflu-master/augur/source-data/H10_outgroup.gb', 'genbank')
		genome_annotation = tmp_outgroup.features
		self.cds = {x.qualifiers['gene'][0]:x for x in genome_annotation
				if 'gene' in x.qualifiers and x.type=='CDS' and
				x.qualifiers['gene'][0] in ['SigPep', 'HA1', 'HA2']}
		self.outgroup = {
						'strain':'A/mallard-duck/ALB/302/1977',
						'region':'NorthAmerica',
						'isolate_id':'EPI-ISL-8890',
						'date':'1977-08-13', #(Month and day unknown)
						'seq': str(tmp_outgroup.seq).upper()
						}
Ejemplo n.º 25
0
    def __init__(self,
                 aln_fname,
                 outgroup,
                 include_ref_strains=True,
                 outdir='./',
                 formats=['pdf', 'png'],
                 verbose=0,
                 **kwargs):
        process.__init__(self, **kwargs)
        flu_filter.__init__(self, alignment_file=aln_fname, **kwargs)
        tree_refine.__init__(self, **kwargs)
        virus_clean.__init__(self, **kwargs)
        self.midpoint_rooting = False
        self.include_ref_strains = include_ref_strains
        self.verbose = verbose
        self.formats = formats
        self.outdir = outdir.rstrip('/') + '/'
        self.auspice_tree_fname = self.outdir + 'tree.json'
        self.auspice_align_fname = self.outdir + 'aln.fasta'
        self.auspice_aa_align_fname = self.outdir + 'aa_aln.fasta'
        self.auspice_sequences_fname = self.outdir + 'sequences.json'
        self.auspice_frequencies_fname = None
        self.auspice_meta_fname = self.outdir + 'meta.json'
        self.path_to_augur = path_to_augur

        if os.path.isfile(outgroup):
            tmp = [{
                'strain': seq.name,
                'seq': str(record.seq).upper(),
                'desc': seq.description
            } for seq in SeqIO.parse(outgroup, 'fasta')]
            if len(tmp):
                self.outgroup = tmp[0]
                if len(tmp) > 1:
                    print "More than one sequence in ", outgroup, "taking first"
                if self.verbose:
                    print "using outgroup found in file ", outgroup
        elif outgroup == 'auto':
            print "automatically determine outgroup"
            self.auto_outgroup_blast()
        elif isinstance(outgroup, basestring):
            seq_names = [x['strain'] for x in self.viruses]
            if outgroup in seq_names:
                self.outgroup = self.viruses.pop(seq_names.index(outgroup))
                if self.verbose:
                    print "using outgroup found in alignment", outgroup
            else:
                standard_outgroups = self.load_standard_outgroups()
                if outgroup in standard_outgroups:
                    self.outgroup = standard_outgroups[outgroup]
                    if self.verbose:
                        print "using standard outgroup", outgroup
                else:
                    raise ValueError("outgroup %s not found" % outgroup)
                    return
        if "anno:" in self.outgroup['desc']:
            anno = [x for x in self.outgroup['desc'].split()
                    if "anno:" in x][0]
            anno = (anno.split(':')[1]).split('_')
            tmp = [(anno[2 * i], int(anno[2 * i + 1]))
                   for i in range(len(anno) / 2)]
            self.anno = sorted(tmp, key=lambda x: x[1])
            print("Using annotation", self.anno)
        else:
            self.anno = None
            print("No annotation found")
        #self.anno = sorted((('SP',0), ('HA1',16), ('HA2',329+16)), key=lambda x:x[1])
        self.viruses.append(self.outgroup)
        self.filter_geo(prune=False)
        self.make_strain_names_unique()
Ejemplo n.º 26
0
	def __init__(self,min_length = 900, **kwargs):
		'''
		parameters
		min_length  -- minimal length for a sequence to be acceptable
		'''
		flu_filter.__init__(self, **kwargs)
		self.min_length = min_length
		self.vaccine_strains =[
				{
					"strain": "A/Wisconsin/67/2005",
					"db": "IRD",
					"accession": "CY163984",
					"date": "2005-08-31",
					"region": "north_america",
					"country": "usa",
					"seq": "ATGAAGACTATCATTGCTTTGAGCTACATTCTATGTCTGGTTTTCGCTCAAAAACTTCCCGGAAATGACAACAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACGAATGACCAAATTGAAGTTACTAATGCTACTGAGCTGGTTCAGAGTTCCTCAACAGGTGGAATATGCGACAGTCCTCATCAGATCCTTGATGGAGAAAACTGCACACTAATAGATGCTCTATTGGGAGACCCTCAGTGTGATGGCTTCCAAAATAAGAAATGGGACCTTTTTGTTGAACGCAGCAAAGCCTACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGGAGTTTAACGATGAAAGCTTCAATTGGACTGGAGTCACTCAAAATGGAACAAGCTCTTCTTGCAAAAGGAGATCTAATAACAGTTTCTTTAGTAGATTGAATTGGTTGACCCACTTAAAATTCAAATACCCAGCATTGAACGTGACTATGCCAAACAATGAAAAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGTTACGGACAATGACCAAATCTTCCTGTATGCTCAAGCATCAGGAAGAATCACAGTCTCTACCAAAAGAAGCCAACAAACTGTAATCCCGAATATCGGATCTAGACCCAGAATAAGGAATATCCCCAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAATTCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCATTTCAAAATGTAAACAGGATCACATATGGGGCCTGTCCCAGATATGTTAAGCAAAACACTCTGAAATTGGCAACAGGGATGCGAAATGTACCAGAGAAACAAACTAGAGGCATATTTGGCGCAATCGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGGTTTCAGGCATCAAAATTCTGAGGGAATAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCAATCAAATCAATGGGAAGCTGAATAGGTTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAAGTAGAAGGGAGAATTCAGGACCTCGAGAAATATGTTGAGGACACTAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAACAAACTGTTTGAAAGAACAAAGAAGCAACTGAGGGAAAATGCTGAGGATATGGGCAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATCAGAAATGGAACTTATGACCATGATGTATACAGAGATGAAGCATTAAACAACCGGTTCCAGATCAAAGGCGTTGAGCTGAAGTCAGGATACAAAGATTGGATCCTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAAGGCAACATTAGGTGCAACATTTGCATTTGA"
				},	{
					"strain": "A/Brisbane/10/2007",
					"db": "IRD",
					"accession": "CY113005",
					"date": "2007-02-06",
					"region": "oceania",
					"country": "australia",
					"seq": "ATGAAGACTATCATTGCTTTGAGCTACATTCTATGTCTGGTTTTCACTCAAAAACTTCCCGGAAATGACAACAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACGAATGACCAAATTGAAGTTACTAATGCTACTGAGCTGGTTCAGAGTTCCTCAACAGGTGAAATATGCGACAGTCCTCATCAGATCCTTGATGGAGAAAACTGCACACTAATAGATGCTCTATTGGGAGACCCTCAGTGTGATGGCTTCCAAAATAAGAAATGGGACCTTTTTGTTGAACGCAGCAAAGCCTACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGGAGTTTAACAATGAAAGCTTCAATTGGACTGGAGTCACTCAAAACGGAACAAGCTCTGCTTGCATAAGGAGATCTAATAACAGTTTCTTTAGTAGATTGAATTGGTTGACCCACTTAAAATTCAAATACCCAGCATTGAACGTGACTATGCCAAACAATGAAAAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGGTACGGACAATGACCAAATCTTCCCGTATGCTCAAGCATCAGGAAGAATCACAGTCTCTACCAAAAGAAGCCAACAAACTGTAATCCCGAATATCGGATCTAGACCCAGAGTAAGGAATATCCCCAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAATTCTGAATGCATCACTCCAAACGGAAGCATTCCCAATGACAAACCATTCCAAAATGTAAACAGGATCACATACGGGGCCTGTCCCAGATATGTTAAGCAAAACACTCTGAAATTGGCAACAGGGATGCGAAATGTACCAGAGAAACAAACTAGAGGCATATTTGGCGCAATCGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGGTTTCAGGCATCAAAATTCTGAGGGAATAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCGATCAAATCAATGGGAAGCTGAATAGGTTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAAGTCGAAGGGAGAATTCAGGACCTTGAGAAATATGTTGAGGACACCAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAACAAACTGTTTGAAAAAACAAAGAAGCAACTGAGGGAAAATGCTGAGGATATGGGCAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATCAGAAATGGAACTTATGACCACAATGTATACAGAGATGAAGCATTAAACAACCGGTTCCAGATCAAGGGCGTTGAGCTGAAGTCAGGATACAAAGATTGGATCCTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAAGGCAACATTAGGTGCAACATTTGCATTTGA"
				},	{
					"strain": "A/Perth/16/2009",
					"db": "IRD",
					"accession": "GQ293081",
					"date": "2009-04-07",
					"region": "oceania",
					"country": "australia",
					"seq": "ATGAAGACTATCATTGCTTTGAGCTACATTCTATGTCTGGTTTTCGCTCAAAAACTTCCTGGAAATGACAACAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACGAATGACCAAATTGAAGTTACTAATGCTACTGAGCTGGTTCAGAGTTCCTCAACAGGTGAAATATGCGACAGTCCTCATCAGATCCTTGATGGAAAAAACTGCACACTAATAGATGCTCTATTGGGAGACCCTCAGTGTGATGGCTTCCAAAATAAGAAATGGGACCTTTTTGTTGAACGCAGCAAAGCCTACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGGAGTTTAACAATGAAAGCTTCAATTGGACTGGAGTCACTCAAAACGGAACAAGCTCTGCTTGCATAAGGAGATCTAAAAACAGTTTCTTTAGTAGATTGAATTGGTTGACCCACTTAAACTTCAAATACCCAGCATTGAACGTGACTATGCCAAACAATGAACAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGGTACGGACAAAGACCAAATCTTCCTGTATGCTCAAGCATCAGGAAGAATCACAGTCTCTACCAAAAGAAGCCAACAAACCGTAAGCCCGAATATCGGATCTAGACCCAGAGTAAGGAATATCCCTAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAATTCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCATTCCAAAATGTAAACAGGATCACATACGGGGCCTGTCCCAGATATGTTAAGCAAAACACTCTGAAATTGGCAACAGGGATGCGAAATGTACCAGAGAAACAAACTAGAGGCATATTTGGCGCAATCGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGGTTTCAGGCATCAAAATTCTGAGGGAAGAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCGATCAAATCAATGGGAAGCTGAATAGATTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAAGTCGAAGGGAGAATTCAGGACCTTGAGAAATATGTTGAGGACACTAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAACAAACTGTTTGAAAAAACAAAGAAGCAACTGAGGGAAAATGCTGAGGATATGGGCAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATCAGAAATGGAACTTATGACCACGATGTATACAGAGATGAAGCATTAAACAACCGGTTTCAGATCAAGGGAGTTGAGCTGAAGTCAGGGTACAAAGATTGGATCCTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAAGGCAACATTAGGTGCAACATTTGCATTTGA"
				},	{
					"strain": "A/Victoria/361/2011",
					"db": "IRD",
					"accession": "GQ293081",
					"date": "2011-10-24",
					"region": "oceania",
					"country": "australia",
					"seq": "ATGAAGACTATCATTGCTTTGAGCCACATTCTATGTCTGGTTTTCGCTCAAAAACTTCCTGGAAATGACAACAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACGAATGACCAAATTGAAGTTACTAATGCTACTGAGCTGGTTCAGAATTCCTCAATAGGTGAAATATGCGACAGTCCTCATCAGATCCTTGATGGAGAAAACTGCACACTAATAGATGCTCTATTGGGAGACCCTCAGTGTGATGGCTTCCAAAATAAGAAATGGGACCTTTTTGTTGAACGAAGCAAAGCCTACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGGAGTTTAACAATGAAAGCTTCAATTGGACTGGAGTCACTCAAAACGGAACAAGTTCTGCTTGCATAAGGAGATCTAATAATAGTTTCTTTAGTAGATTAAATTGGTTGACCCGCTTAAACTTCAAATACCCAGCATTGAACGTGACTATGCCAAACAATGAACAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGTTACGGACAAGGAACAAATCTTCCTGTATGCTCAATCATCAGGAAGAATCACAGTATCTACCAAAAGAAGCCAACAAGCTGTAATCCCGAATATCGGATATAGACCCAGAATAAGGAATATCCCTAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAATTCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCATTCCAAAATGTAAACAGGATCACATACGGGGCCTGTCCCAGATATGTTAAGCAAAGCACTCTGAAATTGGCAACAGGAATGCGAAATGTACCAGAGAAACAAACTAGAGGCATATTTGGCGCAATAGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGGTTTCAGGCATCAAAATTCTGAGGGAAGAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCGATCAAATCAATGGGAAGCTGAATCGATTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAAGTCGAAGGGAGAATTCAGGACCTTGAGAAATATGTTGAGGACACTAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAACAAACTGTTTGAAAAAACAAAGAAGCAACTAAGGGAAAATGCTGAGGATATGGGCAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATCAGAAATGGAACTTATGACCACGATGTATACAGAGATGAAGCATTAAACAACCGGTTCCAGATCAAGGGAGTTGAGCTGAAGTCAGGGTACAAAGATTGGATCCTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAGGGCAACATTAGGTGCAACATTTGCATTTGA"
				},	{
					"strain": "A/Texas/50/2012",
					"db": "GISAID",
					"isolate_id": "EPI_ISL_129858",
					"date": "2012-04-15",
					"region": "north_america",
					"country": "usa",
					"seq": "ATGAAGACTATCATTGCTTTGAGCTACATTCTATGTCTGGTTTTCGCTCAAAAACTTCCTGGAAATGACAATAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACGAATGACCGAATTGAAGTTACTAATGCTACTGAACTGGTTCAGAATTCCTCAATAGGTGAAATATGCGACAGTCCTCATCAGATCCTTGATGGAGAAAACTGCACACTAATAGATGCTCTATTGGGAGACCCTCAGTGTGATGGCTTCCAAAATAAGAAATGGGACCTTTTTGTTGAACGAAGCAAAGCCTACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGGAGTTTAACAATGAAAGCTTCAATTGGAATGGAGTCACTCAAAACGGAACAAGTTCTGCTTGCATAAGGAGATCTAATAATAGTTTCTTTAGTAGATTAAATTGGTTGACCCACTTAAACTTCAAATACCCAGCATTGAACGTGACTATGCCAAACAATGAACAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGGTACGGACAAGGACCAAATCTTCCTGTATGCTCAACCATCAGGAAGAATCACAGTATCTACCAAAAGAAGCCAACAAGCTGTAATCCCGAATATCGGATCTAGACCCAGAATAAGGAATATCCCTAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAAGTCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCATTCCAAAATGTAAACAGGATCACATACGGGGCCTGTCCCAGATATGTTAAGCAAAGCACTCTGAAATTGGCAACAGGAATGCGGAATGTACCAGAGAAACAAACTAGAGGCATATTTGGCGCAATAGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGGTTTCAGGCATCAAAATTCTGAGGGAAGAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCGATCAAATCAATGGGAAGCTGAATCGATTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAAGTAGAAGGGAGAATTCAGGACCTTGAGAAATATGTTGAGGACACTAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAACAAACTGTTTGAAAAAACAAAGAAGCAACTGAGGGAAAATGCTGAGGATATGGGCAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATCAGAAATGGAACTTATGACCACGATGTATACAGAGATGAAGCATTAAACAACCGGTTCCAGATCAAGGGAGTTGAGCTGAAGTCAGGGTACAAAGATTGGATCCTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAGGGCAACATTAGGTGCAACATTTGCATTTGA",
				},	{
					"strain": "A/Switzerland/9715293/2013",
					"db": "GISAID",
					"isolate_id": "EPI_ISL_162149",
					"date": "2013-12-06",
					"region": "europe",
					"country": "switzerland",
					"seq": "ATGAAGACTATCATTGCTTTGAGCTACATTCTATGTCTGGTTTTCGCTCAAAAACTTCCTGGAAATGACAATAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACGAATGACCGAATTGAAGTTACTAATGCTACTGAGCTGGTTCAGAATTCCTCAATAGGTGAAATATGCGACAGTCCTCATCAGATCCTTGATGGAGAAAACTGCACACTAATAGATGCTCTATTGGGAGACCCTCAGTGTGATGGCTTTCAAAATAAGAAATGGGACCTTTTTGTTGAACGAAGCAAAGCCTACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGGAGTTTAACAATGAAAGCTTCAATTGGGCTGGAGTCACTCAAAACGGAACAAGTTCTTCTTGCATAAGGGGATCTAATAGTAGTTTCTTTAGTAGATTAAATTGGTTGACCCACTTAAACTCCAAATACCCAGCATTAAACGTGACTATGCCAAACAATGAACAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGGTACGGACAAGGACCAAATCTTCCTGTATGCACAATCATCAGGAAGAATCACAGTATCTACCAAAAGAAGCCAACAAGCTGTAATCCCGAATATCGGATCTAGACCCAGAATAAGGGATATCCCTAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAAGTCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCATTCCAAAATGTAAACAGGATCACATACGGGGCCTGTCCCAGATATGTTAAGCAAAGCACTCTGAAATTGGCAACAGGAATGCGAAATGTACCAGAGAGACAAACTAGAGGCATATTTGGCGCAATAGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGGCTTCAGGCATCAAAATTCTGAGGGAAGAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCGATCAAATCAATGGGAAGCTGAATCGATTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAAGTAGAAGGGAGAATTCAGGACCTTGAGAAATATGTTGAGGACACAAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAACAAACTGTTTGAAAAAACAAAGAAGCAACTGAGGGAAAATGCTGAGGATATGGGCAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATCAGAAATGGAACTTATGACCACGATGTATACAGGGATGAAGCATTAAACAACCGGTTCCAGATCAAGGGAGTTGAGCTGAAGTCAGGGTACAAAGATTGGATCCTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAGGGCAACATTAGGTGCAACATTTGCATTTGA",
				},  {
					"strain": "A/HongKong/4801/2014",
					"db": "GISAID",
					"isolate_id": "EPI_ISL_165554",
					"date": "2014-02-26",
					"region": "china",
					"country": "hong_kong",
					"seq": "ATGAAGACTATCATTGCTTTGAGCTACATTCTATGTCTGGTTTTCGCTCAAAAAATTCCTGGAAATGACAATAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACGAATGACCGAATTGAAGTTACTAATGCTACTGAGCTGGTTCAGAATTCCTCAATAGGTGAAATATGCGACAGTCCTCATCAGATCCTTGATGGAGAAAACTGCACACTAATAGATGCTCTATTGGGAGACCCTCAGTGTGATGGCTTTCAAAATAAGAAATGGGACCTTTTTGTTGAACGAAGCAAAGCCTACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGGAGTTTAACAATGAAAGCTTCAATTGGACTGGAGTCACTCAAAACGGAACAAGTTCTGCTTGCATAAGGAGATCTAGTAGTAGTTTCTTTAGTAGATTAAATTGGTTGACCCACTTAAACTACACATACCCAGCATTGAACGTGACTATGCCAAACAATGAACAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGGTACGGACAAGGACCAAATCTTCCTGTATGCTCAATCATCAGGAAGAATCACAGTATCTACCAAAAGAAGCCAACAAGCTGTAATCCCAAATATCGGATCTAGACCCAGAATAAGGGATATCCCTAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAAGTCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCATTCCAAAATGTAAACAGGATCACATACGGGGCCTGTCCCAGATATGTTAAGCATAGCACTCTGAAATTGGCAACAGGAATGCGAAATGTACCAGAGAAACAAACTAGAGGCATATTTGGCGCAATAGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGGTTTCAGGCATCAAAATTCTGAGGGAAGAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCGATCAAATCAATGGGAAGCTGAATCGATTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAAGTAGAAGGAAGAATTCAGGACCTTGAGAAATATGTTGAGGACACTAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAACAAACTGTTTGAAAAAACAAAGAAGCAACTGAGGGAAAATGCTGAGGATATGGGCAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATAAGAAATGGAACTTATGACCACAATGTGTACAGGGATGAAGCATTAAACAACCGGTTCCAGATCAAGGGAGTTGAGCTGAAGTCAGGGTACAAAGATTGGATCCTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAGGGCAACATTAGGTGCAACATTTGCATTTGA",
				},  {
					"strain": "A/Alaska/232/2015",
					"db": "GISAID",
					"isolate_id": "EPI787411",
					"date": "2015-09-09",
					"region": "north_america",
					"country": "usa",
					"seq": "GGATAATTCTATTAACCATGAAGACTATCATTGCTTTGAGCTACATTCTATGTCTGGTTTTCGCTCAAAAAATTCCTGGAAATGACAATAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACAAATGACCGAATTGAAGTTACTAATGCTACTGAGTTGGTTCAGAATTCCTCAATAGGTGAAATATGCGACAGTCCTCATCAGATCCTTGATGGAGAGAACTGCACACTAATAGATGCTCTATTGGGAGACCCTCAGTGTGATGGCTTTCAAAATAAGAAATGGGACCTTTTTGTTGAACGAAGCAAAGCCTACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGGAGTTTAACAATGAAAGCTTCAATTGGACTGGAGTCACTCAAAACGGAACAAGTTCTTCTTGCATAAGGAGATCTAGTAGTAGTTTCTTTAGTAGATTAAATTGGTTGACCCACTTAAACTACAAATATCCAGCATTGAACGTGACTATGCCAAACAAGGAACAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGGTACGGACAAGGACCAAATCTACCCGTATGCTCAATCATCAGGAAGAATCACAGTATCTACCAAAAGAAGCCAACAAGCTGTAATCCCAAATATCGGATCTAGACCCAGAATAAGGGATATCCCTAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAAGTCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCATTCCAAAATGTAAACAGGATCACATACGGGGCCTGTCCCAGATATGTTAAGCATAGCACTCTGAAATTGGCAACAGGAATGCGAAATGTACCAGAGAAACAAACTAGAGGCATATTTGGCGCAATAGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGGTTTCAGGCATCAAAATTCTGAGGGAAGAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCGATCAAATCAATGGGAAGCTGAATCGGTTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAAGTAGAAGGAAGAGTTCAAGACCTTGAGAAATATGTTGAGGACACTAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAACAAACTGTTTGAAAAAACAAAGAAGCAACTGAGGGAAAATGCTGAGGATATGGGAAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATAAGAAATGAAACTTATGACCACAATGTGTACAGGGATGAAGCATTAAACAACCGGTTCCAGATCAAGGGAGTTGAGCTGAAGTCAGGGTACAAAGATTGGATCCTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAGGGCAACATTAGATGCAACATTTGCATTTGAGTGCATTAATTAAAAACAC"
				}
			]
		tmp_outgroup = SeqIO.read('source-data/H3N2_outgroup.gb', 'genbank')
		genome_annotation = tmp_outgroup.features
		self.cds = {x.qualifiers['gene'][0]:x for x in genome_annotation
				if 'gene' in x.qualifiers and x.type=='CDS' and
				x.qualifiers['gene'][0] in ['SigPep', 'HA1', 'HA2']}
		self.outgroup = {
			'strain': 'A/Beijing/32/1992',
			'db': 'IRD',
			'accession': 'U26830',
			'date': '1992-01-01',
			'country': 'china',
			'region': 'china',
			'seq': str(tmp_outgroup.seq).upper()
		}
Ejemplo n.º 27
0
	def __init__(self,min_length = 987, **kwargs):
		'''
		parameters
		min_length  -- minimal length for a sequence to be acceptable
		'''
		flu_filter.__init__(self, **kwargs)
		self.min_length = min_length
		self.vaccine_strains =[
			{
				'strain':    	'B/Beijing/184/93',
				'isolate_id':	'EPI_ISL_969',
				'date':    		'1993-07-01', #(Month and day unknown)
				'region':   	'China',
				'seq':     		'GATCGAATCTGTACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGTGTGATACCACTGACAACAACACCAACAAAATCTCATTTTGGAAATCTCAAAGGAACAAAGACCAGAGGGAAACTATGCCCAAACTGTCTCAACTGCACAGATCTGGATGTGGCCTTGGGCAGACCAATGTGTGTGGGGACCACACCTTCGGCAAAAGCTTCAATACTCCACGAAGTCAGACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGACAGCTACCCAATCTTCTCAGAGGATATGAAAATATCAGATTATCAACCCAAAACGTTATCAACGCAGAAAAGGCACCAGGAGGACCCTACAGGCTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAGAAGCGGATTTTTCGCAACAATGGCTTGGGCTGTCCCAAGGGACAACAACAAAACAGCAACAAATCCACTAACAGTAGAAGTACCATACATTTGTACAAAAGGAGAAGACCAAATTACTGTTTGGGGGTTCCATTCTGATAACAAAATCCAAATGAAAAACCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACATTATGTTTCTCAGATTGGCGGCTTCCCAGATCAAACAGAAGACGGAGGGCTACCACAAAGCGGCAGAATTGTTGTTGATTACATGGTGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCCTTGCCTTTAATTGGTGAAGCAGATTGCCTTCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGG',
			},
			{
				'strain':    	'B/Sichuan/379/99',
				'isolate_id': 	'EPI_ISL_21113',
				'date':    		'1999-07-01', # (Month and day unknown)
				'region':   	'China',
				'seq':     		'GAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCGTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTAACTGGTGCGATACCACTGACAACAACACCAACAAAATCTCATTTTGCAAATCTCAAAGGAACAAAGACCAGAGGGAAACTATGCCCAACCTGTCTCAACTGCACAGATCTGGATGTGGCCTTGGGCAGACCAATGTGTGTGGGGATCACACCTTCGGCAAAAGCTTCAATACTCCACGAAATCAAACCTGTTACATCCGGATGCTTTCCTATAATGCACGACAGAACAAAAATCAGACAGCTACCCAATCTTCTCAGAGGATATGAAAAAATCAGATTATCAACCCAAAACGTTATCAACGCAGAAAAGGCACCAGGAGGACCTTACAGACTTGGAACTTCAGGATCTTGCCCTAACGCTACCAGTAAAAGCGGATTTTTCGCAACAATGGCTTGGGCTGTCCCAAGGGACAACAACAAAACAGCAACGAATCCACTAACAGTAGAAGTACCACACATCTGTACAAAAGAAGAAGACCAAATTACTGTTTGGGGGTTCCATTCTGATGACAAAACCCAAATGAAAAACCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAATAACCACACATTATGTTTCTCAGATTGGCGGCTTCCCGGACCAAACAGAGGACGGAGGGCTACCACAAAGCGGCAGAATTGTTGTTGATTACATGGTGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGGATTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGTAGGAGCAAAGTAATAAAAGGGTCCTTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCT',
			},
			{
				'strain':    	'B/Shanghai/361/2002',
				'isolate_id': 	'EPI_ISL_2842',
				'date':    		'2002-06-12',
				'region':   	'China',
				'seq': 			'AATGCAGATCGAATCTGCACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGTGTGATACCACTGACAACAACTCCAATAAAATCTCATTTTGCAAATCTCAAAGGAACAAGGACTAGAGGGAAACTATGCCCAGATTGTCTCAACTGCACAGATCTGGATGTGGCCTTGGGCAGACCAATGTGTGTGGGGACCACACCTTCGGCAAAAGCTTCAATACTCCACGAAGTCAGACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGACAACTACCCAATCTTCTCAGAGGATATGAAAATATCAGGTTATCAACCCAAAACGTTATCGATGCAGAAAAGGCCCTAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCCACCAGTAAAAGCGGATTTTTCGCAACAATGGCTTGGGCTGTCCCAAAGGACAACAACAAAAATGCAACGAACCCACTAACAGTAGAAGTACCATACATCTGTACAGAAGGGGAAGACCAAATTACTGTTTGGGGGTTCCATTCAGATGACAAAACCCAAATGAAAAACCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACATTATGTTTCTCAGATTGGCGGCTTCCCAGATCAAACAGAAGACGGAGGACTACCACAAAGCGGCAGAATTGTTGTTGATTACATGGTGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCCTTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAAAATACGGTGGGTTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTC',
			},
			{
				'strain':		'B/Florida/4/2006',
				'isolate_id':	'EPI_ISL_21307',
				'date':			'2006-11-01',
				'region':		'NorthAmerica',
				'seq':			'ATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGAATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCCACTCAAGGGGAGGTCAATGTGACTGGTGTGATACCACTAACAACAACACCAACAAAATCTTATTTTGCAAATCTCAAAGGAACAAGGACCAGAGGGAAACTATGCCCAGACTGTCTCAACTGCACAGATCTGGATGTGGCTTTGGGCAGACCAATGTGTGTGGGGACCACACCTTCGGCGAAAGCTTCAATACTCCACGAAGTCAAACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGGCAACTACCCAATCTTCTCAGAGGATATGAAAATATCAGGCTATCAACCCAAAACGTCATCGATGCGGAAAAGGCACCAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAAGAGCGGATTTTTCGCAACAATGGCTTGGGCTGTCCCAAAGGACAACAACAAAAATGCAACGAACCCACTAACAGTAGAAGTACCATACATTTGTACAGAAGGGGAAGACCAAATCACTGTTTGGGGGTTCCATTCAGATGACAAAACCCAAATGAAGAACCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACACTATGTTTCTCAGATTGGCAGCTTCCCAGATCAAACAGAAGACGGAGGACTACCACAAAGCGGCAGGATTGTTGTTGATTACATGATGCAAAAACCTGGGAAAACAGGAACAATTGTCTACCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCCTTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCCTAGAAGGAGGATGGGAAGGAATGATTGCAGGCTGGCACGGATACACATCTCACGGAGCACATGGAGTGGCAGTGGCGGCGGACCTTAAGAGTACGCAAGAAGCTATAAACAAGATAACAAAAAATCTCAATTCTTTGAGTGAGCTAGAAGTAAAGAATCTTCAAAGACTAAGTGGTGCCATGGATGAACTCCACAACGAAATACTCGAGCTGGATGAGAAAGTGGATGATCTCAGAGCTGACACTATAAGCTCGCAAATAGAACTTGCAGTCTTGCTTTCCAACGAAGGAATAATAAACAGTGAAGATGAGCATCTATTGGCACTTGAGAGAAAACTAAAGAAAATGCTGGGTCCCTCTGCTGTAGAGATAGGAAATGGATGCTTCGAAACCAAACACAAGTGCAACCAGACCTGCTTAGACAGGATAGCTGCTGGCACCTTTAATGCAGGAGAATTTTCTCTCCCCACTTTTGATTCACTGAACATTACTGCTGCATCTTTAAATGATGATGGATTGGATAACCATACTATACTGCTCTATTACTCAACTGCTGCTTCTAGTTTGGCTGTAACATTGATGCTAGCTATTTTTATTGTTTATATGGTCTCCAGAGACAACGTTTCATGCTCCATCTGTCTATAA'
			},
			{
				'strain':		'B/Wisconsin/01/2010',
				'isolate_id':	'EPI_ISL_76940',
				'date':			'2010-02-20',
				'region':		'NorthAmerica',
				'seq':			'ATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGCGTGATACCACTGACAACAACACCAACAAAATCTTATTTTGCAAATCTCAAAGGAACAAGGACCAGAGGGAAACTATGCCCGGACTGTCTCAACTGTACAGATCTGGATGTGGCCTTGGGCAGGCCAATGTGTGTGGGGACCACACCTTCTGCTAAAGCTTCAATACTCCACGAGGTCAGACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGGCAACTACCCAATCTTCTCAGAGGATATGAAAATATCAGGTTATCAACCCAAAACGTTATCGATGCAGAAAAAGCACCAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAAAATCGGATTTTTTGCAACAATGGCTTGGGCTGTCCCAAAGGACAACTACAAAAATGCAACGAACCCACTAACAGTAGAAGTACCATACATTTGTACAGAAGGGGAAGACCAAATTACTGTTTGGGGGTTCCATTCAGATAACAAAACCCAAATGAAGAGCCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACATTATGTTTCTCAGATTGGCGACTTCCCAGATCAAACAGAAGACGGAGGACTACCACAAAGCGGCAGAATTGTTGTTGATTACATGATGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCATTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTAAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTGAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCCTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCTCACGGAGCACATGGAGTGGCAGTGGCGGCAGACCTTAAGAGTACACAAGAAGCTATAAATAAGATAACAAAAAATCTCAATTCTTTGAGTGAGCTAGAAGTAAAGAACCTTCAAAGACTAAGTGGTGCCATGGATGAACTCCACAACGAAATACTCGAGCTGGATGAGAAAGTGGATGATCTCAGAGCTGACACTATAAGCTCACAAATAGAACTTGCAGTCTTGCTTTCCAACGAAGGAATAATAAACAGTGAAGACGAGCATCTATTGGCACTTGAGAGAAAACTAAAGAAAATGCTGGGTCCCTCTGCTGTAGACATAGGAAACGGATGCTTCGAAACCAAACACAAATGCAACCAGACCTGCTTAGACAGGATAGCTGCTGGCACCTTTAATGCAGGAGAATTTTCTCTCCCCACTTTTGATTCATTGAACATTACTGCTGCATCTTTAAATGATGATGGATTGGATAACCATACTATACTGCTCTATTACTCAACTGCTGCTTCTAGTTTGGCTGTAACATTAATGCTAGCTATTTTTATTGTTTATATGGTCTCCAGAGACAACGTTTCATGCTCCATCTGTCTATAA'
			},
			{
				'strain':		'B/Massachusetts/02/2012',
				'isolate_id':	'EPI_ISL_121434',
				'date':			'2012-03-13',
				'region':		'NorthAmerica',
				'seq':			'ATGAAGGCAATAATTGTACTACTAATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGTGTGATACCACTAACAACAACACCAACAAAATCTTATTTTGCAAATCTCAAAGGAACAAAGACCAGAGGGAAACTATGCCCAGACTGTCTCAACTGTACAGATCTGGATGTGGCCCTGGGCAGGCCAATGTGTGTGGGAACTACACCTTCTGCGAAAGCTTCAATACTTCACGAAGTCAGACCTGTTACATCCGGGTGCTTCCCTATAATGCACGACAGAACAAAAATCAGGCAACTAGCCAATCTTCTCAGAGGATATGAAAATATCAGGTTATCAACCCAAAACGTTATCGATGCAGAAAAGGCACCAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAAAAGCGGATTTTTCGCAACAATGGCTTGGGCTGTCCCAAAGGACAACAACAAAAATGCAACGAACCCATTAACAGTAGAAGTACCATACATTTGTGCAGAAGGGGAAGACCAAATTACTGTTTGGGGGTTCCATTCAGATAACAAAACCCAAATGAAGAACCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACATTATGTTTCTCAGATTGGCGGCTTCCCAGATCAAACAGAAGACGGAGGACTACCACAAAGCGGCAGAATTGTCGTTGATTACATGATGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCCTTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCCTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCTCACGGAGCACATGGAGTGGCAGTTGCTGCAGACCTTAAGAGCACACAAGAAGCTATAAACAAGATAACAAAAAATCTCAACTCTTTGAGTGAGCTAGAAGTAAAGAATCTTCAAAGGCTAAGTGGTGCCATGGATGAACTCCACAACGAAATACTCGAGCTGGATGAGAAAGTGGATGACCTCAGAGCTGACACTATAAGTTCACAAATAGAACTTGCAGTCTTGCTTTCCAACGAAGGAATAATAAACAGTGAAGACGAGCATCTATTGGCACTTGAGAGAAAACTAAAGAAAATGCTGGGTCCCTCTGCTGTAGACATAGGAAATGGATGCTTCGAAACCAAACACAAATGCAACCAGACCTGCTTAGACAGGATAGCTGCTGGCACCTTTAATGCAGGAGAGTTTTCTCTCCCCACTTTTGATTCATTGAACATTACTGCTGCATCTTTAAATGATGATGGATTGGATAACCATACTATACTGCTCTATTACTCAACTGCTGCTTCTAGTTTGGCTGTAACATTGATGCTAGCTATTTTTATTGTTTATATGGTCTCCAGAGACAACGTTTCATGCTCCATCTGTCTATAA'
			},
			{
				'strain':    	'B/PHUKET/3073/2013',
				'isolate_id':	'EPI_ISL_161843',
				'date':    		'2013-11-21',
				'region':   	'SoutheastAsia',
				'seq':			'ATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAACGCAGATCGAATCTGCACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGCGTGATACCACTGACAACAACACCAACAAAATCTTATTTTGCAAATCTCAAAGGAACAAGGACCAGAGGGAAACTATGCCCGGACTGTCTCAACTGTACAGATCTGGATGTGGCCTTGGGCAGGCCAATGTGTGTGGGGACCACACCTTCTGCTAAAGCTTCAATACTCCATGAGGTCAGACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGGCAACTACCCAATCTTCTCAGAGGATATGAAAAGATCAGGTTATCAACCCAAAACGTTATCGATGCAGAAAAAGCACCAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAAAATCGGATTTTTTGCAACAATGGCTTGGGCTGTCCCAAAGGACAACTACAAAAATGCAACGAACCCACTAACAGTGGAAGTACCATACATTTGTACAGAAGGGGAAGACCAAATTACTGTTTGGGGGTTCCATTCGGATAACAAAACCCAAATGAAGAGCCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACGCATTATGTTTCTCAGATTGGCGACTTCCCAGATCAAACAGAAGACGGAGGACTACCACAAAGCGGCAGAATTGTTGTTGATTACATGATGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGGGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCATTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAGAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAAAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTAAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTGAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCCTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCTCACGGAGCACATGGAGTGGCAGTGGCGGCAGACCTTAAGAGTACACAAGAAGCTATAAATAAGATAACAAAAAATCTCAATTCTTTGAGTGAACTAGAAGTAAAGAACCTTCAAAGACTAAGTGGTGCCATGGATGAACTCCACAACGAAATACTCGAGCTGGATGAAAAAGTGGATGATCTCAGAGCTGACACTATAAGCTCACAAATAGAACTTGCAGTCTTGCTTTCCAACGAAGGAATAATAAACAGTGAAGACGAGCATCTATTGGCACTTGAGAGAAAACTAAAGAAAATGCTGGGTCCCTCTGCTGTAGACATAGGAAACGGATGCTTCGAAACCAAACACAAATGCAACCAGACCTGCTTAGACAGGATAGCTGCTGGCACCTTTAATGCAGGAGAATTTTCTCTCCCCACTTTTGATTCATTGAACATTACTGCTGCATCTTTAAATGATGATGGATTGGATAACCATACTATACTGCTCTATTACTCAACTGCTGCTTCTAGTTTGGCTGTAACATTAATGCTAGCTATTTTTATTGTTTATATGGTCTCCAGAGACAACGTTTCATGCTCCATCTGTCTATAAAGAAGGTTAGGCCTTGTATTTTCCTTTATTGTAGTGCTTGTTTGCTTGTCATCATTACAAAGAAAC'
			}
		]
		tmp_outgroup = SeqIO.read('source-data/Yam_outgroup.gb', 'genbank')
		genome_annotation = tmp_outgroup.features
		self.cds = {x.qualifiers['gene'][0]:x for x in genome_annotation
				if 'gene' in x.qualifiers and x.type=='CDS' and
				x.qualifiers['gene'][0] in ['SigPep', 'HA1', 'HA2']}
		self.outgroup = {
				'strain':'B/Singapore/11/94',
				'isolate_id':'EPI_ISL_20980',
				'date':'1994-05-10',
				'region':'China',
				'seq':str(tmp_outgroup.seq).upper()
			}
Ejemplo n.º 28
0
    def __init__(self, min_length=987, **kwargs):
        '''
		parameters
		min_length  -- minimal length for a sequence to be acceptable
		'''
        flu_filter.__init__(self, **kwargs)
        self.min_length = min_length
        self.vaccine_strains = [
            {
                'strain':
                'B/Beijing/184/93',
                'isolate_id':
                'EPI_ISL_969',
                'date':
                '1993-07-01',  #(Month and day unknown)
                'region':
                'china',
                'country':
                'china',
                'seq':
                'GATCGAATCTGTACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGTGTGATACCACTGACAACAACACCAACAAAATCTCATTTTGGAAATCTCAAAGGAACAAAGACCAGAGGGAAACTATGCCCAAACTGTCTCAACTGCACAGATCTGGATGTGGCCTTGGGCAGACCAATGTGTGTGGGGACCACACCTTCGGCAAAAGCTTCAATACTCCACGAAGTCAGACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGACAGCTACCCAATCTTCTCAGAGGATATGAAAATATCAGATTATCAACCCAAAACGTTATCAACGCAGAAAAGGCACCAGGAGGACCCTACAGGCTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAGAAGCGGATTTTTCGCAACAATGGCTTGGGCTGTCCCAAGGGACAACAACAAAACAGCAACAAATCCACTAACAGTAGAAGTACCATACATTTGTACAAAAGGAGAAGACCAAATTACTGTTTGGGGGTTCCATTCTGATAACAAAATCCAAATGAAAAACCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACATTATGTTTCTCAGATTGGCGGCTTCCCAGATCAAACAGAAGACGGAGGGCTACCACAAAGCGGCAGAATTGTTGTTGATTACATGGTGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCCTTGCCTTTAATTGGTGAAGCAGATTGCCTTCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGG',
            },
            {
                'strain':
                'B/Sichuan/379/99',
                'isolate_id':
                'EPI_ISL_21113',
                'date':
                '1999-07-01',  # (Month and day unknown)
                'region':
                'china',
                'country':
                'china',
                'seq':
                'GAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCGTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTAACTGGTGCGATACCACTGACAACAACACCAACAAAATCTCATTTTGCAAATCTCAAAGGAACAAAGACCAGAGGGAAACTATGCCCAACCTGTCTCAACTGCACAGATCTGGATGTGGCCTTGGGCAGACCAATGTGTGTGGGGATCACACCTTCGGCAAAAGCTTCAATACTCCACGAAATCAAACCTGTTACATCCGGATGCTTTCCTATAATGCACGACAGAACAAAAATCAGACAGCTACCCAATCTTCTCAGAGGATATGAAAAAATCAGATTATCAACCCAAAACGTTATCAACGCAGAAAAGGCACCAGGAGGACCTTACAGACTTGGAACTTCAGGATCTTGCCCTAACGCTACCAGTAAAAGCGGATTTTTCGCAACAATGGCTTGGGCTGTCCCAAGGGACAACAACAAAACAGCAACGAATCCACTAACAGTAGAAGTACCACACATCTGTACAAAAGAAGAAGACCAAATTACTGTTTGGGGGTTCCATTCTGATGACAAAACCCAAATGAAAAACCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAATAACCACACATTATGTTTCTCAGATTGGCGGCTTCCCGGACCAAACAGAGGACGGAGGGCTACCACAAAGCGGCAGAATTGTTGTTGATTACATGGTGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGGATTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGTAGGAGCAAAGTAATAAAAGGGTCCTTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCT',
            },
            {
                'strain':
                'B/Shanghai/361/2002',
                'isolate_id':
                'EPI_ISL_2842',
                'date':
                '2002-06-12',
                'region':
                'china',
                'country':
                'china',
                'seq':
                'AATGCAGATCGAATCTGCACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGTGTGATACCACTGACAACAACTCCAATAAAATCTCATTTTGCAAATCTCAAAGGAACAAGGACTAGAGGGAAACTATGCCCAGATTGTCTCAACTGCACAGATCTGGATGTGGCCTTGGGCAGACCAATGTGTGTGGGGACCACACCTTCGGCAAAAGCTTCAATACTCCACGAAGTCAGACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGACAACTACCCAATCTTCTCAGAGGATATGAAAATATCAGGTTATCAACCCAAAACGTTATCGATGCAGAAAAGGCCCTAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCCACCAGTAAAAGCGGATTTTTCGCAACAATGGCTTGGGCTGTCCCAAAGGACAACAACAAAAATGCAACGAACCCACTAACAGTAGAAGTACCATACATCTGTACAGAAGGGGAAGACCAAATTACTGTTTGGGGGTTCCATTCAGATGACAAAACCCAAATGAAAAACCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACATTATGTTTCTCAGATTGGCGGCTTCCCAGATCAAACAGAAGACGGAGGACTACCACAAAGCGGCAGAATTGTTGTTGATTACATGGTGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCCTTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAAAATACGGTGGGTTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTC',
            },
            {
                'strain':
                'B/Florida/4/2006',
                'isolate_id':
                'EPI_ISL_21307',
                'date':
                '2006-11-01',
                'region':
                'north_america',
                'country':
                'usa',
                'seq':
                'ATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGAATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCCACTCAAGGGGAGGTCAATGTGACTGGTGTGATACCACTAACAACAACACCAACAAAATCTTATTTTGCAAATCTCAAAGGAACAAGGACCAGAGGGAAACTATGCCCAGACTGTCTCAACTGCACAGATCTGGATGTGGCTTTGGGCAGACCAATGTGTGTGGGGACCACACCTTCGGCGAAAGCTTCAATACTCCACGAAGTCAAACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGGCAACTACCCAATCTTCTCAGAGGATATGAAAATATCAGGCTATCAACCCAAAACGTCATCGATGCGGAAAAGGCACCAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAAGAGCGGATTTTTCGCAACAATGGCTTGGGCTGTCCCAAAGGACAACAACAAAAATGCAACGAACCCACTAACAGTAGAAGTACCATACATTTGTACAGAAGGGGAAGACCAAATCACTGTTTGGGGGTTCCATTCAGATGACAAAACCCAAATGAAGAACCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACACTATGTTTCTCAGATTGGCAGCTTCCCAGATCAAACAGAAGACGGAGGACTACCACAAAGCGGCAGGATTGTTGTTGATTACATGATGCAAAAACCTGGGAAAACAGGAACAATTGTCTACCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCCTTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCCTAGAAGGAGGATGGGAAGGAATGATTGCAGGCTGGCACGGATACACATCTCACGGAGCACATGGAGTGGCAGTGGCGGCGGACCTTAAGAGTACGCAAGAAGCTATAAACAAGATAACAAAAAATCTCAATTCTTTGAGTGAGCTAGAAGTAAAGAATCTTCAAAGACTAAGTGGTGCCATGGATGAACTCCACAACGAAATACTCGAGCTGGATGAGAAAGTGGATGATCTCAGAGCTGACACTATAAGCTCGCAAATAGAACTTGCAGTCTTGCTTTCCAACGAAGGAATAATAAACAGTGAAGATGAGCATCTATTGGCACTTGAGAGAAAACTAAAGAAAATGCTGGGTCCCTCTGCTGTAGAGATAGGAAATGGATGCTTCGAAACCAAACACAAGTGCAACCAGACCTGCTTAGACAGGATAGCTGCTGGCACCTTTAATGCAGGAGAATTTTCTCTCCCCACTTTTGATTCACTGAACATTACTGCTGCATCTTTAAATGATGATGGATTGGATAACCATACTATACTGCTCTATTACTCAACTGCTGCTTCTAGTTTGGCTGTAACATTGATGCTAGCTATTTTTATTGTTTATATGGTCTCCAGAGACAACGTTTCATGCTCCATCTGTCTATAA'
            },
            {
                'strain':
                'B/Wisconsin/01/2010',
                'isolate_id':
                'EPI_ISL_76940',
                'date':
                '2010-02-20',
                'region':
                'north_america',
                'country':
                'usa',
                'seq':
                'ATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGCGTGATACCACTGACAACAACACCAACAAAATCTTATTTTGCAAATCTCAAAGGAACAAGGACCAGAGGGAAACTATGCCCGGACTGTCTCAACTGTACAGATCTGGATGTGGCCTTGGGCAGGCCAATGTGTGTGGGGACCACACCTTCTGCTAAAGCTTCAATACTCCACGAGGTCAGACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGGCAACTACCCAATCTTCTCAGAGGATATGAAAATATCAGGTTATCAACCCAAAACGTTATCGATGCAGAAAAAGCACCAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAAAATCGGATTTTTTGCAACAATGGCTTGGGCTGTCCCAAAGGACAACTACAAAAATGCAACGAACCCACTAACAGTAGAAGTACCATACATTTGTACAGAAGGGGAAGACCAAATTACTGTTTGGGGGTTCCATTCAGATAACAAAACCCAAATGAAGAGCCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACATTATGTTTCTCAGATTGGCGACTTCCCAGATCAAACAGAAGACGGAGGACTACCACAAAGCGGCAGAATTGTTGTTGATTACATGATGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCATTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTAAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTGAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCCTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCTCACGGAGCACATGGAGTGGCAGTGGCGGCAGACCTTAAGAGTACACAAGAAGCTATAAATAAGATAACAAAAAATCTCAATTCTTTGAGTGAGCTAGAAGTAAAGAACCTTCAAAGACTAAGTGGTGCCATGGATGAACTCCACAACGAAATACTCGAGCTGGATGAGAAAGTGGATGATCTCAGAGCTGACACTATAAGCTCACAAATAGAACTTGCAGTCTTGCTTTCCAACGAAGGAATAATAAACAGTGAAGACGAGCATCTATTGGCACTTGAGAGAAAACTAAAGAAAATGCTGGGTCCCTCTGCTGTAGACATAGGAAACGGATGCTTCGAAACCAAACACAAATGCAACCAGACCTGCTTAGACAGGATAGCTGCTGGCACCTTTAATGCAGGAGAATTTTCTCTCCCCACTTTTGATTCATTGAACATTACTGCTGCATCTTTAAATGATGATGGATTGGATAACCATACTATACTGCTCTATTACTCAACTGCTGCTTCTAGTTTGGCTGTAACATTAATGCTAGCTATTTTTATTGTTTATATGGTCTCCAGAGACAACGTTTCATGCTCCATCTGTCTATAA'
            },
            {
                'strain':
                'B/Massachusetts/02/2012',
                'isolate_id':
                'EPI_ISL_121434',
                'date':
                '2012-03-13',
                'region':
                'north_america',
                'country':
                'usa',
                'seq':
                'ATGAAGGCAATAATTGTACTACTAATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGTGTGATACCACTAACAACAACACCAACAAAATCTTATTTTGCAAATCTCAAAGGAACAAAGACCAGAGGGAAACTATGCCCAGACTGTCTCAACTGTACAGATCTGGATGTGGCCCTGGGCAGGCCAATGTGTGTGGGAACTACACCTTCTGCGAAAGCTTCAATACTTCACGAAGTCAGACCTGTTACATCCGGGTGCTTCCCTATAATGCACGACAGAACAAAAATCAGGCAACTAGCCAATCTTCTCAGAGGATATGAAAATATCAGGTTATCAACCCAAAACGTTATCGATGCAGAAAAGGCACCAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAAAAGCGGATTTTTCGCAACAATGGCTTGGGCTGTCCCAAAGGACAACAACAAAAATGCAACGAACCCATTAACAGTAGAAGTACCATACATTTGTGCAGAAGGGGAAGACCAAATTACTGTTTGGGGGTTCCATTCAGATAACAAAACCCAAATGAAGAACCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACATTATGTTTCTCAGATTGGCGGCTTCCCAGATCAAACAGAAGACGGAGGACTACCACAAAGCGGCAGAATTGTCGTTGATTACATGATGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCCTTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCCTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCTCACGGAGCACATGGAGTGGCAGTTGCTGCAGACCTTAAGAGCACACAAGAAGCTATAAACAAGATAACAAAAAATCTCAACTCTTTGAGTGAGCTAGAAGTAAAGAATCTTCAAAGGCTAAGTGGTGCCATGGATGAACTCCACAACGAAATACTCGAGCTGGATGAGAAAGTGGATGACCTCAGAGCTGACACTATAAGTTCACAAATAGAACTTGCAGTCTTGCTTTCCAACGAAGGAATAATAAACAGTGAAGACGAGCATCTATTGGCACTTGAGAGAAAACTAAAGAAAATGCTGGGTCCCTCTGCTGTAGACATAGGAAATGGATGCTTCGAAACCAAACACAAATGCAACCAGACCTGCTTAGACAGGATAGCTGCTGGCACCTTTAATGCAGGAGAGTTTTCTCTCCCCACTTTTGATTCATTGAACATTACTGCTGCATCTTTAAATGATGATGGATTGGATAACCATACTATACTGCTCTATTACTCAACTGCTGCTTCTAGTTTGGCTGTAACATTGATGCTAGCTATTTTTATTGTTTATATGGTCTCCAGAGACAACGTTTCATGCTCCATCTGTCTATAA'
            },
            {
                'strain':
                'B/PHUKET/3073/2013',
                'isolate_id':
                'EPI_ISL_161843',
                'date':
                '2013-11-21',
                'region':
                'southeast_asia',
                'country':
                'thailand',
                'seq':
                'ATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAACGCAGATCGAATCTGCACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGCGTGATACCACTGACAACAACACCAACAAAATCTTATTTTGCAAATCTCAAAGGAACAAGGACCAGAGGGAAACTATGCCCGGACTGTCTCAACTGTACAGATCTGGATGTGGCCTTGGGCAGGCCAATGTGTGTGGGGACCACACCTTCTGCTAAAGCTTCAATACTCCATGAGGTCAGACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGGCAACTACCCAATCTTCTCAGAGGATATGAAAAGATCAGGTTATCAACCCAAAACGTTATCGATGCAGAAAAAGCACCAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAAAATCGGATTTTTTGCAACAATGGCTTGGGCTGTCCCAAAGGACAACTACAAAAATGCAACGAACCCACTAACAGTGGAAGTACCATACATTTGTACAGAAGGGGAAGACCAAATTACTGTTTGGGGGTTCCATTCGGATAACAAAACCCAAATGAAGAGCCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACGCATTATGTTTCTCAGATTGGCGACTTCCCAGATCAAACAGAAGACGGAGGACTACCACAAAGCGGCAGAATTGTTGTTGATTACATGATGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGGGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCATTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAGAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAAAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTAAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTGAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCCTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCTCACGGAGCACATGGAGTGGCAGTGGCGGCAGACCTTAAGAGTACACAAGAAGCTATAAATAAGATAACAAAAAATCTCAATTCTTTGAGTGAACTAGAAGTAAAGAACCTTCAAAGACTAAGTGGTGCCATGGATGAACTCCACAACGAAATACTCGAGCTGGATGAAAAAGTGGATGATCTCAGAGCTGACACTATAAGCTCACAAATAGAACTTGCAGTCTTGCTTTCCAACGAAGGAATAATAAACAGTGAAGACGAGCATCTATTGGCACTTGAGAGAAAACTAAAGAAAATGCTGGGTCCCTCTGCTGTAGACATAGGAAACGGATGCTTCGAAACCAAACACAAATGCAACCAGACCTGCTTAGACAGGATAGCTGCTGGCACCTTTAATGCAGGAGAATTTTCTCTCCCCACTTTTGATTCATTGAACATTACTGCTGCATCTTTAAATGATGATGGATTGGATAACCATACTATACTGCTCTATTACTCAACTGCTGCTTCTAGTTTGGCTGTAACATTAATGCTAGCTATTTTTATTGTTTATATGGTCTCCAGAGACAACGTTTCATGCTCCATCTGTCTATAAAGAAGGTTAGGCCTTGTATTTTCCTTTATTGTAGTGCTTGTTTGCTTGTCATCATTACAAAGAAAC'
            }
        ]
        tmp_outgroup = SeqIO.read('source-data/Yam_outgroup.gb', 'genbank')
        genome_annotation = tmp_outgroup.features
        self.cds = {
            x.qualifiers['gene'][0]: x
            for x in genome_annotation
            if 'gene' in x.qualifiers and x.type == 'CDS'
            and x.qualifiers['gene'][0] in ['SigPep', 'HA1', 'HA2']
        }
        self.outgroup = {
            'strain': 'B/Singapore/11/94',
            'isolate_id': 'EPI_ISL_20980',
            'date': '1994-05-10',
            'region': 'China',
            'seq': str(tmp_outgroup.seq).upper()
        }