def __init__(self,min_length = 987, **kwargs): ''' parameters min_length -- minimal length for a sequence to be acceptable ''' flu_filter.__init__(self, **kwargs) self.min_length = min_length self.vaccine_strains =[{ 'strain':'A/California/07/2009', 'isolate_id':'EPI_ISL_31553', 'date':'2009-04-09', 'lab':'Naval Health Research Center', 'country':'USA', 'region':'NorthAmerica', 'seq':'ATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACACCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGTCATCAAGATACAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAAACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATATCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATACACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAGTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCAAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGCTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAA', }] self.outgroup = { 'strain': 'A/Swine/Indiana/P12439/00', 'db': 'IRD', 'accession': 'AF455680', 'date': '2002-03-14', 'country': 'USA', 'region': 'NorthAmerica', 'seq': 'ATGAAGGCAATACTAGTAGTCCTGCTATATACATTTACAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACTGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAGGCATAACGGGAAACTATGTAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGCTCCTGGGAAATCCAGAGTGTGAATCACTCTTCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGATAATGGGACGTGTTACCCAGGAGATTTCATCAATTATGAAGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGATTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACACGAACAGAGGTGTGACGGCAGCATGTCCTTATGCTGGAGCAAAAAGCTTCTACAGAAATTTAATATGGCTGGTCAAAAAAGAAAATTCATACCCAAAGCTCAGCAAATCCTATATTAACAATAAGGGGAAGGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACCAGTGCCGACCAACAAAGTCTCTACCAGAATGCAGATGCATATGTTTTTGTGGGGTCATCAAGATACAGCAAGAAGTTCAAGCCAGAAATAGCAGCCAGACCCAAGGTGAGGGACCAAGCAGGGAGAATAAACTATTACTGGACACTAGTAGAGCCTGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCCTTCGCAATGGAAAGAAATTCTGGATCTGGTATTATCATTTCAGATACATCAGTCCACGATTGTAATACGACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCAGTCACAATTGGAGAATGTCCAAAATATGTAAAAAGCACAAAATTGAGAATGGCCACAGGATTAAGGAATGTCCCGTCTATTCAATCTAGAGGCCTGTTTGGGGCCATTGCCGGCTTTATTGAGGGGGGATGGACAGGAATGATAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGATCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGGGATCACTAACAAAGTAAATTCTGTTATTGAAAAGATGAACACACAATTCATAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAGGTTGATGATGGTTTTCTGGATATTTGGACTTACAATGCCGAACTGTTGATTCTGTTGGAAAATGAAAGAACTTTGGATTACCACGATTCAAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGCTAAAAAACAATGCCAGGGAAATTGGGAATGGCTGCTTTGAATTTTACCACAAATGTGATGACAAGTGCATGGAAAGCGTCAAAAATGGGACTTATGATTACCCAAAATACTCAGAGGAAGCAAAACTAAACAGAGAGGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAA' }
def __init__(self,min_length = 987, **kwargs): ''' parameters min_length -- minimal length for a sequence to be acceptable ''' flu_filter.__init__(self, **kwargs) self.min_length = min_length self.vaccine_strains =[{ 'strain':'A/California/07/2009', 'isolate_id':'EPI_ISL_31553', 'date':'2009-04-09', 'lab':'Naval Health Research Center', 'country':'USA', 'region':'NorthAmerica', 'seq':'ATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACACCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGTCATCAAGATACAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAAACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATATCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATACACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAGTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCAAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGCTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAA', }] tmp_outgroup = SeqIO.read('source-data/H1N1pdm_outgroup.gb', 'genbank') genome_annotation = tmp_outgroup.features self.cds = {x.qualifiers['gene'][0]:x for x in genome_annotation if 'gene' in x.qualifiers and x.type=='CDS' and x.qualifiers['gene'][0] in ['SigPep', 'HA1', 'HA2']} self.outgroup = { 'strain': 'A/Swine/Indiana/P12439/00', 'db': 'IRD', 'accession': 'AF455680', 'date': '2002-03-14', 'country': 'USA', 'region': 'NorthAmerica', 'seq': str(tmp_outgroup.seq).upper() }
def __init__(self,min_length = 987, **kwargs): ''' parameters min_length -- minimal length for a sequence to be acceptable ''' flu_filter.__init__(self, **kwargs) self.min_length = min_length self.vaccine_strains =[ { "strain": "A/Wisconsin/67/2005", "db": "IRD", "accession": "CY163984", "date": "2005-08-31", "seq": "ATGAAGACTATCATTGCTTTGAGCTACATTCTATGTCTGGTTTTCGCTCAAAAACTTCCCGGAAATGACAACAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACGAATGACCAAATTGAAGTTACTAATGCTACTGAGCTGGTTCAGAGTTCCTCAACAGGTGGAATATGCGACAGTCCTCATCAGATCCTTGATGGAGAAAACTGCACACTAATAGATGCTCTATTGGGAGACCCTCAGTGTGATGGCTTCCAAAATAAGAAATGGGACCTTTTTGTTGAACGCAGCAAAGCCTACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGGAGTTTAACGATGAAAGCTTCAATTGGACTGGAGTCACTCAAAATGGAACAAGCTCTTCTTGCAAAAGGAGATCTAATAACAGTTTCTTTAGTAGATTGAATTGGTTGACCCACTTAAAATTCAAATACCCAGCATTGAACGTGACTATGCCAAACAATGAAAAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGTTACGGACAATGACCAAATCTTCCTGTATGCTCAAGCATCAGGAAGAATCACAGTCTCTACCAAAAGAAGCCAACAAACTGTAATCCCGAATATCGGATCTAGACCCAGAATAAGGAATATCCCCAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAATTCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCATTTCAAAATGTAAACAGGATCACATATGGGGCCTGTCCCAGATATGTTAAGCAAAACACTCTGAAATTGGCAACAGGGATGCGAAATGTACCAGAGAAACAAACTAGAGGCATATTTGGCGCAATCGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGGTTTCAGGCATCAAAATTCTGAGGGAATAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCAATCAAATCAATGGGAAGCTGAATAGGTTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAAGTAGAAGGGAGAATTCAGGACCTCGAGAAATATGTTGAGGACACTAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAACAAACTGTTTGAAAGAACAAAGAAGCAACTGAGGGAAAATGCTGAGGATATGGGCAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATCAGAAATGGAACTTATGACCATGATGTATACAGAGATGAAGCATTAAACAACCGGTTCCAGATCAAAGGCGTTGAGCTGAAGTCAGGATACAAAGATTGGATCCTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAAGGCAACATTAGGTGCAACATTTGCATTTGA" }, { "strain": "A/Brisbane/10/2007", "db": "IRD", "accession": "CY113005", "date": "2007-02-06", "seq": "ATGAAGACTATCATTGCTTTGAGCTACATTCTATGTCTGGTTTTCACTCAAAAACTTCCCGGAAATGACAACAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACGAATGACCAAATTGAAGTTACTAATGCTACTGAGCTGGTTCAGAGTTCCTCAACAGGTGAAATATGCGACAGTCCTCATCAGATCCTTGATGGAGAAAACTGCACACTAATAGATGCTCTATTGGGAGACCCTCAGTGTGATGGCTTCCAAAATAAGAAATGGGACCTTTTTGTTGAACGCAGCAAAGCCTACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGGAGTTTAACAATGAAAGCTTCAATTGGACTGGAGTCACTCAAAACGGAACAAGCTCTGCTTGCATAAGGAGATCTAATAACAGTTTCTTTAGTAGATTGAATTGGTTGACCCACTTAAAATTCAAATACCCAGCATTGAACGTGACTATGCCAAACAATGAAAAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGGTACGGACAATGACCAAATCTTCCCGTATGCTCAAGCATCAGGAAGAATCACAGTCTCTACCAAAAGAAGCCAACAAACTGTAATCCCGAATATCGGATCTAGACCCAGAGTAAGGAATATCCCCAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAATTCTGAATGCATCACTCCAAACGGAAGCATTCCCAATGACAAACCATTCCAAAATGTAAACAGGATCACATACGGGGCCTGTCCCAGATATGTTAAGCAAAACACTCTGAAATTGGCAACAGGGATGCGAAATGTACCAGAGAAACAAACTAGAGGCATATTTGGCGCAATCGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGGTTTCAGGCATCAAAATTCTGAGGGAATAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCGATCAAATCAATGGGAAGCTGAATAGGTTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAAGTCGAAGGGAGAATTCAGGACCTTGAGAAATATGTTGAGGACACCAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAACAAACTGTTTGAAAAAACAAAGAAGCAACTGAGGGAAAATGCTGAGGATATGGGCAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATCAGAAATGGAACTTATGACCACAATGTATACAGAGATGAAGCATTAAACAACCGGTTCCAGATCAAGGGCGTTGAGCTGAAGTCAGGATACAAAGATTGGATCCTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAAGGCAACATTAGGTGCAACATTTGCATTTGA" }, { "strain": "A/Perth/16/2009", "db": "IRD", "accession": "GQ293081", "date": "2009-04-07", "seq": "ATGAAGACTATCATTGCTTTGAGCTACATTCTATGTCTGGTTTTCGCTCAAAAACTTCCTGGAAATGACAACAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACGAATGACCAAATTGAAGTTACTAATGCTACTGAGCTGGTTCAGAGTTCCTCAACAGGTGAAATATGCGACAGTCCTCATCAGATCCTTGATGGAAAAAACTGCACACTAATAGATGCTCTATTGGGAGACCCTCAGTGTGATGGCTTCCAAAATAAGAAATGGGACCTTTTTGTTGAACGCAGCAAAGCCTACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGGAGTTTAACAATGAAAGCTTCAATTGGACTGGAGTCACTCAAAACGGAACAAGCTCTGCTTGCATAAGGAGATCTAAAAACAGTTTCTTTAGTAGATTGAATTGGTTGACCCACTTAAACTTCAAATACCCAGCATTGAACGTGACTATGCCAAACAATGAACAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGGTACGGACAAAGACCAAATCTTCCTGTATGCTCAAGCATCAGGAAGAATCACAGTCTCTACCAAAAGAAGCCAACAAACCGTAAGCCCGAATATCGGATCTAGACCCAGAGTAAGGAATATCCCTAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAATTCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCATTCCAAAATGTAAACAGGATCACATACGGGGCCTGTCCCAGATATGTTAAGCAAAACACTCTGAAATTGGCAACAGGGATGCGAAATGTACCAGAGAAACAAACTAGAGGCATATTTGGCGCAATCGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGGTTTCAGGCATCAAAATTCTGAGGGAAGAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCGATCAAATCAATGGGAAGCTGAATAGATTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAAGTCGAAGGGAGAATTCAGGACCTTGAGAAATATGTTGAGGACACTAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAACAAACTGTTTGAAAAAACAAAGAAGCAACTGAGGGAAAATGCTGAGGATATGGGCAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATCAGAAATGGAACTTATGACCACGATGTATACAGAGATGAAGCATTAAACAACCGGTTTCAGATCAAGGGAGTTGAGCTGAAGTCAGGGTACAAAGATTGGATCCTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAAGGCAACATTAGGTGCAACATTTGCATTTGA" }, { "strain": "A/Victoria/361/2011", "db": "IRD", "accession": "GQ293081", "date": "2011-10-24", "seq": "ATGAAGACTATCATTGCTTTGAGCCACATTCTATGTCTGGTTTTCGCTCAAAAACTTCCTGGAAATGACAACAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACGAATGACCAAATTGAAGTTACTAATGCTACTGAGCTGGTTCAGAATTCCTCAATAGGTGAAATATGCGACAGTCCTCATCAGATCCTTGATGGAGAAAACTGCACACTAATAGATGCTCTATTGGGAGACCCTCAGTGTGATGGCTTCCAAAATAAGAAATGGGACCTTTTTGTTGAACGAAGCAAAGCCTACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGGAGTTTAACAATGAAAGCTTCAATTGGACTGGAGTCACTCAAAACGGAACAAGTTCTGCTTGCATAAGGAGATCTAATAATAGTTTCTTTAGTAGATTAAATTGGTTGACCCGCTTAAACTTCAAATACCCAGCATTGAACGTGACTATGCCAAACAATGAACAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGTTACGGACAAGGAACAAATCTTCCTGTATGCTCAATCATCAGGAAGAATCACAGTATCTACCAAAAGAAGCCAACAAGCTGTAATCCCGAATATCGGATATAGACCCAGAATAAGGAATATCCCTAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAATTCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCATTCCAAAATGTAAACAGGATCACATACGGGGCCTGTCCCAGATATGTTAAGCAAAGCACTCTGAAATTGGCAACAGGAATGCGAAATGTACCAGAGAAACAAACTAGAGGCATATTTGGCGCAATAGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGGTTTCAGGCATCAAAATTCTGAGGGAAGAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCGATCAAATCAATGGGAAGCTGAATCGATTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAAGTCGAAGGGAGAATTCAGGACCTTGAGAAATATGTTGAGGACACTAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAACAAACTGTTTGAAAAAACAAAGAAGCAACTAAGGGAAAATGCTGAGGATATGGGCAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATCAGAAATGGAACTTATGACCACGATGTATACAGAGATGAAGCATTAAACAACCGGTTCCAGATCAAGGGAGTTGAGCTGAAGTCAGGGTACAAAGATTGGATCCTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAGGGCAACATTAGGTGCAACATTTGCATTTGA" }, { "strain": "A/Texas/50/2012", "db": "GISAID", "isolate_id": "EPI_ISL_129858", "date": "2012-04-15", "seq": "ATGAAGACTATCATTGCTTTGAGCTACATTCTATGTCTGGTTTTCGCTCAAAAACTTCCTGGAAATGACAATAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACGAATGACCGAATTGAAGTTACTAATGCTACTGAACTGGTTCAGAATTCCTCAATAGGTGAAATATGCGACAGTCCTCATCAGATCCTTGATGGAGAAAACTGCACACTAATAGATGCTCTATTGGGAGACCCTCAGTGTGATGGCTTCCAAAATAAGAAATGGGACCTTTTTGTTGAACGAAGCAAAGCCTACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGGAGTTTAACAATGAAAGCTTCAATTGGAATGGAGTCACTCAAAACGGAACAAGTTCTGCTTGCATAAGGAGATCTAATAATAGTTTCTTTAGTAGATTAAATTGGTTGACCCACTTAAACTTCAAATACCCAGCATTGAACGTGACTATGCCAAACAATGAACAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGGTACGGACAAGGACCAAATCTTCCTGTATGCTCAACCATCAGGAAGAATCACAGTATCTACCAAAAGAAGCCAACAAGCTGTAATCCCGAATATCGGATCTAGACCCAGAATAAGGAATATCCCTAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAAGTCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCATTCCAAAATGTAAACAGGATCACATACGGGGCCTGTCCCAGATATGTTAAGCAAAGCACTCTGAAATTGGCAACAGGAATGCGGAATGTACCAGAGAAACAAACTAGAGGCATATTTGGCGCAATAGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGGTTTCAGGCATCAAAATTCTGAGGGAAGAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCGATCAAATCAATGGGAAGCTGAATCGATTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAAGTAGAAGGGAGAATTCAGGACCTTGAGAAATATGTTGAGGACACTAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAACAAACTGTTTGAAAAAACAAAGAAGCAACTGAGGGAAAATGCTGAGGATATGGGCAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATCAGAAATGGAACTTATGACCACGATGTATACAGAGATGAAGCATTAAACAACCGGTTCCAGATCAAGGGAGTTGAGCTGAAGTCAGGGTACAAAGATTGGATCCTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAGGGCAACATTAGGTGCAACATTTGCATTTGA", }, { "strain": "A/Switzerland/9715293/2013", "db": "GISAID", "isolate_id": "EPI_ISL_162149", "date": "2013-12-06", "seq": "ATGAAGACTATCATTGCTTTGAGCTACATTCTATGTCTGGTTTTCGCTCAAAAACTTCCTGGAAATGACAATAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACGAATGACCGAATTGAAGTTACTAATGCTACTGAGCTGGTTCAGAATTCCTCAATAGGTGAAATATGCGACAGTCCTCATCAGATCCTTGATGGAGAAAACTGCACACTAATAGATGCTCTATTGGGAGACCCTCAGTGTGATGGCTTTCAAAATAAGAAATGGGACCTTTTTGTTGAACGAAGCAAAGCCTACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGGAGTTTAACAATGAAAGCTTCAATTGGGCTGGAGTCACTCAAAACGGAACAAGTTCTTCTTGCATAAGGGGATCTAATAGTAGTTTCTTTAGTAGATTAAATTGGTTGACCCACTTAAACTCCAAATACCCAGCATTAAACGTGACTATGCCAAACAATGAACAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGGTACGGACAAGGACCAAATCTTCCTGTATGCACAATCATCAGGAAGAATCACAGTATCTACCAAAAGAAGCCAACAAGCTGTAATCCCGAATATCGGATCTAGACCCAGAATAAGGGATATCCCTAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAAGTCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCATTCCAAAATGTAAACAGGATCACATACGGGGCCTGTCCCAGATATGTTAAGCAAAGCACTCTGAAATTGGCAACAGGAATGCGAAATGTACCAGAGAGACAAACTAGAGGCATATTTGGCGCAATAGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGGCTTCAGGCATCAAAATTCTGAGGGAAGAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCGATCAAATCAATGGGAAGCTGAATCGATTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAAGTAGAAGGGAGAATTCAGGACCTTGAGAAATATGTTGAGGACACAAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAACAAACTGTTTGAAAAAACAAAGAAGCAACTGAGGGAAAATGCTGAGGATATGGGCAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATCAGAAATGGAACTTATGACCACGATGTATACAGGGATGAAGCATTAAACAACCGGTTCCAGATCAAGGGAGTTGAGCTGAAGTCAGGGTACAAAGATTGGATCCTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAGGGCAACATTAGGTGCAACATTTGCATTTGA", } ] tmp_outgroup = SeqIO.read('source-data/H3N2_outgroup.gb', 'genbank') genome_annotation = tmp_outgroup.features self.cds = {x.qualifiers['gene'][0]:x for x in genome_annotation if 'gene' in x.qualifiers and x.type=='CDS' and x.qualifiers['gene'][0] in ['SigPep', 'HA1', 'HA2']} self.outgroup = { 'strain': 'A/Beijing/32/1992', 'db': 'IRD', 'accession': 'U26830', 'date': '1992-01-01', 'country': 'China', 'region': 'China', 'seq': str(tmp_outgroup.seq).upper() }
def __init__(self,min_length = 0, **kwargs): ''' parameters min_length -- minimal length for a sequence to be acceptable ''' flu_filter.__init__(self, **kwargs) self.min_length = min_length self.vaccine_strains =[ { "strain": "A/Chicken/HongKong/G9/97", "db": "GISAID", "accession": "EPI_ISL_1263", "date": "1997-07-01", "seq": "ATGGAAATAATAGCACTAATAGCTATACTGGTAGTGACAAAAACAAGCAATGCAGATAAAATTTGCATTGGCTACCAGTCAACAAACTCCACAGAAACTGTTGATACACTAGTAGAAAACAATGTCCCTGTGACACATACCAAAGAATTGCTCCACACAGAGCACAATGGAATGCTATGTGCAACAAACCTGGGGCACCCTCTCATCCTAGACACCTGCACCATCGAAGGGTTGGTGTACGGCAACCCTTCCTGTGATTTGCTACTGGGAGGGAAAGAATGGTCTTACATTGTCGAAAGATCATCAGCTGTCAATGGGATGTGTTACCCTGGAAGGGTAGAGAACCTGGAAGAACTCAGGTCTTTTTTCAGCTCCGCTCGCTCCTACAAAAGACTCCTGCTCTTTCCAGACAGAACTTGGAATGTGACTTACACTGGGACAAGCAAAGCATGTTCAAACTCATTCTACAGAAGTATGAGATGGCTGACACACAAGAGCGATTCTTACCCTATTCAAGACGCCCAATATACTAACGATTGGGGAAAGAATATTCTCTTCATGTGGGGCATACACCACCCACCTACTGATACTGAGCAAATAAATCTATACAAAAAAGCTGATACAACAACAAGTATAACAACGGAAGATATCAATCGAACTTTCAAACCAGTGATAGGGCCAAGGCCTCTTGTCAATGGTCAACAAGGGAGAATTGATTATTATTGGTCAGTACTAAAGCCAGGCCAGACACTGCGAGTGAGATCCAATGGGAATCTAATTGCCCCATGGTATGGACACATTCTTTCAGGAGAAAGCCATGGAAGAATCTTGAAGACCGATTTGAGTAGTGGCAACTGCGTAGTACAATGCCAAACTGAGAAAGGTGGTTTGAACACGACCTTGCCATTCCACAATGTCAGCAAGTATGCATTTGGGAACTGCCCCAAATATGTTGGAGTGAAGAGTCTCAAACTGGCAGTTGGTCTAAGGAATGTTCCTGCTGCATCATATAGAGGGCTCTTCGGTGCCATAGCTGGATTCATAGAAGGCGGTTGGCCAGGACTAGTTGCAGGCTGGTACGGGTTTCAGCATTCAAATGATCAAGGGGTTGGAATGGCCGCAGATAGGGAATCAACTCAAGAAGCAGTTGACAAGATAACATCCAAAGTAAATAACATAATCGACAAAATGAACAAGCAGTATGGA------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T--------------------------------------------------------------------------------------"} ] tmp_outgroup = SeqIO.read('/Users/yujiazhou/Documents/FluProject/augur/source-data/H9_outgroup.gb', 'genbank') genome_annotation = tmp_outgroup.features self.cds = {x.qualifiers['gene'][0]:x for x in genome_annotation if 'gene' in x.qualifiers and x.type=='CDS' and x.qualifiers['gene'][0] in ['SigPep', 'HA1', 'HA2']} self.outgroup = { 'strain': 'A/duck/HongKong/147/1977', 'db': 'IRD', 'accession': 'AY206671', 'date': '2003-03-03', 'country': 'HongKong', 'region': 'EastAsia', 'seq': str(tmp_outgroup.seq).upper() }
def __init__(self,min_length = 0, **kwargs): ''' parameters min_length -- minimal length for a sequence to be acceptable ''' flu_filter.__init__(self, **kwargs) self.min_length = min_length self.vaccine_strains =[ { 'strain': 'A/ruddyshelduck/Mongolia/1626/2010', 'isolate_id': 'EPI-ISL-149888', 'date': '2010-09-11', 'lab': 'Other Database Import', 'region': 'NorthAsia', 'seq': 'ATGCTATCAATTGTGATTTTGTTTCTGCTTGTTGCAGAGAGCTCTTCTCAAAACTACACAGGAAACCCTGTGATATGCATGGGACATCATGCTGTGGCCAATGGGACTATGGTAAAGACCCTTACTGATGATCAAGTGGAAGTGGTCACTGCACAAGAATTGGTGGAATCACAGAACCTCCCGGAACTATGCCCGAGTCCTCTAAGACTAGTCGATGGCCAGACCTGTGATATCATCAATGGAGCCTTAGGAAGCCCAGGATGTGACCATTTGAATGGTGCTGAATGGGACATTTTCATAGAAAGGCCCAATGCAGTGGACACTTGCTATCCATTTGATGTGCCAGATTATCAGAGCCTAAGGAGCATACTCGCCAACAATGGGAAATTCGAATTCATTGCTGAAGAATTCCAATGGAGCACCGTGAAGCAAAATGGCAAGTCCGGGGCCTGCAAGAGGGCAAATGTGAACGATTTCTTTAATAAACTGAATTGGCTCGTGAAGTCAGACGGGAATGCATACCCTCTCCAGAATTTGACAAAAGTAAACAACGGTGATTACGCGAGGCTTTACATCTGGGGAGTTCACCACCCTTCGACGGATACCGAGCAAACCGATCTGTACAAGAACAATCCTGGTAGGGTCACTGTATCTACCAAAATCAGTCAAACAAGTGTAGTGCCCAACATTGGCAGCAGACCTTGGGTGAGAGGACAAAGTGGCAGAATCAGCTTCTATTGGACTATTGTAGAGCCTGGAGATTTGATAGTCTTCAACACAATAGGAAATTTAATTGCCCCAAGAGGACATTACAAATTAAACAGTCAGAAGAAGAGCACAATTCTGAACACTGCGACTCCCATAGGCTCATGTGTCAGTAAATGTCATACAGACAAAGGTTCTCTCTCTACCACCAAGCCCTTTCAAAATATCTCAAGGATAGCAGTTGGAGATTGTCCCAAATATGTTAAACAAGGCTCCCTAAAACTTGCAACTGGGATGAGAAATATCCCTGAAAAGGCATCAAGAGGGCTTTTTGGGGCAATAGCTGGGTTCATAGAGAATGGATGGCAAGGTCTGATTGATGGTTGGTATGGCTTCAGACACCAAAATGCAGAAGGAACAGGAACAGCTGTTGATCTAAAATCCACTCAGGCAGCCATCGATCAAATCAATGGAAAACTCAATCGTCTTATTGAGAAAACAAACGAGAAATACCATCAAATCGAAAAAGAATTCGAACAAGTTGAAGGAAGAATCCAAGACCTGGAGAAGTATGTTGAAGACACAAAGATTGATCTATGGTCATATAATGCAGAGCTATTAGTCGCTCTGGAAAACCAGCATACTATAGATGTGACTGACTCGGAGATGAACAAGCTCTTTGAAAGAGTAAGGCGACAACTCAGGGAGAATGCTGAAGACAGAGGAAATGGGTGTTTTGAAATATTCCACAAATGTGACAACAACTGCATTGAAAGCATTCGGAATGGGACCTATGATCATGATGTTTATAGAGATGAAGCGATCAACAATCGATTCCAAATACAGGGAGTCAAATTGACCCAGGGATACAAGGACATCATCCTTTGGATTTCGTTCTCCATATCATGCTTTTTGCTCGTAGCACTGCTTTTGGCCTTCATTTTGTGGGCTTGTCAGAACGGAAACATCCGGTGCCAGATT---TGCATTTGA', } ] tmp_outgroup = SeqIO.read('/Users/yujiazhou/Documents/nextflu/H9_nextflu-master/augur/source-data/H4_outgroup.gb', 'genbank') genome_annotation = tmp_outgroup.features self.cds = {x.qualifiers['gene'][0]:x for x in genome_annotation if 'gene' in x.qualifiers and x.type=='CDS' and x.qualifiers['gene'][0] in ['SigPep', 'HA1', 'HA2']} self.outgroup = { 'strain': 'A/Duck/Czechoslovakia/1956', 'db': 'OtherDatabaseImport', 'accession': 'EPI-ISL-70104', 'date': '1956-01-01', 'country': 'Czech Republic', 'region': 'Europe', 'seq': str(tmp_outgroup.seq).upper() }
def __init__(self, aln_fname, outgroup, include_ref_strains = True, outdir = './', formats = ['pdf','png'], verbose = 0, **kwargs): process.__init__(self, **kwargs) flu_filter.__init__(self, alignment_file = aln_fname, **kwargs) tree_refine.__init__(self, **kwargs) virus_clean.__init__(self, **kwargs) self.midpoint_rooting = False self.include_ref_strains = include_ref_strains self.verbose = verbose self.formats = formats self.outdir = outdir.rstrip('/')+'/' self.auspice_tree_fname = self.outdir + 'tree.json' self.auspice_align_fname = self.outdir + 'aln.fasta' self.auspice_aa_align_fname = self.outdir + 'aa_aln.fasta' self.auspice_sequences_fname = self.outdir + 'sequences.json' self.auspice_frequencies_fname = None self.auspice_meta_fname = self.outdir + 'meta.json' self.path_to_augur = path_to_augur if os.path.isfile(outgroup): tmp = [{'strain':seq.name, 'seq':str(record.seq).upper(), 'desc':seq.description} for seq in SeqIO.parse(outgroup, 'fasta') ] if len(tmp): self.outgroup = tmp[0] if len(tmp)>1: print "More than one sequence in ", outgroup, "taking first" if self.verbose: print "using outgroup found in file ", outgroup elif outgroup=='auto': print "automatically determine outgroup" self.auto_outgroup_blast() elif isinstance(outgroup, basestring): seq_names = [x['strain'] for x in self.viruses] if outgroup in seq_names: self.outgroup = self.viruses.pop(seq_names.index(outgroup)) if self.verbose: print "using outgroup found in alignment", outgroup else: standard_outgroups = self.load_standard_outgroups() if outgroup in standard_outgroups: self.outgroup = standard_outgroups[outgroup] if self.verbose: print "using standard outgroup", outgroup else: raise ValueError("outgroup %s not found" % outgroup) return if "anno:" in self.outgroup['desc']: anno = [x for x in self.outgroup['desc'].split() if "anno:" in x][0] anno = (anno.split(':')[1]).split('_') tmp = [(anno[2*i], int(anno[2*i+1])) for i in range(len(anno)/2)] self.anno = sorted(tmp, key=lambda x:x[1]) print("Using annotation",self.anno) else: self.anno = None print("No annotation found") #self.anno = sorted((('SP',0), ('HA1',16), ('HA2',329+16)), key=lambda x:x[1]) self.viruses.append(self.outgroup) self.filter_geo(prune=False) self.make_strain_names_unique()
def __init__(self,min_length = 0, **kwargs): ''' parameters min_length -- minimal length for a sequence to be acceptable ''' flu_filter.__init__(self, **kwargs) self.min_length = min_length self.vaccine_strains =[ { 'strain': 'A/turkey/Italy/3889/99', 'isolate_id': 'EPI-ISL-2746', 'date': '1990-07-01', #(Month and day unknown) 'region': 'Europe', 'seq': 'ATGAACACTCAAATCCTGGTATTCGCTCTGGTGGCGATCATTCCGACAAATGCAGACAAAATCTGCCTTGGGCATCATGCCGTGTCAAACGGGACTAAAGTAAACACATTAACTGAAAGAGGAGTGGAAGTCGTTAATGCAACTGAAACGGTGGAACGAACAAACGTCCCCAGGATCTGCTCAAAAGGGAAAAGGACAGTTGACCTCGGTCAATGTGGACTTCTGGGAACAATCACTGGGCCACCCCAATGTGACCAATTCCTAGAATTTTCAGCCGATCTAATTATTGAGAGGCGAGAAGGAAGTGGTGTCTGTTATCCTGGGAAATTCGTGAATGAAGAAGCTCTGAGGCAAATTCTCAGGGAGTCAGGCGGAATTGACAAGGAGACAATGGGATTCACATACAGCGGAATAAGAACTAATGGAACAACCAGTGCATGTAGGAGATTAGGATCTTCATTCTATGCAGAGATGAAATGGCTCCTGTCAAACACAGACAATGCTGCTTTCCCGCAGATGACTAAGTCATACAAAAACACAAGGAAAGACCCAGCTCTGATAATATGGGGGATCCACCATTCCGGATCAACTACAGAACAGACCAAGCTATATGGGAGTGGAAACAAACTGATAACAGTTGGGAGTTCTAATTACCAACAGTCCTTTGTACCGAGTCCAGGAGCGAGACCACAAGTGAATGGCCAATCTGGAAGAATTGACTTTCATTGGCTGATGCTAAACCCCAATGACACAGTCACTTTCAGTTTCAATGGGGCCTTCATAGCTCCAGACCGTGCAAGTTTTCTGAGAGGGAAGTCTATGGGGATTCAGAGTGGAGTACAGGTTGATGCCAATTGTGAAGGAGATTGCTATCACAGTGGAGGGACAATAATAAGTAATTTGCCCTTTCAGAACATAAATAGCAGGGCAGTAGGGAAATGTCCGAGATATGTTAAGCAAGAGAGTCTGCTGCTGGCAACAGGGATGAAGAATGTTCCCGAAGTTCCAAAGGGA------------------------------AGAGGCCTATTTGGTGCTATAGCGGGTTTCATTGAAAATGGATGGGAAGGTCTGATTGATGGGTGGTATGGCTTCAGGCATCAAAATGCACAAGGGGAGGGAACTGCTGCAGATTACAAAAGCACCCAATCAGCAATTGATCAAGTAACAGGAAAATTGAACCGGCTTATAGAAAAAACTAACCAACAATTTGAGTTAATAGACAATGAATTCACTGAGGTTGAAAAGCAAATTGGCAATGTGATAAATTGGACCAGAGATTCCATGACAGAAGTGTGGTCCTATAACGCTGAACTCTTGGTAGCAATGGAGAACCAGCACACAATTGATCTGGCCGACTCAGAAATGAACAAACTATACGAACGAGTGAAGAGACAACTGAGAGAGAATGCTGAAGAAGATGGCACTGGTTGCTTCGAAATATTTCACAAGTGTGATGACGACTGTATGGCCAGTATTAGAAACAACACCTATGATCACAGCAAGTACAGGGAAGAGGCAATGCAAAATAGAATACAGATTGACCCAGTCAAACTAAGCAGCGGCTACAAAGATGTGATACTTTGGTTTAGCTTCGGGGCATCATGTTTCATACTTCTGGCCATTGCAATGGGCCTTGTCTTCATATGTGTGAAGAATGGAAACATGCGGTGCACTATTTGTATATAA', }, { 'strain': 'A/mallard/Netherlands/12/00', 'isolate_id': 'EPI-ISL-3548', 'date': '2000-07-01', # (Month and day unknown) 'region': 'Europe', 'seq': 'ATGAACACTCAAATCCTGGTATTCGCTCTGATGGCGATCATTCCGACAAATGCAGACAAGATCTGCCTTGGGCATCATGCCGTGTCAAACGGGACTAAAGTAAACACATTAACTGAAAGAGGAGTGGAAGTCGTTAATGCAACTGAAACGGTGGAACGAACAAACGTCCCCAGGATCTGCTCAAAAGGGAAAAGGACAGTTGACCTTGGTCAATGTGGACTTCTGGGAACAATCACTGGGCCGCCCCAATGTGACCAATTCCTAGAATTTTCAGCCGATTTAATTATTGAGAGGCGAGAAGGAAGTGATGTCTGTTATCCTGGGAAATTCGTGAATGAAGAAGCTCTGAGGCAAATTCTCAGGGAGTCAGGCGGAATTGACAAGGAGACAATGGGATTCACCTACAGCGGAATAAGAACTAATGGAGCAACCAGTGCATGTAGGAGATCAGGATCTTCATTCTATGCAGAGATGAAATGGCTCCTGTCAAACACAGACAATGCTGCTTTCCCGCAGATGACTAAGTCATACAAAAACACAAGGAAAGACCCAGCTCTGATAATATGGGGGATCCACCATTCCGGATCAACTACAGAACAGACCAAGCTATATGGGAGTGGAAACAAACTGATAACAGTTGGGAGTTCTAATTACCAACAGTCCTTTGTACCGAGTCCAGGAGCGAGACCACAAGTAAATGGCCAATCTGGAAGAATTGACTTTCATTGGCTGATACTAAACCCCAATGACACAGTTACTTTCAGTTTCAATGGGGCCTTCATAGCTCCAGACCGTGCAAGCTTTCTGAGAGGGAAGTCTATGGGAATTCAGAGTGGAGTACAGGTTGATGCCAATTGTGAAGGAGATTGCTATCATAGTGGAGGGACAATAATAAGTAATTTGCCCTTTCAGAACATAAATAGCAGGGCAGTAGGAAAATGTCCGAGATATGTTAAGCAAGAGAGTCTGCTGCTGGCAACAGGAATGAAGAATGTTCCCGAAATCCCAAAGGGA------------------------------AGAGGCCTATTTGGTGCTATAGCGGGTTTCATTGAAAATGGATGGGAAGGTCTGATTGATGGGTGGTATGGCTTCAGGCATCAAAATGCACAAGGGGAGGGAACTGCTGCAGATTACAAAAGCACCCAATCAGCAATTGATCAAATAACAGGAAAATTAAACCGGCTTATAGAAAAAACTAACCAACAATTTGAGTTAATAGACAATGAATTCACTGAGGTTGAAAAGCAAATTGGCAATGTGATAAATTGGACCAGAGATTCCATGACAGAAGTGTGGTCCTATAACGCTGAACTCTTGGTAGCAATGGAGAATCAGCACACAATTGATCTGGCCGACTCAGAAATGAACAAACTGTACGAACGAGTGAAGAGACAACTGAGAGAGAATGCTGAAGAAGATGGCACTGGTTGCTTCGAAATATTTCACAAGTGTGATGACGACTGTATGGCCAGTATTAGAAACAACACCTATGATCACAGCAAGTACAGGGAAGAGGCAATGCAAAATAGAATACAGATTGACCCAGTCAAACTAAGCAGCGGCTACAAAGATGTGATACTTTGGTTTAGCTTCGGGGCATCATGTTTCATACTTCTGGCCATTGCAATGGGCCTTGTCTTCATATGTGTGAAGAATGGAAACATGCGGTGCACTATTTGTATATAA', }, { 'strain': 'A/human/New-York/107/2003', 'isolate_id': 'EPI-ISL-16424', 'date': '2003-07-01', 'region': 'NorthAmerica', 'seq': 'ATGAACACTCAAATTCTGGCATTCATTGCTTGTGTGCTGACTGGAGTTAAAGGAGACAAAATATGTCTTGGGCACCATGCTGTGGCAAATGGAACAAAAGTGAACACATTAACAGAGAGGGGGATTGAAGTAGTGAATGCCACAGAGACAGTGGAAACTACGAATATCAAGAAAATATGTACCCAGGGGAAAAGGCCAACAGATCTGGGACAATGTGGACTTCTAGGAACCCTAATAGGACCTCCCCAATGTGATCAATTCCTGGAGTTTTCCTCTGATTTGATAATTGAGCGAAGAGAAGGAACCGATATATGCTATCCCGGTAGATTCACAAACGAAGAATCACTGAGGCAGATCCTTCGAAGATCAGGAGGAATTGGTAAGGAGTCAATGGGCTTCACCTATAGTGGAATAAGAACCAATGGAGCGACAAGTGCCTGCACAAGATCAGGTTCTTCTTTCTATGCAGAGATGAAGTGGTTGCTGTCGAATTCAGATAATGCAGCATTCCCACAGATGACAAAGGCGTATAGAAATCCCAGAAACAAACCAGCTCTGATAATTTGGGGAGTTCATCACTCTGAATCGGTTAGCGAGCAGACCAAACTCTATGGAAGTGGAAACAAGTTGATAACAGTAAGAAGCTCAAAATACCAGCAATCATTCACCCCAAATCCGGGAGCACGA------------------------AGAATCGATTTCCACTGGCTACTCCTTGATCCCAATGACACAGTGACCTTCACTTTCAATGGAGCATTCATAGCCCCTGACAGGACAAGTTTCTTTAGGGGAGAATCACTAGGAGTCCAGAGTGATGCTCCTTTGGATTCAAGTTGTAGAGGAGATTGCTTTCACAGTGGGGGTACGATAGTCAGTTCCCTGCCATTCCAAAACATCAACTCTAGAACTGTGGGGAAATGCCCTCGGTATGTCAAACAGAAAAGCCTCCTTCTGGCTACAGGAATGAGAAATGTTCCAGAGAAACCAAAGCCC------------------------------AGAGGCCTTTTTGGAGCAATTGCTGGATTCATAGAGAATGGATGGGAGGGTCTCATCAATGGATGGTATGGTTTCAGACATCAAAATGCACAAGGAGAGGGAACTGCAGCTGACTACAAAAGCACCCAGTCTGCAATAGATCAGATCACAGGCAAATTGAATCGTTTAATTGGCAAAACAAATCAGCAGTTTGAGCTGATAGACAATGAGTTCAATGAGATAGAACAACAAATAGGAAATGTCATTAATTGGACAAGAGACGCAATGACTGAGATATGGTCGTATAATGCTGAGCTGTTGGTGGCAATGGAAAATCAGCATACAATAGATCTTGCGGACTCAGAAATGAGCAAACTTTATGAGCGTGTCAAAAAACAACTAAGGGAGAATGCTGAAGAAGATGGAACTGGATGTTTTGAGATATTTCATAAATGTGACGATCAGTGTATGGAGAGCATAAGGAACAACACGTATGACCATACTCAATACAGAACAGAGTCATTGCAGAATAGAATACAGATAGACCCAGTGAAGTTGAGTAGTGGGTACAAAGACATAATCTTATGGTTTAGCTTCGGGGCATCATGTTTTCTTCTTCTAGCCATTGCAATGGGACTGGTTTTCATTTGCATAAAGAATGGAAACATGCAGTGCACTATTTGTATATAG', }, { 'strain': 'A/human/Shanghai/2/2013', 'isolate_id': 'EPI-ISL-138738', 'date': '2013-03-05', 'region': 'China', 'seq': 'ATGAACACTCAAATCCTGGTATTCGCTCTGATTGCGATCATTCCAACAAATGCAGACAAAATCTGCCTCGGACATCATGCCGTGTCAAACGGAACCAAAGTAAACACATTAACTGAAAGAGGAGTGGAAGTCGTCAATGCAACTGAAACAGTGGAACGAACAAACATCCCCAGGATCTGCTCAAAAGGGAAAAGGACAGTTGACCTCGGTCAATGTGGACTCCTGGGGACAATCACTGGACCACCTCAATGTGACCAATTCCTAGAATTTTCAGCCGATTTAATTATTGAGAGGCGAGAAGGAAGTGATGTCTGTTATCCTGGGAAATTCGTGAATGAAGAAGCTCTGAGGCAAATTCTCAGAGAATCAGGCGGAATTGACAAGGAAGCAATGGGATTCACATACAGTGGAATAAGAACTAATGGAGCAACCAGTGCATGTAGGAGATCAGGATCTTCATTCTATGCAGAAATGAAATGGCTCCTGTCAAACACAGATAATGCTGCATTCCCGCAGATGACTAAGTCATATAAAAATACAAGAAAAAGCCCAGCTCTAATAGTATGGGGGATCCATCATTCCGTATCAACTGCAGAGCAAACCAAGCTATATGGGAGTGGAAACAAACTGGTGACAGTTGGGAGTTCTAATTATCAACAATCTTTTGTACCGAGTCCAGGAGCGAGACCACAAGTTAATGGTCTATCTGGAAGAATTGACTTTCATTGGCTAATGCTAAATCCCAATGATACAGTCACTTTCAGTTTCAATGGGGCTTTCATAGCTCCAGACCGTGCAAGCTTCCTGAGAGGAAAATCTATGGGAATCCAGAGTGGAGTACAGGTTGATGCCAATTGTGAAGGGGACTGCTATCATAGTGGAGGGACAATAATAAGTAACTTGCCATTTCAGAACATAGATAGCAGGGCAGTTGGAAAATGTCCGAGATATGTTAAGCAAAGGAGTCTGCTGCTAGCAACAGGGATGAAGAATGTTCCTGAGATTCCAAAAGGA------------------------------AGAGGCCTATTTGGTGCTATAGCGGGTTTCATTGAAAATGGATGGGAAGGCCTAATTGATGGTTGGTATGGTTTCAGACACCAGAATGCACAGGGAGAGGGAACTGCTGCAGATTACAAAAGCACTCAATCGGCAATTGATCAAATAACAGGAAAATTAAACCGGCTTATAGAAAAAACCAACCAACAATTTGAGTTGATAGACAATGAATTCAATGAGGTAGAGAAGCAAATCGGTAATGTGATAAATTGGACCAGAGATTCTATAACAGAAGTGTGGTCATACAATGCTGAACTCTTGGTAGCAATGGAGAACCAGCATACAATTGATCTGGCTGATTCAGAAATGGACAAACTGTACGAACGAGTGAAAAGACAGCTGAGAGAGAATGCTGAAGAAGATGGCACTGGTTGCTTTGAAATATTTCACAAGTGTGATGATGACTGTATGGCCAGTATTAGAAATAACACCTATGATCACAGCAAATACAGGGAAGAGGCAATGCAAAATAGAATACAGATTGACCCAGTCAAACTAAGCAGCGGCTACAAAGATGTGATACTTTGGTTTAGCTTCGGGGCATCATGTTTCATACTTCTAGCCATTGTAATGGGCCTTGTCTTCATATGTGTAAAGAATGGAAACATGCGGTGCACTATTTGTATATAA', }, { 'strain': 'A/human/Anhui/1/2013', 'isolate_id': 'EPI-ISL-138739', 'date': '2013-03-20', 'region': 'China', 'seq': 'ATGAACACTCAAATCCTGGTATTCGCTCTGATTGCGATCATTCCAACAAATGCAGACAAAATCTGCCTCGGACATCATGCCGTGTCAAACGGAACCAAAGTAAACACATTAACTGAAAGAGGAGTGGAAGTCGTCAATGCAACTGAAACAGTGGAACGAACAAACATCCCCAGGATCTGCTCAAAAGGGAAAAGGACAGTTGACCTCGGTCAATGTGGACTCCTGGGGACAATCACTGGACCACCTCAATGTGACCAATTCCTAGAATTTTCAGCCGATTTAATTATTGAGAGGCGAGAAGGAAGTGATGTCTGTTATCCTGGGAAATTCGTGAATGAAGAAGCTCTGAGGCAAATTCTCAGAGAATCAGGCGGAATTGACAAGGAAGCAATGGGATTCACATACAGTGGAATAAGAACTAATGGAGCAACCAGTGCATGTAGGAGATCAGGATCTTCATTCTATGCAGAAATGAAATGGCTCCTGTCAAACACAGATAATGCTGCATTCCCGCAGATGACTAAGTCATATAAAAATACAAGAAAAAGCCCAGCTCTAATAGTATGGGGGATCCATCATTCCGTATCAACTGCAGAGCAAACCAAGCTATATGGGAGTGGAAACAAACTGGTGACAGTTGGGAGTTCTAATTATCAACAATCTTTTGTACCGAGTCCAGGAGCGAGACCACAAGTTAATGGTCTATCTGGAAGAATTGACTTTCATTGGCTAATGCTAAATCCCAATGATACAGTCACTTTCAGTTTCAATGGGGCTTTCATAGCTCCAGACCGTGCAAGCTTCCTGAGAGGAAAATCTATGGGAATCCAGAGTGGAGTACAGGTTGATGCCAATTGTGAAGGGGACTGCTATCATAGTGGAGGGACAATAATAAGTAACTTGCCATTTCAGAACATAGATAGCAGGGCAGTTGGAAAATGTCCGAGATATGTTAAGCAAAGGAGTCTGCTGCTAGCAACAGGGATGAAGAATGTTCCTGAGATTCCAAAGGGA------------------------------AGAGGCCTATTTGGTGCTATAGCGGGTTTCATTGAAAATGGATGGGAAGGCCTAATTGATGGTTGGTATGGTTTCAGACACCAGAATGCACAGGGAGAGGGAACTGCTGCAGATTACAAAAGCACTCAATCGGCAATTGATCAAATAACAGGAAAATTAAACCGGCTTATAGAAAAAACCAACCAACAATTTGAGTTGATAGACAATGAATTCAATGAGGTAGAGAAGCAAATCGGTAATGTGATAAATTGGACCAGAGATTCTATAACAGAAGTGTGGTCATACAATGCTGAACTCTTGGTAGCAATGGAGAACCAGCATACAATTGATCTGGCTGATTCAGAAATGGACAAACTGTACGAACGAGTGAAAAGACAGCTGAGAGAGAATGCTGAAGAAGATGGCACTGGTTGCTTTGAAATATTTCACAAGTGTGATGATGACTGTATGGCCAGTATTAGAAATAACACCTATGATCACAGCAAATACAGGGAAGAGGCAATGCAAAATAGAATACAGATTGACCCAGTCAAACTAAGCAGCGGCTACAAAGATGTGATACTTTGGTTTAGCTTCGGGGCATCATGTTTCATACTTCTAGCCATTGTAATGGGCCTTGTCTTCATATGTGTAAAGAATGGAAACATGCGGTGCACTATTTGTATATAA', } ] tmp_outgroup = SeqIO.read('/Users/yujiazhou/Documents/nextflu/H9_nextflu-master/augur/source-data/H7_outgroup.gb', 'genbank') genome_annotation = tmp_outgroup.features self.cds = {x.qualifiers['gene'][0]:x for x in genome_annotation if 'gene' in x.qualifiers and x.type=='CDS' and x.qualifiers['gene'][0] in ['SigPep', 'HA1', 'HA2']} self.outgroup = { 'strain':'A/equine/Prague/2/1956', 'db': 'Other-Database-Import', 'isolate_id':'EPI-ISL-89157', 'date':'1956-06-11', 'country': 'CzechRepublic', 'region':'Europe', 'seq':str(tmp_outgroup.seq).upper() }
def __init__(self, aln_fname, outgroup, outdir='./', formats=['pdf', 'svg', 'png'], verbose=0, **kwargs): process.__init__(self, **kwargs) flu_filter.__init__(self, alignment_file=aln_fname, **kwargs) tree_refine.__init__(self, **kwargs) virus_clean.__init__(self, **kwargs) self.verbose = verbose self.formats = formats self.outdir = outdir.rstrip('/') + '/' self.auspice_tree_fname = self.outdir + 'tree.json' self.auspice_sequences_fname = self.outdir + 'sequences.json' self.auspice_frequencies_fname = None self.auspice_meta_fname = self.outdir + 'meta.json' if os.path.isfile(outgroup): tmp = [{ 'strain': seq.name, 'seq': str(record.seq).upper(), 'desc': seq.description } for seq in SeqIO.parse(outgroup, 'fasta')] if len(tmp): self.outgroup = tmp[0] if len(tmp) > 1: print "More than one sequence in ", outgroup, "taking first" if self.verbose: print "using outgroup found in file ", outgroup elif isinstance(outgroup, basestring): seq_names = [x['strain'] for x in self.viruses] if outgroup in seq_names: self.outgroup = self.viruses.pop(seq_names.index(outgroup)) if self.verbose: print "using outgroup found in alignment", outgroup else: standard_outgroups = [{ 'strain': seq.name, 'seq': str(seq.seq).upper(), 'desc': seq.description } for seq in SeqIO.parse(std_outgroup_file, 'fasta')] outgroup_names = [x['strain'] for x in standard_outgroups] if outgroup in outgroup_names: self.outgroup = standard_outgroups[outgroup_names.index( outgroup)] if self.verbose: print "using standard outgroup", outgroup else: raise ValueError("outgroup %s not found" % outgroup) return self.viruses.append(self.outgroup) self.filter_geo(prune=False) #self.filter_host(prune=False) self.make_strain_names_unique()
def __init__(self,min_length = 987, **kwargs): ''' parameters min_length -- minimal length for a sequence to be acceptable ''' flu_filter.__init__(self, **kwargs) self.min_length = min_length self.vaccine_strains =[ { "strain": "A/Wisconsin/67/2005", "db": "IRD", "accession": "CY163984", "date": "2005-08-31", "seq": "ATGAAGACTATCATTGCTTTGAGCTACATTCTATGTCTGGTTTTCGCTCAAAAACTTCCCGGAAATGACAACAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACGAATGACCAAATTGAAGTTACTAATGCTACTGAGCTGGTTCAGAGTTCCTCAACAGGTGGAATATGCGACAGTCCTCATCAGATCCTTGATGGAGAAAACTGCACACTAATAGATGCTCTATTGGGAGACCCTCAGTGTGATGGCTTCCAAAATAAGAAATGGGACCTTTTTGTTGAACGCAGCAAAGCCTACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGGAGTTTAACGATGAAAGCTTCAATTGGACTGGAGTCACTCAAAATGGAACAAGCTCTTCTTGCAAAAGGAGATCTAATAACAGTTTCTTTAGTAGATTGAATTGGTTGACCCACTTAAAATTCAAATACCCAGCATTGAACGTGACTATGCCAAACAATGAAAAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGTTACGGACAATGACCAAATCTTCCTGTATGCTCAAGCATCAGGAAGAATCACAGTCTCTACCAAAAGAAGCCAACAAACTGTAATCCCGAATATCGGATCTAGACCCAGAATAAGGAATATCCCCAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAATTCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCATTTCAAAATGTAAACAGGATCACATATGGGGCCTGTCCCAGATATGTTAAGCAAAACACTCTGAAATTGGCAACAGGGATGCGAAATGTACCAGAGAAACAAACTAGAGGCATATTTGGCGCAATCGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGGTTTCAGGCATCAAAATTCTGAGGGAATAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCAATCAAATCAATGGGAAGCTGAATAGGTTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAAGTAGAAGGGAGAATTCAGGACCTCGAGAAATATGTTGAGGACACTAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAACAAACTGTTTGAAAGAACAAAGAAGCAACTGAGGGAAAATGCTGAGGATATGGGCAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATCAGAAATGGAACTTATGACCATGATGTATACAGAGATGAAGCATTAAACAACCGGTTCCAGATCAAAGGCGTTGAGCTGAAGTCAGGATACAAAGATTGGATCCTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAAGGCAACATTAGGTGCAACATTTGCATTTGA" }, { "strain": "A/Brisbane/10/2007", "db": "IRD", "accession": "CY113005", "date": "2007-02-06", "seq": "ATGAAGACTATCATTGCTTTGAGCTACATTCTATGTCTGGTTTTCACTCAAAAACTTCCCGGAAATGACAACAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACGAATGACCAAATTGAAGTTACTAATGCTACTGAGCTGGTTCAGAGTTCCTCAACAGGTGAAATATGCGACAGTCCTCATCAGATCCTTGATGGAGAAAACTGCACACTAATAGATGCTCTATTGGGAGACCCTCAGTGTGATGGCTTCCAAAATAAGAAATGGGACCTTTTTGTTGAACGCAGCAAAGCCTACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGGAGTTTAACAATGAAAGCTTCAATTGGACTGGAGTCACTCAAAACGGAACAAGCTCTGCTTGCATAAGGAGATCTAATAACAGTTTCTTTAGTAGATTGAATTGGTTGACCCACTTAAAATTCAAATACCCAGCATTGAACGTGACTATGCCAAACAATGAAAAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGGTACGGACAATGACCAAATCTTCCCGTATGCTCAAGCATCAGGAAGAATCACAGTCTCTACCAAAAGAAGCCAACAAACTGTAATCCCGAATATCGGATCTAGACCCAGAGTAAGGAATATCCCCAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAATTCTGAATGCATCACTCCAAACGGAAGCATTCCCAATGACAAACCATTCCAAAATGTAAACAGGATCACATACGGGGCCTGTCCCAGATATGTTAAGCAAAACACTCTGAAATTGGCAACAGGGATGCGAAATGTACCAGAGAAACAAACTAGAGGCATATTTGGCGCAATCGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGGTTTCAGGCATCAAAATTCTGAGGGAATAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCGATCAAATCAATGGGAAGCTGAATAGGTTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAAGTCGAAGGGAGAATTCAGGACCTTGAGAAATATGTTGAGGACACCAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAACAAACTGTTTGAAAAAACAAAGAAGCAACTGAGGGAAAATGCTGAGGATATGGGCAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATCAGAAATGGAACTTATGACCACAATGTATACAGAGATGAAGCATTAAACAACCGGTTCCAGATCAAGGGCGTTGAGCTGAAGTCAGGATACAAAGATTGGATCCTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAAGGCAACATTAGGTGCAACATTTGCATTTGA" }, { "strain": "A/Perth/16/2009", "db": "IRD", "accession": "GQ293081", "date": "2009-04-07", "seq": "ATGAAGACTATCATTGCTTTGAGCTACATTCTATGTCTGGTTTTCGCTCAAAAACTTCCTGGAAATGACAACAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACGAATGACCAAATTGAAGTTACTAATGCTACTGAGCTGGTTCAGAGTTCCTCAACAGGTGAAATATGCGACAGTCCTCATCAGATCCTTGATGGAAAAAACTGCACACTAATAGATGCTCTATTGGGAGACCCTCAGTGTGATGGCTTCCAAAATAAGAAATGGGACCTTTTTGTTGAACGCAGCAAAGCCTACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGGAGTTTAACAATGAAAGCTTCAATTGGACTGGAGTCACTCAAAACGGAACAAGCTCTGCTTGCATAAGGAGATCTAAAAACAGTTTCTTTAGTAGATTGAATTGGTTGACCCACTTAAACTTCAAATACCCAGCATTGAACGTGACTATGCCAAACAATGAACAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGGTACGGACAAAGACCAAATCTTCCTGTATGCTCAAGCATCAGGAAGAATCACAGTCTCTACCAAAAGAAGCCAACAAACCGTAAGCCCGAATATCGGATCTAGACCCAGAGTAAGGAATATCCCTAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAATTCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCATTCCAAAATGTAAACAGGATCACATACGGGGCCTGTCCCAGATATGTTAAGCAAAACACTCTGAAATTGGCAACAGGGATGCGAAATGTACCAGAGAAACAAACTAGAGGCATATTTGGCGCAATCGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGGTTTCAGGCATCAAAATTCTGAGGGAAGAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCGATCAAATCAATGGGAAGCTGAATAGATTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAAGTCGAAGGGAGAATTCAGGACCTTGAGAAATATGTTGAGGACACTAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAACAAACTGTTTGAAAAAACAAAGAAGCAACTGAGGGAAAATGCTGAGGATATGGGCAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATCAGAAATGGAACTTATGACCACGATGTATACAGAGATGAAGCATTAAACAACCGGTTTCAGATCAAGGGAGTTGAGCTGAAGTCAGGGTACAAAGATTGGATCCTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAAGGCAACATTAGGTGCAACATTTGCATTTGA" }, { "strain": "A/Victoria/361/2011", "db": "IRD", "accession": "GQ293081", "date": "2011-10-24", "seq": "ATGAAGACTATCATTGCTTTGAGCCACATTCTATGTCTGGTTTTCGCTCAAAAACTTCCTGGAAATGACAACAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACGAATGACCAAATTGAAGTTACTAATGCTACTGAGCTGGTTCAGAATTCCTCAATAGGTGAAATATGCGACAGTCCTCATCAGATCCTTGATGGAGAAAACTGCACACTAATAGATGCTCTATTGGGAGACCCTCAGTGTGATGGCTTCCAAAATAAGAAATGGGACCTTTTTGTTGAACGAAGCAAAGCCTACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGGAGTTTAACAATGAAAGCTTCAATTGGACTGGAGTCACTCAAAACGGAACAAGTTCTGCTTGCATAAGGAGATCTAATAATAGTTTCTTTAGTAGATTAAATTGGTTGACCCGCTTAAACTTCAAATACCCAGCATTGAACGTGACTATGCCAAACAATGAACAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGTTACGGACAAGGAACAAATCTTCCTGTATGCTCAATCATCAGGAAGAATCACAGTATCTACCAAAAGAAGCCAACAAGCTGTAATCCCGAATATCGGATATAGACCCAGAATAAGGAATATCCCTAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAATTCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCATTCCAAAATGTAAACAGGATCACATACGGGGCCTGTCCCAGATATGTTAAGCAAAGCACTCTGAAATTGGCAACAGGAATGCGAAATGTACCAGAGAAACAAACTAGAGGCATATTTGGCGCAATAGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGGTTTCAGGCATCAAAATTCTGAGGGAAGAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCGATCAAATCAATGGGAAGCTGAATCGATTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAAGTCGAAGGGAGAATTCAGGACCTTGAGAAATATGTTGAGGACACTAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAACAAACTGTTTGAAAAAACAAAGAAGCAACTAAGGGAAAATGCTGAGGATATGGGCAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATCAGAAATGGAACTTATGACCACGATGTATACAGAGATGAAGCATTAAACAACCGGTTCCAGATCAAGGGAGTTGAGCTGAAGTCAGGGTACAAAGATTGGATCCTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAGGGCAACATTAGGTGCAACATTTGCATTTGA" }, { "strain": "A/Texas/50/2012", "db": "GISAID", "isolate_id": "EPI_ISL_129858", "date": "2012-04-15", "seq": "ATGAAGACTATCATTGCTTTGAGCTACATTCTATGTCTGGTTTTCGCTCAAAAACTTCCTGGAAATGACAATAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACGAATGACCGAATTGAAGTTACTAATGCTACTGAACTGGTTCAGAATTCCTCAATAGGTGAAATATGCGACAGTCCTCATCAGATCCTTGATGGAGAAAACTGCACACTAATAGATGCTCTATTGGGAGACCCTCAGTGTGATGGCTTCCAAAATAAGAAATGGGACCTTTTTGTTGAACGAAGCAAAGCCTACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGGAGTTTAACAATGAAAGCTTCAATTGGAATGGAGTCACTCAAAACGGAACAAGTTCTGCTTGCATAAGGAGATCTAATAATAGTTTCTTTAGTAGATTAAATTGGTTGACCCACTTAAACTTCAAATACCCAGCATTGAACGTGACTATGCCAAACAATGAACAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGGTACGGACAAGGACCAAATCTTCCTGTATGCTCAACCATCAGGAAGAATCACAGTATCTACCAAAAGAAGCCAACAAGCTGTAATCCCGAATATCGGATCTAGACCCAGAATAAGGAATATCCCTAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAAGTCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCATTCCAAAATGTAAACAGGATCACATACGGGGCCTGTCCCAGATATGTTAAGCAAAGCACTCTGAAATTGGCAACAGGAATGCGGAATGTACCAGAGAAACAAACTAGAGGCATATTTGGCGCAATAGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGGTTTCAGGCATCAAAATTCTGAGGGAAGAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCGATCAAATCAATGGGAAGCTGAATCGATTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAAGTAGAAGGGAGAATTCAGGACCTTGAGAAATATGTTGAGGACACTAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAACAAACTGTTTGAAAAAACAAAGAAGCAACTGAGGGAAAATGCTGAGGATATGGGCAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATCAGAAATGGAACTTATGACCACGATGTATACAGAGATGAAGCATTAAACAACCGGTTCCAGATCAAGGGAGTTGAGCTGAAGTCAGGGTACAAAGATTGGATCCTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAGGGCAACATTAGGTGCAACATTTGCATTTGA", }, { "strain": "A/Switzerland/9715293/2013", "db": "GISAID", "isolate_id": "EPI_ISL_162149", "date": "2013-12-06", "seq": "ATGAAGACTATCATTGCTTTGAGCTACATTCTATGTCTGGTTTTCGCTCAAAAACTTCCTGGAAATGACAATAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACGAATGACCGAATTGAAGTTACTAATGCTACTGAGCTGGTTCAGAATTCCTCAATAGGTGAAATATGCGACAGTCCTCATCAGATCCTTGATGGAGAAAACTGCACACTAATAGATGCTCTATTGGGAGACCCTCAGTGTGATGGCTTTCAAAATAAGAAATGGGACCTTTTTGTTGAACGAAGCAAAGCCTACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGGAGTTTAACAATGAAAGCTTCAATTGGGCTGGAGTCACTCAAAACGGAACAAGTTCTTCTTGCATAAGGGGATCTAATAGTAGTTTCTTTAGTAGATTAAATTGGTTGACCCACTTAAACTCCAAATACCCAGCATTAAACGTGACTATGCCAAACAATGAACAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGGTACGGACAAGGACCAAATCTTCCTGTATGCACAATCATCAGGAAGAATCACAGTATCTACCAAAAGAAGCCAACAAGCTGTAATCCCGAATATCGGATCTAGACCCAGAATAAGGGATATCCCTAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAAGTCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCATTCCAAAATGTAAACAGGATCACATACGGGGCCTGTCCCAGATATGTTAAGCAAAGCACTCTGAAATTGGCAACAGGAATGCGAAATGTACCAGAGAGACAAACTAGAGGCATATTTGGCGCAATAGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGGCTTCAGGCATCAAAATTCTGAGGGAAGAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCGATCAAATCAATGGGAAGCTGAATCGATTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAAGTAGAAGGGAGAATTCAGGACCTTGAGAAATATGTTGAGGACACAAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAACAAACTGTTTGAAAAAACAAAGAAGCAACTGAGGGAAAATGCTGAGGATATGGGCAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATCAGAAATGGAACTTATGACCACGATGTATACAGGGATGAAGCATTAAACAACCGGTTCCAGATCAAGGGAGTTGAGCTGAAGTCAGGGTACAAAGATTGGATCCTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAGGGCAACATTAGGTGCAACATTTGCATTTGA", } ] self.outgroup = { 'strain': 'A/Beijing/32/1992', 'db': 'IRD', 'accession': 'U26830', 'date': '1992-01-01', 'country': 'China', 'region': 'China', 'seq': 'ATGAAGACTATCATTGCTTTGAGCTACATTTTATGTCTGGTTTTCGCTCAAAAACTTCCCGGAAATGACAACAGCACAGCAACGCTGTGCCTGGGACATCATGCAGTGCCAAACGGAACGCTAGTGAAAACAATCACGAATGATCAAATTGAAGTGACTAATGCTACTGAGCTGGTTCAGAGTTCCTCAACAGGTAGAATATGCGACAGTCCTCACCGAATCCTTGATGGAAAAAACTGCACACTGATAGATGCTCTATTGGGAGACCCTCATTGTGATGGCTTCCAAAATAAGGAATGGGACCTTTTTGTTGAACGCAGCAAAGCTTACAGCAACTGTTACCCTTATGATGTACCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCAGGCACCCTGGAGTTTATCAATGAAGACTTCAATTGGACTGGAGTCGCTCAGGATGGGGGAAGCTATGCTTGCAAAAGGGGATCTGTTAACAGTTTCTTTAGTAGATTGAATTGGTTGCACAAATCAGAATACAAATATCCAGCGCTGAACGTGACTATGCCAAACAATGGCAAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGAGCACGGACAGAGACCAAACCAGCCTATATGTTCGAGCATCAGGGAGAGTCACAGTCTCTACCAAAAGAAGCCAACAAACTGTAACCCCGAATATCGGGTCTAGACCCTGGGTAAGGGGTCAGTCCAGTAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAATAGCACAGGGAATCTAATTGCTCCTCGGGGTTACTTCAAAATACGAAATGGGAAAAGCTCAATAATGAGGTCAGATGCACCCATTGGCACCTGCAGTTCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCTTTTCAAAATGTAAACAGGATCACATATGGGGCCTGCCCCAGATATGTTAAGCAAAACACTCTGAAATTGGCAACAGGGATGCGGAATGTACCAGAGAAACAAACTAGAGGCATATTCGGCGCAATCGCAGGTTTCATAGAAAATGGTTGGGAGGGAATGGTAGACGGTTGGTACGGTTTCAGGCATCAAAATTCTGAGGGCACAGGACAAGCAGCAGATCTTAAAAGCACTCAAGCAGCAATCGACCAAATCAACGGGAAACTGAATAGGTTAATCGAGAAAACGAACGAGAAATTCCATCAAATCGAAAAAGAATTCTCAGAAGTAGAAGGGAGAATTCAGGACCTCGAGAAATATGTTGAAGACACTAAAATAGATCTCTGGTCTTACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTTACTGACTCAGAAATGAACAAACTGTTTGAAAAAACAAGGAAGCAACTGAGGGAAAATGCTGAGGACATGGGCAATGGTTGCTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGGTCAATCAGAAATGGAACTTATGACCATGATGTATACAGAGACGAAGCATTAAACAACCGGTTCCAGATCAAAGGTGTTGAGCTGAAGTCAGGATACAAAGATTGGATCCTGTGGATTTCCTTTGCCATATCATGCTTTTTGCTTTGTGTTGTTTTGCTGGGGTTCATCATGTGGGCCTGCCAAAAAGGCAACATTAGGTGTAACATTTGCATTTGA' }
def __init__(self,min_length = 987, **kwargs): ''' parameters min_length -- minimal length for a sequence to be acceptable ''' flu_filter.__init__(self, **kwargs) self.min_length = min_length self.vaccine_strains =[ { 'strain': 'B/Shangdong/7/97', 'isolate_id': 'EPI_ISL_1790', 'date': '1997-07-01', #(Month and day unknown) 'region': 'china', 'country': 'china', 'seq':'GATCGAATCTGCACTGGGATAACATCGTCAAACTCACCCCATGTGGTCAAAACTGCTACTCAAGGGGAGGTCAATGTGACTGGTGTGATACCACTGACAACAACACCCACCAAATCTCATTTTGCAAATCTCAAAGGAACAAAAACCAGAGGGAAACTATGCCCAAAATGCCTCAACTGTACAGATCTGGACGTGGCCTTGGGCAGACCAAAATGCACGGGGAACATACCTTCGGCAAAAGTTTCAATACTCCATGAAGTCAGACCTGTTACATCTGGGTGCTTTCCTATAATGCACGACAGAACAAAAATTAGACAGCTGCCCAATCTTCTCAGAGGATACGAACATATCAGGTTATCAATTCATAACGTTATCAATGCAGAAAAGGCACCAGGAGGACCCTACAAAATTGGAACCTCAGGGTCTTGCCCTAACGTTACCAATGGAAACGGATTCTTCGCAACAATGGCTTGGGCCGTCCCAAAAAACGACAACAACAAAACAGCAACAAATTCATTAACAATAGAAGTACCATACATTTGTACAGAAGGAGAAGACCAAATTACCGTTTGGGGGTTCCACTCTGATAACGAAAACCAAATGGCAAAACTCTATGGGGACTCAAAGCCCCAGAAGTTCACCTCATCTGCCAACGGAGTGACCACACATTACGTTTCACAGATTGGTGGCTTCCCAAATCAAACAGAAGACGGAGGACTACCACAAAGTGGTAGAATTGTTGTTGATTACATGGTGCAAAAATCTGGGAAAACAGGAACAATTACCTATCAAAGAGGTATTTTATTGCCTCAAAAAGTGTGGTGCGCAAGTGGCAGGAGCAAGGTAATAAAAGGGTCCTTGCCTTTAATTGGAGAAGCAGATTGCCTCCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTATTACACAGGGGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCCTTGAAGCTGGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCTTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCCCATGGAGCACATGGAGTAGCAGTGGCAGCAGACCTTAAGAGTACTCAAGAAGCCATAAACAAGATAACAAAAAATCTCAACTCTTTGAGTGAGCTGGAAGTAAAGAATCTTCAAAGACTAAGCGGTGCCATGGATGAACTCCACAACGAAATACTAGAACTAGACGAGAAAGTGGATGATCTCAGAGCTGATACAATAAGCTCGCAAATAGAACTCGCAGTCTTGCTTTCCAAT', }, { 'strain': 'B/HongKong/330/2001', 'isolate_id': 'EPI_ISL_2342', 'date': '2001-07-01', #(Month and day unknown) 'region': 'china', 'country': 'hong_kong', 'seq': 'GATCGAATCTGCACTGGAATAACATCGTCAAACTCACCCCATGTGGTCAAAACTGCTACTCAAGGGGAAGTCAATGTGACTGGTGTGATACCACTGACAACAACACCCACCAAATCTCATTTTGCAAATCTCAAAGGAACAAAAACCAGAGGGAAACTATGCCCAAAATGTCTCAACTGCACAGATCTGGACGTGGCCTTGGGCAGACCAAAATGCACGGGGAACATACCTTCGGCAAAAGTTTCAATACTCCATGAAGTAAGACCTGTTACATCTGGGTGCTTTCCTATAATGCACGACAGAACAAAAATTAGACAGCTGCCCAATCTTCTCAGAGGATACGAACGTATCAGGTTATCAAACCATAACGTTATCAATGCAGAAAAAGCACCAGGAGGACCCTACAAAATTGGAACCTCAGGGTCTTGCCCTAACGTTACCAATGGAAACGGATTCTTCGCAACAATGGCTTGGGCTGTCCCAAAAAACGAAAACAACAAAACAGCAACAAATTCATTAACAATAGAAGTACCATACATTTGTACAGAAGGAGAAGACCAAATTACCGTTTGGGGGTTCCACTCTGATAGCGAAACCCAAATGGCAAAACTCTATGGAGACTCAAAGCCTCAGAAGTTCACTTCATCTGCCAACGGAGTGACCACACATTACGTTTCACAGATTGGTGGCTTCCCAAATCAAACAGAAGACGGAGGACTACCACAAAGTGGTAGAATTGTTGTTGATTACATGGTGCAAAAATCTGGAAAAACAGGAACAATTACCTATCAAAGAGGTATTTTATTGCCTCAAAAAGTGTGGTGCGCAAGTGGCAGGAGCAAGGTAATAAAAGGATCCTTGCCTTTAATTGGAGAAGCAGATTGCCTCCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTATACAGGGGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCCTTGAAGCTGGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTTTGGGCTG', }, { 'strain': 'B/Malaysia/2506/2004', 'isolate_id': 'EPI_ISL_21142', 'date': '2004-07-01', # (Month and day unknown) | | 'region': 'southeast_asia', 'country': 'malaysia', 'seq': 'ATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCGTCAAACTCACCACATGTTGTCAAAACTGCTACTCAAGGGGAGGTCAATGTGACTGGTGTAATACCACTGACAACAACACCCACCAAATCTCATTTTGCAAATCTCAAAGGAACAGAAACCAGAGGGAAACTATGCCCAAAATGTCTCAACTGCACAGATCTGGACGTGGCCTTGGGCAGACCAAAATGCACGGGGAACATACCCTCGGCAAGAGTTTCAATACTCCATGAAGTCAGACCTGTTACATCTGGGTGCTTTCCTATAATGCACGACAGAACAAAAATTAGACAGCTGCCTAACCTTCTCAGAGGATACGAACATATCAGGTTATCAACTCATAACGTTATCAATGCAGAAAATGCACCAGGAGGATCCTACAAAATTGGAACCTCAGGGTCTTGCCCTAACGTTACCAATGGAAACGGATTTTTCGCAACAATGGCTTGGGCCGTCCCAAAAAACGACAACAACAAAACAGCAACAAATTCATTAACAATAGAAGTACCATACATTTGTACAGAAGGAGAAGACCAAATTACCGTTTGGGGGTTCCACTCTGATAACGAAGCCCAAATGGCAAAGCTCTATGGGGACTCAAAGCCCCAGAAGTTCACCTCATCTGCCAACGGAGTGACCACACATTACGTTTCACAGATTGGTGGCTTCCCAAATCAAACAGAAGACGGAGGACTACCACAAAGTGGTAGAATTGTTGTTGATTACATGGTGCAAAAATCTGGGAAAACAGGAACAATTACCTATCAAAGAGGTATTTTATTGCCTCAAAAAGTGTGGTGCGCAAGTGGCAGGAGCAAGGTAATAAAAGGATCCTTGCCTTTAATTGGAGAAGCAGATTGCCTCCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGGGAACATGCAAAGGCCATAGGAAATTGCCCAATATGGGTGAAAACACCCTTGAAGCTGGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCTTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCCCATGGGGCACATGGAGTAGCGGTGGCAGCAGACCTTAAGAGCACTCAAGAGGCCATAAACAAGATAACAAAAAATCTCAACTCTTTGAGTGAGCTGGAAGTAAAGAATCTTCAAAGACTAAGCGGTGCCATGGATGAACTCCACAACGAAATACTAGAACTAGACGAGAAAGTGGATGATCTCAGAGCTGATACAATAAGCTCACAAATAGAACTCGCAGTCCTGCTTTCCAATGAAGGAATAATAAACAGTGAAGATGAGCATCTCTTGGCGCTTGAAAGAAAGCTGAAGAAAATGCTGGGCCCCTCTGCTGTAGAGATAGGGAATGGATGCTTTGAAACCAAACACAAGTGCAACCAGACCTGTCTCGACAGAATAGCTGCTGGTACCTTTGATGCAGGAGAATTTTCTCTCCCCACTTTTGATTCACTGAATATTACTGCTGCATCTTTAAATGACGATGGATTGGATAATCATACTATACTGCTTTACTACTCAACTGCTGCCTCCAGTTTGGCTGTAACATTGATGATAGCTATCTTTGTTGTTTATATGGTCTCCAGAGACAATGTTTCTTGCTCCATCTGTCTATAAGGAAAGTTAAACCCTGTATTTTCCTTTATTGTAGTGCTTGTTTGCTTGTTACCATTACAAAAAACGGTTATTGAAAAATGCTCTTGTTACTACTAATA', }, { 'strain': 'B/Brisbane/60/2008', 'isolate_id': 'EPI_ISL_24365', 'date': '2008-08-04', 'lab': 'Queensland Health Scientific Services', 'region': 'oceania', 'country': 'australia', 'seq': 'AGCAGAAGCAGAGCATTTTCTAATATCCACAAAATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCGTCAAACTCACCACATGTCGTCAAAACTGCTACTCAAGGGGAGGTCAATGTGACTGGTGTAATACCACTGACAACAACACCCACCAAATCTCATTTTGCAAATCTCAAAGGAACAGAAACCAGGGGGAAACTATGCCCAAAATGCCTCAACTGCACAGATCTGGACGTAGCCTTGGGCAGACCAAAATGCACGGGGAAAATACCCTCGGCAAGAGTTTCAATACTCCATGAAGTCAGACCTGTTACATCTGGGTGCTTTCCTATAATGCACGACAGAACAAAAATTAGACAGCTGCCTAACCTTCTCCGAGGATACGAACATATCAGGTTATCAACCCATAACGTTATCAATGCAGAAAATGCACCAGGAGGACCCTACAAAATTGGAACCTCAGGGTCTTGCCCTAACATTACCAATGGAAACGGATTTTTCGCAACAATGGCTTGGGCCGTCCCAAAAAACGACAAAAACAAAACAGCAACAAATCCATTAACAATAGAAGTACCATACATTTGTACAGAAGGAGAAGACCAAATTACCGTTTGGGGGTTCCACTCTGACAACGAGACCCAAATGGCAAAGCTCTATGGGGACTCAAAGCCCCAGAAGTTCACCTCATCTGCCAACGGAGTGACCACACATTACGTTTCACAGATTGGTGGCTTCCCAAATCAAACAGAAGACGGAGGACTACCACAAAGTGGTAGAATTGTTGTTGATTACATGGTGCAAAAATCTGGGAAAACAGGAACAATTACCTATCAAAGGGGTATTTTATTGCCTCAAAAGGTGTGGTGCGCAAGTGGCAGGAGCAAGGTAATAAAAGGATCCTTGCCTTTAATTGGAGAAGCAGATTGCCTCCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGGGAACATGCAAAGGCCATAGGAAATTGCCCAATATGGGTGAAAACACCCTTGAAGCTGGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCTTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCCCATGGGGCACATGGAGTAGCGGTGGCAGCAGACCTTAAGAGCACTCAAGAGGCCATAAACAAGATAACAAAAAATCTCAACTCTTTGAGTGAGCTGGAAGTAAAGAATCTTCAAAGACTAAGCGGTGCCATGGATGAACTCCACAACGAAATACTAGAACTAGATGAGAAAGTGGATGATCTCAGAGCTGATACAATAAGCTCACAAATAGAACTCGCAGTCCTGCTTTCCAATGAAGGAATAATAAACAGTGAAGATGAACATCTCTTGGCGCTTGAAAGAAAGCTGAAGAAAATGCTGGGCCCCTCTGCTGTAGAGATAGGGAATGGATGCTTTGAAACCAAACACAAGTGCAACCAGACCTGTCTCGACAGAATAGCTGCTGGTACCTTTGATGCAGGAGAATTTTCTCTCCCCACCTTTGATTCACTGAATATTACTGCTGCATCTTTAAATGACGATGGATTGGATAATCATACTATACTGCTTTACTACTCAACTGCTGCCTCCAGT', } ] tmp_outgroup = SeqIO.read('source-data/Vic_outgroup.gb', 'genbank') genome_annotation = tmp_outgroup.features self.cds = {x.qualifiers['gene'][0]:x for x in genome_annotation if 'gene' in x.qualifiers and x.type=='CDS' and x.qualifiers['gene'][0] in ['SigPep', 'HA1', 'HA2']} self.outgroup = { 'strain':'B/HongKong/02/1993', 'region':'China', 'isolate_id':'EPI_ISL_6617', 'date':'1993-02-15', #(Month and day unknown) 'seq': str(tmp_outgroup.seq).upper() }
def __init__(self, min_length=987, **kwargs): ''' parameters min_length -- minimal length for a sequence to be acceptable ''' flu_filter.__init__(self, **kwargs) self.min_length = min_length self.vaccine_strains = [{ 'strain': 'A/California/07/2009', 'isolate_id': 'EPI_ISL_31553', 'date': '2009-04-09', 'lab': 'Naval Health Research Center', 'region': 'north_america', 'country': 'usa', 'seq': 'ATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACACCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGTCATCAAGATACAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAAACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATATCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATACACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAGTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCAAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGCTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAA', }, { "strain": "A/Michigan/45/2015", 'isolate_id': 'EPI699812', "date": "2015-09-07", "region": "north_america", "country": "usa", "seq": "GGAAAAACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTACAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTGGAAGACAAGCATAACGGAAAACTATGCAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGTTCATGGTCCTACATTGTGGAAACATCTAATTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCAATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCACGCTGGAGCAAAAAGCTTCTACAAAAACTTGATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTTAACCAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTGTGGGGCATTCACCATCCATCTACTACTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGACATCAAGATACAGCAAGAAGTTCAAGCCGGAAATAGCAACAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCACAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCGAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAGTATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTTCCGTCTATTCAATCTAGAGGCCTATTCGGGGCCATTGCCGGCTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAAAATGCCATTGACAAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATACACAGTTCACAGCAGTGGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATCTAAATAAAAAAGTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTATCACGATTCAAATGTGAAGAACTTGTATGAAAAAGTAAGAAACCAGTTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAAAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGCTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAATCATGAGAAAAACAC" }] tmp_outgroup = SeqIO.read('source-data/H1N1pdm_outgroup.gb', 'genbank') genome_annotation = tmp_outgroup.features self.cds = { x.qualifiers['gene'][0]: x for x in genome_annotation if 'gene' in x.qualifiers and x.type == 'CDS' and x.qualifiers['gene'][0] in ['SigPep', 'HA1', 'HA2'] } self.outgroup = { 'strain': 'A/Swine/Indiana/P12439/00', 'db': 'IRD', 'accession': 'AF455680', 'date': '2002-03-14', 'region': 'north_america', 'country': 'usa', 'seq': str(tmp_outgroup.seq).upper() }
def __init__(self,min_length = 987, **kwargs): ''' parameters min_length -- minimal length for a sequence to be acceptable ''' flu_filter.__init__(self, **kwargs) self.min_length = min_length self.vaccine_strains =[ { 'strain': 'B/Shangdong/7/97', 'isolate_id': 'EPI_ISL_1790', 'date': '1997-07-01', #(Month and day unknown) 'region': 'China', 'seq':'GATCGAATCTGCACTGGGATAACATCGTCAAACTCACCCCATGTGGTCAAAACTGCTACTCAAGGGGAGGTCAATGTGACTGGTGTGATACCACTGACAACAACACCCACCAAATCTCATTTTGCAAATCTCAAAGGAACAAAAACCAGAGGGAAACTATGCCCAAAATGCCTCAACTGTACAGATCTGGACGTGGCCTTGGGCAGACCAAAATGCACGGGGAACATACCTTCGGCAAAAGTTTCAATACTCCATGAAGTCAGACCTGTTACATCTGGGTGCTTTCCTATAATGCACGACAGAACAAAAATTAGACAGCTGCCCAATCTTCTCAGAGGATACGAACATATCAGGTTATCAATTCATAACGTTATCAATGCAGAAAAGGCACCAGGAGGACCCTACAAAATTGGAACCTCAGGGTCTTGCCCTAACGTTACCAATGGAAACGGATTCTTCGCAACAATGGCTTGGGCCGTCCCAAAAAACGACAACAACAAAACAGCAACAAATTCATTAACAATAGAAGTACCATACATTTGTACAGAAGGAGAAGACCAAATTACCGTTTGGGGGTTCCACTCTGATAACGAAAACCAAATGGCAAAACTCTATGGGGACTCAAAGCCCCAGAAGTTCACCTCATCTGCCAACGGAGTGACCACACATTACGTTTCACAGATTGGTGGCTTCCCAAATCAAACAGAAGACGGAGGACTACCACAAAGTGGTAGAATTGTTGTTGATTACATGGTGCAAAAATCTGGGAAAACAGGAACAATTACCTATCAAAGAGGTATTTTATTGCCTCAAAAAGTGTGGTGCGCAAGTGGCAGGAGCAAGGTAATAAAAGGGTCCTTGCCTTTAATTGGAGAAGCAGATTGCCTCCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTATTACACAGGGGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCCTTGAAGCTGGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCTTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCCCATGGAGCACATGGAGTAGCAGTGGCAGCAGACCTTAAGAGTACTCAAGAAGCCATAAACAAGATAACAAAAAATCTCAACTCTTTGAGTGAGCTGGAAGTAAAGAATCTTCAAAGACTAAGCGGTGCCATGGATGAACTCCACAACGAAATACTAGAACTAGACGAGAAAGTGGATGATCTCAGAGCTGATACAATAAGCTCGCAAATAGAACTCGCAGTCTTGCTTTCCAAT', }, { 'strain': 'B/HongKong/330/2001', 'isolate_id': 'EPI_ISL_2342', 'date': '2001-07-01', #(Month and day unknown) 'region': 'China', 'seq': 'GATCGAATCTGCACTGGAATAACATCGTCAAACTCACCCCATGTGGTCAAAACTGCTACTCAAGGGGAAGTCAATGTGACTGGTGTGATACCACTGACAACAACACCCACCAAATCTCATTTTGCAAATCTCAAAGGAACAAAAACCAGAGGGAAACTATGCCCAAAATGTCTCAACTGCACAGATCTGGACGTGGCCTTGGGCAGACCAAAATGCACGGGGAACATACCTTCGGCAAAAGTTTCAATACTCCATGAAGTAAGACCTGTTACATCTGGGTGCTTTCCTATAATGCACGACAGAACAAAAATTAGACAGCTGCCCAATCTTCTCAGAGGATACGAACGTATCAGGTTATCAAACCATAACGTTATCAATGCAGAAAAAGCACCAGGAGGACCCTACAAAATTGGAACCTCAGGGTCTTGCCCTAACGTTACCAATGGAAACGGATTCTTCGCAACAATGGCTTGGGCTGTCCCAAAAAACGAAAACAACAAAACAGCAACAAATTCATTAACAATAGAAGTACCATACATTTGTACAGAAGGAGAAGACCAAATTACCGTTTGGGGGTTCCACTCTGATAGCGAAACCCAAATGGCAAAACTCTATGGAGACTCAAAGCCTCAGAAGTTCACTTCATCTGCCAACGGAGTGACCACACATTACGTTTCACAGATTGGTGGCTTCCCAAATCAAACAGAAGACGGAGGACTACCACAAAGTGGTAGAATTGTTGTTGATTACATGGTGCAAAAATCTGGAAAAACAGGAACAATTACCTATCAAAGAGGTATTTTATTGCCTCAAAAAGTGTGGTGCGCAAGTGGCAGGAGCAAGGTAATAAAAGGATCCTTGCCTTTAATTGGAGAAGCAGATTGCCTCCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTATACAGGGGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCCTTGAAGCTGGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTTTGGGCTG', }, { 'strain': 'B/Malaysia/2506/2004', 'isolate_id': 'EPI_ISL_21142', 'date':'2004-07-01', # (Month and day unknown) | | 'region':'SouthEast Asia', 'seq':'ATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCGTCAAACTCACCACATGTTGTCAAAACTGCTACTCAAGGGGAGGTCAATGTGACTGGTGTAATACCACTGACAACAACACCCACCAAATCTCATTTTGCAAATCTCAAAGGAACAGAAACCAGAGGGAAACTATGCCCAAAATGTCTCAACTGCACAGATCTGGACGTGGCCTTGGGCAGACCAAAATGCACGGGGAACATACCCTCGGCAAGAGTTTCAATACTCCATGAAGTCAGACCTGTTACATCTGGGTGCTTTCCTATAATGCACGACAGAACAAAAATTAGACAGCTGCCTAACCTTCTCAGAGGATACGAACATATCAGGTTATCAACTCATAACGTTATCAATGCAGAAAATGCACCAGGAGGATCCTACAAAATTGGAACCTCAGGGTCTTGCCCTAACGTTACCAATGGAAACGGATTTTTCGCAACAATGGCTTGGGCCGTCCCAAAAAACGACAACAACAAAACAGCAACAAATTCATTAACAATAGAAGTACCATACATTTGTACAGAAGGAGAAGACCAAATTACCGTTTGGGGGTTCCACTCTGATAACGAAGCCCAAATGGCAAAGCTCTATGGGGACTCAAAGCCCCAGAAGTTCACCTCATCTGCCAACGGAGTGACCACACATTACGTTTCACAGATTGGTGGCTTCCCAAATCAAACAGAAGACGGAGGACTACCACAAAGTGGTAGAATTGTTGTTGATTACATGGTGCAAAAATCTGGGAAAACAGGAACAATTACCTATCAAAGAGGTATTTTATTGCCTCAAAAAGTGTGGTGCGCAAGTGGCAGGAGCAAGGTAATAAAAGGATCCTTGCCTTTAATTGGAGAAGCAGATTGCCTCCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGGGAACATGCAAAGGCCATAGGAAATTGCCCAATATGGGTGAAAACACCCTTGAAGCTGGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCTTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCCCATGGGGCACATGGAGTAGCGGTGGCAGCAGACCTTAAGAGCACTCAAGAGGCCATAAACAAGATAACAAAAAATCTCAACTCTTTGAGTGAGCTGGAAGTAAAGAATCTTCAAAGACTAAGCGGTGCCATGGATGAACTCCACAACGAAATACTAGAACTAGACGAGAAAGTGGATGATCTCAGAGCTGATACAATAAGCTCACAAATAGAACTCGCAGTCCTGCTTTCCAATGAAGGAATAATAAACAGTGAAGATGAGCATCTCTTGGCGCTTGAAAGAAAGCTGAAGAAAATGCTGGGCCCCTCTGCTGTAGAGATAGGGAATGGATGCTTTGAAACCAAACACAAGTGCAACCAGACCTGTCTCGACAGAATAGCTGCTGGTACCTTTGATGCAGGAGAATTTTCTCTCCCCACTTTTGATTCACTGAATATTACTGCTGCATCTTTAAATGACGATGGATTGGATAATCATACTATACTGCTTTACTACTCAACTGCTGCCTCCAGTTTGGCTGTAACATTGATGATAGCTATCTTTGTTGTTTATATGGTCTCCAGAGACAATGTTTCTTGCTCCATCTGTCTATAAGGAAAGTTAAACCCTGTATTTTCCTTTATTGTAGTGCTTGTTTGCTTGTTACCATTACAAAAAACGGTTATTGAAAAATGCTCTTGTTACTACTAATA', }, { 'strain':'B/Brisbane/60/2008', 'isolate_id':'EPI_ISL_24365', 'date': '2008-08-04', 'lab':'Queensland Health Scientific Services', 'region':'Oceania', 'seq':'AGCAGAAGCAGAGCATTTTCTAATATCCACAAAATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCGTCAAACTCACCACATGTCGTCAAAACTGCTACTCAAGGGGAGGTCAATGTGACTGGTGTAATACCACTGACAACAACACCCACCAAATCTCATTTTGCAAATCTCAAAGGAACAGAAACCAGGGGGAAACTATGCCCAAAATGCCTCAACTGCACAGATCTGGACGTAGCCTTGGGCAGACCAAAATGCACGGGGAAAATACCCTCGGCAAGAGTTTCAATACTCCATGAAGTCAGACCTGTTACATCTGGGTGCTTTCCTATAATGCACGACAGAACAAAAATTAGACAGCTGCCTAACCTTCTCCGAGGATACGAACATATCAGGTTATCAACCCATAACGTTATCAATGCAGAAAATGCACCAGGAGGACCCTACAAAATTGGAACCTCAGGGTCTTGCCCTAACATTACCAATGGAAACGGATTTTTCGCAACAATGGCTTGGGCCGTCCCAAAAAACGACAAAAACAAAACAGCAACAAATCCATTAACAATAGAAGTACCATACATTTGTACAGAAGGAGAAGACCAAATTACCGTTTGGGGGTTCCACTCTGACAACGAGACCCAAATGGCAAAGCTCTATGGGGACTCAAAGCCCCAGAAGTTCACCTCATCTGCCAACGGAGTGACCACACATTACGTTTCACAGATTGGTGGCTTCCCAAATCAAACAGAAGACGGAGGACTACCACAAAGTGGTAGAATTGTTGTTGATTACATGGTGCAAAAATCTGGGAAAACAGGAACAATTACCTATCAAAGGGGTATTTTATTGCCTCAAAAGGTGTGGTGCGCAAGTGGCAGGAGCAAGGTAATAAAAGGATCCTTGCCTTTAATTGGAGAAGCAGATTGCCTCCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGGGAACATGCAAAGGCCATAGGAAATTGCCCAATATGGGTGAAAACACCCTTGAAGCTGGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCTTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCCCATGGGGCACATGGAGTAGCGGTGGCAGCAGACCTTAAGAGCACTCAAGAGGCCATAAACAAGATAACAAAAAATCTCAACTCTTTGAGTGAGCTGGAAGTAAAGAATCTTCAAAGACTAAGCGGTGCCATGGATGAACTCCACAACGAAATACTAGAACTAGATGAGAAAGTGGATGATCTCAGAGCTGATACAATAAGCTCACAAATAGAACTCGCAGTCCTGCTTTCCAATGAAGGAATAATAAACAGTGAAGATGAACATCTCTTGGCGCTTGAAAGAAAGCTGAAGAAAATGCTGGGCCCCTCTGCTGTAGAGATAGGGAATGGATGCTTTGAAACCAAACACAAGTGCAACCAGACCTGTCTCGACAGAATAGCTGCTGGTACCTTTGATGCAGGAGAATTTTCTCTCCCCACCTTTGATTCACTGAATATTACTGCTGCATCTTTAAATGACGATGGATTGGATAATCATACTATACTGCTTTACTACTCAACTGCTGCCTCCAGT', } ] tmp_outgroup = SeqIO.read('source-data/Vic_outgroup.gb', 'genbank') genome_annotation = tmp_outgroup.features self.cds = {x.qualifiers['gene'][0]:x for x in genome_annotation if 'gene' in x.qualifiers and x.type=='CDS' and x.qualifiers['gene'][0] in ['SigPep', 'HA1', 'HA2']} self.outgroup = { 'strain':'B/HongKong/02/1993', 'region':'China', 'isolate_id':'EPI_ISL_6617', 'date':'1993-02-15', #(Month and day unknown) 'seq': str(tmp_outgroup.seq).upper() }
def __init__(self,min_length = 987, **kwargs): ''' parameters min_length -- minimal length for a sequence to be acceptable ''' flu_filter.__init__(self, **kwargs) self.min_length = min_length self.vaccine_strains =[ { 'strain': 'B/Shangdong/7/97', 'isolate_id': 'EPI_ISL_1790', 'date': '1997-07-01', #(Month and day unknown) 'region': 'China', 'seq':'GATCGAATCTGCACTGGGATAACATCGTCAAACTCACCCCATGTGGTCAAAACTGCTACTCAAGGGGAGGTCAATGTGACTGGTGTGATACCACTGACAACAACACCCACCAAATCTCATTTTGCAAATCTCAAAGGAACAAAAACCAGAGGGAAACTATGCCCAAAATGCCTCAACTGTACAGATCTGGACGTGGCCTTGGGCAGACCAAAATGCACGGGGAACATACCTTCGGCAAAAGTTTCAATACTCCATGAAGTCAGACCTGTTACATCTGGGTGCTTTCCTATAATGCACGACAGAACAAAAATTAGACAGCTGCCCAATCTTCTCAGAGGATACGAACATATCAGGTTATCAATTCATAACGTTATCAATGCAGAAAAGGCACCAGGAGGACCCTACAAAATTGGAACCTCAGGGTCTTGCCCTAACGTTACCAATGGAAACGGATTCTTCGCAACAATGGCTTGGGCCGTCCCAAAAAACGACAACAACAAAACAGCAACAAATTCATTAACAATAGAAGTACCATACATTTGTACAGAAGGAGAAGACCAAATTACCGTTTGGGGGTTCCACTCTGATAACGAAAACCAAATGGCAAAACTCTATGGGGACTCAAAGCCCCAGAAGTTCACCTCATCTGCCAACGGAGTGACCACACATTACGTTTCACAGATTGGTGGCTTCCCAAATCAAACAGAAGACGGAGGACTACCACAAAGTGGTAGAATTGTTGTTGATTACATGGTGCAAAAATCTGGGAAAACAGGAACAATTACCTATCAAAGAGGTATTTTATTGCCTCAAAAAGTGTGGTGCGCAAGTGGCAGGAGCAAGGTAATAAAAGGGTCCTTGCCTTTAATTGGAGAAGCAGATTGCCTCCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTATTACACAGGGGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCCTTGAAGCTGGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCTTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCCCATGGAGCACATGGAGTAGCAGTGGCAGCAGACCTTAAGAGTACTCAAGAAGCCATAAACAAGATAACAAAAAATCTCAACTCTTTGAGTGAGCTGGAAGTAAAGAATCTTCAAAGACTAAGCGGTGCCATGGATGAACTCCACAACGAAATACTAGAACTAGACGAGAAAGTGGATGATCTCAGAGCTGATACAATAAGCTCGCAAATAGAACTCGCAGTCTTGCTTTCCAAT', }, { 'strain': 'B/HongKong/330/2001', 'isolate_id': 'EPI_ISL_2342', 'date': '2001-07-01', #(Month and day unknown) 'region': 'China', 'seq': 'GATCGAATCTGCACTGGAATAACATCGTCAAACTCACCCCATGTGGTCAAAACTGCTACTCAAGGGGAAGTCAATGTGACTGGTGTGATACCACTGACAACAACACCCACCAAATCTCATTTTGCAAATCTCAAAGGAACAAAAACCAGAGGGAAACTATGCCCAAAATGTCTCAACTGCACAGATCTGGACGTGGCCTTGGGCAGACCAAAATGCACGGGGAACATACCTTCGGCAAAAGTTTCAATACTCCATGAAGTAAGACCTGTTACATCTGGGTGCTTTCCTATAATGCACGACAGAACAAAAATTAGACAGCTGCCCAATCTTCTCAGAGGATACGAACGTATCAGGTTATCAAACCATAACGTTATCAATGCAGAAAAAGCACCAGGAGGACCCTACAAAATTGGAACCTCAGGGTCTTGCCCTAACGTTACCAATGGAAACGGATTCTTCGCAACAATGGCTTGGGCTGTCCCAAAAAACGAAAACAACAAAACAGCAACAAATTCATTAACAATAGAAGTACCATACATTTGTACAGAAGGAGAAGACCAAATTACCGTTTGGGGGTTCCACTCTGATAGCGAAACCCAAATGGCAAAACTCTATGGAGACTCAAAGCCTCAGAAGTTCACTTCATCTGCCAACGGAGTGACCACACATTACGTTTCACAGATTGGTGGCTTCCCAAATCAAACAGAAGACGGAGGACTACCACAAAGTGGTAGAATTGTTGTTGATTACATGGTGCAAAAATCTGGAAAAACAGGAACAATTACCTATCAAAGAGGTATTTTATTGCCTCAAAAAGTGTGGTGCGCAAGTGGCAGGAGCAAGGTAATAAAAGGATCCTTGCCTTTAATTGGAGAAGCAGATTGCCTCCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTATACAGGGGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCCTTGAAGCTGGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTTTGGGCTG', }, { 'strain': 'B/Malaysia/2506/2004', 'isolate_id': 'EPI_ISL_21142', 'date':'2004-07-01', # (Month and day unknown) | | 'region':'SouthEast Asia', 'seq':'ATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCGTCAAACTCACCACATGTTGTCAAAACTGCTACTCAAGGGGAGGTCAATGTGACTGGTGTAATACCACTGACAACAACACCCACCAAATCTCATTTTGCAAATCTCAAAGGAACAGAAACCAGAGGGAAACTATGCCCAAAATGTCTCAACTGCACAGATCTGGACGTGGCCTTGGGCAGACCAAAATGCACGGGGAACATACCCTCGGCAAGAGTTTCAATACTCCATGAAGTCAGACCTGTTACATCTGGGTGCTTTCCTATAATGCACGACAGAACAAAAATTAGACAGCTGCCTAACCTTCTCAGAGGATACGAACATATCAGGTTATCAACTCATAACGTTATCAATGCAGAAAATGCACCAGGAGGATCCTACAAAATTGGAACCTCAGGGTCTTGCCCTAACGTTACCAATGGAAACGGATTTTTCGCAACAATGGCTTGGGCCGTCCCAAAAAACGACAACAACAAAACAGCAACAAATTCATTAACAATAGAAGTACCATACATTTGTACAGAAGGAGAAGACCAAATTACCGTTTGGGGGTTCCACTCTGATAACGAAGCCCAAATGGCAAAGCTCTATGGGGACTCAAAGCCCCAGAAGTTCACCTCATCTGCCAACGGAGTGACCACACATTACGTTTCACAGATTGGTGGCTTCCCAAATCAAACAGAAGACGGAGGACTACCACAAAGTGGTAGAATTGTTGTTGATTACATGGTGCAAAAATCTGGGAAAACAGGAACAATTACCTATCAAAGAGGTATTTTATTGCCTCAAAAAGTGTGGTGCGCAAGTGGCAGGAGCAAGGTAATAAAAGGATCCTTGCCTTTAATTGGAGAAGCAGATTGCCTCCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGGGAACATGCAAAGGCCATAGGAAATTGCCCAATATGGGTGAAAACACCCTTGAAGCTGGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCTTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCCCATGGGGCACATGGAGTAGCGGTGGCAGCAGACCTTAAGAGCACTCAAGAGGCCATAAACAAGATAACAAAAAATCTCAACTCTTTGAGTGAGCTGGAAGTAAAGAATCTTCAAAGACTAAGCGGTGCCATGGATGAACTCCACAACGAAATACTAGAACTAGACGAGAAAGTGGATGATCTCAGAGCTGATACAATAAGCTCACAAATAGAACTCGCAGTCCTGCTTTCCAATGAAGGAATAATAAACAGTGAAGATGAGCATCTCTTGGCGCTTGAAAGAAAGCTGAAGAAAATGCTGGGCCCCTCTGCTGTAGAGATAGGGAATGGATGCTTTGAAACCAAACACAAGTGCAACCAGACCTGTCTCGACAGAATAGCTGCTGGTACCTTTGATGCAGGAGAATTTTCTCTCCCCACTTTTGATTCACTGAATATTACTGCTGCATCTTTAAATGACGATGGATTGGATAATCATACTATACTGCTTTACTACTCAACTGCTGCCTCCAGTTTGGCTGTAACATTGATGATAGCTATCTTTGTTGTTTATATGGTCTCCAGAGACAATGTTTCTTGCTCCATCTGTCTATAAGGAAAGTTAAACCCTGTATTTTCCTTTATTGTAGTGCTTGTTTGCTTGTTACCATTACAAAAAACGGTTATTGAAAAATGCTCTTGTTACTACTAATA', }, { 'strain':'B/Brisbane/60/2008', 'isolate_id':'EPI_ISL_24365', 'date': '2008-08-04', 'lab':'Queensland Health Scientific Services', 'region':'Oceania', 'seq':'AGCAGAAGCAGAGCATTTTCTAATATCCACAAAATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCGTCAAACTCACCACATGTCGTCAAAACTGCTACTCAAGGGGAGGTCAATGTGACTGGTGTAATACCACTGACAACAACACCCACCAAATCTCATTTTGCAAATCTCAAAGGAACAGAAACCAGGGGGAAACTATGCCCAAAATGCCTCAACTGCACAGATCTGGACGTAGCCTTGGGCAGACCAAAATGCACGGGGAAAATACCCTCGGCAAGAGTTTCAATACTCCATGAAGTCAGACCTGTTACATCTGGGTGCTTTCCTATAATGCACGACAGAACAAAAATTAGACAGCTGCCTAACCTTCTCCGAGGATACGAACATATCAGGTTATCAACCCATAACGTTATCAATGCAGAAAATGCACCAGGAGGACCCTACAAAATTGGAACCTCAGGGTCTTGCCCTAACATTACCAATGGAAACGGATTTTTCGCAACAATGGCTTGGGCCGTCCCAAAAAACGACAAAAACAAAACAGCAACAAATCCATTAACAATAGAAGTACCATACATTTGTACAGAAGGAGAAGACCAAATTACCGTTTGGGGGTTCCACTCTGACAACGAGACCCAAATGGCAAAGCTCTATGGGGACTCAAAGCCCCAGAAGTTCACCTCATCTGCCAACGGAGTGACCACACATTACGTTTCACAGATTGGTGGCTTCCCAAATCAAACAGAAGACGGAGGACTACCACAAAGTGGTAGAATTGTTGTTGATTACATGGTGCAAAAATCTGGGAAAACAGGAACAATTACCTATCAAAGGGGTATTTTATTGCCTCAAAAGGTGTGGTGCGCAAGTGGCAGGAGCAAGGTAATAAAAGGATCCTTGCCTTTAATTGGAGAAGCAGATTGCCTCCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGGGAACATGCAAAGGCCATAGGAAATTGCCCAATATGGGTGAAAACACCCTTGAAGCTGGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCTTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCCCATGGGGCACATGGAGTAGCGGTGGCAGCAGACCTTAAGAGCACTCAAGAGGCCATAAACAAGATAACAAAAAATCTCAACTCTTTGAGTGAGCTGGAAGTAAAGAATCTTCAAAGACTAAGCGGTGCCATGGATGAACTCCACAACGAAATACTAGAACTAGATGAGAAAGTGGATGATCTCAGAGCTGATACAATAAGCTCACAAATAGAACTCGCAGTCCTGCTTTCCAATGAAGGAATAATAAACAGTGAAGATGAACATCTCTTGGCGCTTGAAAGAAAGCTGAAGAAAATGCTGGGCCCCTCTGCTGTAGAGATAGGGAATGGATGCTTTGAAACCAAACACAAGTGCAACCAGACCTGTCTCGACAGAATAGCTGCTGGTACCTTTGATGCAGGAGAATTTTCTCTCCCCACCTTTGATTCACTGAATATTACTGCTGCATCTTTAAATGACGATGGATTGGATAATCATACTATACTGCTTTACTACTCAACTGCTGCCTCCAGT', } ] self.outgroup = { 'strain':'B/HongKong/02/1993', 'region':'China', 'isolate_id':'EPI_ISL_6617', 'date':'1993-02-15', #(Month and day unknown) 'seq': 'ATATCCACAAAATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCGTCAAACTCACCCCATGTGGTCAAAACTGCTACTCAAGGGGAAGTCAATGTGACTGGTGTGATACCACTGACAACAACACCCACCAAATCTCATTTTGCAAATCTCAAAGGAACAAAAACCAGAGGGAAACTATGCCCAAAGTGTCTCAACTGCACAGATCTGGACGTGGCCTTGGGCAGACCAAAATGCACGGGGACCATACCTTCGGCAAAAGTTTCAATACTCCACGAAGTCAAACCTGTTACATCTGGGTGCTTTCCTATAATGCACGACAGAACAAAAATTAGACAGCTGCCCAATCTTCTCAGAGGATACGAACATATCAGGTTATCAACCCATAACGTTATCAACGCAGAAAAGGCACCAGGAGGACCCTACAAAATTGGAACCTCAGGGTCTTGCCCTAACATTACCAATGGAAACGGATTCTTCGCAACAATGGCTTGGGCTGTCCCAAAAAACGACAACAACAAAACAGCAACAAATTCATTAACAGTAGAAGTACCATACATTTGTACAGAAGGAGAAGACCAAATTACCGTTTGGGGGTTCCACTCTGATAACGAAACCCAAATGGCAAAACTCTATGGAGACTCAAAACCTCAGAAGTTCACCTCATCTGCCAACGGAGTGACCACACATTACGTTTCACAGATTGGTGGCTTCCCAAATCAAGCAGAAGACGGAGGGCTACCACAAAGTGGTAGAATTGTTGTTGATTACATGGTGCAAAAATCTGGAAAAACAGGAACAATTACCTACCAAAGAGGTATTTTATTGCCTCAAAAAGTGTGGTGCGCAAGTGGCAGGAGCAAGGTAATAAAAGGATCCTTGCCTTTAATTGGCGAAGCAGATTGCCTCCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGGGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCCTTGAATCTGGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAAGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCTTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCCCATGGAGCACATGGAGTAGCAGTGGCAGCAGACCTTAAGAGTACACAAGAAGCCATAAACAAGATAACAAAAAATCTCAACTCTTTGAGTGAGCTTGAAGTAAAGAATCTTCAAAGACTAAGCGGTGCCATGGATGAACTCCACAACGAAATACTCGAACTGGATGAGAAAGTGGATGATCTCAGAGCTGACACAATAAGCTCGCAAATAGAGCTCGCAGTCTTGCTTTCCAATGAAGGAATAATAAACAGCGAAGATGAGCATCTCTTGGCACTTGAAAGAAAACTGAAGAAAATGCTGGGCCCCTCTGCTGTAGAGATAGGGAATGGATGCTTCGAAACCAAACACAAGTGCAACCAGACCTGCCTCGACAGAATAGCTGCTGGCACCTTTAATGCAGGAGAATTTTCTCTCCCCACCTTTGATTCATTAAATATTACTGCTGCATCTTTAAATGATGATGGATTGGATAATCATACTATACTGCTTTACTACTCAACTGCTGCTTCCAGTTTGGCTGTAACATTGATGATAGCTATCTTTATTGTTTATATGGTCTCCAGAGACAATGTTTCTTGCTCCATCTGTCTATAAGGAAAGTTAAGCCCTGTATTTTCCTTTATTGTAGTGCTTGTTTGCTTGTTACCATTACAAAAAACGTTATTGA' }
def __init__(self, min_length=987, **kwargs): """ parameters min_length -- minimal length for a sequence to be acceptable """ flu_filter.__init__(self, **kwargs) self.min_length = min_length self.vaccine_strains = [ { "strain": "B/Shangdong/7/97", "isolate_id": "EPI_ISL_1790", "date": "1997-07-01", # (Month and day unknown) "region": "China", "seq": "GATCGAATCTGCACTGGGATAACATCGTCAAACTCACCCCATGTGGTCAAAACTGCTACTCAAGGGGAGGTCAATGTGACTGGTGTGATACCACTGACAACAACACCCACCAAATCTCATTTTGCAAATCTCAAAGGAACAAAAACCAGAGGGAAACTATGCCCAAAATGCCTCAACTGTACAGATCTGGACGTGGCCTTGGGCAGACCAAAATGCACGGGGAACATACCTTCGGCAAAAGTTTCAATACTCCATGAAGTCAGACCTGTTACATCTGGGTGCTTTCCTATAATGCACGACAGAACAAAAATTAGACAGCTGCCCAATCTTCTCAGAGGATACGAACATATCAGGTTATCAATTCATAACGTTATCAATGCAGAAAAGGCACCAGGAGGACCCTACAAAATTGGAACCTCAGGGTCTTGCCCTAACGTTACCAATGGAAACGGATTCTTCGCAACAATGGCTTGGGCCGTCCCAAAAAACGACAACAACAAAACAGCAACAAATTCATTAACAATAGAAGTACCATACATTTGTACAGAAGGAGAAGACCAAATTACCGTTTGGGGGTTCCACTCTGATAACGAAAACCAAATGGCAAAACTCTATGGGGACTCAAAGCCCCAGAAGTTCACCTCATCTGCCAACGGAGTGACCACACATTACGTTTCACAGATTGGTGGCTTCCCAAATCAAACAGAAGACGGAGGACTACCACAAAGTGGTAGAATTGTTGTTGATTACATGGTGCAAAAATCTGGGAAAACAGGAACAATTACCTATCAAAGAGGTATTTTATTGCCTCAAAAAGTGTGGTGCGCAAGTGGCAGGAGCAAGGTAATAAAAGGGTCCTTGCCTTTAATTGGAGAAGCAGATTGCCTCCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTATTACACAGGGGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCCTTGAAGCTGGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCTTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCCCATGGAGCACATGGAGTAGCAGTGGCAGCAGACCTTAAGAGTACTCAAGAAGCCATAAACAAGATAACAAAAAATCTCAACTCTTTGAGTGAGCTGGAAGTAAAGAATCTTCAAAGACTAAGCGGTGCCATGGATGAACTCCACAACGAAATACTAGAACTAGACGAGAAAGTGGATGATCTCAGAGCTGATACAATAAGCTCGCAAATAGAACTCGCAGTCTTGCTTTCCAAT", }, { "strain": "B/HongKong/330/2001", "isolate_id": "EPI_ISL_2342", "date": "2001-07-01", # (Month and day unknown) "region": "China", "seq": "GATCGAATCTGCACTGGAATAACATCGTCAAACTCACCCCATGTGGTCAAAACTGCTACTCAAGGGGAAGTCAATGTGACTGGTGTGATACCACTGACAACAACACCCACCAAATCTCATTTTGCAAATCTCAAAGGAACAAAAACCAGAGGGAAACTATGCCCAAAATGTCTCAACTGCACAGATCTGGACGTGGCCTTGGGCAGACCAAAATGCACGGGGAACATACCTTCGGCAAAAGTTTCAATACTCCATGAAGTAAGACCTGTTACATCTGGGTGCTTTCCTATAATGCACGACAGAACAAAAATTAGACAGCTGCCCAATCTTCTCAGAGGATACGAACGTATCAGGTTATCAAACCATAACGTTATCAATGCAGAAAAAGCACCAGGAGGACCCTACAAAATTGGAACCTCAGGGTCTTGCCCTAACGTTACCAATGGAAACGGATTCTTCGCAACAATGGCTTGGGCTGTCCCAAAAAACGAAAACAACAAAACAGCAACAAATTCATTAACAATAGAAGTACCATACATTTGTACAGAAGGAGAAGACCAAATTACCGTTTGGGGGTTCCACTCTGATAGCGAAACCCAAATGGCAAAACTCTATGGAGACTCAAAGCCTCAGAAGTTCACTTCATCTGCCAACGGAGTGACCACACATTACGTTTCACAGATTGGTGGCTTCCCAAATCAAACAGAAGACGGAGGACTACCACAAAGTGGTAGAATTGTTGTTGATTACATGGTGCAAAAATCTGGAAAAACAGGAACAATTACCTATCAAAGAGGTATTTTATTGCCTCAAAAAGTGTGGTGCGCAAGTGGCAGGAGCAAGGTAATAAAAGGATCCTTGCCTTTAATTGGAGAAGCAGATTGCCTCCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTATACAGGGGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCCTTGAAGCTGGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTTTGGGCTG", }, { "strain": "B/Malaysia/2506/2004", "isolate_id": "EPI_ISL_21142", "date": "2004-07-01", # (Month and day unknown) | | "region": "SouthEast Asia", "seq": "ATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCGTCAAACTCACCACATGTTGTCAAAACTGCTACTCAAGGGGAGGTCAATGTGACTGGTGTAATACCACTGACAACAACACCCACCAAATCTCATTTTGCAAATCTCAAAGGAACAGAAACCAGAGGGAAACTATGCCCAAAATGTCTCAACTGCACAGATCTGGACGTGGCCTTGGGCAGACCAAAATGCACGGGGAACATACCCTCGGCAAGAGTTTCAATACTCCATGAAGTCAGACCTGTTACATCTGGGTGCTTTCCTATAATGCACGACAGAACAAAAATTAGACAGCTGCCTAACCTTCTCAGAGGATACGAACATATCAGGTTATCAACTCATAACGTTATCAATGCAGAAAATGCACCAGGAGGATCCTACAAAATTGGAACCTCAGGGTCTTGCCCTAACGTTACCAATGGAAACGGATTTTTCGCAACAATGGCTTGGGCCGTCCCAAAAAACGACAACAACAAAACAGCAACAAATTCATTAACAATAGAAGTACCATACATTTGTACAGAAGGAGAAGACCAAATTACCGTTTGGGGGTTCCACTCTGATAACGAAGCCCAAATGGCAAAGCTCTATGGGGACTCAAAGCCCCAGAAGTTCACCTCATCTGCCAACGGAGTGACCACACATTACGTTTCACAGATTGGTGGCTTCCCAAATCAAACAGAAGACGGAGGACTACCACAAAGTGGTAGAATTGTTGTTGATTACATGGTGCAAAAATCTGGGAAAACAGGAACAATTACCTATCAAAGAGGTATTTTATTGCCTCAAAAAGTGTGGTGCGCAAGTGGCAGGAGCAAGGTAATAAAAGGATCCTTGCCTTTAATTGGAGAAGCAGATTGCCTCCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGGGAACATGCAAAGGCCATAGGAAATTGCCCAATATGGGTGAAAACACCCTTGAAGCTGGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCTTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCCCATGGGGCACATGGAGTAGCGGTGGCAGCAGACCTTAAGAGCACTCAAGAGGCCATAAACAAGATAACAAAAAATCTCAACTCTTTGAGTGAGCTGGAAGTAAAGAATCTTCAAAGACTAAGCGGTGCCATGGATGAACTCCACAACGAAATACTAGAACTAGACGAGAAAGTGGATGATCTCAGAGCTGATACAATAAGCTCACAAATAGAACTCGCAGTCCTGCTTTCCAATGAAGGAATAATAAACAGTGAAGATGAGCATCTCTTGGCGCTTGAAAGAAAGCTGAAGAAAATGCTGGGCCCCTCTGCTGTAGAGATAGGGAATGGATGCTTTGAAACCAAACACAAGTGCAACCAGACCTGTCTCGACAGAATAGCTGCTGGTACCTTTGATGCAGGAGAATTTTCTCTCCCCACTTTTGATTCACTGAATATTACTGCTGCATCTTTAAATGACGATGGATTGGATAATCATACTATACTGCTTTACTACTCAACTGCTGCCTCCAGTTTGGCTGTAACATTGATGATAGCTATCTTTGTTGTTTATATGGTCTCCAGAGACAATGTTTCTTGCTCCATCTGTCTATAAGGAAAGTTAAACCCTGTATTTTCCTTTATTGTAGTGCTTGTTTGCTTGTTACCATTACAAAAAACGGTTATTGAAAAATGCTCTTGTTACTACTAATA", }, { "strain": "B/Brisbane/60/2008", "isolate_id": "EPI_ISL_24365", "date": "2008-08-04", "lab": "Queensland Health Scientific Services", "region": "Oceania", "seq": "AGCAGAAGCAGAGCATTTTCTAATATCCACAAAATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCGTCAAACTCACCACATGTCGTCAAAACTGCTACTCAAGGGGAGGTCAATGTGACTGGTGTAATACCACTGACAACAACACCCACCAAATCTCATTTTGCAAATCTCAAAGGAACAGAAACCAGGGGGAAACTATGCCCAAAATGCCTCAACTGCACAGATCTGGACGTAGCCTTGGGCAGACCAAAATGCACGGGGAAAATACCCTCGGCAAGAGTTTCAATACTCCATGAAGTCAGACCTGTTACATCTGGGTGCTTTCCTATAATGCACGACAGAACAAAAATTAGACAGCTGCCTAACCTTCTCCGAGGATACGAACATATCAGGTTATCAACCCATAACGTTATCAATGCAGAAAATGCACCAGGAGGACCCTACAAAATTGGAACCTCAGGGTCTTGCCCTAACATTACCAATGGAAACGGATTTTTCGCAACAATGGCTTGGGCCGTCCCAAAAAACGACAAAAACAAAACAGCAACAAATCCATTAACAATAGAAGTACCATACATTTGTACAGAAGGAGAAGACCAAATTACCGTTTGGGGGTTCCACTCTGACAACGAGACCCAAATGGCAAAGCTCTATGGGGACTCAAAGCCCCAGAAGTTCACCTCATCTGCCAACGGAGTGACCACACATTACGTTTCACAGATTGGTGGCTTCCCAAATCAAACAGAAGACGGAGGACTACCACAAAGTGGTAGAATTGTTGTTGATTACATGGTGCAAAAATCTGGGAAAACAGGAACAATTACCTATCAAAGGGGTATTTTATTGCCTCAAAAGGTGTGGTGCGCAAGTGGCAGGAGCAAGGTAATAAAAGGATCCTTGCCTTTAATTGGAGAAGCAGATTGCCTCCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGGGAACATGCAAAGGCCATAGGAAATTGCCCAATATGGGTGAAAACACCCTTGAAGCTGGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCTTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCCCATGGGGCACATGGAGTAGCGGTGGCAGCAGACCTTAAGAGCACTCAAGAGGCCATAAACAAGATAACAAAAAATCTCAACTCTTTGAGTGAGCTGGAAGTAAAGAATCTTCAAAGACTAAGCGGTGCCATGGATGAACTCCACAACGAAATACTAGAACTAGATGAGAAAGTGGATGATCTCAGAGCTGATACAATAAGCTCACAAATAGAACTCGCAGTCCTGCTTTCCAATGAAGGAATAATAAACAGTGAAGATGAACATCTCTTGGCGCTTGAAAGAAAGCTGAAGAAAATGCTGGGCCCCTCTGCTGTAGAGATAGGGAATGGATGCTTTGAAACCAAACACAAGTGCAACCAGACCTGTCTCGACAGAATAGCTGCTGGTACCTTTGATGCAGGAGAATTTTCTCTCCCCACCTTTGATTCACTGAATATTACTGCTGCATCTTTAAATGACGATGGATTGGATAATCATACTATACTGCTTTACTACTCAACTGCTGCCTCCAGT", }, ] self.outgroup = { "strain": "B/HongKong/02/1993", "region": "China", "isolate_id": "EPI_ISL_6617", "date": "1993-02-15", # (Month and day unknown) "seq": "ATATCCACAAAATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCGTCAAACTCACCCCATGTGGTCAAAACTGCTACTCAAGGGGAAGTCAATGTGACTGGTGTGATACCACTGACAACAACACCCACCAAATCTCATTTTGCAAATCTCAAAGGAACAAAAACCAGAGGGAAACTATGCCCAAAGTGTCTCAACTGCACAGATCTGGACGTGGCCTTGGGCAGACCAAAATGCACGGGGACCATACCTTCGGCAAAAGTTTCAATACTCCACGAAGTCAAACCTGTTACATCTGGGTGCTTTCCTATAATGCACGACAGAACAAAAATTAGACAGCTGCCCAATCTTCTCAGAGGATACGAACATATCAGGTTATCAACCCATAACGTTATCAACGCAGAAAAGGCACCAGGAGGACCCTACAAAATTGGAACCTCAGGGTCTTGCCCTAACATTACCAATGGAAACGGATTCTTCGCAACAATGGCTTGGGCTGTCCCAAAAAACGACAACAACAAAACAGCAACAAATTCATTAACAGTAGAAGTACCATACATTTGTACAGAAGGAGAAGACCAAATTACCGTTTGGGGGTTCCACTCTGATAACGAAACCCAAATGGCAAAACTCTATGGAGACTCAAAACCTCAGAAGTTCACCTCATCTGCCAACGGAGTGACCACACATTACGTTTCACAGATTGGTGGCTTCCCAAATCAAGCAGAAGACGGAGGGCTACCACAAAGTGGTAGAATTGTTGTTGATTACATGGTGCAAAAATCTGGAAAAACAGGAACAATTACCTACCAAAGAGGTATTTTATTGCCTCAAAAAGTGTGGTGCGCAAGTGGCAGGAGCAAGGTAATAAAAGGATCCTTGCCTTTAATTGGCGAAGCAGATTGCCTCCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGGGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCCTTGAATCTGGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAAGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCTTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCCCATGGAGCACATGGAGTAGCAGTGGCAGCAGACCTTAAGAGTACACAAGAAGCCATAAACAAGATAACAAAAAATCTCAACTCTTTGAGTGAGCTTGAAGTAAAGAATCTTCAAAGACTAAGCGGTGCCATGGATGAACTCCACAACGAAATACTCGAACTGGATGAGAAAGTGGATGATCTCAGAGCTGACACAATAAGCTCGCAAATAGAGCTCGCAGTCTTGCTTTCCAATGAAGGAATAATAAACAGCGAAGATGAGCATCTCTTGGCACTTGAAAGAAAACTGAAGAAAATGCTGGGCCCCTCTGCTGTAGAGATAGGGAATGGATGCTTCGAAACCAAACACAAGTGCAACCAGACCTGCCTCGACAGAATAGCTGCTGGCACCTTTAATGCAGGAGAATTTTCTCTCCCCACCTTTGATTCATTAAATATTACTGCTGCATCTTTAAATGATGATGGATTGGATAATCATACTATACTGCTTTACTACTCAACTGCTGCTTCCAGTTTGGCTGTAACATTGATGATAGCTATCTTTATTGTTTATATGGTCTCCAGAGACAATGTTTCTTGCTCCATCTGTCTATAAGGAAAGTTAAGCCCTGTATTTTCCTTTATTGTAGTGCTTGTTTGCTTGTTACCATTACAAAAAACGTTATTGA", }
def __init__(self,min_length = 987, **kwargs): ''' parameters min_length -- minimal length for a sequence to be acceptable ''' flu_filter.__init__(self, **kwargs) self.min_length = min_length self.vaccine_strains =[ { 'strain':'A/Beijing/262/95', 'isolate_id':'EPI_ISL_2656', 'region':'China', 'db':'GISAID', 'date':'1995-07-01', # (Month and day unknown) 'seq':'AGCAAAAGCAGGGGAAAATAAAAACAACCAAAATGAAAGCAAAACTACTAGTCCTGTTATGTACATTTACAGCTACATATGCAGACACAATATGTATAGGCTACCATGCCAACAACTCAACCGACACTGTTGACACAGTACTTGAGAAGAATGTGACAGTGACACACTCTGTCAACCTACTTGAGGACAGTCACAATGGAAAACTATGTCTACTAAAAGGAATAGCCCCACTACAATTGGGTAATTGCAGCGTTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATCACTGATTTCTAAGGAATCATGGTCCTACATTGTAGAGACACCAAACCCTGAGAATGGAACATGTTACCCAGGGTATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGTTCAGTATCATCATTTGAGAGATTCGAAATATTCCCCAAAGAAAGCTCATGGCCCAAACACACCGTAACAGGAGTAACGGCATCATGCTCCCATAATGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAGAATGGCTTGTACCCAAATCTGAGCAATTCCTATGTGAACAACAAAGAGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCATCTAACATAGGGGACCAAAGGGCCATCTATCATACAGAAAACGCTTATGTCTCTGTAGTGTCTTCACATTATAGCAGAAGATTCACCCCAGAAATAGCAAAAAGACCCAAAGTAAGAGGTCAGGAAGGAAGAATCAACTACTACTGGACTCTGCTGGAACCCGGGGACACAATAATATTTGAGGCAAATGGAAATCTAATAGCGCCATGGTATGCTTTCGCACTGAGTAGAGGCTTTGGGTCAGGAATCATCACCTCAAATGCACCAATGAATGAATGTGATGCGAAGTGTCAAACACCTCAGGGAGCTATAAACAGTAGTCTTCCTTTCCAGAATGTACACCCAGTCACAATAGGAGAGTGTCCAAAGTATGTCAGGAGTACAAAATTAAGGATGGTTACAGGACTAAGGAATATCCCATCCATTCAATCCAGAGGTTTGTTTGGAGCCATTGCCGGTTTCATTGAAGGGGGGTGGACTGGAATGATGGATGGGTGGTATGGTTATCATCATCAGAATGAGCAAGGATCTGGCTATGCTGCAGATCAAAAAAGCACACAAAATGCCATTAACGGGATTACAAATAAGGTGAATTCTGTAATTGAGAAAATGAACACTCAATTCACAGCTGTGGGCAAAGAATTCAACAAATTAGAAAGAAGGATGGAAAACTTAAATAAAAAAGTTGATGATGGATTTCTAGACATTTGGACATATAATGCAGAATTGTTGGTTCTACTGGAAAATGAAAGGACTTTGGATTTCCATGACTCAAATGTGAAGAATCTGTATGAGAAAGTGAAAAGCCAATTAAAGAATAATGCCAAAGAAATAGGGAACGGGTGTTTTGAATTCTATCACAAGTGTAACAATGAATGCATGGAAAGTGTGAAAAATGGAACTTATGACTATCCAAAATATTCCGAAGAATCAAAGTTAAACAGGGAGAAAATTGATGGAGTGAAATTGGAATCAATGGGAGTCTATCAGATTCTGGCGATCTACTCAACTGTCGCCAGTTCACTGGTTCTTTTGGTCTCCCTGGGGGCAATCAGCTTCTGGATGTGTTCCAATGGGTCTTTGCAGTGTAGAATATGCATCTGAGACCAGAATTTCAGAAATATAAGAAAAAACACCCTTGTTTCTACT', }, { 'strain':'A/NewCaledonia/20/99', 'isolate_id':'EPI_ISL_158137', 'region':'Oceania', 'date':'1999-06-09', 'seq':'GACACAATATGTATAGGCTACCATGCCAACAACTCAACCGACACTGTTGACACAGTACTTGAGAAGAATGTGACAGTGACACACTCTGTCAACCTACTTGAGGACAGTCACAACGGAAAACTATGTCTACTAAAAGGAATAGCCCCACTACAATTGGGTAATTGCAGCGTTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATTACTGATTTCCAAGGAATCATGGTCCTACATTGTAGAAACACCAAATCCTGAGAATGGAACATGTTACCCAGGGTATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGTTCAGTATCTTCATTTGAGAGATTCGAAATATTCCCCAAAGAAAGCTCATGGCCCAACCACACCGTAACCGGAGTATCAGCATCATGCTCCCATAATGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGGGAAGAATGGTTTGTACCCAAACCTGAGCAAGTCCTATGTAAACAACAAAGAGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGCCTAACATAGGGGACCAAAGGGCCCTCTATCATACAGAAAATGCTTATGTCTCTGTAGTGTCTTCACATTATAGCAGAAGATTCACCCCAGAAATAGCCAAAAGACCCAAAGTAAGAGATCAGGAAGGAAGAATCAACTACTACTGGACTCTGCTGGAACCTGGGGATACAATAATATTTGAGGCAAATGGAAATCTAATAGCGCCATGGTATGCTTTTGCACTGAGTAGAGGCTTTGGATCAGGAATCATCACCTCAAATGCACCAATGGATGAATGTGATGCGAAGTGTCAAACACCTCAGGGAGCTATAAACAGCAGTCTTCCTTTCCAGAATGTACACCCAGTCACAATAGGAGAGTGTCCAAAGTATGTCAGGAGTGCAAAATTAAGGATGGTTACAGGACTAAGGAACATCCCATCCATTCAATCCAGAGGTTTGTTTGGAGCCATTGCCGGTTTCATTGAAGGGGGGTGGACTGGAATGGTAGATGGGTGGTATGGTTATCATCATCAGAATGAGCAAGGATCTGGCTATGCTGCAGATCAAAAAAGTACACAAAATGCCATTAACGGGATTACAAACAAGGTGAATTCTGTAATTGAGAAAATGAACACTCAATTCACAGCTGTGGGCAAAGAATTCAACAAATTGGAAAGAAGGATGGAAAACTTAAATAAAAAAGTTGATGATGGGTTTCTAGACATTTGGACATATAATGCAGAATTGTTGGTTCTACTGGAAAATGAAAGGACTTTGGATTTCCATGACTCCAATGTGAAGAATCTGTATGAGAAAGTAAAAAGCCAATTAAAGAATAATGCCAAAGAAATAGGAAACGGGTGTTTTGAATTCTATCACAAGTGTAACAATGAATGCATGGAGAGTGTGAAAAATGGAACTTATGACTATCCAAAATATTCCGAAGAATCAAAGTTAAACAGGGAGAAAATTGATGGAGTGAAATTGGAATCAATGGGAGTCTATCAGATTCTGGCGATCTACTCAACTGTCGCCAGTTCCCTGGTTCTTTTGGTCTCCCTGGGGGCAATCAGCTTCTGGATGTGTTCCAATGGGTCTTTGCAGTGCAGAATATGC', }, {'strain': 'A/SolomonIslands/3/2006', 'isolate_id':'EPI_ISL_157458', 'region':'Oceania', 'date':'2006-08-21', 'lab':'WHO Centre for Reference & Research on Influenza', 'seq': 'GACACAATATGTATAGGCTACCATGCCAACAACTCAACCGACACTGTTGACACAGTACTTGAGAAGAATGTGACAGTGACACACTCTGTCAACCTGCTTGAGGACAGTCACAATGGAAAATTATGTCTATTAAAAGGAATAGCCCCACTACAATTGGGTAATTGCAGCGTTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATTACTGATTTCCAGGGAATCATGGTCCTACATTGTAGAAAAACCAAATCCTGAGAATGGAACATGTTACCCAGGGCATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGTTCAGTATCTTCATTTGAGAGATTCGAAATATTCCCCAAAGAAAGCTCATGGCCCAACCACACCACAACCGGAGTATCAGCATCATGCTCCCATAATGGGGAAAGCAGTTTTTACAAAAATTTGCTATGGCTGACGGGGAAGAATGGTTTGTACCCAAACCTGAGCAAGTCCTATGCAAACAACAAAGAGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGCCTAACATAGGTGACCAAAGGGCTCTCTATCATACAGAAAATGCTTATGTCTCTGTAGTGTCTTCACATTATAGCAGAAAATTCACCCCAGAAATAGCCAAAAGACCCAAAGTAAGAGATCGAGAAGGAAGAATCAACTACTACTGGACTCTACTTGAACCCGGGGATACAATAATATTTGAGGCAAATGGAAATCTAATAGCGCCAAGATATGCTTTCGCACTGAGTAGAGGCTTTGGATCAGGAATCATCAACTCAAATGCACCAATGGATGAATGTGATGCGAAGTGCCAAACACCTCAGGGAGCTATAAACAGCAGTCTTCCTTTCCAGAATGTACACCCTGTCACAATAGGAGAGTGTCCAAAGTATGTCAGGAGTGCAAAATTAAGGATGGTTACAGGACTAAGGAACATCCCATCCATTCAATCCAGAGGTTTGTTTGGAGCCATTGCCGGTTTCATTGAAGGGGGGTGGACTGGAATGGTAGATGGTTGGTATGGTTATCATCATCAGAATGAGCAAGGATCTGGCTATGCTGCAGATCAAAAAAGCACACAAAATGCCATTAATGGGATTACAAACAAGGTGAATTCTGTAATTGAGAAAATGAACACTCAATTCACAGCTGTGGGCAAAGAATTCAACAAATTGGAAAGAAGGATGGAAAACTTAAATAAAAAAGTTGATGATGGGTTTATAGACATTTGGACATATAATGCAGAATTGTTGGTTCTACTGGAAAATGAAAGGACTTTGGATTTCCATGACTCCAATGTGAAGAATCTGTATGAGAAAGTAAAAAGCCAATTAAAGAATAATGCCAAAGAAATAGGAAATGGGTGTTTTGAATTCTATCACAAGTGTAACGATGAATGCATGGAGAGTGTAAAAAATGGAACTTATGACTATCCAAAATATTCCGAAGAATCAAAGTTAAACAGGGAGAAAATTGATGGAGTGAAATTGGAATCAATGGGAGTCTATCAGATTCTGGCGATCTACTCAACAGTCGCCAGTTCTCTGGTCCTTTTGGTCTCCCTGGGGGCAATCAGATTCG', }, {'strain':'A/Brisbane/59/2007', 'isolate_id':'EPI_ISL_23344', 'region':'Oceania', 'date':'2007-07-01', # (Month and day unknown) 'seq': 'ATGAAAGTAAAACTACTGGTCCTGTTATGCACATTTACAGCTACATATGCAGACACAATATGTATAGGCTACCATGCTAACAACTCGACCGACACTGTTGACACAGTACTTGAAAAGAATGTGACAGTGACACACTCTGTCAACCTGCTTGAGAACAGTCACAATGGAAAACTATGTCTATTAAAAGGAATAGCCCCACTACAATTGGGTAATTGCAGCGTTGCCGGGTGGATCTTAGGAAACCCAGAATGCGAATTACTGATTTCCAAGGAGTCATGGTCCTACATTGTAGAAAAACCAAATCCTGAGAATGGAACATGTTACCCAGGGCATTTCGCTGACTATGAGGAACTGAGGGAGCAATTGAGTTCAGTATCTTCATTTGAGAGGTTCGAAATATTCCCCAAAGAAAGCTCATGGCCCAACCACACCGTAACCGGAGTGTCAGCATCATGCTCCCATAATGGGGAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGGGAAGAATGGTTTGTACCCAAACCTGAGCAAGTCCTATGCAAACAACAAAGAAAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGCCAAACATAGGTRWCCAAAAGGCCCTCTATCATACAGAAAATGCTTATGTCTCTGTAGTGTCTTCACATTATAGCAGAAAATTCACCCCAGAAATAGCCAAAAGACCCAAAGTAAGAGATCAAGAAGGAAGAATCAATTACTACTGGACTCTGCTTGAACCCGGGGATACAATAATATTTGAGGCAAATGGAAATCTAATAGCGCCAAGATATGCTTTCGCACTGAGTAGAGGCTTTGGATCAGGAATCATCAACTCAAATGCACCAATGGATAAATGTGATGCGAAGTGCCAAACACCTCAGGGAGCTATAAACAGCAGTCTTCCTTTCCAGAACGTACACCCAGTCACAATAGGAGAGTGTCCAAAGTATGTCAGGAGTGCAAAATTAAGGATGGTTACAGGACTAAGGAACATCCCATCCATTCAATCCAGAGGTTTGTTTGGAGCCATTGCCGGTTTCATTGAAGGGGGGTGGACTGGAATGGTAGATGGTTGGTATGGTTATCATCATCAGAATGAGCAAGGATCTGGCTATGCTGCAGATCAAAAAAGCACACAAAATGCCATTAATGGGATTACAAACAAGGTGAATTCTGTAATTGAGAAAATGAACACTCAATTCACAGCAGTGGGCAAAGAATTCAACAAATTGGAAAGAAGGATGGAAAACTTGAATAAAAAAGTTGATGATGGGTTTATAGACATTTGGACATATAATGCAGAACTGTTGGTTCTACTGGAAAATGAAAGGACTTTGGATTTCCATGACTCCAATGTGAAGAATCTGTATGAGAAAGTAAAAAGCCAGTTAAAGAATAATGCTAAAGAAATAGGAAATGGGTGTTTTGAATTCTATCACAAGTGTAACGATGAATGCATGGAGAGTGTAAAGAATGGAACTTATGACTATCCAAAATATTCCGAAGAATCAAAGTTAAACAGGGAGAAAATTGATGGAGTGAAATTGGAATCAATGGGAGTCTATCAGATTCTGGCGATCTACTCAACAGTCGCCAGTTCTCTGGTTCTTTTGGTCTCCCTGGGGGCAATCAGCTTCTGGATGTGTTCCAATGGGTCTTTACAGTGTAGAATATGCATCTAA', } ] self.outgroup = { 'strain': 'A/Tokyo/1/51', 'db': 'GISAID', 'accession': 'EPI_ISL_101', 'date': '1951-07-01', 'country': 'Japan', 'region': 'JapanKorea', 'seq': 'ATGAAAGCAAAACTACTGATCCTGTTATGTGCACTTTCAGCTACAGATGCAGACACAATATGTATAGGCTACCATGCTAACAATTCAACCGACACTGTTGACACAGTACTCGAAAAGAATGTGACAGTGACACACTCTGTAAACCTACTCGAAGACAGCCACAACGGGAAATTATGCAGATTAAAAGGAATAGCCCCACTACAATTGGGGAAATGTAACATTGCCGGATGGATCTTGGGAACCCCAGAATGCGAATCATTGCTCTCTAATAGATCATGGTCCTACATTGCAGAAACACCAAACTGTGAGAATGGAACATGTTACCCAGGAGATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGCTCAGTATCATCATTCGAGAGATTCGAAATATTCCCCAAGGAAAGATCATGGCCCAAACACAACATAACCAGAGGAGTAACGGCAGCATGCTCCCACGCGAAGAAAAGCAGTTTTTACAAAAATTTGCTCTGGCTGACGGAGGCAAATGGCTCATACCCAAATCTGAGCAAGTCCTATGTGAACAATAAAGAGAAAGAAGTCCTTGTGCTGTGGGGTGTTCATCACCCGTCTAACATAGAGGATCAAAGGACCCTCTATCGGAAAGAAAATGCTTATGTCTCTGTGGTGTCTTCAAATTATAACAGGAGATTCACCCCGGAAATAGCAGAAAGACCCAAAGTAAGAGGTCAAGCAGGGAGAATAAACTATTACTGGACTTTGCTAGAACCCGGAGACAAAATAATATTTGAGGCAAATGGAAACCTAATAGCGCCATGGTATGCTTTCGCACTGAGTAGAGGCCTTGGATCAGGAATCATCACCTCAAACGCATCAATGGATGAATGTGACACGAAGTGTCAGACACCCCAGGGAGCTATAAACAGTAGTCTCCCTTTTCAGAACATACACCCAGTCACAATAGGAGAGTGCCCAAAATACGTCAGGAGTACCAAATTGAGGATGGTTACAGGACTAAGGAACATCCCATCCATTCAATCCAGA', }
def __init__(self, aln_fname, outgroup, outdir = './', formats = ['pdf','svg','png'], verbose = 0, **kwargs): process.__init__(self, **kwargs) flu_filter.__init__(self, alignment_file = aln_fname, **kwargs) tree_refine.__init__(self, **kwargs) virus_clean.__init__(self, **kwargs) self.verbose = verbose self.formats = formats self.outdir = outdir.rstrip('/')+'/' self.auspice_tree_fname = self.outdir + 'tree.json' self.auspice_sequences_fname = self.outdir + 'sequences.json' self.auspice_frequencies_fname = None self.auspice_meta_fname = self.outdir + 'meta.json' if os.path.isfile(outgroup): tmp = [{'strain':seq.name, 'seq':str(record.seq).upper(), 'desc':seq.description} for seq in SeqIO.parse(outgroup, 'fasta') ] if len(tmp): self.outgroup = tmp[0] if len(tmp)>1: print "More than one sequence in ", outgroup, "taking first" if self.verbose: print "using outgroup found in file ", outgroup elif isinstance(outgroup, basestring): seq_names = [x['strain'] for x in self.viruses] if outgroup in seq_names: self.outgroup = self.viruses.pop(seq_names.index(outgroup)) if self.verbose: print "using outgroup found in alignment", outgroup else: standard_outgroups = [{'strain':seq.name, 'seq':str(seq.seq).upper(), 'desc':seq.description} for seq in SeqIO.parse(std_outgroup_file, 'fasta') ] outgroup_names = [x['strain'] for x in standard_outgroups] if outgroup in outgroup_names: self.outgroup = standard_outgroups[outgroup_names.index(outgroup)] if self.verbose: print "using standard outgroup", outgroup else: raise ValueError("outgroup %s not found" % outgroup) return self.viruses.append(self.outgroup) self.filter_geo(prune=False) #self.filter_host(prune=False) self.make_strain_names_unique()
def __init__(self, min_length=987, **kwargs): ''' parameters min_length -- minimal length for a sequence to be acceptable ''' flu_filter.__init__(self, **kwargs) self.min_length = min_length self.vaccine_strains = [{ 'strain': 'A/California/07/2009', 'isolate_id': 'EPI_ISL_31553', 'date': '2009-04-09', 'lab': 'Naval Health Research Center', 'country': 'USA', 'region': 'NorthAmerica', 'seq': 'ATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACACCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGTCATCAAGATACAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAAACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATATCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATACACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAGTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCAAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGCTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAA', }] tmp_outgroup = SeqIO.read( '/Users/yujia_zhou/Documents/Work/H9_nextflu-master/augur/source-data/H1N1pdm_outgroup.gb', 'genbank') genome_annotation = tmp_outgroup.features self.cds = { x.qualifiers['gene'][0]: x for x in genome_annotation if 'gene' in x.qualifiers and x.type == 'CDS' and x.qualifiers['gene'][0] in ['SigPep', 'HA1', 'HA2'] } self.outgroup = { 'strain': 'A/Swine/Indiana/P12439/00', 'db': 'IRD', 'accession': 'AF455680', 'date': '2002-03-14', 'country': 'USA', 'region': 'NorthAmerica', 'seq': str(tmp_outgroup.seq).upper() }
def __init__(self, min_length=0, **kwargs): ''' parameters min_length -- minimal length for a sequence to be acceptable ''' flu_filter.__init__(self, **kwargs) self.min_length = min_length self.vaccine_strains = [{ "strain": "A/Chicken/HongKong/G9/97", "db": "GISAID", "accession": "EPI_ISL_1263", "date": "1997-07-01", "seq": "ATGGAAATAATAGCACTAATAGCTATACTGGTAGTGACAAAAACAAGCAATGCAGATAAAATTTGCATTGGCTACCAGTCAACAAACTCCACAGAAACTGTTGATACACTAGTAGAAAACAATGTCCCTGTGACACATACCAAAGAATTGCTCCACACAGAGCACAATGGAATGCTATGTGCAACAAACCTGGGGCACCCTCTCATCCTAGACACCTGCACCATCGAAGGGTTGGTGTACGGCAACCCTTCCTGTGATTTGCTACTGGGAGGGAAAGAATGGTCTTACATTGTCGAAAGATCATCAGCTGTCAATGGGATGTGTTACCCTGGAAGGGTAGAGAACCTGGAAGAACTCAGGTCTTTTTTCAGCTCCGCTCGCTCCTACAAAAGACTCCTGCTCTTTCCAGACAGAACTTGGAATGTGACTTACACTGGGACAAGCAAAGCATGTTCAAACTCATTCTACAGAAGTATGAGATGGCTGACACACAAGAGCGATTCTTACCCTATTCAAGACGCCCAATATACTAACGATTGGGGAAAGAATATTCTCTTCATGTGGGGCATACACCACCCACCTACTGATACTGAGCAAATAAATCTATACAAAAAAGCTGATACAACAACAAGTATAACAACGGAAGATATCAATCGAACTTTCAAACCAGTGATAGGGCCAAGGCCTCTTGTCAATGGTCAACAAGGGAGAATTGATTATTATTGGTCAGTACTAAAGCCAGGCCAGACACTGCGAGTGAGATCCAATGGGAATCTAATTGCCCCATGGTATGGACACATTCTTTCAGGAGAAAGCCATGGAAGAATCTTGAAGACCGATTTGAGTAGTGGCAACTGCGTAGTACAATGCCAAACTGAGAAAGGTGGTTTGAACACGACCTTGCCATTCCACAATGTCAGCAAGTATGCATTTGGGAACTGCCCCAAATATGTTGGAGTGAAGAGTCTCAAACTGGCAGTTGGTCTAAGGAATGTTCCTGCTGCATCATATAGAGGGCTCTTCGGTGCCATAGCTGGATTCATAGAAGGCGGTTGGCCAGGACTAGTTGCAGGCTGGTACGGGTTTCAGCATTCAAATGATCAAGGGGTTGGAATGGCCGCAGATAGGGAATCAACTCAAGAAGCAGTTGACAAGATAACATCCAAAGTAAATAACATAATCGACAAAATGAACAAGCAGTATGGA------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T--------------------------------------------------------------------------------------" }] tmp_outgroup = SeqIO.read( '/Users/yujiazhou/Documents/FluProject/augur/source-data/H9_outgroup.gb', 'genbank') genome_annotation = tmp_outgroup.features self.cds = { x.qualifiers['gene'][0]: x for x in genome_annotation if 'gene' in x.qualifiers and x.type == 'CDS' and x.qualifiers['gene'][0] in ['SigPep', 'HA1', 'HA2'] } self.outgroup = { 'strain': 'A/duck/HongKong/147/1977', 'db': 'IRD', 'accession': 'AY206671', 'date': '2003-03-03', 'country': 'HongKong', 'region': 'EastAsia', 'seq': str(tmp_outgroup.seq).upper() }
def __init__(self, min_length=987, **kwargs): """ parameters min_length -- minimal length for a sequence to be acceptable """ flu_filter.__init__(self, **kwargs) self.min_length = min_length self.vaccine_strains = [ { "strain": "B/Beijing/184/93", "isolate_id": "EPI_ISL_969", "date": "1993-07-01", # (Month and day unknown) "region": "China", "seq": "GATCGAATCTGTACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGTGTGATACCACTGACAACAACACCAACAAAATCTCATTTTGGAAATCTCAAAGGAACAAAGACCAGAGGGAAACTATGCCCAAACTGTCTCAACTGCACAGATCTGGATGTGGCCTTGGGCAGACCAATGTGTGTGGGGACCACACCTTCGGCAAAAGCTTCAATACTCCACGAAGTCAGACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGACAGCTACCCAATCTTCTCAGAGGATATGAAAATATCAGATTATCAACCCAAAACGTTATCAACGCAGAAAAGGCACCAGGAGGACCCTACAGGCTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAGAAGCGGATTTTTCGCAACAATGGCTTGGGCTGTCCCAAGGGACAACAACAAAACAGCAACAAATCCACTAACAGTAGAAGTACCATACATTTGTACAAAAGGAGAAGACCAAATTACTGTTTGGGGGTTCCATTCTGATAACAAAATCCAAATGAAAAACCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACATTATGTTTCTCAGATTGGCGGCTTCCCAGATCAAACAGAAGACGGAGGGCTACCACAAAGCGGCAGAATTGTTGTTGATTACATGGTGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCCTTGCCTTTAATTGGTGAAGCAGATTGCCTTCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGG", }, { "strain": "B/Sichuan/379/99", "isolate_id": "EPI_ISL_21113", "date": "1999-07-01", # (Month and day unknown) "region": "China", "seq": "GAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCGTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTAACTGGTGCGATACCACTGACAACAACACCAACAAAATCTCATTTTGCAAATCTCAAAGGAACAAAGACCAGAGGGAAACTATGCCCAACCTGTCTCAACTGCACAGATCTGGATGTGGCCTTGGGCAGACCAATGTGTGTGGGGATCACACCTTCGGCAAAAGCTTCAATACTCCACGAAATCAAACCTGTTACATCCGGATGCTTTCCTATAATGCACGACAGAACAAAAATCAGACAGCTACCCAATCTTCTCAGAGGATATGAAAAAATCAGATTATCAACCCAAAACGTTATCAACGCAGAAAAGGCACCAGGAGGACCTTACAGACTTGGAACTTCAGGATCTTGCCCTAACGCTACCAGTAAAAGCGGATTTTTCGCAACAATGGCTTGGGCTGTCCCAAGGGACAACAACAAAACAGCAACGAATCCACTAACAGTAGAAGTACCACACATCTGTACAAAAGAAGAAGACCAAATTACTGTTTGGGGGTTCCATTCTGATGACAAAACCCAAATGAAAAACCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAATAACCACACATTATGTTTCTCAGATTGGCGGCTTCCCGGACCAAACAGAGGACGGAGGGCTACCACAAAGCGGCAGAATTGTTGTTGATTACATGGTGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGGATTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGTAGGAGCAAAGTAATAAAAGGGTCCTTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCT", }, { "strain": "B/Shanghai/361/2002", "isolate_id": "EPI_ISL_2842", "date": "2002-06-12", "region": "China", "seq": "AATGCAGATCGAATCTGCACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGTGTGATACCACTGACAACAACTCCAATAAAATCTCATTTTGCAAATCTCAAAGGAACAAGGACTAGAGGGAAACTATGCCCAGATTGTCTCAACTGCACAGATCTGGATGTGGCCTTGGGCAGACCAATGTGTGTGGGGACCACACCTTCGGCAAAAGCTTCAATACTCCACGAAGTCAGACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGACAACTACCCAATCTTCTCAGAGGATATGAAAATATCAGGTTATCAACCCAAAACGTTATCGATGCAGAAAAGGCCCTAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCCACCAGTAAAAGCGGATTTTTCGCAACAATGGCTTGGGCTGTCCCAAAGGACAACAACAAAAATGCAACGAACCCACTAACAGTAGAAGTACCATACATCTGTACAGAAGGGGAAGACCAAATTACTGTTTGGGGGTTCCATTCAGATGACAAAACCCAAATGAAAAACCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACATTATGTTTCTCAGATTGGCGGCTTCCCAGATCAAACAGAAGACGGAGGACTACCACAAAGCGGCAGAATTGTTGTTGATTACATGGTGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCCTTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAAAATACGGTGGGTTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTC", }, { "strain": "B/Florida/4/2006", "isolate_id": "EPI_ISL_21307", "date": "2006-11-01", "region": "NorthAmerica", "seq": "ATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGAATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCCACTCAAGGGGAGGTCAATGTGACTGGTGTGATACCACTAACAACAACACCAACAAAATCTTATTTTGCAAATCTCAAAGGAACAAGGACCAGAGGGAAACTATGCCCAGACTGTCTCAACTGCACAGATCTGGATGTGGCTTTGGGCAGACCAATGTGTGTGGGGACCACACCTTCGGCGAAAGCTTCAATACTCCACGAAGTCAAACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGGCAACTACCCAATCTTCTCAGAGGATATGAAAATATCAGGCTATCAACCCAAAACGTCATCGATGCGGAAAAGGCACCAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAAGAGCGGATTTTTCGCAACAATGGCTTGGGCTGTCCCAAAGGACAACAACAAAAATGCAACGAACCCACTAACAGTAGAAGTACCATACATTTGTACAGAAGGGGAAGACCAAATCACTGTTTGGGGGTTCCATTCAGATGACAAAACCCAAATGAAGAACCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACACTATGTTTCTCAGATTGGCAGCTTCCCAGATCAAACAGAAGACGGAGGACTACCACAAAGCGGCAGGATTGTTGTTGATTACATGATGCAAAAACCTGGGAAAACAGGAACAATTGTCTACCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCCTTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCCTAGAAGGAGGATGGGAAGGAATGATTGCAGGCTGGCACGGATACACATCTCACGGAGCACATGGAGTGGCAGTGGCGGCGGACCTTAAGAGTACGCAAGAAGCTATAAACAAGATAACAAAAAATCTCAATTCTTTGAGTGAGCTAGAAGTAAAGAATCTTCAAAGACTAAGTGGTGCCATGGATGAACTCCACAACGAAATACTCGAGCTGGATGAGAAAGTGGATGATCTCAGAGCTGACACTATAAGCTCGCAAATAGAACTTGCAGTCTTGCTTTCCAACGAAGGAATAATAAACAGTGAAGATGAGCATCTATTGGCACTTGAGAGAAAACTAAAGAAAATGCTGGGTCCCTCTGCTGTAGAGATAGGAAATGGATGCTTCGAAACCAAACACAAGTGCAACCAGACCTGCTTAGACAGGATAGCTGCTGGCACCTTTAATGCAGGAGAATTTTCTCTCCCCACTTTTGATTCACTGAACATTACTGCTGCATCTTTAAATGATGATGGATTGGATAACCATACTATACTGCTCTATTACTCAACTGCTGCTTCTAGTTTGGCTGTAACATTGATGCTAGCTATTTTTATTGTTTATATGGTCTCCAGAGACAACGTTTCATGCTCCATCTGTCTATAA", }, { "strain": "B/Wisconsin/01/2010", "isolate_id": "EPI_ISL_76940", "date": "2010-02-20", "region": "NorthAmerica", "seq": "ATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGCGTGATACCACTGACAACAACACCAACAAAATCTTATTTTGCAAATCTCAAAGGAACAAGGACCAGAGGGAAACTATGCCCGGACTGTCTCAACTGTACAGATCTGGATGTGGCCTTGGGCAGGCCAATGTGTGTGGGGACCACACCTTCTGCTAAAGCTTCAATACTCCACGAGGTCAGACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGGCAACTACCCAATCTTCTCAGAGGATATGAAAATATCAGGTTATCAACCCAAAACGTTATCGATGCAGAAAAAGCACCAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAAAATCGGATTTTTTGCAACAATGGCTTGGGCTGTCCCAAAGGACAACTACAAAAATGCAACGAACCCACTAACAGTAGAAGTACCATACATTTGTACAGAAGGGGAAGACCAAATTACTGTTTGGGGGTTCCATTCAGATAACAAAACCCAAATGAAGAGCCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACATTATGTTTCTCAGATTGGCGACTTCCCAGATCAAACAGAAGACGGAGGACTACCACAAAGCGGCAGAATTGTTGTTGATTACATGATGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCATTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTAAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTGAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCCTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCTCACGGAGCACATGGAGTGGCAGTGGCGGCAGACCTTAAGAGTACACAAGAAGCTATAAATAAGATAACAAAAAATCTCAATTCTTTGAGTGAGCTAGAAGTAAAGAACCTTCAAAGACTAAGTGGTGCCATGGATGAACTCCACAACGAAATACTCGAGCTGGATGAGAAAGTGGATGATCTCAGAGCTGACACTATAAGCTCACAAATAGAACTTGCAGTCTTGCTTTCCAACGAAGGAATAATAAACAGTGAAGACGAGCATCTATTGGCACTTGAGAGAAAACTAAAGAAAATGCTGGGTCCCTCTGCTGTAGACATAGGAAACGGATGCTTCGAAACCAAACACAAATGCAACCAGACCTGCTTAGACAGGATAGCTGCTGGCACCTTTAATGCAGGAGAATTTTCTCTCCCCACTTTTGATTCATTGAACATTACTGCTGCATCTTTAAATGATGATGGATTGGATAACCATACTATACTGCTCTATTACTCAACTGCTGCTTCTAGTTTGGCTGTAACATTAATGCTAGCTATTTTTATTGTTTATATGGTCTCCAGAGACAACGTTTCATGCTCCATCTGTCTATAA", }, { "strain": "B/Massachusetts/02/2012", "isolate_id": "EPI_ISL_121434", "date": "2012-03-13", "region": "NorthAmerica", "seq": "ATGAAGGCAATAATTGTACTACTAATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGTGTGATACCACTAACAACAACACCAACAAAATCTTATTTTGCAAATCTCAAAGGAACAAAGACCAGAGGGAAACTATGCCCAGACTGTCTCAACTGTACAGATCTGGATGTGGCCCTGGGCAGGCCAATGTGTGTGGGAACTACACCTTCTGCGAAAGCTTCAATACTTCACGAAGTCAGACCTGTTACATCCGGGTGCTTCCCTATAATGCACGACAGAACAAAAATCAGGCAACTAGCCAATCTTCTCAGAGGATATGAAAATATCAGGTTATCAACCCAAAACGTTATCGATGCAGAAAAGGCACCAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAAAAGCGGATTTTTCGCAACAATGGCTTGGGCTGTCCCAAAGGACAACAACAAAAATGCAACGAACCCATTAACAGTAGAAGTACCATACATTTGTGCAGAAGGGGAAGACCAAATTACTGTTTGGGGGTTCCATTCAGATAACAAAACCCAAATGAAGAACCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACATTATGTTTCTCAGATTGGCGGCTTCCCAGATCAAACAGAAGACGGAGGACTACCACAAAGCGGCAGAATTGTCGTTGATTACATGATGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCCTTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCCTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCTCACGGAGCACATGGAGTGGCAGTTGCTGCAGACCTTAAGAGCACACAAGAAGCTATAAACAAGATAACAAAAAATCTCAACTCTTTGAGTGAGCTAGAAGTAAAGAATCTTCAAAGGCTAAGTGGTGCCATGGATGAACTCCACAACGAAATACTCGAGCTGGATGAGAAAGTGGATGACCTCAGAGCTGACACTATAAGTTCACAAATAGAACTTGCAGTCTTGCTTTCCAACGAAGGAATAATAAACAGTGAAGACGAGCATCTATTGGCACTTGAGAGAAAACTAAAGAAAATGCTGGGTCCCTCTGCTGTAGACATAGGAAATGGATGCTTCGAAACCAAACACAAATGCAACCAGACCTGCTTAGACAGGATAGCTGCTGGCACCTTTAATGCAGGAGAGTTTTCTCTCCCCACTTTTGATTCATTGAACATTACTGCTGCATCTTTAAATGATGATGGATTGGATAACCATACTATACTGCTCTATTACTCAACTGCTGCTTCTAGTTTGGCTGTAACATTGATGCTAGCTATTTTTATTGTTTATATGGTCTCCAGAGACAACGTTTCATGCTCCATCTGTCTATAA", }, { "strain": "B/PHUKET/3073/2013", "isolate_id": "EPI_ISL_161843", "date": "2013-11-21", "region": "SoutheastAsia", "seq": "ATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAACGCAGATCGAATCTGCACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGCGTGATACCACTGACAACAACACCAACAAAATCTTATTTTGCAAATCTCAAAGGAACAAGGACCAGAGGGAAACTATGCCCGGACTGTCTCAACTGTACAGATCTGGATGTGGCCTTGGGCAGGCCAATGTGTGTGGGGACCACACCTTCTGCTAAAGCTTCAATACTCCATGAGGTCAGACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGGCAACTACCCAATCTTCTCAGAGGATATGAAAAGATCAGGTTATCAACCCAAAACGTTATCGATGCAGAAAAAGCACCAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAAAATCGGATTTTTTGCAACAATGGCTTGGGCTGTCCCAAAGGACAACTACAAAAATGCAACGAACCCACTAACAGTGGAAGTACCATACATTTGTACAGAAGGGGAAGACCAAATTACTGTTTGGGGGTTCCATTCGGATAACAAAACCCAAATGAAGAGCCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACGCATTATGTTTCTCAGATTGGCGACTTCCCAGATCAAACAGAAGACGGAGGACTACCACAAAGCGGCAGAATTGTTGTTGATTACATGATGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGGGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCATTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAGAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAAAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTAAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTGAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCCTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCTCACGGAGCACATGGAGTGGCAGTGGCGGCAGACCTTAAGAGTACACAAGAAGCTATAAATAAGATAACAAAAAATCTCAATTCTTTGAGTGAACTAGAAGTAAAGAACCTTCAAAGACTAAGTGGTGCCATGGATGAACTCCACAACGAAATACTCGAGCTGGATGAAAAAGTGGATGATCTCAGAGCTGACACTATAAGCTCACAAATAGAACTTGCAGTCTTGCTTTCCAACGAAGGAATAATAAACAGTGAAGACGAGCATCTATTGGCACTTGAGAGAAAACTAAAGAAAATGCTGGGTCCCTCTGCTGTAGACATAGGAAACGGATGCTTCGAAACCAAACACAAATGCAACCAGACCTGCTTAGACAGGATAGCTGCTGGCACCTTTAATGCAGGAGAATTTTCTCTCCCCACTTTTGATTCATTGAACATTACTGCTGCATCTTTAAATGATGATGGATTGGATAACCATACTATACTGCTCTATTACTCAACTGCTGCTTCTAGTTTGGCTGTAACATTAATGCTAGCTATTTTTATTGTTTATATGGTCTCCAGAGACAACGTTTCATGCTCCATCTGTCTATAAAGAAGGTTAGGCCTTGTATTTTCCTTTATTGTAGTGCTTGTTTGCTTGTCATCATTACAAAGAAAC", }, ] tmp_outgroup = SeqIO.read("source-data/Yam_outgroup.gb", "genbank") genome_annotation = tmp_outgroup.features self.cds = { x.qualifiers["gene"][0]: x for x in genome_annotation if "gene" in x.qualifiers and x.type == "CDS" and x.qualifiers["gene"][0] in ["SigPep", "HA1", "HA2"] } self.outgroup = { "strain": "B/Singapore/11/94", "isolate_id": "EPI_ISL_20980", "date": "1994-05-10", "region": "China", "seq": str(tmp_outgroup.seq).upper(), }
def __init__(self,min_length = 987, **kwargs): ''' parameters min_length -- minimal length for a sequence to be acceptable ''' flu_filter.__init__(self, **kwargs) self.min_length = min_length self.vaccine_strains =[ { 'strain': 'B/Beijing/184/93', 'isolate_id': 'EPI_ISL_969', 'date': '1993-07-01', #(Month and day unknown) 'region': 'China', 'seq': 'GATCGAATCTGTACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGTGTGATACCACTGACAACAACACCAACAAAATCTCATTTTGGAAATCTCAAAGGAACAAAGACCAGAGGGAAACTATGCCCAAACTGTCTCAACTGCACAGATCTGGATGTGGCCTTGGGCAGACCAATGTGTGTGGGGACCACACCTTCGGCAAAAGCTTCAATACTCCACGAAGTCAGACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGACAGCTACCCAATCTTCTCAGAGGATATGAAAATATCAGATTATCAACCCAAAACGTTATCAACGCAGAAAAGGCACCAGGAGGACCCTACAGGCTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAGAAGCGGATTTTTCGCAACAATGGCTTGGGCTGTCCCAAGGGACAACAACAAAACAGCAACAAATCCACTAACAGTAGAAGTACCATACATTTGTACAAAAGGAGAAGACCAAATTACTGTTTGGGGGTTCCATTCTGATAACAAAATCCAAATGAAAAACCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACATTATGTTTCTCAGATTGGCGGCTTCCCAGATCAAACAGAAGACGGAGGGCTACCACAAAGCGGCAGAATTGTTGTTGATTACATGGTGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCCTTGCCTTTAATTGGTGAAGCAGATTGCCTTCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGG', }, { 'strain': 'B/Sichuan/379/99', 'isolate_id': 'EPI_ISL_21113', 'date': '1999-07-01', # (Month and day unknown) 'region': 'China', 'seq': 'GAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCGTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTAACTGGTGCGATACCACTGACAACAACACCAACAAAATCTCATTTTGCAAATCTCAAAGGAACAAAGACCAGAGGGAAACTATGCCCAACCTGTCTCAACTGCACAGATCTGGATGTGGCCTTGGGCAGACCAATGTGTGTGGGGATCACACCTTCGGCAAAAGCTTCAATACTCCACGAAATCAAACCTGTTACATCCGGATGCTTTCCTATAATGCACGACAGAACAAAAATCAGACAGCTACCCAATCTTCTCAGAGGATATGAAAAAATCAGATTATCAACCCAAAACGTTATCAACGCAGAAAAGGCACCAGGAGGACCTTACAGACTTGGAACTTCAGGATCTTGCCCTAACGCTACCAGTAAAAGCGGATTTTTCGCAACAATGGCTTGGGCTGTCCCAAGGGACAACAACAAAACAGCAACGAATCCACTAACAGTAGAAGTACCACACATCTGTACAAAAGAAGAAGACCAAATTACTGTTTGGGGGTTCCATTCTGATGACAAAACCCAAATGAAAAACCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAATAACCACACATTATGTTTCTCAGATTGGCGGCTTCCCGGACCAAACAGAGGACGGAGGGCTACCACAAAGCGGCAGAATTGTTGTTGATTACATGGTGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGGATTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGTAGGAGCAAAGTAATAAAAGGGTCCTTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCT', }, { 'strain': 'B/Shanghai/361/2002', 'isolate_id': 'EPI_ISL_2842', 'date': '2002-06-12', 'region': 'China', 'seq': 'AATGCAGATCGAATCTGCACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGTGTGATACCACTGACAACAACTCCAATAAAATCTCATTTTGCAAATCTCAAAGGAACAAGGACTAGAGGGAAACTATGCCCAGATTGTCTCAACTGCACAGATCTGGATGTGGCCTTGGGCAGACCAATGTGTGTGGGGACCACACCTTCGGCAAAAGCTTCAATACTCCACGAAGTCAGACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGACAACTACCCAATCTTCTCAGAGGATATGAAAATATCAGGTTATCAACCCAAAACGTTATCGATGCAGAAAAGGCCCTAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCCACCAGTAAAAGCGGATTTTTCGCAACAATGGCTTGGGCTGTCCCAAAGGACAACAACAAAAATGCAACGAACCCACTAACAGTAGAAGTACCATACATCTGTACAGAAGGGGAAGACCAAATTACTGTTTGGGGGTTCCATTCAGATGACAAAACCCAAATGAAAAACCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACATTATGTTTCTCAGATTGGCGGCTTCCCAGATCAAACAGAAGACGGAGGACTACCACAAAGCGGCAGAATTGTTGTTGATTACATGGTGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCCTTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAAAATACGGTGGGTTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTC', }, { 'strain': 'B/Florida/4/2006', 'isolate_id': 'EPI_ISL_21307', 'date': '2006-11-01', 'region': 'NorthAmerica', 'seq': 'ATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGAATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCCACTCAAGGGGAGGTCAATGTGACTGGTGTGATACCACTAACAACAACACCAACAAAATCTTATTTTGCAAATCTCAAAGGAACAAGGACCAGAGGGAAACTATGCCCAGACTGTCTCAACTGCACAGATCTGGATGTGGCTTTGGGCAGACCAATGTGTGTGGGGACCACACCTTCGGCGAAAGCTTCAATACTCCACGAAGTCAAACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGGCAACTACCCAATCTTCTCAGAGGATATGAAAATATCAGGCTATCAACCCAAAACGTCATCGATGCGGAAAAGGCACCAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAAGAGCGGATTTTTCGCAACAATGGCTTGGGCTGTCCCAAAGGACAACAACAAAAATGCAACGAACCCACTAACAGTAGAAGTACCATACATTTGTACAGAAGGGGAAGACCAAATCACTGTTTGGGGGTTCCATTCAGATGACAAAACCCAAATGAAGAACCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACACTATGTTTCTCAGATTGGCAGCTTCCCAGATCAAACAGAAGACGGAGGACTACCACAAAGCGGCAGGATTGTTGTTGATTACATGATGCAAAAACCTGGGAAAACAGGAACAATTGTCTACCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCCTTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCCTAGAAGGAGGATGGGAAGGAATGATTGCAGGCTGGCACGGATACACATCTCACGGAGCACATGGAGTGGCAGTGGCGGCGGACCTTAAGAGTACGCAAGAAGCTATAAACAAGATAACAAAAAATCTCAATTCTTTGAGTGAGCTAGAAGTAAAGAATCTTCAAAGACTAAGTGGTGCCATGGATGAACTCCACAACGAAATACTCGAGCTGGATGAGAAAGTGGATGATCTCAGAGCTGACACTATAAGCTCGCAAATAGAACTTGCAGTCTTGCTTTCCAACGAAGGAATAATAAACAGTGAAGATGAGCATCTATTGGCACTTGAGAGAAAACTAAAGAAAATGCTGGGTCCCTCTGCTGTAGAGATAGGAAATGGATGCTTCGAAACCAAACACAAGTGCAACCAGACCTGCTTAGACAGGATAGCTGCTGGCACCTTTAATGCAGGAGAATTTTCTCTCCCCACTTTTGATTCACTGAACATTACTGCTGCATCTTTAAATGATGATGGATTGGATAACCATACTATACTGCTCTATTACTCAACTGCTGCTTCTAGTTTGGCTGTAACATTGATGCTAGCTATTTTTATTGTTTATATGGTCTCCAGAGACAACGTTTCATGCTCCATCTGTCTATAA' }, { 'strain': 'B/Wisconsin/01/2010', 'isolate_id': 'EPI_ISL_76940', 'date': '2010-02-20', 'region': 'NorthAmerica', 'seq': 'ATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGCGTGATACCACTGACAACAACACCAACAAAATCTTATTTTGCAAATCTCAAAGGAACAAGGACCAGAGGGAAACTATGCCCGGACTGTCTCAACTGTACAGATCTGGATGTGGCCTTGGGCAGGCCAATGTGTGTGGGGACCACACCTTCTGCTAAAGCTTCAATACTCCACGAGGTCAGACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGGCAACTACCCAATCTTCTCAGAGGATATGAAAATATCAGGTTATCAACCCAAAACGTTATCGATGCAGAAAAAGCACCAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAAAATCGGATTTTTTGCAACAATGGCTTGGGCTGTCCCAAAGGACAACTACAAAAATGCAACGAACCCACTAACAGTAGAAGTACCATACATTTGTACAGAAGGGGAAGACCAAATTACTGTTTGGGGGTTCCATTCAGATAACAAAACCCAAATGAAGAGCCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACATTATGTTTCTCAGATTGGCGACTTCCCAGATCAAACAGAAGACGGAGGACTACCACAAAGCGGCAGAATTGTTGTTGATTACATGATGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCATTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTAAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTGAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCCTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCTCACGGAGCACATGGAGTGGCAGTGGCGGCAGACCTTAAGAGTACACAAGAAGCTATAAATAAGATAACAAAAAATCTCAATTCTTTGAGTGAGCTAGAAGTAAAGAACCTTCAAAGACTAAGTGGTGCCATGGATGAACTCCACAACGAAATACTCGAGCTGGATGAGAAAGTGGATGATCTCAGAGCTGACACTATAAGCTCACAAATAGAACTTGCAGTCTTGCTTTCCAACGAAGGAATAATAAACAGTGAAGACGAGCATCTATTGGCACTTGAGAGAAAACTAAAGAAAATGCTGGGTCCCTCTGCTGTAGACATAGGAAACGGATGCTTCGAAACCAAACACAAATGCAACCAGACCTGCTTAGACAGGATAGCTGCTGGCACCTTTAATGCAGGAGAATTTTCTCTCCCCACTTTTGATTCATTGAACATTACTGCTGCATCTTTAAATGATGATGGATTGGATAACCATACTATACTGCTCTATTACTCAACTGCTGCTTCTAGTTTGGCTGTAACATTAATGCTAGCTATTTTTATTGTTTATATGGTCTCCAGAGACAACGTTTCATGCTCCATCTGTCTATAA' }, { 'strain': 'B/Massachusetts/02/2012', 'isolate_id': 'EPI_ISL_121434', 'date': '2012-03-13', 'region': 'NorthAmerica', 'seq': 'ATGAAGGCAATAATTGTACTACTAATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGTGTGATACCACTAACAACAACACCAACAAAATCTTATTTTGCAAATCTCAAAGGAACAAAGACCAGAGGGAAACTATGCCCAGACTGTCTCAACTGTACAGATCTGGATGTGGCCCTGGGCAGGCCAATGTGTGTGGGAACTACACCTTCTGCGAAAGCTTCAATACTTCACGAAGTCAGACCTGTTACATCCGGGTGCTTCCCTATAATGCACGACAGAACAAAAATCAGGCAACTAGCCAATCTTCTCAGAGGATATGAAAATATCAGGTTATCAACCCAAAACGTTATCGATGCAGAAAAGGCACCAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAAAAGCGGATTTTTCGCAACAATGGCTTGGGCTGTCCCAAAGGACAACAACAAAAATGCAACGAACCCATTAACAGTAGAAGTACCATACATTTGTGCAGAAGGGGAAGACCAAATTACTGTTTGGGGGTTCCATTCAGATAACAAAACCCAAATGAAGAACCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACATTATGTTTCTCAGATTGGCGGCTTCCCAGATCAAACAGAAGACGGAGGACTACCACAAAGCGGCAGAATTGTCGTTGATTACATGATGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCCTTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCCTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCTCACGGAGCACATGGAGTGGCAGTTGCTGCAGACCTTAAGAGCACACAAGAAGCTATAAACAAGATAACAAAAAATCTCAACTCTTTGAGTGAGCTAGAAGTAAAGAATCTTCAAAGGCTAAGTGGTGCCATGGATGAACTCCACAACGAAATACTCGAGCTGGATGAGAAAGTGGATGACCTCAGAGCTGACACTATAAGTTCACAAATAGAACTTGCAGTCTTGCTTTCCAACGAAGGAATAATAAACAGTGAAGACGAGCATCTATTGGCACTTGAGAGAAAACTAAAGAAAATGCTGGGTCCCTCTGCTGTAGACATAGGAAATGGATGCTTCGAAACCAAACACAAATGCAACCAGACCTGCTTAGACAGGATAGCTGCTGGCACCTTTAATGCAGGAGAGTTTTCTCTCCCCACTTTTGATTCATTGAACATTACTGCTGCATCTTTAAATGATGATGGATTGGATAACCATACTATACTGCTCTATTACTCAACTGCTGCTTCTAGTTTGGCTGTAACATTGATGCTAGCTATTTTTATTGTTTATATGGTCTCCAGAGACAACGTTTCATGCTCCATCTGTCTATAA' }, { 'strain': 'B/PHUKET/3073/2013', 'isolate_id': 'EPI_ISL_161843', 'date': '2013-11-21', 'region': 'SoutheastAsia', 'seq': 'ATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAACGCAGATCGAATCTGCACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGCGTGATACCACTGACAACAACACCAACAAAATCTTATTTTGCAAATCTCAAAGGAACAAGGACCAGAGGGAAACTATGCCCGGACTGTCTCAACTGTACAGATCTGGATGTGGCCTTGGGCAGGCCAATGTGTGTGGGGACCACACCTTCTGCTAAAGCTTCAATACTCCATGAGGTCAGACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGGCAACTACCCAATCTTCTCAGAGGATATGAAAAGATCAGGTTATCAACCCAAAACGTTATCGATGCAGAAAAAGCACCAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAAAATCGGATTTTTTGCAACAATGGCTTGGGCTGTCCCAAAGGACAACTACAAAAATGCAACGAACCCACTAACAGTGGAAGTACCATACATTTGTACAGAAGGGGAAGACCAAATTACTGTTTGGGGGTTCCATTCGGATAACAAAACCCAAATGAAGAGCCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACGCATTATGTTTCTCAGATTGGCGACTTCCCAGATCAAACAGAAGACGGAGGACTACCACAAAGCGGCAGAATTGTTGTTGATTACATGATGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGGGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCATTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAGAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAAAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTAAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTGAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCCTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCTCACGGAGCACATGGAGTGGCAGTGGCGGCAGACCTTAAGAGTACACAAGAAGCTATAAATAAGATAACAAAAAATCTCAATTCTTTGAGTGAACTAGAAGTAAAGAACCTTCAAAGACTAAGTGGTGCCATGGATGAACTCCACAACGAAATACTCGAGCTGGATGAAAAAGTGGATGATCTCAGAGCTGACACTATAAGCTCACAAATAGAACTTGCAGTCTTGCTTTCCAACGAAGGAATAATAAACAGTGAAGACGAGCATCTATTGGCACTTGAGAGAAAACTAAAGAAAATGCTGGGTCCCTCTGCTGTAGACATAGGAAACGGATGCTTCGAAACCAAACACAAATGCAACCAGACCTGCTTAGACAGGATAGCTGCTGGCACCTTTAATGCAGGAGAATTTTCTCTCCCCACTTTTGATTCATTGAACATTACTGCTGCATCTTTAAATGATGATGGATTGGATAACCATACTATACTGCTCTATTACTCAACTGCTGCTTCTAGTTTGGCTGTAACATTAATGCTAGCTATTTTTATTGTTTATATGGTCTCCAGAGACAACGTTTCATGCTCCATCTGTCTATAAAGAAGGTTAGGCCTTGTATTTTCCTTTATTGTAGTGCTTGTTTGCTTGTCATCATTACAAAGAAAC' } ] self.outgroup = { 'strain':'B/Singapore/11/94', 'isolate_id':'EPI_ISL_20980', 'date':'1994-05-10', 'region':'China', 'seq':'ATATCCACAAAATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGTGTGATACCACTGACAACAACACCAACAAAATCTCATTTTGCAAATCTCAAAGGAACAAAGACCAGAGGGAAACTATGCCCAAACTGTCTCAACTGCACAGATCTGGATGTGGCCTTGGGCAGACCAATGTGTGTGGGGACCACACCTTCGGCAAAAGCTTCAATACTCCACGAAGTCAGACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGACAGCTACCCAATCTTCTCAGAGGATATGAAAATATCAGATTATCAACCCAAAACGTTATCAATGCAGAAAAGGCACCAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAGAAGCGGATTTTTCGCAACAATGGCTTGGGCTGTCCCAAGGGACAACAACAAAACAGCAACGAATCCACTAACAGTAGAAGTACCATACGTTTGTACAGAAGGAGAAGACCAAATTACTGTTTGGGGGTTCCATTCTGATAACAAAACCCAAATGAAAAACCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACATTATGTTTCTCAGATTGGCGGCTTCCCAGATCAAACAGAAGACGGAGGGCTACCACAAAGCGGCAGAATTGTTGTTGATTACATGGTGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCCTTGCCTTTAATTGGTGAAGCAGATTGCCTTCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCTTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCTCACGGAGCACATGGAGTGGCAGTGGCAGCAGACCTTAAGAGTACGCAAGAAGCCATAAACAAGATAACAAAAAATCTCAATTCTTTGAGTGAGCTAGAAGTAAAGAATCTTCAAAGACTAAGTGGTGCCATGGATGAACTCCACAACGAAATACTCGAGCTGGATGAGAAAGTGGATGATCTCAGAGCTGACACTATAAGCTCGCAAATAGAACTTGCAGTCTTGCTTTCCAACGAAGGAATAATAAACAGTGAAGATGAGCATCTATTGGCACTTGAGAGAAAACTAAAGAAAATGCTGGGTCCCTCTGCTGTAGACATAGGGAATGGATGCTTCGAAACCAAACACAAGTGCAACCAGACCTGCTTAGACAGGATAGCTGCTGGCACCTTTAATGCAGGAGAATTTTCTCTCCCCACTTTTGATTCACTGAATATTACTGCTGCATCTTTAAATGATGATGGATTGGATAACCATACTATACTGCTCTACTACTCAACTGCTGCTTCTAGTTTGGCTGTAACATTGATGATAGCTATTTTTATTGTTTATATGGTCTCCAGAGACAATGTTTCATGCTCCATCTGTCTATAAGGAAAATTAAGCCCTGTATTTTCCTTTATTGTAGTGCTTGTTTGTTTGTTACCATTACAAAGAAACGTTATTGA' }
def __init__(self, min_length=0, **kwargs): ''' parameters min_length -- minimal length for a sequence to be acceptable ''' flu_filter.__init__(self, **kwargs) self.min_length = min_length self.vaccine_strains = [{ 'strain': 'A/mallard/Maryland/13OS3318/2014', 'isolate_id': 'EPI-ISL-216765', 'date': '2014-06-24', 'lab': 'Other Database Import', 'region': 'NorthAmerica', 'seq': 'ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGAATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAACGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGATCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTACTGTTACCCGGGTACCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATCGACAAGATAAGCACTGGGTTTACATATGGATCATCCATCAATTCAGCTGGAACCACTAAAGCATGCATGAGAAATGGAGGAAATAGTTTCTATGCGGAGCTAAAGTGGTTAGTGTCGAAGAGCAAAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTGTAATCTGGGGAATTCATCACCCTTCAAGCACACAAGAAAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAATGGCCAAAGTGGGCGGATTGATTTCCATTGGACGATGGTACAACCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGGAGCTTCAGTAGACAATAACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAATATGTGAACAAAAAGAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGGATGGTAGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAGAAGTACACAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAATGAAATAGAACATCAAATCGGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATTTATATGAGAGAGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATATCACAAATGCAATGACAACTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTGAAAAATGGAAACATGCGCTGCACAATCTGTATTTAG', }] self.new_strains = [{ "strain": "A/Unknown/Unknown/Batch2-1_002_01102017_4_H10N6", "db": "Unknown", "accession": "Unknown", "date": "2017-01-10", "seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG", }, { "strain": "A/Unknown/Unknown/Batch2-1_007_01102017_4_H10N4", "db": "Unknown", "accession": "Unknown", "date": "2017-01-10", "seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCAGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACGCAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG", }, { "strain": "A/Unknown/Unknown/Batch2-1_008_01102017_4_H10N7", "db": "Unknown", "accession": "Unknown", "date": "2017-01-10", "seq": "ATGTACAAAATAGCACTAGTATTCGCGCTCCTTGGAGCGGTGGATGGTCTTGATAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCATAAACGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAATCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGATCTACACCTTACCGGAACATGGGACACCTTAATAGAGAGAGACAATTCTATTGCCTACTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATCGACAAGATAAGCACTGGGTTTACATATGGATCATCCATCAATTCAGCTGGAACCACTAAAGCATGCATGAGAAATGGAGGAAATAGTTTCTACGCGGAGCTAAAGTGGCTAGTGTCGAAGAGCAAAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACACAAGAAAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGAAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAATGGCCAAAGTGGACGGATTGATTTCCATTGGACGATGGTACAACCAGGTGATAACATTACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTGGGCATTCAATCAGGAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAATATGTGAACAAAAAGAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGGATGGTAGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTAAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCGGTAACGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAGATGCTGAATTTATATGAGAGAGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGAAAAGGGTGCTTTGAAATATATCACAAATGCGATGACAACTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTCTTCTTCTGCTTGAAAAATGGAAACATGCGCTGCACAATCTGTATTTAG", }, { "strain": "A/Unknown/Unknown/Batch2-1_010_01102017_4_H10N5", "db": "Unknown", "accession": "Unknown", "date": "2017-01-10", "seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGRGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG", }, { "strain": "A/Unknown/Unknown/Batch2-1_013_01102017_4_H10N5", "db": "Unknown", "accession": "Unknown", "date": "2017-01-10", "seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCTGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGTTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG", }, { "strain": "A/Unknown/Unknown/Batch2-1_021_01102017_4_H10N4", "db": "Unknown", "accession": "Unknown", "date": "2017-01-10", "seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCAGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG", }, { "strain": "A/Unknown/Unknown/Batch2-1_027_01102017_4_H10N5", "db": "Unknown", "accession": "Unknown", "date": "2017-01-10", "seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATTTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG", }, { "strain": "A/Unknown/Unknown/Batch2-1_028_01102017_4_H10N5", "db": "Unknown", "accession": "Unknown", "date": "2017-01-10", "seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGACAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG", }, { "strain": "A/Unknown/Unknown/Batch2-1_030_01102017_4_H10N5", "db": "Unknown", "accession": "Unknown", "date": "2017-01-10", "seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG", }, { "strain": "A/Unknown/Unknown/Batch2-1_031_01102017_4_H10N5", "db": "Unknown", "accession": "Unknown", "date": "2017-01-10", "seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCATCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG", }, { "strain": "A/Unknown/Unknown/Batch2-1_033_01102017_4_H10N5", "db": "Unknown", "accession": "Unknown", "date": "2017-01-10", "seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTACTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG", }, { "strain": "A/Unknown/Unknown/Batch2-1_034_01102017_4_H10N7", "db": "Unknown", "accession": "Unknown", "date": "2017-01-10", "seq": "ATGTACAAAATAGCACTAGTATTCGCGCTCCTTGGAGCGGTGGATGGTCTTGATAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCATAAACGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAATCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGATCTACACCTTACCGGAACATGGGACACCTTAATAGAGAGAGACAATTCTATTGCCTACTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATCGACAAGATAAGCACTGGGTTTACATATGGATCATCCATCAATTCAGCTGGAACCACTAAAGCATGCATGAGAAATGGAGGAAATAGTTTCTACGCGGAGCTAAAGTGGCTAGTGTCGAAGAGCAAAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACACAAGAAAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGAAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAATGGCCAAAGTGGACGGATTGATTTCCATTGGACGATGGTACAACCAGGTGATAACATTACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTGGGCATTCAATCAGGAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAATATGTGAACAAAAAGAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGGATGGTAGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTAAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCGGTAACGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAGATGCTGAATTTATATGAGAGAGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGAAAAGGGTGCTTTGAAATATATCACAAATGCGATGACAACTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTCTTCTTCTGCTTGAAAAATGGAAACATGCGCTGCACAATCTGTATTTAG", }, { "strain": "A/Unknown/Unknown/Batch2-1_036_01102017_4_H10N5", "db": "Unknown", "accession": "Unknown", "date": "2017-01-10", "seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG", }, { "strain": "A/Unknown/Unknown/Batch2-1_037_01102017_4_H10N5", "db": "Unknown", "accession": "Unknown", "date": "2017-01-10", "seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAACCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTGATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCTGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG", }, { "strain": "A/Unknown/Unknown/Batch2-1_039_01102017_4_H10N5", "db": "Unknown", "accession": "Unknown", "date": "2017-01-10", "seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG", }, { "strain": "A/Unknown/Unknown/Batch2-1_042_01102017_4_H10N5", "db": "Unknown", "accession": "Unknown", "date": "2017-01-10", "seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG", }, { "strain": "A/Unknown/Unknown/Batch2-1_043_01102017_4_H10N4", "db": "Unknown", "accession": "Unknown", "date": "2017-01-10", "seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAGATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCGGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG", }, { "strain": "A/Unknown/Unknown/Batch2-1_044_01102017_4_H10N5", "db": "Unknown", "accession": "Unknown", "date": "2017-01-10", "seq": "ATGCACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG", }, { "strain": "A/Unknown/Unknown/Batch2-1_048_01102017_4_H10N5", "db": "Unknown", "accession": "Unknown", "date": "2017-01-10", "seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGACTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG", }] tmp_outgroup = SeqIO.read( '/Users/yujiazhou/Documents/nextflu/H9_nextflu-master/augur/source-data/H10_outgroup.gb', 'genbank') genome_annotation = tmp_outgroup.features self.cds = { x.qualifiers['gene'][0]: x for x in genome_annotation if 'gene' in x.qualifiers and x.type == 'CDS' and x.qualifiers['gene'][0] in ['SigPep', 'HA1', 'HA2'] } self.outgroup = { 'strain': 'A/mallard-duck/ALB/302/1977', 'region': 'NorthAmerica', 'isolate_id': 'EPI-ISL-8890', 'date': '1977-08-13', #(Month and day unknown) 'seq': str(tmp_outgroup.seq).upper() }
def __init__(self, min_length=0, **kwargs): ''' parameters min_length -- minimal length for a sequence to be acceptable ''' flu_filter.__init__(self, **kwargs) self.min_length = min_length self.vaccine_strains = [ { 'strain': 'A/turkey/Italy/3889/99', 'isolate_id': 'EPI-ISL-2746', 'date': '1990-07-01', #(Month and day unknown) 'region': 'Europe', 'seq': 'ATGAACACTCAAATCCTGGTATTCGCTCTGGTGGCGATCATTCCGACAAATGCAGACAAAATCTGCCTTGGGCATCATGCCGTGTCAAACGGGACTAAAGTAAACACATTAACTGAAAGAGGAGTGGAAGTCGTTAATGCAACTGAAACGGTGGAACGAACAAACGTCCCCAGGATCTGCTCAAAAGGGAAAAGGACAGTTGACCTCGGTCAATGTGGACTTCTGGGAACAATCACTGGGCCACCCCAATGTGACCAATTCCTAGAATTTTCAGCCGATCTAATTATTGAGAGGCGAGAAGGAAGTGGTGTCTGTTATCCTGGGAAATTCGTGAATGAAGAAGCTCTGAGGCAAATTCTCAGGGAGTCAGGCGGAATTGACAAGGAGACAATGGGATTCACATACAGCGGAATAAGAACTAATGGAACAACCAGTGCATGTAGGAGATTAGGATCTTCATTCTATGCAGAGATGAAATGGCTCCTGTCAAACACAGACAATGCTGCTTTCCCGCAGATGACTAAGTCATACAAAAACACAAGGAAAGACCCAGCTCTGATAATATGGGGGATCCACCATTCCGGATCAACTACAGAACAGACCAAGCTATATGGGAGTGGAAACAAACTGATAACAGTTGGGAGTTCTAATTACCAACAGTCCTTTGTACCGAGTCCAGGAGCGAGACCACAAGTGAATGGCCAATCTGGAAGAATTGACTTTCATTGGCTGATGCTAAACCCCAATGACACAGTCACTTTCAGTTTCAATGGGGCCTTCATAGCTCCAGACCGTGCAAGTTTTCTGAGAGGGAAGTCTATGGGGATTCAGAGTGGAGTACAGGTTGATGCCAATTGTGAAGGAGATTGCTATCACAGTGGAGGGACAATAATAAGTAATTTGCCCTTTCAGAACATAAATAGCAGGGCAGTAGGGAAATGTCCGAGATATGTTAAGCAAGAGAGTCTGCTGCTGGCAACAGGGATGAAGAATGTTCCCGAAGTTCCAAAGGGA------------------------------AGAGGCCTATTTGGTGCTATAGCGGGTTTCATTGAAAATGGATGGGAAGGTCTGATTGATGGGTGGTATGGCTTCAGGCATCAAAATGCACAAGGGGAGGGAACTGCTGCAGATTACAAAAGCACCCAATCAGCAATTGATCAAGTAACAGGAAAATTGAACCGGCTTATAGAAAAAACTAACCAACAATTTGAGTTAATAGACAATGAATTCACTGAGGTTGAAAAGCAAATTGGCAATGTGATAAATTGGACCAGAGATTCCATGACAGAAGTGTGGTCCTATAACGCTGAACTCTTGGTAGCAATGGAGAACCAGCACACAATTGATCTGGCCGACTCAGAAATGAACAAACTATACGAACGAGTGAAGAGACAACTGAGAGAGAATGCTGAAGAAGATGGCACTGGTTGCTTCGAAATATTTCACAAGTGTGATGACGACTGTATGGCCAGTATTAGAAACAACACCTATGATCACAGCAAGTACAGGGAAGAGGCAATGCAAAATAGAATACAGATTGACCCAGTCAAACTAAGCAGCGGCTACAAAGATGTGATACTTTGGTTTAGCTTCGGGGCATCATGTTTCATACTTCTGGCCATTGCAATGGGCCTTGTCTTCATATGTGTGAAGAATGGAAACATGCGGTGCACTATTTGTATATAA', }, { 'strain': 'A/mallard/Netherlands/12/00', 'isolate_id': 'EPI-ISL-3548', 'date': '2000-07-01', # (Month and day unknown) 'region': 'Europe', 'seq': 'ATGAACACTCAAATCCTGGTATTCGCTCTGATGGCGATCATTCCGACAAATGCAGACAAGATCTGCCTTGGGCATCATGCCGTGTCAAACGGGACTAAAGTAAACACATTAACTGAAAGAGGAGTGGAAGTCGTTAATGCAACTGAAACGGTGGAACGAACAAACGTCCCCAGGATCTGCTCAAAAGGGAAAAGGACAGTTGACCTTGGTCAATGTGGACTTCTGGGAACAATCACTGGGCCGCCCCAATGTGACCAATTCCTAGAATTTTCAGCCGATTTAATTATTGAGAGGCGAGAAGGAAGTGATGTCTGTTATCCTGGGAAATTCGTGAATGAAGAAGCTCTGAGGCAAATTCTCAGGGAGTCAGGCGGAATTGACAAGGAGACAATGGGATTCACCTACAGCGGAATAAGAACTAATGGAGCAACCAGTGCATGTAGGAGATCAGGATCTTCATTCTATGCAGAGATGAAATGGCTCCTGTCAAACACAGACAATGCTGCTTTCCCGCAGATGACTAAGTCATACAAAAACACAAGGAAAGACCCAGCTCTGATAATATGGGGGATCCACCATTCCGGATCAACTACAGAACAGACCAAGCTATATGGGAGTGGAAACAAACTGATAACAGTTGGGAGTTCTAATTACCAACAGTCCTTTGTACCGAGTCCAGGAGCGAGACCACAAGTAAATGGCCAATCTGGAAGAATTGACTTTCATTGGCTGATACTAAACCCCAATGACACAGTTACTTTCAGTTTCAATGGGGCCTTCATAGCTCCAGACCGTGCAAGCTTTCTGAGAGGGAAGTCTATGGGAATTCAGAGTGGAGTACAGGTTGATGCCAATTGTGAAGGAGATTGCTATCATAGTGGAGGGACAATAATAAGTAATTTGCCCTTTCAGAACATAAATAGCAGGGCAGTAGGAAAATGTCCGAGATATGTTAAGCAAGAGAGTCTGCTGCTGGCAACAGGAATGAAGAATGTTCCCGAAATCCCAAAGGGA------------------------------AGAGGCCTATTTGGTGCTATAGCGGGTTTCATTGAAAATGGATGGGAAGGTCTGATTGATGGGTGGTATGGCTTCAGGCATCAAAATGCACAAGGGGAGGGAACTGCTGCAGATTACAAAAGCACCCAATCAGCAATTGATCAAATAACAGGAAAATTAAACCGGCTTATAGAAAAAACTAACCAACAATTTGAGTTAATAGACAATGAATTCACTGAGGTTGAAAAGCAAATTGGCAATGTGATAAATTGGACCAGAGATTCCATGACAGAAGTGTGGTCCTATAACGCTGAACTCTTGGTAGCAATGGAGAATCAGCACACAATTGATCTGGCCGACTCAGAAATGAACAAACTGTACGAACGAGTGAAGAGACAACTGAGAGAGAATGCTGAAGAAGATGGCACTGGTTGCTTCGAAATATTTCACAAGTGTGATGACGACTGTATGGCCAGTATTAGAAACAACACCTATGATCACAGCAAGTACAGGGAAGAGGCAATGCAAAATAGAATACAGATTGACCCAGTCAAACTAAGCAGCGGCTACAAAGATGTGATACTTTGGTTTAGCTTCGGGGCATCATGTTTCATACTTCTGGCCATTGCAATGGGCCTTGTCTTCATATGTGTGAAGAATGGAAACATGCGGTGCACTATTTGTATATAA', }, { 'strain': 'A/human/New-York/107/2003', 'isolate_id': 'EPI-ISL-16424', 'date': '2003-07-01', 'region': 'NorthAmerica', 'seq': 'ATGAACACTCAAATTCTGGCATTCATTGCTTGTGTGCTGACTGGAGTTAAAGGAGACAAAATATGTCTTGGGCACCATGCTGTGGCAAATGGAACAAAAGTGAACACATTAACAGAGAGGGGGATTGAAGTAGTGAATGCCACAGAGACAGTGGAAACTACGAATATCAAGAAAATATGTACCCAGGGGAAAAGGCCAACAGATCTGGGACAATGTGGACTTCTAGGAACCCTAATAGGACCTCCCCAATGTGATCAATTCCTGGAGTTTTCCTCTGATTTGATAATTGAGCGAAGAGAAGGAACCGATATATGCTATCCCGGTAGATTCACAAACGAAGAATCACTGAGGCAGATCCTTCGAAGATCAGGAGGAATTGGTAAGGAGTCAATGGGCTTCACCTATAGTGGAATAAGAACCAATGGAGCGACAAGTGCCTGCACAAGATCAGGTTCTTCTTTCTATGCAGAGATGAAGTGGTTGCTGTCGAATTCAGATAATGCAGCATTCCCACAGATGACAAAGGCGTATAGAAATCCCAGAAACAAACCAGCTCTGATAATTTGGGGAGTTCATCACTCTGAATCGGTTAGCGAGCAGACCAAACTCTATGGAAGTGGAAACAAGTTGATAACAGTAAGAAGCTCAAAATACCAGCAATCATTCACCCCAAATCCGGGAGCACGA------------------------AGAATCGATTTCCACTGGCTACTCCTTGATCCCAATGACACAGTGACCTTCACTTTCAATGGAGCATTCATAGCCCCTGACAGGACAAGTTTCTTTAGGGGAGAATCACTAGGAGTCCAGAGTGATGCTCCTTTGGATTCAAGTTGTAGAGGAGATTGCTTTCACAGTGGGGGTACGATAGTCAGTTCCCTGCCATTCCAAAACATCAACTCTAGAACTGTGGGGAAATGCCCTCGGTATGTCAAACAGAAAAGCCTCCTTCTGGCTACAGGAATGAGAAATGTTCCAGAGAAACCAAAGCCC------------------------------AGAGGCCTTTTTGGAGCAATTGCTGGATTCATAGAGAATGGATGGGAGGGTCTCATCAATGGATGGTATGGTTTCAGACATCAAAATGCACAAGGAGAGGGAACTGCAGCTGACTACAAAAGCACCCAGTCTGCAATAGATCAGATCACAGGCAAATTGAATCGTTTAATTGGCAAAACAAATCAGCAGTTTGAGCTGATAGACAATGAGTTCAATGAGATAGAACAACAAATAGGAAATGTCATTAATTGGACAAGAGACGCAATGACTGAGATATGGTCGTATAATGCTGAGCTGTTGGTGGCAATGGAAAATCAGCATACAATAGATCTTGCGGACTCAGAAATGAGCAAACTTTATGAGCGTGTCAAAAAACAACTAAGGGAGAATGCTGAAGAAGATGGAACTGGATGTTTTGAGATATTTCATAAATGTGACGATCAGTGTATGGAGAGCATAAGGAACAACACGTATGACCATACTCAATACAGAACAGAGTCATTGCAGAATAGAATACAGATAGACCCAGTGAAGTTGAGTAGTGGGTACAAAGACATAATCTTATGGTTTAGCTTCGGGGCATCATGTTTTCTTCTTCTAGCCATTGCAATGGGACTGGTTTTCATTTGCATAAAGAATGGAAACATGCAGTGCACTATTTGTATATAG', }, { 'strain': 'A/human/Shanghai/2/2013', 'isolate_id': 'EPI-ISL-138738', 'date': '2013-03-05', 'region': 'China', 'seq': 'ATGAACACTCAAATCCTGGTATTCGCTCTGATTGCGATCATTCCAACAAATGCAGACAAAATCTGCCTCGGACATCATGCCGTGTCAAACGGAACCAAAGTAAACACATTAACTGAAAGAGGAGTGGAAGTCGTCAATGCAACTGAAACAGTGGAACGAACAAACATCCCCAGGATCTGCTCAAAAGGGAAAAGGACAGTTGACCTCGGTCAATGTGGACTCCTGGGGACAATCACTGGACCACCTCAATGTGACCAATTCCTAGAATTTTCAGCCGATTTAATTATTGAGAGGCGAGAAGGAAGTGATGTCTGTTATCCTGGGAAATTCGTGAATGAAGAAGCTCTGAGGCAAATTCTCAGAGAATCAGGCGGAATTGACAAGGAAGCAATGGGATTCACATACAGTGGAATAAGAACTAATGGAGCAACCAGTGCATGTAGGAGATCAGGATCTTCATTCTATGCAGAAATGAAATGGCTCCTGTCAAACACAGATAATGCTGCATTCCCGCAGATGACTAAGTCATATAAAAATACAAGAAAAAGCCCAGCTCTAATAGTATGGGGGATCCATCATTCCGTATCAACTGCAGAGCAAACCAAGCTATATGGGAGTGGAAACAAACTGGTGACAGTTGGGAGTTCTAATTATCAACAATCTTTTGTACCGAGTCCAGGAGCGAGACCACAAGTTAATGGTCTATCTGGAAGAATTGACTTTCATTGGCTAATGCTAAATCCCAATGATACAGTCACTTTCAGTTTCAATGGGGCTTTCATAGCTCCAGACCGTGCAAGCTTCCTGAGAGGAAAATCTATGGGAATCCAGAGTGGAGTACAGGTTGATGCCAATTGTGAAGGGGACTGCTATCATAGTGGAGGGACAATAATAAGTAACTTGCCATTTCAGAACATAGATAGCAGGGCAGTTGGAAAATGTCCGAGATATGTTAAGCAAAGGAGTCTGCTGCTAGCAACAGGGATGAAGAATGTTCCTGAGATTCCAAAAGGA------------------------------AGAGGCCTATTTGGTGCTATAGCGGGTTTCATTGAAAATGGATGGGAAGGCCTAATTGATGGTTGGTATGGTTTCAGACACCAGAATGCACAGGGAGAGGGAACTGCTGCAGATTACAAAAGCACTCAATCGGCAATTGATCAAATAACAGGAAAATTAAACCGGCTTATAGAAAAAACCAACCAACAATTTGAGTTGATAGACAATGAATTCAATGAGGTAGAGAAGCAAATCGGTAATGTGATAAATTGGACCAGAGATTCTATAACAGAAGTGTGGTCATACAATGCTGAACTCTTGGTAGCAATGGAGAACCAGCATACAATTGATCTGGCTGATTCAGAAATGGACAAACTGTACGAACGAGTGAAAAGACAGCTGAGAGAGAATGCTGAAGAAGATGGCACTGGTTGCTTTGAAATATTTCACAAGTGTGATGATGACTGTATGGCCAGTATTAGAAATAACACCTATGATCACAGCAAATACAGGGAAGAGGCAATGCAAAATAGAATACAGATTGACCCAGTCAAACTAAGCAGCGGCTACAAAGATGTGATACTTTGGTTTAGCTTCGGGGCATCATGTTTCATACTTCTAGCCATTGTAATGGGCCTTGTCTTCATATGTGTAAAGAATGGAAACATGCGGTGCACTATTTGTATATAA', }, { 'strain': 'A/human/Anhui/1/2013', 'isolate_id': 'EPI-ISL-138739', 'date': '2013-03-20', 'region': 'China', 'seq': 'ATGAACACTCAAATCCTGGTATTCGCTCTGATTGCGATCATTCCAACAAATGCAGACAAAATCTGCCTCGGACATCATGCCGTGTCAAACGGAACCAAAGTAAACACATTAACTGAAAGAGGAGTGGAAGTCGTCAATGCAACTGAAACAGTGGAACGAACAAACATCCCCAGGATCTGCTCAAAAGGGAAAAGGACAGTTGACCTCGGTCAATGTGGACTCCTGGGGACAATCACTGGACCACCTCAATGTGACCAATTCCTAGAATTTTCAGCCGATTTAATTATTGAGAGGCGAGAAGGAAGTGATGTCTGTTATCCTGGGAAATTCGTGAATGAAGAAGCTCTGAGGCAAATTCTCAGAGAATCAGGCGGAATTGACAAGGAAGCAATGGGATTCACATACAGTGGAATAAGAACTAATGGAGCAACCAGTGCATGTAGGAGATCAGGATCTTCATTCTATGCAGAAATGAAATGGCTCCTGTCAAACACAGATAATGCTGCATTCCCGCAGATGACTAAGTCATATAAAAATACAAGAAAAAGCCCAGCTCTAATAGTATGGGGGATCCATCATTCCGTATCAACTGCAGAGCAAACCAAGCTATATGGGAGTGGAAACAAACTGGTGACAGTTGGGAGTTCTAATTATCAACAATCTTTTGTACCGAGTCCAGGAGCGAGACCACAAGTTAATGGTCTATCTGGAAGAATTGACTTTCATTGGCTAATGCTAAATCCCAATGATACAGTCACTTTCAGTTTCAATGGGGCTTTCATAGCTCCAGACCGTGCAAGCTTCCTGAGAGGAAAATCTATGGGAATCCAGAGTGGAGTACAGGTTGATGCCAATTGTGAAGGGGACTGCTATCATAGTGGAGGGACAATAATAAGTAACTTGCCATTTCAGAACATAGATAGCAGGGCAGTTGGAAAATGTCCGAGATATGTTAAGCAAAGGAGTCTGCTGCTAGCAACAGGGATGAAGAATGTTCCTGAGATTCCAAAGGGA------------------------------AGAGGCCTATTTGGTGCTATAGCGGGTTTCATTGAAAATGGATGGGAAGGCCTAATTGATGGTTGGTATGGTTTCAGACACCAGAATGCACAGGGAGAGGGAACTGCTGCAGATTACAAAAGCACTCAATCGGCAATTGATCAAATAACAGGAAAATTAAACCGGCTTATAGAAAAAACCAACCAACAATTTGAGTTGATAGACAATGAATTCAATGAGGTAGAGAAGCAAATCGGTAATGTGATAAATTGGACCAGAGATTCTATAACAGAAGTGTGGTCATACAATGCTGAACTCTTGGTAGCAATGGAGAACCAGCATACAATTGATCTGGCTGATTCAGAAATGGACAAACTGTACGAACGAGTGAAAAGACAGCTGAGAGAGAATGCTGAAGAAGATGGCACTGGTTGCTTTGAAATATTTCACAAGTGTGATGATGACTGTATGGCCAGTATTAGAAATAACACCTATGATCACAGCAAATACAGGGAAGAGGCAATGCAAAATAGAATACAGATTGACCCAGTCAAACTAAGCAGCGGCTACAAAGATGTGATACTTTGGTTTAGCTTCGGGGCATCATGTTTCATACTTCTAGCCATTGTAATGGGCCTTGTCTTCATATGTGTAAAGAATGGAAACATGCGGTGCACTATTTGTATATAA', } ] tmp_outgroup = SeqIO.read( '/Users/yujiazhou/Documents/nextflu/H9_nextflu-master/augur/source-data/H7_outgroup.gb', 'genbank') genome_annotation = tmp_outgroup.features self.cds = { x.qualifiers['gene'][0]: x for x in genome_annotation if 'gene' in x.qualifiers and x.type == 'CDS' and x.qualifiers['gene'][0] in ['SigPep', 'HA1', 'HA2'] } self.outgroup = { 'strain': 'A/equine/Prague/2/1956', 'db': 'Other-Database-Import', 'isolate_id': 'EPI-ISL-89157', 'date': '1956-06-11', 'country': 'CzechRepublic', 'region': 'Europe', 'seq': str(tmp_outgroup.seq).upper() }
def __init__(self,min_length = 0, **kwargs): ''' parameters min_length -- minimal length for a sequence to be acceptable ''' flu_filter.__init__(self, **kwargs) self.min_length = min_length self.vaccine_strains =[ { 'strain':'A/mallard/Maryland/13OS3318/2014', 'isolate_id':'EPI-ISL-216765', 'date': '2014-06-24', 'lab':'Other Database Import', 'region':'NorthAmerica', 'seq':'ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGAATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAACGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGATCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTACTGTTACCCGGGTACCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATCGACAAGATAAGCACTGGGTTTACATATGGATCATCCATCAATTCAGCTGGAACCACTAAAGCATGCATGAGAAATGGAGGAAATAGTTTCTATGCGGAGCTAAAGTGGTTAGTGTCGAAGAGCAAAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTGTAATCTGGGGAATTCATCACCCTTCAAGCACACAAGAAAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAATGGCCAAAGTGGGCGGATTGATTTCCATTGGACGATGGTACAACCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGGAGCTTCAGTAGACAATAACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAATATGTGAACAAAAAGAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGGATGGTAGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAGAAGTACACAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAATGAAATAGAACATCAAATCGGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATTTATATGAGAGAGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATATCACAAATGCAATGACAACTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTGAAAAATGGAAACATGCGCTGCACAATCTGTATTTAG', } ] self.new_strains =[ { "strain": "A/Unknown/Unknown/Batch2-1_002_01102017_4_H10N6", "db": "Unknown", "accession": "Unknown", "date": "2017-01-10", "seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG", }, { "strain": "A/Unknown/Unknown/Batch2-1_007_01102017_4_H10N4", "db": "Unknown", "accession": "Unknown", "date": "2017-01-10", "seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCAGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACGCAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG", }, { "strain": "A/Unknown/Unknown/Batch2-1_008_01102017_4_H10N7", "db": "Unknown", "accession": "Unknown", "date": "2017-01-10", "seq": "ATGTACAAAATAGCACTAGTATTCGCGCTCCTTGGAGCGGTGGATGGTCTTGATAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCATAAACGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAATCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGATCTACACCTTACCGGAACATGGGACACCTTAATAGAGAGAGACAATTCTATTGCCTACTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATCGACAAGATAAGCACTGGGTTTACATATGGATCATCCATCAATTCAGCTGGAACCACTAAAGCATGCATGAGAAATGGAGGAAATAGTTTCTACGCGGAGCTAAAGTGGCTAGTGTCGAAGAGCAAAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACACAAGAAAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGAAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAATGGCCAAAGTGGACGGATTGATTTCCATTGGACGATGGTACAACCAGGTGATAACATTACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTGGGCATTCAATCAGGAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAATATGTGAACAAAAAGAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGGATGGTAGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTAAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCGGTAACGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAGATGCTGAATTTATATGAGAGAGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGAAAAGGGTGCTTTGAAATATATCACAAATGCGATGACAACTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTCTTCTTCTGCTTGAAAAATGGAAACATGCGCTGCACAATCTGTATTTAG", }, { "strain": "A/Unknown/Unknown/Batch2-1_010_01102017_4_H10N5", "db": "Unknown", "accession": "Unknown", "date": "2017-01-10", "seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGRGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG", }, { "strain": "A/Unknown/Unknown/Batch2-1_013_01102017_4_H10N5", "db": "Unknown", "accession": "Unknown", "date": "2017-01-10", "seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCTGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGTTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG", }, { "strain": "A/Unknown/Unknown/Batch2-1_021_01102017_4_H10N4", "db": "Unknown", "accession": "Unknown", "date": "2017-01-10", "seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCAGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG", }, { "strain": "A/Unknown/Unknown/Batch2-1_027_01102017_4_H10N5", "db": "Unknown", "accession": "Unknown", "date": "2017-01-10", "seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATTTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG", }, { "strain": "A/Unknown/Unknown/Batch2-1_028_01102017_4_H10N5", "db": "Unknown", "accession": "Unknown", "date": "2017-01-10", "seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGACAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG", }, { "strain": "A/Unknown/Unknown/Batch2-1_030_01102017_4_H10N5", "db": "Unknown", "accession": "Unknown", "date": "2017-01-10", "seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG", }, { "strain": "A/Unknown/Unknown/Batch2-1_031_01102017_4_H10N5", "db": "Unknown", "accession": "Unknown", "date": "2017-01-10", "seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCATCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG", }, { "strain": "A/Unknown/Unknown/Batch2-1_033_01102017_4_H10N5", "db": "Unknown", "accession": "Unknown", "date": "2017-01-10", "seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTACTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG", }, { "strain": "A/Unknown/Unknown/Batch2-1_034_01102017_4_H10N7", "db": "Unknown", "accession": "Unknown", "date": "2017-01-10", "seq": "ATGTACAAAATAGCACTAGTATTCGCGCTCCTTGGAGCGGTGGATGGTCTTGATAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCATAAACGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAATCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGATCTACACCTTACCGGAACATGGGACACCTTAATAGAGAGAGACAATTCTATTGCCTACTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATCGACAAGATAAGCACTGGGTTTACATATGGATCATCCATCAATTCAGCTGGAACCACTAAAGCATGCATGAGAAATGGAGGAAATAGTTTCTACGCGGAGCTAAAGTGGCTAGTGTCGAAGAGCAAAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACACAAGAAAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGAAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAATGGCCAAAGTGGACGGATTGATTTCCATTGGACGATGGTACAACCAGGTGATAACATTACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTGGGCATTCAATCAGGAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAATATGTGAACAAAAAGAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGGATGGTAGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTAAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCGGTAACGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAGATGCTGAATTTATATGAGAGAGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGAAAAGGGTGCTTTGAAATATATCACAAATGCGATGACAACTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTCTTCTTCTGCTTGAAAAATGGAAACATGCGCTGCACAATCTGTATTTAG", }, { "strain": "A/Unknown/Unknown/Batch2-1_036_01102017_4_H10N5", "db": "Unknown", "accession": "Unknown", "date": "2017-01-10", "seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG", }, { "strain": "A/Unknown/Unknown/Batch2-1_037_01102017_4_H10N5", "db": "Unknown", "accession": "Unknown", "date": "2017-01-10", "seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAACCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTGATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCTGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG", }, { "strain": "A/Unknown/Unknown/Batch2-1_039_01102017_4_H10N5", "db": "Unknown", "accession": "Unknown", "date": "2017-01-10", "seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG", }, { "strain": "A/Unknown/Unknown/Batch2-1_042_01102017_4_H10N5", "db": "Unknown", "accession": "Unknown", "date": "2017-01-10", "seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG", }, { "strain": "A/Unknown/Unknown/Batch2-1_043_01102017_4_H10N4", "db": "Unknown", "accession": "Unknown", "date": "2017-01-10", "seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAGATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCGGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG", }, { "strain": "A/Unknown/Unknown/Batch2-1_044_01102017_4_H10N5", "db": "Unknown", "accession": "Unknown", "date": "2017-01-10", "seq": "ATGCACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGATTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG", }, { "strain": "A/Unknown/Unknown/Batch2-1_048_01102017_4_H10N5", "db": "Unknown", "accession": "Unknown", "date": "2017-01-10", "seq": "ATGTACAAAATAGTACTAGTATTCGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGTCTTGGACATCATGCAGTCTCCAATGGCACCATCGTAAAGACTCTCACAAATGAAAAGGAAGAGGTGACCAATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGAAATTACAAGGACTTAGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTTACCGGAACATGGGACACTTTAATAGAGAGAGACAATTCTATTGCCTATTGTTACCCGGGTGCCACTGTGAATGAAGAAGCATTAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTCACATATGGATCATCCATCAATTCAGCTGGAACCACTCAAGCATGCATGAGAAATGGAGGGAATAGTTTCTATGCGGAGCTAAAGTGGCTAGTGTCGAAGAACACAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAGAACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACAAAAGAGAAGAATGATCTGTATGGAACACAATCACTTTCCATTTCAGTGGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCACAAGTGAAAGGCCAAAGTGGGCGGATTGATTTTCATTGGACGATGGTACAGCCAGGTGATAACATCACTTTCTCGCATAATGGTGGACTAATAGCACCCAGCAGAGTGAGTAAGCTAAAGGGAAGAGGCCTTGGCATTCAATCAGTAGCTTCAGTAGACAATGACTGTGAGTCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAACTCCCTTTTCAGAATCTTTCTCCAAGAACCGTGGGTCAATGCCCCAAGTATGTGAACAAAAGAAGCCTGTTGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGGATTCATAGAAAATGGATGGGAAGGAATGGTGGATGGTTGGTATGGTTTCCGGCACCAAAATGCCCAAGGCACTGGTCAGGCCGCGGATTACAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGGCTGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATCAGTAATGTAATAAACTGGACTAAAGATTCTATAACAGACATCTGGACGTATCAAGCTGAATTGCTGGTAGCAATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGCAACTAAGGCAAAATGCTGAAGAAGATGGGAAAGGGTGCTTTGAAATATACCACAAATGCGATGACAATTGTATGGAAAGCATCAGAAACAACACTTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTCAACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCATGCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTAAAAAATGGAAACATGCGATGCACAATCTGTATTTAG", } ] tmp_outgroup = SeqIO.read('/Users/yujiazhou/Documents/nextflu/H9_nextflu-master/augur/source-data/H10_outgroup.gb', 'genbank') genome_annotation = tmp_outgroup.features self.cds = {x.qualifiers['gene'][0]:x for x in genome_annotation if 'gene' in x.qualifiers and x.type=='CDS' and x.qualifiers['gene'][0] in ['SigPep', 'HA1', 'HA2']} self.outgroup = { 'strain':'A/mallard-duck/ALB/302/1977', 'region':'NorthAmerica', 'isolate_id':'EPI-ISL-8890', 'date':'1977-08-13', #(Month and day unknown) 'seq': str(tmp_outgroup.seq).upper() }
def __init__(self, aln_fname, outgroup, include_ref_strains=True, outdir='./', formats=['pdf', 'png'], verbose=0, **kwargs): process.__init__(self, **kwargs) flu_filter.__init__(self, alignment_file=aln_fname, **kwargs) tree_refine.__init__(self, **kwargs) virus_clean.__init__(self, **kwargs) self.midpoint_rooting = False self.include_ref_strains = include_ref_strains self.verbose = verbose self.formats = formats self.outdir = outdir.rstrip('/') + '/' self.auspice_tree_fname = self.outdir + 'tree.json' self.auspice_align_fname = self.outdir + 'aln.fasta' self.auspice_aa_align_fname = self.outdir + 'aa_aln.fasta' self.auspice_sequences_fname = self.outdir + 'sequences.json' self.auspice_frequencies_fname = None self.auspice_meta_fname = self.outdir + 'meta.json' self.path_to_augur = path_to_augur if os.path.isfile(outgroup): tmp = [{ 'strain': seq.name, 'seq': str(record.seq).upper(), 'desc': seq.description } for seq in SeqIO.parse(outgroup, 'fasta')] if len(tmp): self.outgroup = tmp[0] if len(tmp) > 1: print "More than one sequence in ", outgroup, "taking first" if self.verbose: print "using outgroup found in file ", outgroup elif outgroup == 'auto': print "automatically determine outgroup" self.auto_outgroup_blast() elif isinstance(outgroup, basestring): seq_names = [x['strain'] for x in self.viruses] if outgroup in seq_names: self.outgroup = self.viruses.pop(seq_names.index(outgroup)) if self.verbose: print "using outgroup found in alignment", outgroup else: standard_outgroups = self.load_standard_outgroups() if outgroup in standard_outgroups: self.outgroup = standard_outgroups[outgroup] if self.verbose: print "using standard outgroup", outgroup else: raise ValueError("outgroup %s not found" % outgroup) return if "anno:" in self.outgroup['desc']: anno = [x for x in self.outgroup['desc'].split() if "anno:" in x][0] anno = (anno.split(':')[1]).split('_') tmp = [(anno[2 * i], int(anno[2 * i + 1])) for i in range(len(anno) / 2)] self.anno = sorted(tmp, key=lambda x: x[1]) print("Using annotation", self.anno) else: self.anno = None print("No annotation found") #self.anno = sorted((('SP',0), ('HA1',16), ('HA2',329+16)), key=lambda x:x[1]) self.viruses.append(self.outgroup) self.filter_geo(prune=False) self.make_strain_names_unique()
def __init__(self,min_length = 900, **kwargs): ''' parameters min_length -- minimal length for a sequence to be acceptable ''' flu_filter.__init__(self, **kwargs) self.min_length = min_length self.vaccine_strains =[ { "strain": "A/Wisconsin/67/2005", "db": "IRD", "accession": "CY163984", "date": "2005-08-31", "region": "north_america", "country": "usa", "seq": "ATGAAGACTATCATTGCTTTGAGCTACATTCTATGTCTGGTTTTCGCTCAAAAACTTCCCGGAAATGACAACAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACGAATGACCAAATTGAAGTTACTAATGCTACTGAGCTGGTTCAGAGTTCCTCAACAGGTGGAATATGCGACAGTCCTCATCAGATCCTTGATGGAGAAAACTGCACACTAATAGATGCTCTATTGGGAGACCCTCAGTGTGATGGCTTCCAAAATAAGAAATGGGACCTTTTTGTTGAACGCAGCAAAGCCTACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGGAGTTTAACGATGAAAGCTTCAATTGGACTGGAGTCACTCAAAATGGAACAAGCTCTTCTTGCAAAAGGAGATCTAATAACAGTTTCTTTAGTAGATTGAATTGGTTGACCCACTTAAAATTCAAATACCCAGCATTGAACGTGACTATGCCAAACAATGAAAAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGTTACGGACAATGACCAAATCTTCCTGTATGCTCAAGCATCAGGAAGAATCACAGTCTCTACCAAAAGAAGCCAACAAACTGTAATCCCGAATATCGGATCTAGACCCAGAATAAGGAATATCCCCAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAATTCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCATTTCAAAATGTAAACAGGATCACATATGGGGCCTGTCCCAGATATGTTAAGCAAAACACTCTGAAATTGGCAACAGGGATGCGAAATGTACCAGAGAAACAAACTAGAGGCATATTTGGCGCAATCGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGGTTTCAGGCATCAAAATTCTGAGGGAATAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCAATCAAATCAATGGGAAGCTGAATAGGTTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAAGTAGAAGGGAGAATTCAGGACCTCGAGAAATATGTTGAGGACACTAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAACAAACTGTTTGAAAGAACAAAGAAGCAACTGAGGGAAAATGCTGAGGATATGGGCAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATCAGAAATGGAACTTATGACCATGATGTATACAGAGATGAAGCATTAAACAACCGGTTCCAGATCAAAGGCGTTGAGCTGAAGTCAGGATACAAAGATTGGATCCTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAAGGCAACATTAGGTGCAACATTTGCATTTGA" }, { "strain": "A/Brisbane/10/2007", "db": "IRD", "accession": "CY113005", "date": "2007-02-06", "region": "oceania", "country": "australia", "seq": "ATGAAGACTATCATTGCTTTGAGCTACATTCTATGTCTGGTTTTCACTCAAAAACTTCCCGGAAATGACAACAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACGAATGACCAAATTGAAGTTACTAATGCTACTGAGCTGGTTCAGAGTTCCTCAACAGGTGAAATATGCGACAGTCCTCATCAGATCCTTGATGGAGAAAACTGCACACTAATAGATGCTCTATTGGGAGACCCTCAGTGTGATGGCTTCCAAAATAAGAAATGGGACCTTTTTGTTGAACGCAGCAAAGCCTACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGGAGTTTAACAATGAAAGCTTCAATTGGACTGGAGTCACTCAAAACGGAACAAGCTCTGCTTGCATAAGGAGATCTAATAACAGTTTCTTTAGTAGATTGAATTGGTTGACCCACTTAAAATTCAAATACCCAGCATTGAACGTGACTATGCCAAACAATGAAAAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGGTACGGACAATGACCAAATCTTCCCGTATGCTCAAGCATCAGGAAGAATCACAGTCTCTACCAAAAGAAGCCAACAAACTGTAATCCCGAATATCGGATCTAGACCCAGAGTAAGGAATATCCCCAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAATTCTGAATGCATCACTCCAAACGGAAGCATTCCCAATGACAAACCATTCCAAAATGTAAACAGGATCACATACGGGGCCTGTCCCAGATATGTTAAGCAAAACACTCTGAAATTGGCAACAGGGATGCGAAATGTACCAGAGAAACAAACTAGAGGCATATTTGGCGCAATCGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGGTTTCAGGCATCAAAATTCTGAGGGAATAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCGATCAAATCAATGGGAAGCTGAATAGGTTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAAGTCGAAGGGAGAATTCAGGACCTTGAGAAATATGTTGAGGACACCAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAACAAACTGTTTGAAAAAACAAAGAAGCAACTGAGGGAAAATGCTGAGGATATGGGCAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATCAGAAATGGAACTTATGACCACAATGTATACAGAGATGAAGCATTAAACAACCGGTTCCAGATCAAGGGCGTTGAGCTGAAGTCAGGATACAAAGATTGGATCCTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAAGGCAACATTAGGTGCAACATTTGCATTTGA" }, { "strain": "A/Perth/16/2009", "db": "IRD", "accession": "GQ293081", "date": "2009-04-07", "region": "oceania", "country": "australia", "seq": "ATGAAGACTATCATTGCTTTGAGCTACATTCTATGTCTGGTTTTCGCTCAAAAACTTCCTGGAAATGACAACAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACGAATGACCAAATTGAAGTTACTAATGCTACTGAGCTGGTTCAGAGTTCCTCAACAGGTGAAATATGCGACAGTCCTCATCAGATCCTTGATGGAAAAAACTGCACACTAATAGATGCTCTATTGGGAGACCCTCAGTGTGATGGCTTCCAAAATAAGAAATGGGACCTTTTTGTTGAACGCAGCAAAGCCTACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGGAGTTTAACAATGAAAGCTTCAATTGGACTGGAGTCACTCAAAACGGAACAAGCTCTGCTTGCATAAGGAGATCTAAAAACAGTTTCTTTAGTAGATTGAATTGGTTGACCCACTTAAACTTCAAATACCCAGCATTGAACGTGACTATGCCAAACAATGAACAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGGTACGGACAAAGACCAAATCTTCCTGTATGCTCAAGCATCAGGAAGAATCACAGTCTCTACCAAAAGAAGCCAACAAACCGTAAGCCCGAATATCGGATCTAGACCCAGAGTAAGGAATATCCCTAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAATTCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCATTCCAAAATGTAAACAGGATCACATACGGGGCCTGTCCCAGATATGTTAAGCAAAACACTCTGAAATTGGCAACAGGGATGCGAAATGTACCAGAGAAACAAACTAGAGGCATATTTGGCGCAATCGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGGTTTCAGGCATCAAAATTCTGAGGGAAGAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCGATCAAATCAATGGGAAGCTGAATAGATTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAAGTCGAAGGGAGAATTCAGGACCTTGAGAAATATGTTGAGGACACTAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAACAAACTGTTTGAAAAAACAAAGAAGCAACTGAGGGAAAATGCTGAGGATATGGGCAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATCAGAAATGGAACTTATGACCACGATGTATACAGAGATGAAGCATTAAACAACCGGTTTCAGATCAAGGGAGTTGAGCTGAAGTCAGGGTACAAAGATTGGATCCTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAAGGCAACATTAGGTGCAACATTTGCATTTGA" }, { "strain": "A/Victoria/361/2011", "db": "IRD", "accession": "GQ293081", "date": "2011-10-24", "region": "oceania", "country": "australia", "seq": "ATGAAGACTATCATTGCTTTGAGCCACATTCTATGTCTGGTTTTCGCTCAAAAACTTCCTGGAAATGACAACAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACGAATGACCAAATTGAAGTTACTAATGCTACTGAGCTGGTTCAGAATTCCTCAATAGGTGAAATATGCGACAGTCCTCATCAGATCCTTGATGGAGAAAACTGCACACTAATAGATGCTCTATTGGGAGACCCTCAGTGTGATGGCTTCCAAAATAAGAAATGGGACCTTTTTGTTGAACGAAGCAAAGCCTACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGGAGTTTAACAATGAAAGCTTCAATTGGACTGGAGTCACTCAAAACGGAACAAGTTCTGCTTGCATAAGGAGATCTAATAATAGTTTCTTTAGTAGATTAAATTGGTTGACCCGCTTAAACTTCAAATACCCAGCATTGAACGTGACTATGCCAAACAATGAACAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGTTACGGACAAGGAACAAATCTTCCTGTATGCTCAATCATCAGGAAGAATCACAGTATCTACCAAAAGAAGCCAACAAGCTGTAATCCCGAATATCGGATATAGACCCAGAATAAGGAATATCCCTAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAATTCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCATTCCAAAATGTAAACAGGATCACATACGGGGCCTGTCCCAGATATGTTAAGCAAAGCACTCTGAAATTGGCAACAGGAATGCGAAATGTACCAGAGAAACAAACTAGAGGCATATTTGGCGCAATAGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGGTTTCAGGCATCAAAATTCTGAGGGAAGAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCGATCAAATCAATGGGAAGCTGAATCGATTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAAGTCGAAGGGAGAATTCAGGACCTTGAGAAATATGTTGAGGACACTAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAACAAACTGTTTGAAAAAACAAAGAAGCAACTAAGGGAAAATGCTGAGGATATGGGCAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATCAGAAATGGAACTTATGACCACGATGTATACAGAGATGAAGCATTAAACAACCGGTTCCAGATCAAGGGAGTTGAGCTGAAGTCAGGGTACAAAGATTGGATCCTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAGGGCAACATTAGGTGCAACATTTGCATTTGA" }, { "strain": "A/Texas/50/2012", "db": "GISAID", "isolate_id": "EPI_ISL_129858", "date": "2012-04-15", "region": "north_america", "country": "usa", "seq": "ATGAAGACTATCATTGCTTTGAGCTACATTCTATGTCTGGTTTTCGCTCAAAAACTTCCTGGAAATGACAATAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACGAATGACCGAATTGAAGTTACTAATGCTACTGAACTGGTTCAGAATTCCTCAATAGGTGAAATATGCGACAGTCCTCATCAGATCCTTGATGGAGAAAACTGCACACTAATAGATGCTCTATTGGGAGACCCTCAGTGTGATGGCTTCCAAAATAAGAAATGGGACCTTTTTGTTGAACGAAGCAAAGCCTACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGGAGTTTAACAATGAAAGCTTCAATTGGAATGGAGTCACTCAAAACGGAACAAGTTCTGCTTGCATAAGGAGATCTAATAATAGTTTCTTTAGTAGATTAAATTGGTTGACCCACTTAAACTTCAAATACCCAGCATTGAACGTGACTATGCCAAACAATGAACAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGGTACGGACAAGGACCAAATCTTCCTGTATGCTCAACCATCAGGAAGAATCACAGTATCTACCAAAAGAAGCCAACAAGCTGTAATCCCGAATATCGGATCTAGACCCAGAATAAGGAATATCCCTAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAAGTCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCATTCCAAAATGTAAACAGGATCACATACGGGGCCTGTCCCAGATATGTTAAGCAAAGCACTCTGAAATTGGCAACAGGAATGCGGAATGTACCAGAGAAACAAACTAGAGGCATATTTGGCGCAATAGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGGTTTCAGGCATCAAAATTCTGAGGGAAGAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCGATCAAATCAATGGGAAGCTGAATCGATTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAAGTAGAAGGGAGAATTCAGGACCTTGAGAAATATGTTGAGGACACTAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAACAAACTGTTTGAAAAAACAAAGAAGCAACTGAGGGAAAATGCTGAGGATATGGGCAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATCAGAAATGGAACTTATGACCACGATGTATACAGAGATGAAGCATTAAACAACCGGTTCCAGATCAAGGGAGTTGAGCTGAAGTCAGGGTACAAAGATTGGATCCTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAGGGCAACATTAGGTGCAACATTTGCATTTGA", }, { "strain": "A/Switzerland/9715293/2013", "db": "GISAID", "isolate_id": "EPI_ISL_162149", "date": "2013-12-06", "region": "europe", "country": "switzerland", "seq": "ATGAAGACTATCATTGCTTTGAGCTACATTCTATGTCTGGTTTTCGCTCAAAAACTTCCTGGAAATGACAATAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACGAATGACCGAATTGAAGTTACTAATGCTACTGAGCTGGTTCAGAATTCCTCAATAGGTGAAATATGCGACAGTCCTCATCAGATCCTTGATGGAGAAAACTGCACACTAATAGATGCTCTATTGGGAGACCCTCAGTGTGATGGCTTTCAAAATAAGAAATGGGACCTTTTTGTTGAACGAAGCAAAGCCTACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGGAGTTTAACAATGAAAGCTTCAATTGGGCTGGAGTCACTCAAAACGGAACAAGTTCTTCTTGCATAAGGGGATCTAATAGTAGTTTCTTTAGTAGATTAAATTGGTTGACCCACTTAAACTCCAAATACCCAGCATTAAACGTGACTATGCCAAACAATGAACAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGGTACGGACAAGGACCAAATCTTCCTGTATGCACAATCATCAGGAAGAATCACAGTATCTACCAAAAGAAGCCAACAAGCTGTAATCCCGAATATCGGATCTAGACCCAGAATAAGGGATATCCCTAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAAGTCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCATTCCAAAATGTAAACAGGATCACATACGGGGCCTGTCCCAGATATGTTAAGCAAAGCACTCTGAAATTGGCAACAGGAATGCGAAATGTACCAGAGAGACAAACTAGAGGCATATTTGGCGCAATAGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGGCTTCAGGCATCAAAATTCTGAGGGAAGAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCGATCAAATCAATGGGAAGCTGAATCGATTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAAGTAGAAGGGAGAATTCAGGACCTTGAGAAATATGTTGAGGACACAAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAACAAACTGTTTGAAAAAACAAAGAAGCAACTGAGGGAAAATGCTGAGGATATGGGCAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATCAGAAATGGAACTTATGACCACGATGTATACAGGGATGAAGCATTAAACAACCGGTTCCAGATCAAGGGAGTTGAGCTGAAGTCAGGGTACAAAGATTGGATCCTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAGGGCAACATTAGGTGCAACATTTGCATTTGA", }, { "strain": "A/HongKong/4801/2014", "db": "GISAID", "isolate_id": "EPI_ISL_165554", "date": "2014-02-26", "region": "china", "country": "hong_kong", "seq": "ATGAAGACTATCATTGCTTTGAGCTACATTCTATGTCTGGTTTTCGCTCAAAAAATTCCTGGAAATGACAATAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACGAATGACCGAATTGAAGTTACTAATGCTACTGAGCTGGTTCAGAATTCCTCAATAGGTGAAATATGCGACAGTCCTCATCAGATCCTTGATGGAGAAAACTGCACACTAATAGATGCTCTATTGGGAGACCCTCAGTGTGATGGCTTTCAAAATAAGAAATGGGACCTTTTTGTTGAACGAAGCAAAGCCTACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGGAGTTTAACAATGAAAGCTTCAATTGGACTGGAGTCACTCAAAACGGAACAAGTTCTGCTTGCATAAGGAGATCTAGTAGTAGTTTCTTTAGTAGATTAAATTGGTTGACCCACTTAAACTACACATACCCAGCATTGAACGTGACTATGCCAAACAATGAACAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGGTACGGACAAGGACCAAATCTTCCTGTATGCTCAATCATCAGGAAGAATCACAGTATCTACCAAAAGAAGCCAACAAGCTGTAATCCCAAATATCGGATCTAGACCCAGAATAAGGGATATCCCTAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAAGTCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCATTCCAAAATGTAAACAGGATCACATACGGGGCCTGTCCCAGATATGTTAAGCATAGCACTCTGAAATTGGCAACAGGAATGCGAAATGTACCAGAGAAACAAACTAGAGGCATATTTGGCGCAATAGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGGTTTCAGGCATCAAAATTCTGAGGGAAGAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCGATCAAATCAATGGGAAGCTGAATCGATTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAAGTAGAAGGAAGAATTCAGGACCTTGAGAAATATGTTGAGGACACTAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAACAAACTGTTTGAAAAAACAAAGAAGCAACTGAGGGAAAATGCTGAGGATATGGGCAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATAAGAAATGGAACTTATGACCACAATGTGTACAGGGATGAAGCATTAAACAACCGGTTCCAGATCAAGGGAGTTGAGCTGAAGTCAGGGTACAAAGATTGGATCCTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAGGGCAACATTAGGTGCAACATTTGCATTTGA", }, { "strain": "A/Alaska/232/2015", "db": "GISAID", "isolate_id": "EPI787411", "date": "2015-09-09", "region": "north_america", "country": "usa", "seq": "GGATAATTCTATTAACCATGAAGACTATCATTGCTTTGAGCTACATTCTATGTCTGGTTTTCGCTCAAAAAATTCCTGGAAATGACAATAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACAAATGACCGAATTGAAGTTACTAATGCTACTGAGTTGGTTCAGAATTCCTCAATAGGTGAAATATGCGACAGTCCTCATCAGATCCTTGATGGAGAGAACTGCACACTAATAGATGCTCTATTGGGAGACCCTCAGTGTGATGGCTTTCAAAATAAGAAATGGGACCTTTTTGTTGAACGAAGCAAAGCCTACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGGAGTTTAACAATGAAAGCTTCAATTGGACTGGAGTCACTCAAAACGGAACAAGTTCTTCTTGCATAAGGAGATCTAGTAGTAGTTTCTTTAGTAGATTAAATTGGTTGACCCACTTAAACTACAAATATCCAGCATTGAACGTGACTATGCCAAACAAGGAACAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGGTACGGACAAGGACCAAATCTACCCGTATGCTCAATCATCAGGAAGAATCACAGTATCTACCAAAAGAAGCCAACAAGCTGTAATCCCAAATATCGGATCTAGACCCAGAATAAGGGATATCCCTAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAAGTCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCATTCCAAAATGTAAACAGGATCACATACGGGGCCTGTCCCAGATATGTTAAGCATAGCACTCTGAAATTGGCAACAGGAATGCGAAATGTACCAGAGAAACAAACTAGAGGCATATTTGGCGCAATAGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGGTTTCAGGCATCAAAATTCTGAGGGAAGAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCGATCAAATCAATGGGAAGCTGAATCGGTTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAAGTAGAAGGAAGAGTTCAAGACCTTGAGAAATATGTTGAGGACACTAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAACAAACTGTTTGAAAAAACAAAGAAGCAACTGAGGGAAAATGCTGAGGATATGGGAAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATAAGAAATGAAACTTATGACCACAATGTGTACAGGGATGAAGCATTAAACAACCGGTTCCAGATCAAGGGAGTTGAGCTGAAGTCAGGGTACAAAGATTGGATCCTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAGGGCAACATTAGATGCAACATTTGCATTTGAGTGCATTAATTAAAAACAC" } ] tmp_outgroup = SeqIO.read('source-data/H3N2_outgroup.gb', 'genbank') genome_annotation = tmp_outgroup.features self.cds = {x.qualifiers['gene'][0]:x for x in genome_annotation if 'gene' in x.qualifiers and x.type=='CDS' and x.qualifiers['gene'][0] in ['SigPep', 'HA1', 'HA2']} self.outgroup = { 'strain': 'A/Beijing/32/1992', 'db': 'IRD', 'accession': 'U26830', 'date': '1992-01-01', 'country': 'china', 'region': 'china', 'seq': str(tmp_outgroup.seq).upper() }
def __init__(self,min_length = 987, **kwargs): ''' parameters min_length -- minimal length for a sequence to be acceptable ''' flu_filter.__init__(self, **kwargs) self.min_length = min_length self.vaccine_strains =[ { 'strain': 'B/Beijing/184/93', 'isolate_id': 'EPI_ISL_969', 'date': '1993-07-01', #(Month and day unknown) 'region': 'China', 'seq': 'GATCGAATCTGTACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGTGTGATACCACTGACAACAACACCAACAAAATCTCATTTTGGAAATCTCAAAGGAACAAAGACCAGAGGGAAACTATGCCCAAACTGTCTCAACTGCACAGATCTGGATGTGGCCTTGGGCAGACCAATGTGTGTGGGGACCACACCTTCGGCAAAAGCTTCAATACTCCACGAAGTCAGACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGACAGCTACCCAATCTTCTCAGAGGATATGAAAATATCAGATTATCAACCCAAAACGTTATCAACGCAGAAAAGGCACCAGGAGGACCCTACAGGCTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAGAAGCGGATTTTTCGCAACAATGGCTTGGGCTGTCCCAAGGGACAACAACAAAACAGCAACAAATCCACTAACAGTAGAAGTACCATACATTTGTACAAAAGGAGAAGACCAAATTACTGTTTGGGGGTTCCATTCTGATAACAAAATCCAAATGAAAAACCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACATTATGTTTCTCAGATTGGCGGCTTCCCAGATCAAACAGAAGACGGAGGGCTACCACAAAGCGGCAGAATTGTTGTTGATTACATGGTGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCCTTGCCTTTAATTGGTGAAGCAGATTGCCTTCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGG', }, { 'strain': 'B/Sichuan/379/99', 'isolate_id': 'EPI_ISL_21113', 'date': '1999-07-01', # (Month and day unknown) 'region': 'China', 'seq': 'GAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCGTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTAACTGGTGCGATACCACTGACAACAACACCAACAAAATCTCATTTTGCAAATCTCAAAGGAACAAAGACCAGAGGGAAACTATGCCCAACCTGTCTCAACTGCACAGATCTGGATGTGGCCTTGGGCAGACCAATGTGTGTGGGGATCACACCTTCGGCAAAAGCTTCAATACTCCACGAAATCAAACCTGTTACATCCGGATGCTTTCCTATAATGCACGACAGAACAAAAATCAGACAGCTACCCAATCTTCTCAGAGGATATGAAAAAATCAGATTATCAACCCAAAACGTTATCAACGCAGAAAAGGCACCAGGAGGACCTTACAGACTTGGAACTTCAGGATCTTGCCCTAACGCTACCAGTAAAAGCGGATTTTTCGCAACAATGGCTTGGGCTGTCCCAAGGGACAACAACAAAACAGCAACGAATCCACTAACAGTAGAAGTACCACACATCTGTACAAAAGAAGAAGACCAAATTACTGTTTGGGGGTTCCATTCTGATGACAAAACCCAAATGAAAAACCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAATAACCACACATTATGTTTCTCAGATTGGCGGCTTCCCGGACCAAACAGAGGACGGAGGGCTACCACAAAGCGGCAGAATTGTTGTTGATTACATGGTGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGGATTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGTAGGAGCAAAGTAATAAAAGGGTCCTTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCT', }, { 'strain': 'B/Shanghai/361/2002', 'isolate_id': 'EPI_ISL_2842', 'date': '2002-06-12', 'region': 'China', 'seq': 'AATGCAGATCGAATCTGCACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGTGTGATACCACTGACAACAACTCCAATAAAATCTCATTTTGCAAATCTCAAAGGAACAAGGACTAGAGGGAAACTATGCCCAGATTGTCTCAACTGCACAGATCTGGATGTGGCCTTGGGCAGACCAATGTGTGTGGGGACCACACCTTCGGCAAAAGCTTCAATACTCCACGAAGTCAGACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGACAACTACCCAATCTTCTCAGAGGATATGAAAATATCAGGTTATCAACCCAAAACGTTATCGATGCAGAAAAGGCCCTAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCCACCAGTAAAAGCGGATTTTTCGCAACAATGGCTTGGGCTGTCCCAAAGGACAACAACAAAAATGCAACGAACCCACTAACAGTAGAAGTACCATACATCTGTACAGAAGGGGAAGACCAAATTACTGTTTGGGGGTTCCATTCAGATGACAAAACCCAAATGAAAAACCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACATTATGTTTCTCAGATTGGCGGCTTCCCAGATCAAACAGAAGACGGAGGACTACCACAAAGCGGCAGAATTGTTGTTGATTACATGGTGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCCTTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAAAATACGGTGGGTTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTC', }, { 'strain': 'B/Florida/4/2006', 'isolate_id': 'EPI_ISL_21307', 'date': '2006-11-01', 'region': 'NorthAmerica', 'seq': 'ATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGAATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCCACTCAAGGGGAGGTCAATGTGACTGGTGTGATACCACTAACAACAACACCAACAAAATCTTATTTTGCAAATCTCAAAGGAACAAGGACCAGAGGGAAACTATGCCCAGACTGTCTCAACTGCACAGATCTGGATGTGGCTTTGGGCAGACCAATGTGTGTGGGGACCACACCTTCGGCGAAAGCTTCAATACTCCACGAAGTCAAACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGGCAACTACCCAATCTTCTCAGAGGATATGAAAATATCAGGCTATCAACCCAAAACGTCATCGATGCGGAAAAGGCACCAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAAGAGCGGATTTTTCGCAACAATGGCTTGGGCTGTCCCAAAGGACAACAACAAAAATGCAACGAACCCACTAACAGTAGAAGTACCATACATTTGTACAGAAGGGGAAGACCAAATCACTGTTTGGGGGTTCCATTCAGATGACAAAACCCAAATGAAGAACCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACACTATGTTTCTCAGATTGGCAGCTTCCCAGATCAAACAGAAGACGGAGGACTACCACAAAGCGGCAGGATTGTTGTTGATTACATGATGCAAAAACCTGGGAAAACAGGAACAATTGTCTACCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCCTTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCCTAGAAGGAGGATGGGAAGGAATGATTGCAGGCTGGCACGGATACACATCTCACGGAGCACATGGAGTGGCAGTGGCGGCGGACCTTAAGAGTACGCAAGAAGCTATAAACAAGATAACAAAAAATCTCAATTCTTTGAGTGAGCTAGAAGTAAAGAATCTTCAAAGACTAAGTGGTGCCATGGATGAACTCCACAACGAAATACTCGAGCTGGATGAGAAAGTGGATGATCTCAGAGCTGACACTATAAGCTCGCAAATAGAACTTGCAGTCTTGCTTTCCAACGAAGGAATAATAAACAGTGAAGATGAGCATCTATTGGCACTTGAGAGAAAACTAAAGAAAATGCTGGGTCCCTCTGCTGTAGAGATAGGAAATGGATGCTTCGAAACCAAACACAAGTGCAACCAGACCTGCTTAGACAGGATAGCTGCTGGCACCTTTAATGCAGGAGAATTTTCTCTCCCCACTTTTGATTCACTGAACATTACTGCTGCATCTTTAAATGATGATGGATTGGATAACCATACTATACTGCTCTATTACTCAACTGCTGCTTCTAGTTTGGCTGTAACATTGATGCTAGCTATTTTTATTGTTTATATGGTCTCCAGAGACAACGTTTCATGCTCCATCTGTCTATAA' }, { 'strain': 'B/Wisconsin/01/2010', 'isolate_id': 'EPI_ISL_76940', 'date': '2010-02-20', 'region': 'NorthAmerica', 'seq': 'ATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGCGTGATACCACTGACAACAACACCAACAAAATCTTATTTTGCAAATCTCAAAGGAACAAGGACCAGAGGGAAACTATGCCCGGACTGTCTCAACTGTACAGATCTGGATGTGGCCTTGGGCAGGCCAATGTGTGTGGGGACCACACCTTCTGCTAAAGCTTCAATACTCCACGAGGTCAGACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGGCAACTACCCAATCTTCTCAGAGGATATGAAAATATCAGGTTATCAACCCAAAACGTTATCGATGCAGAAAAAGCACCAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAAAATCGGATTTTTTGCAACAATGGCTTGGGCTGTCCCAAAGGACAACTACAAAAATGCAACGAACCCACTAACAGTAGAAGTACCATACATTTGTACAGAAGGGGAAGACCAAATTACTGTTTGGGGGTTCCATTCAGATAACAAAACCCAAATGAAGAGCCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACATTATGTTTCTCAGATTGGCGACTTCCCAGATCAAACAGAAGACGGAGGACTACCACAAAGCGGCAGAATTGTTGTTGATTACATGATGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCATTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTAAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTGAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCCTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCTCACGGAGCACATGGAGTGGCAGTGGCGGCAGACCTTAAGAGTACACAAGAAGCTATAAATAAGATAACAAAAAATCTCAATTCTTTGAGTGAGCTAGAAGTAAAGAACCTTCAAAGACTAAGTGGTGCCATGGATGAACTCCACAACGAAATACTCGAGCTGGATGAGAAAGTGGATGATCTCAGAGCTGACACTATAAGCTCACAAATAGAACTTGCAGTCTTGCTTTCCAACGAAGGAATAATAAACAGTGAAGACGAGCATCTATTGGCACTTGAGAGAAAACTAAAGAAAATGCTGGGTCCCTCTGCTGTAGACATAGGAAACGGATGCTTCGAAACCAAACACAAATGCAACCAGACCTGCTTAGACAGGATAGCTGCTGGCACCTTTAATGCAGGAGAATTTTCTCTCCCCACTTTTGATTCATTGAACATTACTGCTGCATCTTTAAATGATGATGGATTGGATAACCATACTATACTGCTCTATTACTCAACTGCTGCTTCTAGTTTGGCTGTAACATTAATGCTAGCTATTTTTATTGTTTATATGGTCTCCAGAGACAACGTTTCATGCTCCATCTGTCTATAA' }, { 'strain': 'B/Massachusetts/02/2012', 'isolate_id': 'EPI_ISL_121434', 'date': '2012-03-13', 'region': 'NorthAmerica', 'seq': 'ATGAAGGCAATAATTGTACTACTAATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGTGTGATACCACTAACAACAACACCAACAAAATCTTATTTTGCAAATCTCAAAGGAACAAAGACCAGAGGGAAACTATGCCCAGACTGTCTCAACTGTACAGATCTGGATGTGGCCCTGGGCAGGCCAATGTGTGTGGGAACTACACCTTCTGCGAAAGCTTCAATACTTCACGAAGTCAGACCTGTTACATCCGGGTGCTTCCCTATAATGCACGACAGAACAAAAATCAGGCAACTAGCCAATCTTCTCAGAGGATATGAAAATATCAGGTTATCAACCCAAAACGTTATCGATGCAGAAAAGGCACCAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAAAAGCGGATTTTTCGCAACAATGGCTTGGGCTGTCCCAAAGGACAACAACAAAAATGCAACGAACCCATTAACAGTAGAAGTACCATACATTTGTGCAGAAGGGGAAGACCAAATTACTGTTTGGGGGTTCCATTCAGATAACAAAACCCAAATGAAGAACCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACATTATGTTTCTCAGATTGGCGGCTTCCCAGATCAAACAGAAGACGGAGGACTACCACAAAGCGGCAGAATTGTCGTTGATTACATGATGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCCTTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCCTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCTCACGGAGCACATGGAGTGGCAGTTGCTGCAGACCTTAAGAGCACACAAGAAGCTATAAACAAGATAACAAAAAATCTCAACTCTTTGAGTGAGCTAGAAGTAAAGAATCTTCAAAGGCTAAGTGGTGCCATGGATGAACTCCACAACGAAATACTCGAGCTGGATGAGAAAGTGGATGACCTCAGAGCTGACACTATAAGTTCACAAATAGAACTTGCAGTCTTGCTTTCCAACGAAGGAATAATAAACAGTGAAGACGAGCATCTATTGGCACTTGAGAGAAAACTAAAGAAAATGCTGGGTCCCTCTGCTGTAGACATAGGAAATGGATGCTTCGAAACCAAACACAAATGCAACCAGACCTGCTTAGACAGGATAGCTGCTGGCACCTTTAATGCAGGAGAGTTTTCTCTCCCCACTTTTGATTCATTGAACATTACTGCTGCATCTTTAAATGATGATGGATTGGATAACCATACTATACTGCTCTATTACTCAACTGCTGCTTCTAGTTTGGCTGTAACATTGATGCTAGCTATTTTTATTGTTTATATGGTCTCCAGAGACAACGTTTCATGCTCCATCTGTCTATAA' }, { 'strain': 'B/PHUKET/3073/2013', 'isolate_id': 'EPI_ISL_161843', 'date': '2013-11-21', 'region': 'SoutheastAsia', 'seq': 'ATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAACGCAGATCGAATCTGCACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGCGTGATACCACTGACAACAACACCAACAAAATCTTATTTTGCAAATCTCAAAGGAACAAGGACCAGAGGGAAACTATGCCCGGACTGTCTCAACTGTACAGATCTGGATGTGGCCTTGGGCAGGCCAATGTGTGTGGGGACCACACCTTCTGCTAAAGCTTCAATACTCCATGAGGTCAGACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGGCAACTACCCAATCTTCTCAGAGGATATGAAAAGATCAGGTTATCAACCCAAAACGTTATCGATGCAGAAAAAGCACCAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAAAATCGGATTTTTTGCAACAATGGCTTGGGCTGTCCCAAAGGACAACTACAAAAATGCAACGAACCCACTAACAGTGGAAGTACCATACATTTGTACAGAAGGGGAAGACCAAATTACTGTTTGGGGGTTCCATTCGGATAACAAAACCCAAATGAAGAGCCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACGCATTATGTTTCTCAGATTGGCGACTTCCCAGATCAAACAGAAGACGGAGGACTACCACAAAGCGGCAGAATTGTTGTTGATTACATGATGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGGGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCATTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAGAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAAAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTAAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTGAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCCTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCTCACGGAGCACATGGAGTGGCAGTGGCGGCAGACCTTAAGAGTACACAAGAAGCTATAAATAAGATAACAAAAAATCTCAATTCTTTGAGTGAACTAGAAGTAAAGAACCTTCAAAGACTAAGTGGTGCCATGGATGAACTCCACAACGAAATACTCGAGCTGGATGAAAAAGTGGATGATCTCAGAGCTGACACTATAAGCTCACAAATAGAACTTGCAGTCTTGCTTTCCAACGAAGGAATAATAAACAGTGAAGACGAGCATCTATTGGCACTTGAGAGAAAACTAAAGAAAATGCTGGGTCCCTCTGCTGTAGACATAGGAAACGGATGCTTCGAAACCAAACACAAATGCAACCAGACCTGCTTAGACAGGATAGCTGCTGGCACCTTTAATGCAGGAGAATTTTCTCTCCCCACTTTTGATTCATTGAACATTACTGCTGCATCTTTAAATGATGATGGATTGGATAACCATACTATACTGCTCTATTACTCAACTGCTGCTTCTAGTTTGGCTGTAACATTAATGCTAGCTATTTTTATTGTTTATATGGTCTCCAGAGACAACGTTTCATGCTCCATCTGTCTATAAAGAAGGTTAGGCCTTGTATTTTCCTTTATTGTAGTGCTTGTTTGCTTGTCATCATTACAAAGAAAC' } ] tmp_outgroup = SeqIO.read('source-data/Yam_outgroup.gb', 'genbank') genome_annotation = tmp_outgroup.features self.cds = {x.qualifiers['gene'][0]:x for x in genome_annotation if 'gene' in x.qualifiers and x.type=='CDS' and x.qualifiers['gene'][0] in ['SigPep', 'HA1', 'HA2']} self.outgroup = { 'strain':'B/Singapore/11/94', 'isolate_id':'EPI_ISL_20980', 'date':'1994-05-10', 'region':'China', 'seq':str(tmp_outgroup.seq).upper() }
def __init__(self, min_length=987, **kwargs): ''' parameters min_length -- minimal length for a sequence to be acceptable ''' flu_filter.__init__(self, **kwargs) self.min_length = min_length self.vaccine_strains = [ { 'strain': 'B/Beijing/184/93', 'isolate_id': 'EPI_ISL_969', 'date': '1993-07-01', #(Month and day unknown) 'region': 'china', 'country': 'china', 'seq': 'GATCGAATCTGTACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGTGTGATACCACTGACAACAACACCAACAAAATCTCATTTTGGAAATCTCAAAGGAACAAAGACCAGAGGGAAACTATGCCCAAACTGTCTCAACTGCACAGATCTGGATGTGGCCTTGGGCAGACCAATGTGTGTGGGGACCACACCTTCGGCAAAAGCTTCAATACTCCACGAAGTCAGACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGACAGCTACCCAATCTTCTCAGAGGATATGAAAATATCAGATTATCAACCCAAAACGTTATCAACGCAGAAAAGGCACCAGGAGGACCCTACAGGCTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAGAAGCGGATTTTTCGCAACAATGGCTTGGGCTGTCCCAAGGGACAACAACAAAACAGCAACAAATCCACTAACAGTAGAAGTACCATACATTTGTACAAAAGGAGAAGACCAAATTACTGTTTGGGGGTTCCATTCTGATAACAAAATCCAAATGAAAAACCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACATTATGTTTCTCAGATTGGCGGCTTCCCAGATCAAACAGAAGACGGAGGGCTACCACAAAGCGGCAGAATTGTTGTTGATTACATGGTGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCCTTGCCTTTAATTGGTGAAGCAGATTGCCTTCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGG', }, { 'strain': 'B/Sichuan/379/99', 'isolate_id': 'EPI_ISL_21113', 'date': '1999-07-01', # (Month and day unknown) 'region': 'china', 'country': 'china', 'seq': 'GAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCGTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTAACTGGTGCGATACCACTGACAACAACACCAACAAAATCTCATTTTGCAAATCTCAAAGGAACAAAGACCAGAGGGAAACTATGCCCAACCTGTCTCAACTGCACAGATCTGGATGTGGCCTTGGGCAGACCAATGTGTGTGGGGATCACACCTTCGGCAAAAGCTTCAATACTCCACGAAATCAAACCTGTTACATCCGGATGCTTTCCTATAATGCACGACAGAACAAAAATCAGACAGCTACCCAATCTTCTCAGAGGATATGAAAAAATCAGATTATCAACCCAAAACGTTATCAACGCAGAAAAGGCACCAGGAGGACCTTACAGACTTGGAACTTCAGGATCTTGCCCTAACGCTACCAGTAAAAGCGGATTTTTCGCAACAATGGCTTGGGCTGTCCCAAGGGACAACAACAAAACAGCAACGAATCCACTAACAGTAGAAGTACCACACATCTGTACAAAAGAAGAAGACCAAATTACTGTTTGGGGGTTCCATTCTGATGACAAAACCCAAATGAAAAACCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAATAACCACACATTATGTTTCTCAGATTGGCGGCTTCCCGGACCAAACAGAGGACGGAGGGCTACCACAAAGCGGCAGAATTGTTGTTGATTACATGGTGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGGATTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGTAGGAGCAAAGTAATAAAAGGGTCCTTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCT', }, { 'strain': 'B/Shanghai/361/2002', 'isolate_id': 'EPI_ISL_2842', 'date': '2002-06-12', 'region': 'china', 'country': 'china', 'seq': 'AATGCAGATCGAATCTGCACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGTGTGATACCACTGACAACAACTCCAATAAAATCTCATTTTGCAAATCTCAAAGGAACAAGGACTAGAGGGAAACTATGCCCAGATTGTCTCAACTGCACAGATCTGGATGTGGCCTTGGGCAGACCAATGTGTGTGGGGACCACACCTTCGGCAAAAGCTTCAATACTCCACGAAGTCAGACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGACAACTACCCAATCTTCTCAGAGGATATGAAAATATCAGGTTATCAACCCAAAACGTTATCGATGCAGAAAAGGCCCTAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCCACCAGTAAAAGCGGATTTTTCGCAACAATGGCTTGGGCTGTCCCAAAGGACAACAACAAAAATGCAACGAACCCACTAACAGTAGAAGTACCATACATCTGTACAGAAGGGGAAGACCAAATTACTGTTTGGGGGTTCCATTCAGATGACAAAACCCAAATGAAAAACCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACATTATGTTTCTCAGATTGGCGGCTTCCCAGATCAAACAGAAGACGGAGGACTACCACAAAGCGGCAGAATTGTTGTTGATTACATGGTGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCCTTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAAAATACGGTGGGTTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTC', }, { 'strain': 'B/Florida/4/2006', 'isolate_id': 'EPI_ISL_21307', 'date': '2006-11-01', 'region': 'north_america', 'country': 'usa', 'seq': 'ATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGAATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCCACTCAAGGGGAGGTCAATGTGACTGGTGTGATACCACTAACAACAACACCAACAAAATCTTATTTTGCAAATCTCAAAGGAACAAGGACCAGAGGGAAACTATGCCCAGACTGTCTCAACTGCACAGATCTGGATGTGGCTTTGGGCAGACCAATGTGTGTGGGGACCACACCTTCGGCGAAAGCTTCAATACTCCACGAAGTCAAACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGGCAACTACCCAATCTTCTCAGAGGATATGAAAATATCAGGCTATCAACCCAAAACGTCATCGATGCGGAAAAGGCACCAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAAGAGCGGATTTTTCGCAACAATGGCTTGGGCTGTCCCAAAGGACAACAACAAAAATGCAACGAACCCACTAACAGTAGAAGTACCATACATTTGTACAGAAGGGGAAGACCAAATCACTGTTTGGGGGTTCCATTCAGATGACAAAACCCAAATGAAGAACCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACACTATGTTTCTCAGATTGGCAGCTTCCCAGATCAAACAGAAGACGGAGGACTACCACAAAGCGGCAGGATTGTTGTTGATTACATGATGCAAAAACCTGGGAAAACAGGAACAATTGTCTACCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCCTTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCCTAGAAGGAGGATGGGAAGGAATGATTGCAGGCTGGCACGGATACACATCTCACGGAGCACATGGAGTGGCAGTGGCGGCGGACCTTAAGAGTACGCAAGAAGCTATAAACAAGATAACAAAAAATCTCAATTCTTTGAGTGAGCTAGAAGTAAAGAATCTTCAAAGACTAAGTGGTGCCATGGATGAACTCCACAACGAAATACTCGAGCTGGATGAGAAAGTGGATGATCTCAGAGCTGACACTATAAGCTCGCAAATAGAACTTGCAGTCTTGCTTTCCAACGAAGGAATAATAAACAGTGAAGATGAGCATCTATTGGCACTTGAGAGAAAACTAAAGAAAATGCTGGGTCCCTCTGCTGTAGAGATAGGAAATGGATGCTTCGAAACCAAACACAAGTGCAACCAGACCTGCTTAGACAGGATAGCTGCTGGCACCTTTAATGCAGGAGAATTTTCTCTCCCCACTTTTGATTCACTGAACATTACTGCTGCATCTTTAAATGATGATGGATTGGATAACCATACTATACTGCTCTATTACTCAACTGCTGCTTCTAGTTTGGCTGTAACATTGATGCTAGCTATTTTTATTGTTTATATGGTCTCCAGAGACAACGTTTCATGCTCCATCTGTCTATAA' }, { 'strain': 'B/Wisconsin/01/2010', 'isolate_id': 'EPI_ISL_76940', 'date': '2010-02-20', 'region': 'north_america', 'country': 'usa', 'seq': 'ATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGCGTGATACCACTGACAACAACACCAACAAAATCTTATTTTGCAAATCTCAAAGGAACAAGGACCAGAGGGAAACTATGCCCGGACTGTCTCAACTGTACAGATCTGGATGTGGCCTTGGGCAGGCCAATGTGTGTGGGGACCACACCTTCTGCTAAAGCTTCAATACTCCACGAGGTCAGACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGGCAACTACCCAATCTTCTCAGAGGATATGAAAATATCAGGTTATCAACCCAAAACGTTATCGATGCAGAAAAAGCACCAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAAAATCGGATTTTTTGCAACAATGGCTTGGGCTGTCCCAAAGGACAACTACAAAAATGCAACGAACCCACTAACAGTAGAAGTACCATACATTTGTACAGAAGGGGAAGACCAAATTACTGTTTGGGGGTTCCATTCAGATAACAAAACCCAAATGAAGAGCCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACATTATGTTTCTCAGATTGGCGACTTCCCAGATCAAACAGAAGACGGAGGACTACCACAAAGCGGCAGAATTGTTGTTGATTACATGATGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCATTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTAAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTGAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCCTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCTCACGGAGCACATGGAGTGGCAGTGGCGGCAGACCTTAAGAGTACACAAGAAGCTATAAATAAGATAACAAAAAATCTCAATTCTTTGAGTGAGCTAGAAGTAAAGAACCTTCAAAGACTAAGTGGTGCCATGGATGAACTCCACAACGAAATACTCGAGCTGGATGAGAAAGTGGATGATCTCAGAGCTGACACTATAAGCTCACAAATAGAACTTGCAGTCTTGCTTTCCAACGAAGGAATAATAAACAGTGAAGACGAGCATCTATTGGCACTTGAGAGAAAACTAAAGAAAATGCTGGGTCCCTCTGCTGTAGACATAGGAAACGGATGCTTCGAAACCAAACACAAATGCAACCAGACCTGCTTAGACAGGATAGCTGCTGGCACCTTTAATGCAGGAGAATTTTCTCTCCCCACTTTTGATTCATTGAACATTACTGCTGCATCTTTAAATGATGATGGATTGGATAACCATACTATACTGCTCTATTACTCAACTGCTGCTTCTAGTTTGGCTGTAACATTAATGCTAGCTATTTTTATTGTTTATATGGTCTCCAGAGACAACGTTTCATGCTCCATCTGTCTATAA' }, { 'strain': 'B/Massachusetts/02/2012', 'isolate_id': 'EPI_ISL_121434', 'date': '2012-03-13', 'region': 'north_america', 'country': 'usa', 'seq': 'ATGAAGGCAATAATTGTACTACTAATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGTGTGATACCACTAACAACAACACCAACAAAATCTTATTTTGCAAATCTCAAAGGAACAAAGACCAGAGGGAAACTATGCCCAGACTGTCTCAACTGTACAGATCTGGATGTGGCCCTGGGCAGGCCAATGTGTGTGGGAACTACACCTTCTGCGAAAGCTTCAATACTTCACGAAGTCAGACCTGTTACATCCGGGTGCTTCCCTATAATGCACGACAGAACAAAAATCAGGCAACTAGCCAATCTTCTCAGAGGATATGAAAATATCAGGTTATCAACCCAAAACGTTATCGATGCAGAAAAGGCACCAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAAAAGCGGATTTTTCGCAACAATGGCTTGGGCTGTCCCAAAGGACAACAACAAAAATGCAACGAACCCATTAACAGTAGAAGTACCATACATTTGTGCAGAAGGGGAAGACCAAATTACTGTTTGGGGGTTCCATTCAGATAACAAAACCCAAATGAAGAACCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACATTATGTTTCTCAGATTGGCGGCTTCCCAGATCAAACAGAAGACGGAGGACTACCACAAAGCGGCAGAATTGTCGTTGATTACATGATGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCCTTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTGAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCCTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCTCACGGAGCACATGGAGTGGCAGTTGCTGCAGACCTTAAGAGCACACAAGAAGCTATAAACAAGATAACAAAAAATCTCAACTCTTTGAGTGAGCTAGAAGTAAAGAATCTTCAAAGGCTAAGTGGTGCCATGGATGAACTCCACAACGAAATACTCGAGCTGGATGAGAAAGTGGATGACCTCAGAGCTGACACTATAAGTTCACAAATAGAACTTGCAGTCTTGCTTTCCAACGAAGGAATAATAAACAGTGAAGACGAGCATCTATTGGCACTTGAGAGAAAACTAAAGAAAATGCTGGGTCCCTCTGCTGTAGACATAGGAAATGGATGCTTCGAAACCAAACACAAATGCAACCAGACCTGCTTAGACAGGATAGCTGCTGGCACCTTTAATGCAGGAGAGTTTTCTCTCCCCACTTTTGATTCATTGAACATTACTGCTGCATCTTTAAATGATGATGGATTGGATAACCATACTATACTGCTCTATTACTCAACTGCTGCTTCTAGTTTGGCTGTAACATTGATGCTAGCTATTTTTATTGTTTATATGGTCTCCAGAGACAACGTTTCATGCTCCATCTGTCTATAA' }, { 'strain': 'B/PHUKET/3073/2013', 'isolate_id': 'EPI_ISL_161843', 'date': '2013-11-21', 'region': 'southeast_asia', 'country': 'thailand', 'seq': 'ATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAACGCAGATCGAATCTGCACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGCGTGATACCACTGACAACAACACCAACAAAATCTTATTTTGCAAATCTCAAAGGAACAAGGACCAGAGGGAAACTATGCCCGGACTGTCTCAACTGTACAGATCTGGATGTGGCCTTGGGCAGGCCAATGTGTGTGGGGACCACACCTTCTGCTAAAGCTTCAATACTCCATGAGGTCAGACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGGCAACTACCCAATCTTCTCAGAGGATATGAAAAGATCAGGTTATCAACCCAAAACGTTATCGATGCAGAAAAAGCACCAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAAAATCGGATTTTTTGCAACAATGGCTTGGGCTGTCCCAAAGGACAACTACAAAAATGCAACGAACCCACTAACAGTGGAAGTACCATACATTTGTACAGAAGGGGAAGACCAAATTACTGTTTGGGGGTTCCATTCGGATAACAAAACCCAAATGAAGAGCCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACGCATTATGTTTCTCAGATTGGCGACTTCCCAGATCAAACAGAAGACGGAGGACTACCACAAAGCGGCAGAATTGTTGTTGATTACATGATGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGGGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCATTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAGAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAAAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTAAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTGAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCCTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCTCACGGAGCACATGGAGTGGCAGTGGCGGCAGACCTTAAGAGTACACAAGAAGCTATAAATAAGATAACAAAAAATCTCAATTCTTTGAGTGAACTAGAAGTAAAGAACCTTCAAAGACTAAGTGGTGCCATGGATGAACTCCACAACGAAATACTCGAGCTGGATGAAAAAGTGGATGATCTCAGAGCTGACACTATAAGCTCACAAATAGAACTTGCAGTCTTGCTTTCCAACGAAGGAATAATAAACAGTGAAGACGAGCATCTATTGGCACTTGAGAGAAAACTAAAGAAAATGCTGGGTCCCTCTGCTGTAGACATAGGAAACGGATGCTTCGAAACCAAACACAAATGCAACCAGACCTGCTTAGACAGGATAGCTGCTGGCACCTTTAATGCAGGAGAATTTTCTCTCCCCACTTTTGATTCATTGAACATTACTGCTGCATCTTTAAATGATGATGGATTGGATAACCATACTATACTGCTCTATTACTCAACTGCTGCTTCTAGTTTGGCTGTAACATTAATGCTAGCTATTTTTATTGTTTATATGGTCTCCAGAGACAACGTTTCATGCTCCATCTGTCTATAAAGAAGGTTAGGCCTTGTATTTTCCTTTATTGTAGTGCTTGTTTGCTTGTCATCATTACAAAGAAAC' } ] tmp_outgroup = SeqIO.read('source-data/Yam_outgroup.gb', 'genbank') genome_annotation = tmp_outgroup.features self.cds = { x.qualifiers['gene'][0]: x for x in genome_annotation if 'gene' in x.qualifiers and x.type == 'CDS' and x.qualifiers['gene'][0] in ['SigPep', 'HA1', 'HA2'] } self.outgroup = { 'strain': 'B/Singapore/11/94', 'isolate_id': 'EPI_ISL_20980', 'date': '1994-05-10', 'region': 'China', 'seq': str(tmp_outgroup.seq).upper() }