def setUp(self): self.fq_for = fake_fh( '''@MISEQ578:1:1101:17145:1691#TTCAGA/1\nNTCACCTTCTTGAAGGCTTCCCATTCATTCAGGAACCGCCTTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGATTATGACCAGTGTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTTATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATCAAAGATTGAGTGTGAGGTTATAACGCCGAAGCGGTAACAACTGTAAGAACTG\n+MISEQ578:1:1101:17145:1691#TTCAGA/1\nB]]P]Pab_cePRPPP`efdde`efeRfgeeRPeeeeb`fffgfadfaeefeeedeabfeddbddfggggfcgfbddeggeggfggeggaeeegggggfgdgggfggeaeaddcfgedePdaddPdffeefeaPPeeefPffgeedaecb[^bfggdbedbggPac^Nb^_gfaMLLb`facgeegeafe[bBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB\n@MISEQ578:1:1101:18716:1699#CCTGAG/1\nNCAGCGTCATAAGAGGTTTTACCTCCAAATGAAGAAATAACATCATGGTAACGCTGCATGAAGTAATCACGTTCTTGGTCAGTATGCAAATTAGCATAAGCAGCTTGCAGACCCATAATGTCAATAGATGTGGTAGAAGTCGTCATTTGGCGAGAAAGCTCAGTCTCAGGAGGAAGCGGAGCAGTCCAAATGTTTTTGAGATGGCAGCAACGGAAACCATAACGAGCATCATCTTAGATCGGAAGAGAGGT\n+MISEQ578:1:1101:18716:1699#CCTGAG/1\nBPPP]P]]PbRRcPabffffefefedaPffbPeeegggeggggegfegecfefffffgggacedggggggfgggfffgggggggggfgggggfgfgdgeggggggegdeecffdgafefggegfgQefgeafegeaPeggdfffg`edcga_b^aePfgggfggefeaeOPNNNe[N]LL_LefgeacccaOO\ON[LM[fbbNbb`bMYMZLZMLXMXOZZNXM`__eeaaOOZOXZbBBBBBBBBBBBB\n@MISEQ578:1:1101:16445:1701#CCTGAG/1\nNCGCTCAAAGTCAAAATAATCAGCGTGACATTCAGAAGGGTAATAAGAACGAACCATAAAAAAGCCTCCAAGATTTGGAGGCATGAAAACATACAATTGGGAGGGTGTCAATCCTGACGGTTATTTCCTAGACAAATTAGAGCCAATACCATCAGCTTTACCGTCTTTCCAGAAATTGTTCCAAGTATCGGCAACAGCTTTATCAATACCATGACAAATATCAACCACACCAGAAGCAGCATCAGTGACGA\n+MISEQ578:1:1101:16445:1701#CCTGAG/1\nBPPP]]PPPPa_RRaPecPefaPeOdO`eRdfeaeaab`dfeeecePP`dd`eddefPfaea^NeggggeafdeefPPPeddefefadebfegefggeded``_d`efggggggggeaddd`dggggfeeeePaOecefPPeeeabegPeggeafeefeefdffbfeggedaP[PePP]ecOO\cO\ccM[LLLYMbMYYOOZbfbegefeeOXXZZNOXaBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB\n@MISEQ578:1:1101:12954:1727#AATGTC/1\nNGTGGTGCCAGCAGCCGCGGTAATACGGAGGATCCAAGCGTTATCCGGAATTATTGGGTTTAAAGGGTCCGCAGGCTGTTTGTTAAGTCAGGGGTGAAATCCTACCGCTCAACGGTAGAACTGCCTTTGATACTGGCAAACTTGAGTTATTGTGAAGTAGTTAGAATGTGTAGTGTAGCGGTGAAATGCATAGATATTACACAGAATACCGATTGCGAAAGCAGATTACTAACAATATACTGACGATGAGG\n+MISEQ578:1:1101:12954:1727#AATGTC/1\nBP]PP]P_]aPPP``e^dd^ddfgefebdbbffeggfgffffegggffNdefgggggfdeebceeefgggffffffgggggggegeggggeaedddggggggfggdfffffgedf^fbeggggggggfgffgggeeefggggggggfecfceafaegcef\[\OcgeaObbeegggegfLbbLdefffaeaeefedeaeOOaO`bNZeead^ZXdfeLZL`aaaNXNaXNZNNNNXNXaaaXeBBBBBBBB\n''' ) self.fq_rev = fake_fh( '''@MISEQ578:1:1101:17145:1691#TTCAGA/2\nNTCACCTTCTTGAAGGCTTCCCATTCATTCAGGAACCGCCTTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGATTATGACCAGTGTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTTATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATCAAAGATTGAGTGTGAGGTTATAACGCCGAAGCGGTAACAACTGTAAGAACTG\n+MISEQ578:1:1101:17145:1691#TTCAGA/2\nB]]P]Pab_cePRPPP`efdde`efeRfgeeRPeeeeb`fffgfadfaeefeeedeabfeddbddfggggfcgfbddeggeggfggeggaeeegggggfgdgggfggeaeaddcfgedePdaddPdffeefeaPPeeefPffgeedaecb[^bfggdbedbggPac^Nb^_gfaMLLb`facgeegeafe[bBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB\n@MISEQ578:1:1101:16445:1701#CCTGAG/2\nNCGCTCAAAGTCAAAATAATCAGCGTGACATTCAGAAGGGTAATAAGAACGAACCATAAAAAAGCCTCCAAGATTTGGAGGCATGAAAACATACAATTGGGAGGGTGTCAATCCTGACGGTTATTTCCTAGACAAATTAGAGCCAATACCATCAGCTTTACCGTCTTTCCAGAAATTGTTCCAAGTATCGGCAACAGCTTTATCAATACCATGACAAATATCAACCACACCAGAAGCAGCATCAGTGACGA\n+MISEQ578:1:1101:16445:1701#CCTGAG/2\nBPPP]]PPPPa_RRaPecPefaPeOdO`eRdfeaeaab`dfeeecePP`dd`eddefPfaea^NeggggeafdeefPPPeddefefadebfegefggeded``_d`efggggggggeaddd`dggggfeeeePaOecefPPeeeabegPeggeafeefeefdffbfeggedaP[PePP]ecOO\cO\ccM[LLLYMbMYYOOZbfbegefeeOXXZZNOXaBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB\n@MISEQ578:1:1101:12954:1727#AATGTC/2\nNGTGGTGCCAGCAGCCGCGGTAATACGGAGGATCCAAGCGTTATCCGGAATTATTGGGTTTAAAGGGTCCGCAGGCTGTTTGTTAAGTCAGGGGTGAAATCCTACCGCTCAACGGTAGAACTGCCTTTGATACTGGCAAACTTGAGTTATTGTGAAGTAGTTAGAATGTGTAGTGTAGCGGTGAAATGCATAGATATTACACAGAATACCGATTGCGAAAGCAGATTACTAACAATATACTGACGATGAGG\n+MISEQ578:1:1101:12954:1727#AATGTC/2\nBP]PP]P_]aPPP``e^dd^ddfgefebdbbffeggfgffffegggffNdefgggggfdeebceeefgggffffffgggggggegeggggeaedddggggggfggdfffffgedf^fbeggggggggfgffgggeeefggggggggfecfceafaegcef\[\OcgeaObbeegggegfLbbLdefffaeaeefedeaeOOaO`bNZeead^ZXdfeLZL`aaaNXNaXNZNNNNXNXaaaXeBBBBBBBB\n''' )
def test_correct(self): in_fh = fake_fh(['@foo', 'AAA', '+foo', '###', '@bar', 'CCC', '+bar', '"""', '@baz', 'TTT', '+baz', '$$$', '@poo', 'GGG', '+poo', '===']) outs = [fake_fh() for x in range(3)] split_fastq.split_fastq_entries(in_fh, outs) conts = [out.getvalue() for out in outs] exp1 = "\n".join(['@foo', 'AAA', '+', '###', '@poo', 'GGG', '+', '===']) + "\n" exp2 = "\n".join(['@bar', 'CCC', '+', '"""']) + "\n" exp3 = "\n".join(['@baz', 'TTT', '+', '$$$']) + "\n" self.assertEqual(conts, [exp1, exp2, exp3])
def setUp(self): self.good_fastq_content = "@foo\nAAA\n+foo\n!!!\n@bar\nCCC\n+bar\n###" self.fastq13 = """@lolapolooza:1234#ACGT/1\nAATTAAGTCAAATTTGGCCTGGCCCAGTGTCCAATGTTGT\n+\nABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefgh\n""" self.fastq18 = """@lolapolooza:1234#ACGT/1\nAATTAAGTCAAATTTGGCCTGGCCCAGTGTCCAATGTTGT\n+\n"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI\n""" self.fastq13_fh = fake_fh(self.fastq13 + "\n") self.fastq13_record = SeqIO.read(self.fastq13_fh, 'fastq') self.fastq13_fh.seek(0) self.fastq18_fh = fake_fh(self.fastq18 + "\n") self.fastq18_record = SeqIO.read(self.fastq18_fh, 'fastq') self.fastq18_fh.seek(0) fh = fake_fh('''@lol:1234#ACGT/1\nAAAA\n+\nAh"J\n''') self.fastq_ambig_record = SeqIO.read(fh, 'fastq')
def setUp(self): fasta = fake_fh([ '>foo', 'AA', '>bar', 'CC', '>baz', 'AA', '>blag', 'AA', '>flog', 'TT', '>blob', 'TT' ]) minimum_counts = 2 self.derep = derep_fulllength.Dereplicator(fasta, minimum_counts)
def test_correct(self): table = { "seq0": { "donor1": 5, "donor2": 10 }, "seq1": { "donor1": 1, "donor5": 4 }, "seq2": { "donor4": 1 } } abund = {'seq0': 15, 'seq1': 5, 'seq2': 1} samples = ['donor1', 'donor2', 'donor4', 'donor5'] cont = "\n".join([ 'sequence_id\tdonor1\tdonor2\tdonor4\tdonor5', 'seq0\t5\t10\t0\t0', 'seq1\t1\t0\t0\t4', 'seq2\t0\t0\t1\t0' ]) + "\n" fh = fake_fh() seq_table.SeqTableWriter.write_table(table, abund, samples, 0, fh) fh.seek(0) cont2 = fh.read() self.assertEqual(cont, cont2)
def test_correct(self): in_fh = fake_fh([ '@foo', 'AAA', '+foo', '###', '@bar', 'CCC', '+bar', '"""', '@baz', 'TTT', '+baz', '$$$', '@poo', 'GGG', '+poo', '===' ]) outs = [fake_fh() for x in range(3)] split_fastq.split_fastq_entries(in_fh, outs) conts = [out.getvalue() for out in outs] exp1 = "\n".join( ['@foo', 'AAA', '+', '###', '@poo', 'GGG', '+', '===']) + "\n" exp2 = "\n".join(['@bar', 'CCC', '+', '"""']) + "\n" exp3 = "\n".join(['@baz', 'TTT', '+', '$$$']) + "\n" self.assertEqual(conts, [exp1, exp2, exp3])
def test_parse_derep_fasta(self): '''should make a dictionary of fasta lines''' fasta = fake_fh(['>seq0;counts=10', 'AAA', '>seq4;counts=23', 'TTT']) self.assertEqual(index.parse_derep_fasta(fasta), { 'AAA': 'seq0', 'TTT': 'seq4' })
def test_correct(self): fasta = fake_fh(['>sample=donor1;1', 'AAA', '>sample=donor1;2', 'AAA', '>sample=donor2;1', 'AAA', '>sample=donor3;1', 'TTT']) names = {'AAA': 'seqA', 'TTT': 'seqT'} table = {'seqA': {'donor1': 2, 'donor2': 1}, 'seqT': {'donor3': 1}} abund = {'seqA': 3, 'seqT': 1} table2, abund2 = seq_table.SeqTableWriter.fasta_to_abund(fasta, names) self.assertEqual(table, table2) self.assertEqual(abund, abund2)
def setUp(self): self.fastq = fake_fh( '''@lolapolooza\nTAAAACATCATCATCAT\n+lolapolooza\n"#$%&'()*+,-./012\n''' ) self.primer = "AAAA" self.max_primer_diffs = 1 self.primer_remover = util_primer.PrimerRemover( self.fastq, self.primer, self.max_primer_diffs)
def test_no_barcode(self): '''should raise error when no barcode in the @ line''' d = {'ACGT': 'donor1', 'TACA': 'donor2'} fastq = fake_fh(['@foo#ACGT/1', 'AAA', '+', 'AAA', '@bar_bad_format/1', 'CCC', '+', 'BBB']) it = map_barcodes.renamed_fastq_records(fastq, d, 1) # the first entry is OK, but the second should complain it.next() self.assertRaises(RuntimeError, it.next)
def test_correct(self): '''should collect only the matching entries''' old_fasta = fake_fh([">a", "AAA", ">b", "TTT", ">c", "GGG"]) labels = ["a", "c"] expected_new_fasta = ">a\nAAA\n>c\nGGG\n" new_fasta = uc2denovo.matching_fasta_entries(labels, old_fasta) self.assertEqual([new_fasta[0].id, str(new_fasta[0].seq), new_fasta[1].id, str(new_fasta[1].seq)], ['a', 'AAA', 'c', 'GGG'])
def test_correct(self): '''should convert Illumina 1.4-1.7 to our mixed format''' record18 = convert_fastq.convert_record_illumina13_to_18(self.fastq13_record) fh = fake_fh() SeqIO.write(record18, fh, 'fastq') entry18 = fh.getvalue() self.assertEqual(entry18, self.fastq18)
def setUp(self): self.barcode_map = { 'AAA': 'sampleA', 'TTT': 'sampleT', 'CCC': 'sampleC' } self.fastq = fake_fh([ '@lol#AAA/1', 'CAT', '+', 'aaa', '@hoo#TTT/1', 'CAT', '+', 'aaa', '@crap#ACT/1', 'CAT', '+', 'aaa' ])
def test_correct(self): table = {"seq0": {"donor1": 5, "donor2": 10}, "seq1": {"donor1": 1, "donor5": 4}, "seq2": {"donor4": 1}} abund = {'seq0': 15, 'seq1': 5, 'seq2': 1} samples = ['donor1', 'donor2', 'donor4', 'donor5'] cont = "\n".join(['sequence_id\tdonor1\tdonor2\tdonor4\tdonor5', 'seq0\t5\t10\t0\t0', 'seq1\t1\t0\t0\t4', 'seq2\t0\t0\t1\t0']) + "\n" fh = fake_fh() seq_table.SeqTableWriter.write_table(table, abund, samples, 0, fh) fh.seek(0) cont2 = fh.read() self.assertEqual(cont, cont2)
def test_correct(self): fasta = fake_fh([ '>sample=donor1;1', 'AAA', '>sample=donor1;2', 'AAA', '>sample=donor2;1', 'AAA', '>sample=donor3;1', 'TTT' ]) names = {'AAA': 'seqA', 'TTT': 'seqT'} table = {'seqA': {'donor1': 2, 'donor2': 1}, 'seqT': {'donor3': 1}} abund = {'seqA': 3, 'seqT': 1} table2, abund2 = seq_table.SeqTableWriter.fasta_to_abund(fasta, names) self.assertEqual(table, table2) self.assertEqual(abund, abund2)
def test_parse_full_fasta(self): seq_sid = {'AAA': 'seq0', 'TTT': 'seq4'} fasta = fake_fh([ '>sample=donor1;1', 'AAA', '>sample=donor1;2', 'AAA', '>sample=donor1;3', 'TTT', '>sample=donorT;1', 'TTT' ]) abund = index.parse_full_fasta(fasta, seq_sid) self.assertEqual(abund, { ('donor1', 'seq0'): 2, ('donor1', 'seq4'): 1, ('donorT', 'seq4'): 1 })
def test_correct(self): '''should properly rename samples''' d = {'ACGT': 'donor1', 'TACA': 'donor2'} fastq = fake_fh(['@foo#ACGT/1', 'AAA', '+', 'AAA', '@bar#TACA/1', 'CCC', '+', 'BBB']) it = map_barcodes.renamed_fastq_records(fastq, d, 1) record = it.next() self.assertEqual(str(record.seq), 'AAA') self.assertEqual(record.id, 'sample=donor1;1/1') record = it.next() self.assertEqual(str(record.seq), 'CCC') self.assertEqual(record.id, 'sample=donor2;1/1')
def test_no_barcode(self): '''should raise error when no barcode in the @ line''' d = {'ACGT': 'donor1', 'TACA': 'donor2'} fastq = fake_fh([ '@foo#ACGT/1', 'AAA', '+', 'AAA', '@bar_bad_format/1', 'CCC', '+', 'BBB' ]) it = map_barcodes.renamed_fastq_records(fastq, d, 1) # the first entry is OK, but the second should complain it.next() self.assertRaises(RuntimeError, it.next)
def test_correct(self): '''should properly rename samples''' d = {'ACGT': 'donor1', 'TACA': 'donor2'} fastq = fake_fh([ '@foo#ACGT/1', 'AAA', '+', 'AAA', '@bar#TACA/1', 'CCC', '+', 'BBB' ]) it = map_barcodes.renamed_fastq_records(fastq, d, 1) record = it.next() self.assertEqual(str(record.seq), 'AAA') self.assertEqual(record.id, 'sample=donor1;1/1') record = it.next() self.assertEqual(str(record.seq), 'CCC') self.assertEqual(record.id, 'sample=donor2;1/1')
def test_parse_derep_fasta(self): '''should make a dictionary of fasta lines''' fasta = fake_fh(['>seq0;counts=10', 'AAA', '>seq4;counts=23', 'TTT']) self.assertEqual(index.parse_derep_fasta(fasta), {'AAA': 'seq0', 'TTT': 'seq4'})
def test_correct(self): fasta = fake_fh(['>sample=donor1;1', 'AAA', '>sample=donor1;2', 'AAA', '>sample=donor2;1', 'AAA', '>sample=donor3;1', 'TTT']) table = {'AAA': {'donor1': 2, 'donor2': 1}, 'TTT': {'donor3': 1}} abund = {'AAA': 3, 'TTT': 1} self.assertEqual(seq_table.fasta_to_table_and_abund(fasta), (table, abund))
def setUp(self): self.barcode_map = {'AAA': 'sampleA', 'TTT': 'sampleT', 'CCC': 'sampleC'} self.fastq = fake_fh(['@lol#AAA/1', 'CAT', '+', 'aaa', '@hoo#TTT/1', 'CAT', '+', 'aaa', '@crap#ACT/1', 'CAT', '+', 'aaa'])
def setUp(self): self.fq_for = fake_fh('''@MISEQ578:1:1101:17145:1691#TTCAGA/1\nNTCACCTTCTTGAAGGCTTCCCATTCATTCAGGAACCGCCTTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGATTATGACCAGTGTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTTATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATCAAAGATTGAGTGTGAGGTTATAACGCCGAAGCGGTAACAACTGTAAGAACTG\n+MISEQ578:1:1101:17145:1691#TTCAGA/1\nB]]P]Pab_cePRPPP`efdde`efeRfgeeRPeeeeb`fffgfadfaeefeeedeabfeddbddfggggfcgfbddeggeggfggeggaeeegggggfgdgggfggeaeaddcfgedePdaddPdffeefeaPPeeefPffgeedaecb[^bfggdbedbggPac^Nb^_gfaMLLb`facgeegeafe[bBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB\n@MISEQ578:1:1101:18716:1699#CCTGAG/1\nNCAGCGTCATAAGAGGTTTTACCTCCAAATGAAGAAATAACATCATGGTAACGCTGCATGAAGTAATCACGTTCTTGGTCAGTATGCAAATTAGCATAAGCAGCTTGCAGACCCATAATGTCAATAGATGTGGTAGAAGTCGTCATTTGGCGAGAAAGCTCAGTCTCAGGAGGAAGCGGAGCAGTCCAAATGTTTTTGAGATGGCAGCAACGGAAACCATAACGAGCATCATCTTAGATCGGAAGAGAGGT\n+MISEQ578:1:1101:18716:1699#CCTGAG/1\nBPPP]P]]PbRRcPabffffefefedaPffbPeeegggeggggegfegecfefffffgggacedggggggfgggfffgggggggggfgggggfgfgdgeggggggegdeecffdgafefggegfgQefgeafegeaPeggdfffg`edcga_b^aePfgggfggefeaeOPNNNe[N]LL_LefgeacccaOO\ON[LM[fbbNbb`bMYMZLZMLXMXOZZNXM`__eeaaOOZOXZbBBBBBBBBBBBB\n@MISEQ578:1:1101:16445:1701#CCTGAG/1\nNCGCTCAAAGTCAAAATAATCAGCGTGACATTCAGAAGGGTAATAAGAACGAACCATAAAAAAGCCTCCAAGATTTGGAGGCATGAAAACATACAATTGGGAGGGTGTCAATCCTGACGGTTATTTCCTAGACAAATTAGAGCCAATACCATCAGCTTTACCGTCTTTCCAGAAATTGTTCCAAGTATCGGCAACAGCTTTATCAATACCATGACAAATATCAACCACACCAGAAGCAGCATCAGTGACGA\n+MISEQ578:1:1101:16445:1701#CCTGAG/1\nBPPP]]PPPPa_RRaPecPefaPeOdO`eRdfeaeaab`dfeeecePP`dd`eddefPfaea^NeggggeafdeefPPPeddefefadebfegefggeded``_d`efggggggggeaddd`dggggfeeeePaOecefPPeeeabegPeggeafeefeefdffbfeggedaP[PePP]ecOO\cO\ccM[LLLYMbMYYOOZbfbegefeeOXXZZNOXaBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB\n@MISEQ578:1:1101:12954:1727#AATGTC/1\nNGTGGTGCCAGCAGCCGCGGTAATACGGAGGATCCAAGCGTTATCCGGAATTATTGGGTTTAAAGGGTCCGCAGGCTGTTTGTTAAGTCAGGGGTGAAATCCTACCGCTCAACGGTAGAACTGCCTTTGATACTGGCAAACTTGAGTTATTGTGAAGTAGTTAGAATGTGTAGTGTAGCGGTGAAATGCATAGATATTACACAGAATACCGATTGCGAAAGCAGATTACTAACAATATACTGACGATGAGG\n+MISEQ578:1:1101:12954:1727#AATGTC/1\nBP]PP]P_]aPPP``e^dd^ddfgefebdbbffeggfgffffegggffNdefgggggfdeebceeefgggffffffgggggggegeggggeaedddggggggfggdfffffgedf^fbeggggggggfgffgggeeefggggggggfecfceafaegcef\[\OcgeaObbeegggegfLbbLdefffaeaeefedeaeOOaO`bNZeead^ZXdfeLZL`aaaNXNaXNZNNNNXNXaaaXeBBBBBBBB\n''') self.fq_rev = fake_fh('''@MISEQ578:1:1101:17145:1691#TTCAGA/2\nNTCACCTTCTTGAAGGCTTCCCATTCATTCAGGAACCGCCTTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGATTATGACCAGTGTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTTATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATCAAAGATTGAGTGTGAGGTTATAACGCCGAAGCGGTAACAACTGTAAGAACTG\n+MISEQ578:1:1101:17145:1691#TTCAGA/2\nB]]P]Pab_cePRPPP`efdde`efeRfgeeRPeeeeb`fffgfadfaeefeeedeabfeddbddfggggfcgfbddeggeggfggeggaeeegggggfgdgggfggeaeaddcfgedePdaddPdffeefeaPPeeefPffgeedaecb[^bfggdbedbggPac^Nb^_gfaMLLb`facgeegeafe[bBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB\n@MISEQ578:1:1101:16445:1701#CCTGAG/2\nNCGCTCAAAGTCAAAATAATCAGCGTGACATTCAGAAGGGTAATAAGAACGAACCATAAAAAAGCCTCCAAGATTTGGAGGCATGAAAACATACAATTGGGAGGGTGTCAATCCTGACGGTTATTTCCTAGACAAATTAGAGCCAATACCATCAGCTTTACCGTCTTTCCAGAAATTGTTCCAAGTATCGGCAACAGCTTTATCAATACCATGACAAATATCAACCACACCAGAAGCAGCATCAGTGACGA\n+MISEQ578:1:1101:16445:1701#CCTGAG/2\nBPPP]]PPPPa_RRaPecPefaPeOdO`eRdfeaeaab`dfeeecePP`dd`eddefPfaea^NeggggeafdeefPPPeddefefadebfegefggeded``_d`efggggggggeaddd`dggggfeeeePaOecefPPeeeabegPeggeafeefeefdffbfeggedaP[PePP]ecOO\cO\ccM[LLLYMbMYYOOZbfbegefeeOXXZZNOXaBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB\n@MISEQ578:1:1101:12954:1727#AATGTC/2\nNGTGGTGCCAGCAGCCGCGGTAATACGGAGGATCCAAGCGTTATCCGGAATTATTGGGTTTAAAGGGTCCGCAGGCTGTTTGTTAAGTCAGGGGTGAAATCCTACCGCTCAACGGTAGAACTGCCTTTGATACTGGCAAACTTGAGTTATTGTGAAGTAGTTAGAATGTGTAGTGTAGCGGTGAAATGCATAGATATTACACAGAATACCGATTGCGAAAGCAGATTACTAACAATATACTGACGATGAGG\n+MISEQ578:1:1101:12954:1727#AATGTC/2\nBP]PP]P_]aPPP``e^dd^ddfgefebdbbffeggfgffffegggffNdefgggggfdeebceeefgggffffffgggggggegeggggeaedddggggggfggdfffffgedf^fbeggggggggfgffgggeeefggggggggfecfceafaegcef\[\OcgeaObbeegggegfLbbLdefffaeaeefedeaeOOaO`bNZeead^ZXdfeLZL`aaaNXNaXNZNNNNXNXaaaXeBBBBBBBB\n''')
def setUp(self): fasta = fake_fh(['>foo', 'AA', '>bar', 'CC', '>baz', 'AA', '>blag', 'AA', '>flog', 'TT', '>blob', 'TT']) minimum_counts = 2 self.derep = derep_fulllength.Dereplicator(fasta, minimum_counts)
def setUp(self): self.fastq = fake_fh('''@lolapolooza\nTAAAACATCATCATCAT\n+lolapolooza\n"#$%&'()*+,-./012\n''') self.primer = "AAAA" self.max_primer_diffs = 1 self.primer_remover = util_primer.PrimerRemover(self.fastq, self.primer, self.max_primer_diffs)
def setUp(self): self.fastq13 = """@lolapolooza:1234#ACGT/1\nAATTAAGTCAAATTTGGCCTGGCCCAGTGTCCAATGTTGT\n+\nABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefgh\n""" self.fastq18 = """@lolapolooza:1234#ACGT/1\nAATTAAGTCAAATTTGGCCTGGCCCAGTGTCCAATGTTGT\n+\n"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI\n""" self.fastq13_fh = fake_fh(self.fastq13 + "\n") self.fastq13_record = SeqIO.read(self.fastq13_fh, 'fastq')
def test_parse_full_fasta(self): seq_sid = {'AAA': 'seq0', 'TTT': 'seq4'} fasta = fake_fh(['>sample=donor1;1', 'AAA', '>sample=donor1;2', 'AAA', '>sample=donor1;3', 'TTT', '>sample=donorT;1', 'TTT']) abund = index.parse_full_fasta(fasta, seq_sid) self.assertEqual(abund, {('donor1', 'seq0'): 2, ('donor1', 'seq4'): 1, ('donorT', 'seq4'): 1})