Exemple #1
0
    def test_parse_illumina_paired_end_read_files_error(self):
        """parse_illumina_paired_end_read_files: detects mis-matched lines """
        reversed_illumina_read_lines1 = \
         [self.illumina_read1[1],self.illumina_read1[0]]
        record_iter = parse_illumina_paired_end_read_files(\
         reversed_illumina_read_lines1,illumina_read2,barcode_length=6,\
         max_bad_run_length=0,rev_comp_barcode=True,quality_threshold=1e-5,
         min_per_read_length=75)
        self.assertRaises(IlluminaParseError, list, record_iter)

        reversed_illumina_read_lines2 = \
         [self.illumina_read2[1],self.illumina_read2[0]]
        record_iter = parse_illumina_paired_end_read_files(\
         illumina_read1,reversed_illumina_read_lines2,barcode_length=6,\
         max_bad_run_length=0,rev_comp_barcode=True,quality_threshold=1e-5,
         min_per_read_length=75)
        self.assertRaises(IlluminaParseError, list, record_iter)

        # no error if reversed-order files are passed
        list(parse_illumina_paired_end_read_files(\
         reversed_illumina_read_lines1,reversed_illumina_read_lines2,\
         barcode_length=6,\
         max_bad_run_length=0,rev_comp_barcode=True,quality_threshold=1e-5,
         min_per_read_length=75))
 def test_parse_illumina_paired_end_read_files_error(self):
     """parse_illumina_paired_end_read_files: detects mis-matched lines """
     reversed_illumina_read_lines1 = \
      [self.illumina_read1[1],self.illumina_read1[0]]
     record_iter = parse_illumina_paired_end_read_files(\
      reversed_illumina_read_lines1,illumina_read2,barcode_length=6,\
      max_bad_run_length=0,rev_comp_barcode=True,quality_threshold=1e-5,
      min_per_read_length=75)
     self.assertRaises(IlluminaParseError,list,record_iter)
     
     reversed_illumina_read_lines2 = \
      [self.illumina_read2[1],self.illumina_read2[0]]
     record_iter = parse_illumina_paired_end_read_files(\
      illumina_read1,reversed_illumina_read_lines2,barcode_length=6,\
      max_bad_run_length=0,rev_comp_barcode=True,quality_threshold=1e-5,
      min_per_read_length=75)
     self.assertRaises(IlluminaParseError,list,record_iter)
     
     # no error if reversed-order files are passed
     list(parse_illumina_paired_end_read_files(\
      reversed_illumina_read_lines1,reversed_illumina_read_lines2,\
      barcode_length=6,\
      max_bad_run_length=0,rev_comp_barcode=True,quality_threshold=1e-5,
      min_per_read_length=75))
 def test_parse_illumina_paired_end_read_files_N(self):
     """parse_illumina_paired_end_read_files: functions as expected with N chars"""
     actual = list(parse_illumina_paired_end_read_files(\
      illumina_read1_N,illumina_read2,barcode_length=6,\
      max_bad_run_length=0,rev_comp_barcode=True,quality_threshold=1e-5,
      min_per_read_length=70))
     expected = []
     self.assertEqual(actual,expected)
     
     actual = list(parse_illumina_paired_end_read_files(\
      illumina_read1,illumina_read2_N,barcode_length=6,\
      max_bad_run_length=0,rev_comp_barcode=True,quality_threshold=1e-5,
      min_per_read_length=70))
     expected = []
     self.assertEqual(actual,expected)
     
     actual = list(parse_illumina_paired_end_read_files(\
      illumina_read1_N,illumina_read2_N,barcode_length=6,\
      max_bad_run_length=0,rev_comp_barcode=True,quality_threshold=1e-5,
      min_per_read_length=70))
     expected = []
     self.assertEqual(actual,expected)
     
     actual = list(parse_illumina_paired_end_read_files(\
      illumina_read1_N,illumina_read2_N,barcode_length=6,\
      max_bad_run_length=0,rev_comp_barcode=True,quality_threshold=1e-5,
      min_per_read_length=70,barcode_max_N=1))
     self.assertEqual(len(actual),1)
     
     # one sequence discarded due to barcode N, other due to 2 sequence Ns
     actual = list(parse_illumina_paired_end_read_files(\
      illumina_read1_N,illumina_read2_N,barcode_length=6,\
      max_bad_run_length=0,rev_comp_barcode=True,quality_threshold=1e-5,
      min_per_read_length=70,seq_max_N=1))
     self.assertEqual(len(actual),0)
     
     actual = list(parse_illumina_paired_end_read_files(\
      illumina_read1_N,illumina_read2_N,barcode_length=6,\
      max_bad_run_length=0,rev_comp_barcode=True,quality_threshold=1e-5,
      min_per_read_length=70,barcode_max_N=1,seq_max_N=2))
     self.assertEqual(len(actual),2)
Exemple #4
0
    def test_parse_illumina_paired_end_read_files_N(self):
        """parse_illumina_paired_end_read_files: functions as expected with N chars"""
        actual = list(parse_illumina_paired_end_read_files(\
         illumina_read1_N,illumina_read2,barcode_length=6,\
         max_bad_run_length=0,rev_comp_barcode=True,quality_threshold=1e-5,
         min_per_read_length=70))
        expected = []
        self.assertEqual(actual, expected)

        actual = list(parse_illumina_paired_end_read_files(\
         illumina_read1,illumina_read2_N,barcode_length=6,\
         max_bad_run_length=0,rev_comp_barcode=True,quality_threshold=1e-5,
         min_per_read_length=70))
        expected = []
        self.assertEqual(actual, expected)

        actual = list(parse_illumina_paired_end_read_files(\
         illumina_read1_N,illumina_read2_N,barcode_length=6,\
         max_bad_run_length=0,rev_comp_barcode=True,quality_threshold=1e-5,
         min_per_read_length=70))
        expected = []
        self.assertEqual(actual, expected)

        actual = list(parse_illumina_paired_end_read_files(\
         illumina_read1_N,illumina_read2_N,barcode_length=6,\
         max_bad_run_length=0,rev_comp_barcode=True,quality_threshold=1e-5,
         min_per_read_length=70,barcode_max_N=1))
        self.assertEqual(len(actual), 1)

        actual = list(parse_illumina_paired_end_read_files(\
         illumina_read1_N,illumina_read2_N,barcode_length=6,\
         max_bad_run_length=0,rev_comp_barcode=True,quality_threshold=1e-5,
         min_per_read_length=70,seq_max_N=1))
        self.assertEqual(len(actual), 1)

        actual = list(parse_illumina_paired_end_read_files(\
         illumina_read1_N,illumina_read2_N,barcode_length=6,\
         max_bad_run_length=0,rev_comp_barcode=True,quality_threshold=1e-5,
         min_per_read_length=70,barcode_max_N=1,seq_max_N=1))
        self.assertEqual(len(actual), 2)
Exemple #5
0
    def test_parse_illumina_paired_end_read_files(self):
        """parse_illumina_paired_end_read_files: functions as expected """
        actual = list(parse_illumina_paired_end_read_files(\
         illumina_read1,illumina_read2,barcode_length=6,\
         max_bad_run_length=0,rev_comp_barcode=True,quality_threshold=1e-5,
         min_per_read_length=70))
        expected = [
         ('HWI-6X_9267:1:1:4:1699#ACCACCC','GGTGGT',\
          'TACGGAGGGTGCGAGCGTTAATCGCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC'+\
          'CCCCCCCCCCCCCCCCCGGTTTTTTTTTAAAAAAAAAAAAAGGGGGGGGGGGGGGGGGG'+\
          'GGGGGGCCCCCCCCCCCTTTTTTTTAAAA',\
          'abbbbbbbbbb`_`bbbbbb`bb^aaaaaaaaaaaaaaaaaaaaaaaaaaa'+\
          'aaaaaaaaaaaaaaaaaaaaaaaacccccccccccccccccbbbbbbbbbbbbbbbbbbbbbbb'+\
          'bbaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'),\
         ('HWI-6X_9267:1:1:4:390#ACCTCCC','GGAGGT',\
          'GACAGGAGGAGCAAGTGTTATTCAAATTATGCCCCCCCCCCCCCCCCCCCCCCC'+\
          'CCCCCCCGGGGGGGGGGGGGGGAAAAAAAAAAAAAAAAACGTACGTACGT'+\
          'ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC'+\
          'GTACGTACGTACGTACGTACGTACGT',\
          'aaaaaaaaaa```aa\^_aa``aVaaaaaaaaaaaaaaaaaaaaaaaaaaaa'+\
          'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbb'+\
          'bbbbbbaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbb'+\
          'bbbbaaaaaaaaaaaaaaaaaaaaaaaaaa')]
        self.assertEqual(actual, expected)

        # alt min_per_read_length
        actual = list(parse_illumina_paired_end_read_files(\
         illumina_read1,illumina_read2,barcode_length=6,\
         max_bad_run_length=0,rev_comp_barcode=True,quality_threshold=1e-5,
         min_per_read_length=75))
        expected = [
         ('HWI-6X_9267:1:1:4:390#ACCTCCC','GGAGGT',\
          'GACAGGAGGAGCAAGTGTTATTCAAATTATGCCCCCCCCCCCCCCCCCCCCCCC'+\
          'CCCCCCCGGGGGGGGGGGGGGGAAAAAAAAAAAAAAAAACGTACGTACGT'+\
          'ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC'+\
          'GTACGTACGTACGTACGTACGTACGT',\
          'aaaaaaaaaa```aa\^_aa``aVaaaaaaaaaaaaaaaaaaaaaaaaaaaa'+\
          'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbb'+\
          'bbbbbbaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbb'+\
          'bbbbaaaaaaaaaaaaaaaaaaaaaaaaaa')]
        self.assertEqual(actual, expected)

        # alt quality_threshold (just checking number of results
        # to be sure that alt value is passed through)
        actual = list(parse_illumina_paired_end_read_files(\
         illumina_read1,illumina_read2,barcode_length=6,\
         max_bad_run_length=0,rev_comp_barcode=True,quality_threshold=1e-55,
         min_per_read_length=75))
        self.assertEqual(len(actual), 0)

        # alt max_bad_run_length (just checking number of results
        # to be sure that alt value is passed through)
        actual = list(parse_illumina_paired_end_read_files(\
         illumina_read1,illumina_read2,barcode_length=6,\
         max_bad_run_length=0,rev_comp_barcode=True,quality_threshold=1e-55,
         min_per_read_length=75))
        self.assertEqual(len(actual), 0)
        actual = list(parse_illumina_paired_end_read_files(\
         illumina_read1,illumina_read2,barcode_length=6,\
         max_bad_run_length=150,rev_comp_barcode=True,quality_threshold=1e-55,
         min_per_read_length=75))
        self.assertEqual(len(actual), 2)
 def test_parse_illumina_paired_end_read_files(self):
     """parse_illumina_paired_end_read_files: functions as expected """
     actual = list(parse_illumina_paired_end_read_files(\
      illumina_read1,illumina_read2,barcode_length=6,\
      max_bad_run_length=0,rev_comp_barcode=True,quality_threshold=1e-5,
      min_per_read_length=70))
     expected = [
      ('HWI-6X_9267:1:1:4:1699#ACCACCC','GGTGGT',\
       'TACGGAGGGTGCGAGCGTTAATCGCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC'+\
       'CCCCCCCCCCCCCCCCCGGTTTTTTTTTAAAAAAAAAAAAAGGGGGGGGGGGGGGGGGG'+\
       'GGGGGGCCCCCCCCCCCTTTTTTTTAAAA',\
       'abbbbbbbbbb`_`bbbbbb`bb^aaaaaaaaaaaaaaaaaaaaaaaaaaa'+\
       'aaaaaaaaaaaaaaaaaaaaaaaacccccccccccccccccbbbbbbbbbbbbbbbbbbbbbbb'+\
       'bbaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'),\
      ('HWI-6X_9267:1:1:4:390#ACCTCCC','GGAGGT',\
       'GACAGGAGGAGCAAGTGTTATTCAAATTATGCCCCCCCCCCCCCCCCCCCCCCC'+\
       'CCCCCCCGGGGGGGGGGGGGGGAAAAAAAAAAAAAAAAACGTACGTACGT'+\
       'ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC'+\
       'GTACGTACGTACGTACGTACGTACGT',\
       'aaaaaaaaaa```aa\^_aa``aVaaaaaaaaaaaaaaaaaaaaaaaaaaaa'+\
       'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbb'+\
       'bbbbbbaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbb'+\
       'bbbbaaaaaaaaaaaaaaaaaaaaaaaaaa')]
     self.assertEqual(actual,expected)
 
     # alt min_per_read_length
     actual = list(parse_illumina_paired_end_read_files(\
      illumina_read1,illumina_read2,barcode_length=6,\
      max_bad_run_length=0,rev_comp_barcode=True,quality_threshold=1e-5,
      min_per_read_length=75))
     expected = [
      ('HWI-6X_9267:1:1:4:390#ACCTCCC','GGAGGT',\
       'GACAGGAGGAGCAAGTGTTATTCAAATTATGCCCCCCCCCCCCCCCCCCCCCCC'+\
       'CCCCCCCGGGGGGGGGGGGGGGAAAAAAAAAAAAAAAAACGTACGTACGT'+\
       'ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC'+\
       'GTACGTACGTACGTACGTACGTACGT',\
       'aaaaaaaaaa```aa\^_aa``aVaaaaaaaaaaaaaaaaaaaaaaaaaaaa'+\
       'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbb'+\
       'bbbbbbaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbb'+\
       'bbbbaaaaaaaaaaaaaaaaaaaaaaaaaa')]
     self.assertEqual(actual,expected)
 
     # alt quality_threshold (just checking number of results
     # to be sure that alt value is passed through)
     actual = list(parse_illumina_paired_end_read_files(\
      illumina_read1,illumina_read2,barcode_length=6,\
      max_bad_run_length=0,rev_comp_barcode=True,quality_threshold=1e-55,
      min_per_read_length=75))
     self.assertEqual(len(actual),0)
     
     # alt max_bad_run_length (just checking number of results
     # to be sure that alt value is passed through)
     actual = list(parse_illumina_paired_end_read_files(\
      illumina_read1,illumina_read2,barcode_length=6,\
      max_bad_run_length=0,rev_comp_barcode=True,quality_threshold=1e-55,
      min_per_read_length=75))
     self.assertEqual(len(actual),0)
     actual = list(parse_illumina_paired_end_read_files(\
      illumina_read1,illumina_read2,barcode_length=6,\
      max_bad_run_length=150,rev_comp_barcode=True,quality_threshold=1e-55,
      min_per_read_length=75))
     self.assertEqual(len(actual),2)