コード例 #1
0
    def test_calculate_accession_stats_basic(self):
        accession_data = {
            "total_length": 10,
            "contigs": ["CONTIG_1"],
            "reads": []
        }

        contig_data = {
            "CONTIG_1": {
                "total_length": 10,
                "accession": "ACCESSION_1",
                "query_start": 1,
                "query_end": 10,
                "subject_start": 1,
                "subject_end": 10,
                "coverage": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                "prop_mismatch": 0.1
            }
        }

        read_data = {}

        stats = PipelineStepGenerateCoverageViz.calculate_accession_stats(
            accession_data, contig_data, {})

        self.assertEqual(stats["max_aligned_length"], 10)
        self.assertEqual(stats["coverage_depth"], 5.5)
        self.assertEqual(stats["coverage_breadth"], 1)
        self.assertEqual(stats["avg_prop_mismatch"], 0.1)
コード例 #2
0
    def test_calculate_accession_coverage_max_bins_small(self):
        accession_data = {
            "total_length": 10,
            "contigs": ["CONTIG_1"],
            "reads": []
        }

        contig_data = {
            "CONTIG_1": {
                "total_length": 6,
                "accession": "ACCESSION_1",
                "query_start": 1,
                "query_end": 4,
                "subject_start": 4,
                "subject_end": 7,
                "coverage": [1, 2, 3, 4, 5, 6]
            }
        }

        read_data = {}

        (coverage, bin_size
         ) = PipelineStepGenerateCoverageViz.calculate_accession_coverage(
             "ACCESSION_1", accession_data, contig_data, read_data, 5)

        # The coverage is reduced (e.g. divided by 2) for bins with 0.5 and 2.0.
        # Since the contig only covers half of that bin.
        self.assertEqual(coverage, [
            [1, 0.5, 0.5, 1, 0],
            [2, 2.5, 1, 1, 0],
            [3, 2.0, 0.5, 1, 0],
        ])
        self.assertEqual(bin_size, 2)
コード例 #3
0
    def test_calculate_accession_coverage_multiple_reads(self):
        accession_data = {
            "total_length": 10,
            "contigs": [],
            "reads": ["READS_1", "READS_2"]
        }

        contig_data = {}

        read_data = {
            "READS_1": {
                "accession": "ACCESSION_1",
                "subject_start": 4,
                "subject_end": 7,
            },
            "READS_2": {
                "accession": "ACCESSION_1",
                "subject_start": 6,
                "subject_end": 9,
            }
        }

        (coverage, bin_size
         ) = PipelineStepGenerateCoverageViz.calculate_accession_coverage(
             "ACCESSION_1", accession_data, contig_data, read_data, 10)

        self.assertEqual(coverage, [
            [3, 1.0, 1, 0, 1],
            [4, 1.0, 1, 0, 1],
            [5, 2.0, 1, 0, 2],
            [6, 2.0, 1, 0, 2],
            [7, 1.0, 1, 0, 1],
            [8, 1.0, 1, 0, 1],
        ])
        self.assertEqual(bin_size, 1)
コード例 #4
0
    def test_calculate_accession_coverage_offset_varying_reverse(self):
        accession_data = {
            "total_length": 10,
            "contigs": ["CONTIG_1"],
            "reads": []
        }

        contig_data = {
            "CONTIG_1": {
                "total_length": 5,
                "accession": "ACCESSION_1",
                "query_start": 1,
                "query_end": 5,
                "subject_start": 9,
                "subject_end": 3,
                "coverage": [1, 2, 3, 4, 5]
            }
        }

        read_data = {}

        (coverage, bin_size
         ) = PipelineStepGenerateCoverageViz.calculate_accession_coverage(
             "ACCESSION_1", accession_data, contig_data, read_data, 10)

        self.assertEqual(coverage, [
            [2, 5.0, 1, 1, 0],
            [3, 4.5, 1, 1, 0],
            [4, 3.5, 1, 1, 0],
            [5, 3.0, 1, 1, 0],
            [6, 2.5, 1, 1, 0],
            [7, 1.5, 1, 1, 0],
            [8, 1.0, 1, 1, 0],
        ])
        self.assertEqual(bin_size, 1.0)
コード例 #5
0
    def test_calculate_accession_coverage_partial_contig(self):
        accession_data = {
            "total_length": 10,
            "contigs": ["CONTIG_1"],
            "reads": []
        }

        contig_data = {
            "CONTIG_1": {
                "total_length": 7,
                "accession": "ACCESSION_1",
                "query_start": 1,
                "query_end": 7,
                "subject_start": 3,
                "subject_end": 9,
                "coverage": [2] * 6,  # Coverage is 6 instead of 7.
            }
        }

        read_data = {}

        (coverage, bin_size
         ) = PipelineStepGenerateCoverageViz.calculate_accession_coverage(
             "ACCESSION_1", accession_data, contig_data, read_data, 10)

        self.assertEqual(coverage, [
            [2, 2.0, 1, 1, 0],
            [3, 2.0, 1, 1, 0],
            [4, 2.0, 1, 1, 0],
            [5, 2.0, 1, 1, 0],
            [6, 2.0, 1, 1, 0],
            [7, 2.0, 1, 1, 0],
            [8, 2.0, 1, 1, 0],
        ])
        self.assertEqual(bin_size, 1.0)
コード例 #6
0
    def test_calculate_accession_coverage_basic(self):
        accession_data = {
            "total_length": 4,
            "contigs": ["CONTIG_1"],
            "reads": []
        }

        contig_data = {
            "CONTIG_1": {
                "total_length": 4,
                "accession": "ACCESSION_1",
                "query_start": 1,
                "query_end": 4,
                "subject_start": 1,
                "subject_end": 4,
                "coverage": [2] * 4,
            }
        }

        read_data = {}

        (coverage, bin_size
         ) = PipelineStepGenerateCoverageViz.calculate_accession_coverage(
             "ACCESSION_1", accession_data, contig_data, read_data, 4)

        self.assertEqual(coverage, [
            [0, 2.0, 1, 1, 0],
            [1, 2.0, 1, 1, 0],
            [2, 2.0, 1, 1, 0],
            [3, 2.0, 1, 1, 0],
        ])
        self.assertEqual(bin_size, 1.0)
コード例 #7
0
    def test_calculate_accession_coverage_rounding_error(self):
        accession_data = {
            "total_length": 545,
            "contigs": ["CONTIG_1"],
            "reads": [],
        }

        contig_data = {
            "CONTIG_1": {
                "total_length": 400,
                "accession": "ACCESSION_1",
                "query_start": 320,
                "query_end": 322,
                "subject_start": 221,
                "subject_end": 219,
                "coverage": [1] * 400
            }
        }

        read_data = {}

        (coverage, bin_size
         ) = PipelineStepGenerateCoverageViz.calculate_accession_coverage(
             "ACCESSION_1", accession_data, contig_data, read_data, 500)

        self.assertEqual(coverage,
                         [[200, 1.0, 1.0, 1, 0], [201, 1.0, 1.0, 1, 0],
                          [202, 0.75, 0.752, 1, 0]])
        self.assertEqual(bin_size, 1.09)
コード例 #8
0
    def test_calculate_accession_stats_multiple(self):
        accession_data = {
            "total_length": 100,
            "contigs": ["CONTIG_1", "CONTIG_2"],
            "reads": ["READ_1", "READ_2"]
        }

        contig_data = {
            "CONTIG_1": {
                "total_length": 100,
                "accession": "ACCESSION_1",
                "query_start": 1,
                "query_end": 40,
                "subject_start": 1,
                "subject_end": 40,
                "coverage": [2] * 40,
                "prop_mismatch": 0.01
            },
            "CONTIG_2": {
                "total_length": 100,
                "accession": "ACCESSION_1",
                "query_start": 1,
                "query_end": 20,
                "subject_start": 100,
                "subject_end": 81,
                "coverage": [4] * 20,
                "prop_mismatch": 0.02
            }
        }

        read_data = {
            "READ_1": {
                "total_length": 20,
                "accession": "ACCESSION_1",
                "query_start": 1,
                "query_end": 20,
                "subject_start": 1,
                "subject_end": 20,
                "prop_mismatch": 0.03
            },
            "READ_2": {
                "total_length": 20,
                "accession": "ACCESSION_1",
                "query_start": 1,
                "query_end": 20,
                "subject_start": 100,
                "subject_end": 81,
                "prop_mismatch": 0.04
            }
        }

        stats = PipelineStepGenerateCoverageViz.calculate_accession_stats(
            accession_data, contig_data, read_data)

        self.assertEqual(stats["max_aligned_length"], 40)
        self.assertEqual(stats["coverage_depth"], 2)
        self.assertEqual(stats["coverage_breadth"], 0.6)
        self.assertEqual(stats["avg_prop_mismatch"], 0.025)
コード例 #9
0
    def test_generate_coverage_viz_summary_data_basic(self):
        accession_data = {
            "ACCESSION_1": {
                "reads": ["READS_1"],
                "contigs": ["CONTIGS_1"],
                "name": "Test Accession 1",
                "score": 100,
            },
            "ACCESSION_2": {
                "reads": ["READS_2", "READS_3"],
                "contigs": [],
                "name": "Test Accession 2",
                "score": 200,
            }
        }

        taxon_data = {
            "TAXON_1": {
                "accessions": ["ACCESSION_1", "ACCESSION_2"],
                "num_total_accessions": 100
            },
        }

        coverage_viz_obj = {
            "ACCESSION_1": {
                "coverage_depth": 10,
            },
            "ACCESSION_2": {
                "coverage_depth": 20,
            }
        }

        taxon_data_json = PipelineStepGenerateCoverageViz.generate_coverage_viz_summary_data(
            taxon_data, accession_data, coverage_viz_obj)

        self.assertEqual(len(taxon_data_json.keys()), 1)
        self.assertEqual(
            taxon_data_json["TAXON_1"], {
                "best_accessions": [{
                    "id": "ACCESSION_1",
                    "name": "Test Accession 1",
                    "num_contigs": 1,
                    "num_reads": 1,
                    "score": 100,
                    "coverage_depth": 10
                }, {
                    "id": "ACCESSION_2",
                    "name": "Test Accession 2",
                    "num_contigs": 0,
                    "num_reads": 2,
                    "score": 200,
                    "coverage_depth": 20
                }],
                "num_accessions":
                100
            })
コード例 #10
0
    def test_generate_hit_group_json_contigs(self):
        accession_id = "ACCESSION_1"

        accession_data = {
            "total_length": 10,
            "name": "Test Name",
            "contigs": ["CONTIG_1", "CONTIG_2"],
            "reads": [],
        }

        contig_data = {
            "CONTIG_1": {
                "total_length": 6,
                "accession": "ACCESSION_1",
                "query_start": 3,
                "query_end": 4,
                "subject_start": 7,
                "subject_end": 7,
                "coverage": [1, 2, 3, 4, 5, 6],
                "prop_mismatch": 0.1,
                "num_reads": 4,
                "alignment_length": 2,
                "percent_id": 99,
                "num_mismatches": 2,
                "num_gaps": 1,
                "byterange": [1, 100],
            },
            "CONTIG_2": {
                "total_length": 6,
                "accession": "ACCESSION_1",
                "query_start": 4,
                "query_end": 4,
                "subject_start": 8,
                "subject_end": 8,
                "coverage": [2, 2, 2, 2, 2, 2],
                "prop_mismatch": 0.1,
                "num_reads": 5,
                "alignment_length": 1,
                "percent_id": 98,
                "num_mismatches": 4,
                "num_gaps": 6,
                "byterange": [100, 200],
            }
        }

        hit_group_json = PipelineStepGenerateCoverageViz.generate_hit_group_json(
            accession_data, accession_id, contig_data, [], 5)

        self.assertEqual(len(hit_group_json), 1)

        self.assertEqual(hit_group_json, [
            [2, 0, 9, 7, 8, 1.5, .985, 3, 3.5, 3, [[1, 100], [100, 200]]],
        ])
コード例 #11
0
    def test_remove_taxons_with_no_contigs_basic(self):
        accession_data = {
            "ACCESSION_1": {
                "reads": ["READS_1"],
                "contigs": ["CONTIGS_1"]
            },
            "ACCESSION_2": {
                "reads": ["READS_2"],
                "contigs": []
            },
            "ACCESSION_3": {
                "reads": ["READS_3"],
                "contigs": []
            },
            "ACCESSION_4": {
                "reads": ["READS_4"],
                "contigs": []
            }
        }

        taxon_data = {
            "TAXON_1": {
                "accessions": ["ACCESSION_1", "ACCESSION_2"],
                "num_total_accessions": 2
            },
            "TAXON_2": {
                "accessions": ["ACCESSION_3", "ACCESSION_4"],
                "num_total_accessions": 2
            }
        }

        PipelineStepGenerateCoverageViz.remove_taxons_with_no_contigs(
            accession_data, taxon_data)

        self.assertEqual(len(accession_data.keys()), 2)
        self.assertTrue("ACCESSION_1" in accession_data)
        self.assertTrue("ACCESSION_2" in accession_data)

        self.assertEqual(len(taxon_data.keys()), 1)
        self.assertTrue("TAXON_1" in taxon_data)
コード例 #12
0
    def test_calculate_accession_coverage_max_bins_small_reads(self):
        accession_data = {
            "total_length": 10,
            "contigs": ["CONTIG_1"],
            "reads": ["READS_1", "READS_2"]
        }

        contig_data = {
            "CONTIG_1": {
                "total_length": 6,
                "accession": "ACCESSION_1",
                "query_start": 3,
                "query_end": 4,
                "subject_start": 6,
                "subject_end": 7,
                "coverage": [1, 2, 3, 4, 5, 6]
            }
        }

        read_data = {
            "READS_1": {
                "accession": "ACCESSION_1",
                "subject_start": 2,
                "subject_end": 3,
            },
            "READS_2": {
                "accession": "ACCESSION_1",
                "subject_start": 10,
                "subject_end": 10,
            }
        }

        (coverage, bin_size
         ) = PipelineStepGenerateCoverageViz.calculate_accession_coverage(
             "ACCESSION_1", accession_data, contig_data, read_data, 5)

        # [0.5, 0.5] from READS_1.
        # [1.5, 2.0] from CONTIG_1
        # last [0.5] from READS_2.
        self.assertEqual(coverage, [
            [0, 0.5, 0.5, 0, 1],
            [1, 0.5, 0.5, 0, 1],
            [2, 1.5, 0.5, 1, 0],
            [3, 2.0, 0.5, 1, 0],
            [4, 0.5, 0.5, 0, 1],
        ])
        self.assertEqual(bin_size, 2)
コード例 #13
0
    def test_get_unassigned_reads_set_basic(self):
        accession_data = {
            "ACCESSION_1": {
                "reads": ["READS_1", "READS_3", "READS_4"],
                "contigs": ["CONTIGS_1"]
            },
            "ACCESSION_2": {
                "reads": ["READS_2"],
                "contigs": []
            }
        }

        unassigned_reads_set = PipelineStepGenerateCoverageViz.get_unassigned_reads_set(
            accession_data)

        self.assertEqual(len(unassigned_reads_set), 4)
        self.assertTrue("READS_1" in unassigned_reads_set)
        self.assertTrue("READS_2" in unassigned_reads_set)
        self.assertTrue("READS_3" in unassigned_reads_set)
        self.assertTrue("READS_4" in unassigned_reads_set)
コード例 #14
0
    def test_calculate_accession_coverage_max_bins(self):
        accession_data = {
            "total_length": 10,
            "contigs": ["CONTIG_1"],
            "reads": []
        }

        contig_data = {
            "CONTIG_1": {
                "total_length": 5,
                "accession": "ACCESSION_1",
                "query_start": 1,
                "query_end": 5,
                "subject_start": 1,
                "subject_end": 10,
                "coverage": [2.0] * 5
            }
        }

        read_data = {}

        (coverage, bin_size
         ) = PipelineStepGenerateCoverageViz.calculate_accession_coverage(
             "ACCESSION_1", accession_data, contig_data, read_data, 9)

        self.assertEqual(coverage, [
            [0, 2.0, 1, 1, 0],
            [1, 2.0, 1, 1, 0],
            [2, 2.0, 1, 1, 0],
            [3, 2.0, 1, 1, 0],
            [4, 2.0, 1, 1, 0],
            [5, 2.0, 1, 1, 0],
            [6, 2.0, 1, 1, 0],
            [7, 2.0, 1, 1, 0],
            [8, 2.0, 1, 1, 0],
        ])

        self.assertTrue((bin_size - 10 / 9) < 0.01)
コード例 #15
0
    def test_select_best_accessions_per_taxon_basic(self):
        taxon_data = {
            "TAXON_1": {
                "accessions": ["ACCESSION_1", "ACCESSION_2", "ACCESSION_3"],
                "num_total_accessions": 3
            },
            "TAXON_2": {
                "accessions": ["ACCESSION_4", "ACCESSION_5", "ACCESSION_6"],
                "num_total_accessions": 3
            },
            "TAXON_3": {
                "accessions": ["ACCESSION_7"],
                "num_total_accessions": 1
            },
        }

        contig_data = {
            "CONTIGS_1": {
                "alignment_length": 100
            },
            "CONTIGS_2": {
                "alignment_length": 100
            },
            "CONTIGS_3": {
                "alignment_length": 100
            },
            "CONTIGS_4": {
                "alignment_length": 120
            },
            "CONTIGS_5": {
                "alignment_length": 120
            },
            "CONTIGS_6": {
                "alignment_length": 300
            },
            "CONTIGS_7": {
                "alignment_length": 200
            },
        }

        read_data = {
            "READS_1": {
                "alignment_length": 150
            },
            "READS_2": {
                "alignment_length": 150
            },
            "READS_3": {
                "alignment_length": 150
            },
            "READS_4": {
                "alignment_length": 150
            },
        }

        accession_data = {
            "ACCESSION_1": {
                "reads": [],
                "contigs": ["CONTIGS_1", "CONTIGS_2", "CONTIGS_3"],
            },
            "ACCESSION_2": {
                "reads": [],
                "contigs": ["CONTIGS_4", "CONTIGS_5"],
            },
            "ACCESSION_3": {
                "reads": [],
                "contigs": ["CONTIGS_6"],
            },
            "ACCESSION_4": {
                "reads": ["READS_1"],
                "contigs": [],
            },
            "ACCESSION_5": {
                "reads": [],
                "contigs": ["CONTIGS_7"],
            },
            "ACCESSION_6": {
                "reads": ["READS_2", "READS_3"],
                "contigs": [],
            },
            "ACCESSION_7": {
                "reads": ["READS_4"],
                "contigs": [],
            },
        }

        (new_taxon_data, new_accession_data
         ) = PipelineStepGenerateCoverageViz.select_best_accessions_per_taxon(
             taxon_data, accession_data, contig_data, read_data, 2)

        self.assertEqual(
            new_taxon_data, {
                "TAXON_1": {
                    "accessions":
                    ["ACCESSION_3", "ACCESSION_1", "ACCESSION_2"],
                    "num_total_accessions": 3
                },
                "TAXON_2": {
                    "accessions": ["ACCESSION_5", "ACCESSION_6"],
                    "num_total_accessions": 3
                },
                "TAXON_3": {
                    "accessions": ["ACCESSION_7"],
                    "num_total_accessions": 1
                },
            })

        self.assertFalse("ACCESSION_4" in new_accession_data)

        self.assertTrue(new_accession_data["ACCESSION_3"]["score"] >
                        new_accession_data["ACCESSION_1"]["score"])
        self.assertTrue(new_accession_data["ACCESSION_1"]["score"] >
                        new_accession_data["ACCESSION_2"]["score"])
        self.assertTrue(new_accession_data["ACCESSION_5"]["score"] >
                        new_accession_data["ACCESSION_6"]["score"])
コード例 #16
0
    def test_generate_coverage_viz_data_basic(self):
        accession_data = {
            "ACCESSION_1": {
                "total_length": 10,
                "name": "Test Name",
                "contigs": ["CONTIG_1", "CONTIG_2"],
                "reads": ["READS_1", "READS_2"]
            }
        }

        contig_data = {
            "CONTIG_1": {
                "total_length": 6,
                "accession": "ACCESSION_1",
                "query_start": 3,
                "query_end": 4,
                "subject_start": 6,
                "subject_end": 7,
                "coverage": [1, 2, 3, 4, 5, 6],
                "prop_mismatch": 0.1,
                "num_reads": 4,
                "alignment_length": 2,
                "percent_id": 99,
                "num_mismatches": 2,
                "num_gaps": 1,
                "byterange": [1, 100],
            },
            "CONTIG_2": {
                "total_length": 6,
                "accession": "ACCESSION_1",
                "query_start": 4,
                "query_end": 4,
                "subject_start": 8,
                "subject_end": 8,
                "coverage": [2, 2, 2, 2, 2, 2],
                "prop_mismatch": 0.1,
                "num_reads": 5,
                "alignment_length": 1,
                "percent_id": 98,
                "num_mismatches": 4,
                "num_gaps": 6,
                "byterange": [100, 200],
            }
        }

        read_data = {
            "READS_1": {
                "accession": "ACCESSION_1",
                "subject_start": 2,
                "subject_end": 3,
                "prop_mismatch": 0.3,
                "alignment_length": 2,
                "percent_id": 98,
                "num_mismatches": 1,
                "num_gaps": 0,
            },
            "READS_2": {
                "accession": "ACCESSION_1",
                "subject_start": 10,
                "subject_end": 10,
                "prop_mismatch": 0.3,
                "alignment_length": 1,
                "percent_id": 97,
                "num_mismatches": 0,
                "num_gaps": 1,
            }
        }

        coverage_viz_json = PipelineStepGenerateCoverageViz.generate_coverage_viz_data(
            accession_data, contig_data, read_data, 5)

        self.assertTrue("ACCESSION_1" in coverage_viz_json)

        accession_obj = coverage_viz_json["ACCESSION_1"]

        self.assertEqual(accession_obj["total_length"], 10)
        self.assertEqual(accession_obj["name"], "Test Name")

        # generate_hit_group_json is tested further elsewhere.
        self.assertEqual(accession_obj["hit_groups"],
                         [[0, 1, 0, 2, 3, 2, .98, 1, 0, 1, []],
                          [1, 0, 4, 6, 7, 2, .99, 2, 1, 3, [[1, 100]]],
                          [1, 0, 5, 8, 8, 1, .98, 4, 6, 3, [[100, 200]]],
                          [0, 1, 0, 10, 10, 1, .97, 0, 1, 4, []]])

        # calculate_accession_coverage is tested further elsewhere.
        self.assertEqual(accession_obj["coverage"], [
            [0, 0.5, 0.5, 0, 1],
            [1, 0.5, 0.5, 0, 1],
            [2, 1.5, 0.5, 1, 0],
            [3, 3.0, 1, 2, 0],
            [4, 0.5, 0.5, 0, 1],
        ])
        self.assertEqual(accession_obj["coverage_bin_size"], 2)

        self.assertEqual(accession_obj["max_aligned_length"], 2)
        self.assertEqual(accession_obj["coverage_depth"], 1.2)
        self.assertEqual(accession_obj["coverage_breadth"], 0.6)
        self.assertEqual(accession_obj["avg_prop_mismatch"], 0.2)
コード例 #17
0
    def test_generate_hit_group_json_basic(self):
        accession_id = "ACCESSION_1"

        accession_data = {
            "total_length": 100,
            "name": "Test Name",
            "contigs": ["CONTIG_1", "CONTIG_2"],
            "reads": ["READS_1", "READS_2", "READS_3", "READS_4"]
        }

        contig_data = {
            "CONTIG_1": {
                "total_length": 25,
                "accession": "ACCESSION_1",
                "query_start": 1,
                "query_end": 25,
                "subject_start": 40,
                "subject_end": 64,
                "coverage": [2] * 25,
                "prop_mismatch": 0.1,
                "num_reads": 22,
                "alignment_length": 25,
                "percent_id": 100,
                "num_mismatches": 6,
                "num_gaps": 3,
                "byterange": [1, 100],
            },
            "CONTIG_2": {
                "total_length": 9,
                "accession": "ACCESSION_1",
                "query_start": 1,
                "query_end": 9,
                "subject_start": 85,
                "subject_end": 93,
                "coverage": [2, 2, 2, 2, 2, 2, 2, 2, 2],
                "num_reads": 15,
                "alignment_length": 9,
                "percent_id": 88,
                "num_mismatches": 4,
                "num_gaps": 5,
                "byterange": [100, 200],
            }
        }

        read_data = {
            "READS_1": {
                "accession": "ACCESSION_1",
                "subject_start": 1,
                "subject_end": 30,
                "alignment_length": 30,
                "percent_id": 98,
                "num_mismatches": 1,
                "num_gaps": 0,
            },
            "READS_2": {
                "accession": "ACCESSION_1",
                "subject_start": 69,
                "subject_end": 75,
                "alignment_length": 7,
                "percent_id": 97,
                "num_mismatches": 0,
                "num_gaps": 1,
            },
            "READS_3": {
                "accession": "ACCESSION_1",
                "subject_start": 83,
                "subject_end": 89,
                "alignment_length": 7,
                "percent_id": 90,
                "num_mismatches": 3,
                "num_gaps": 1,
            },
            "READS_4": {
                "accession": "ACCESSION_1",
                "subject_start": 90,
                "subject_end": 86,
                "alignment_length": 5,
                "percent_id": 95,
                "num_mismatches": 2,
                "num_gaps": 0,
            }
        }

        hit_group_json = PipelineStepGenerateCoverageViz.generate_hit_group_json(
            accession_data, accession_id, contig_data, read_data, 10)

        self.assertEqual(len(hit_group_json), 4)

        self.assertEqual(
            hit_group_json,
            [
                [0, 1, 0, 1, 30, 30, .98, 1, 0, 1, []],
                [1, 0, 22, 40, 64, 25, 1, 6, 3, 5, [[1, 100]]],
                [0, 1, 0, 69, 75, 7, .97, 0, 1, 7, []],
                # CONTIG_2, READ_3, and READ_4 are grouped together and their stats are averaged.
                [1, 2, 15, 83, 93, 7, .91, 3, 2, 8, [[100, 200]]],
            ])