예제 #1
0
class ProjectConfigurationTest(unittest.TestCase):
    def setUp(self):
        self.defaultJsonIO = StringIO("""\
{
  "projects": {
    "R1": {
      "max_variants": 5,
      "regions": [
        {
          "coordinate_region": "R1",
          "seed_region_names": ["R1-seed"],
          "id": 10042
        }
      ]
    }
  },
  "regions": {
    "R1-seed": {
      "is_nucleotide": true,
      "reference": [
        "ACTGAAA",
        "GGG"
      ],
      "seed_group": "R1-seeds"
    },
    "R1": {
      "is_nucleotide": false,
      "reference": [
        "RWN",
        "NWR"
      ],
      "seed_group": null
    }
  }
}
""")
        self.config = ProjectConfig()

    def testConvert(self):
        expected_fasta = """\
>R1-seed
ACTGAAAGGG
"""
        fasta = StringIO()

        self.config.load(self.defaultJsonIO)
        self.config.writeSeedFasta(fasta)

        self.assertMultiLineEqual(expected_fasta, fasta.getvalue())

    def testSharedRegions(self):
        jsonIO = StringIO("""\
{
  "projects": {
    "R1": {
      "regions": [
        {
          "coordinate_region": null,
          "seed_region_names": ["R1-seed"]
        }
      ]
    },
    "R1 and R2": {
      "regions": [
        {
          "coordinate_region": null,
          "seed_region_names": ["R1-seed"]
        },
        {
          "coordinate_region": null,
          "seed_region_names": ["R2-seed"]
        }
      ]
    }
  },
  "regions": {
    "R1-seed": {
      "is_nucleotide": true,
      "reference": [
        "ACTGAAA",
        "GGG"
      ]
    },
    "R2-seed": {
      "is_nucleotide": true,
      "reference": [
        "TTT"
      ]
    }
  }
}
""")
        expected_fasta = """\
>R1-seed
ACTGAAAGGG
>R2-seed
TTT
"""
        fasta = StringIO()

        self.config.load(jsonIO)
        self.config.writeSeedFasta(fasta)

        self.assertMultiLineEqual(expected_fasta, fasta.getvalue())

    def testUnusedRegion(self):
        jsonIO = StringIO("""\
{
  "projects": {
    "R1": {
      "regions": [
        {
          "coordinate_region": null,
          "seed_region_names": ["R1-seed"]
        }
      ]
    }
  },
  "regions": {
    "R1-seed": {
      "is_nucleotide": true,
      "reference": [
        "ACTGAAA",
        "GGG"
      ]
    },
    "R2-seed": {
      "is_nucleotide": true,
      "reference": [
        "TTT"
      ]
    }
  }
}
""")
        expected_fasta = """\
>R1-seed
ACTGAAAGGG
"""
        fasta = StringIO()

        self.config.load(jsonIO)
        self.config.writeSeedFasta(fasta)

        self.assertMultiLineEqual(expected_fasta, fasta.getvalue())

    def testExcludeSeeds(self):
        jsonIO = StringIO("""\
{
  "projects": {
    "R1": {
      "regions": [
        {
          "coordinate_region": null,
          "seed_region_names": ["R1-seed"]
        }
      ]
    },
    "R2": {
      "regions": [
        {
          "coordinate_region": null,
          "seed_region_names": ["R2-seed"]
        }
      ]
    },
    "R3": {
      "regions": [
        {
          "coordinate_region": null,
          "seed_region_names": ["R3-seed"]
        }
      ]
    }
  },
  "regions": {
    "R1-seed": {
      "is_nucleotide": true,
      "reference": [
        "ACTGAAA",
        "GGG"
      ]
    },
    "R2-seed": {
      "is_nucleotide": true,
      "reference": [
        "TTT"
      ]
    },
    "R3-seed": {
      "is_nucleotide": true,
      "reference": [
        "TAG"
      ]
    }
  }
}
""")
        expected_fasta = """\
>R2-seed
TTT
"""
        fasta = StringIO()

        self.config.load(jsonIO)
        self.config.writeSeedFasta(fasta,
                                   excluded_seeds=['R1-seed', 'R3-seed'])

        self.assertMultiLineEqual(expected_fasta, fasta.getvalue())

    def testExcludeUnknownSeed(self):
        expected_fasta = """\
>R1-seed
ACTGAAAGGG
"""
        fasta = StringIO()

        self.config.load(self.defaultJsonIO)
        self.config.writeSeedFasta(fasta, excluded_seeds=['R99-seed'])

        self.assertMultiLineEqual(expected_fasta, fasta.getvalue())

    def testDuplicateReference(self):
        jsonIO = StringIO("""\
{
  "projects": {
    "R1": {
      "regions": [
        {
          "coordinate_region": null,
          "seed_region_names": ["R1a-seed", "R1b-seed"]
        }
      ]
    }
  },
  "regions": {
    "R1a-seed": {
      "is_nucleotide": true,
      "reference": [
        "ACTAAAGGG"
      ]
    },
    "R1b-seed": {
      "is_nucleotide": true,
      "reference": [
        "ACTAAAGGG"
      ]
    }
  }
}
""")
        fasta = StringIO()
        self.config.load(jsonIO)

        self.assertRaisesRegex(RuntimeError,
                               "Duplicate references: R1a-seed and R1b-seed.",
                               self.config.writeSeedFasta, fasta)

    def testGetReference(self):
        self.config.load(self.defaultJsonIO)
        seed_name = 'R1-seed'
        expected_ref = 'ACTGAAAGGG'

        seed_ref = self.config.getReference(seed_name)

        self.assertSequenceEqual(expected_ref, seed_ref)

    def testGetCoordinateReferences(self):
        self.config.load(self.defaultJsonIO)
        seed_name = 'R1-seed'
        expected_refs = {'R1': 'RWNNWR'}

        coordinate_refs = self.config.getCoordinateReferences(seed_name)

        self.assertDictEqual(expected_refs, coordinate_refs)

    def testGetAllReferences(self):
        expected_references = {'R1-seed': 'ACTGAAAGGG', 'R1': 'RWNNWR'}

        self.config.load(self.defaultJsonIO)
        references = self.config.getAllReferences()

        self.assertEqual(expected_references, references)

    def testUnknownReference(self):
        self.config.load(self.defaultJsonIO)
        seed_name = 'R-unknown'

        self.assertRaises(KeyError, self.config.getReference, seed_name)

    def testMaxVariants(self):
        self.config.load(self.defaultJsonIO)
        coordinate_region_name = 'R1'

        self.assertEqual(5, self.config.getMaxVariants(coordinate_region_name))

    def testMaxVariantsUnusedRegion(self):
        jsonIO = StringIO("""\
{
  "projects": {
    "R1": {
      "max_variants": 2,
      "regions": [
        {
          "coordinate_region": "R1",
          "seed_region_names": ["R1-seed"]
        }
      ]
    }
  },
  "regions": {
    "R1-seed": {
      "is_nucleotide": true,
      "reference": [
        "ACTGAAA",
        "GGG"
      ]
    },
    "R1": {
      "is_nucleotide": false,
      "reference": [
        "NSFW"
      ]
    },
    "R2": {
      "is_nucleotide": false,
      "reference": [
        "RSW"
      ]
    }
  }
}
""")
        self.config.load(jsonIO)
        coordinate_region_name = 'R2'

        self.assertEqual(0, self.config.getMaxVariants(coordinate_region_name))

    def testMaxVariantsTwoProjects(self):
        """ If two projects specify a maximum for the same coordinate region,
        use the bigger of the two.
        """
        jsonIO = StringIO("""\
{
  "projects": {
    "R1": {
      "max_variants": 9,
      "regions": [
        {
          "coordinate_region": "R1",
          "seed_region_names": ["R1-seed"]
        }
      ]
    },
    "R1-and-R2": {
      "max_variants": 2,
      "regions": [
        {
          "coordinate_region": "R1",
          "seed_region_names": ["R1-seed"]
        },
        {
          "coordinate_region": "R2",
          "seed_region_names": ["R1-seed"]
        }
      ]
    }
  },
  "regions": {
    "R1-seed": {
      "is_nucleotide": true,
      "reference": [
        "ACTGAAA",
        "GGG"
      ]
    },
    "R1": {
      "is_nucleotide": false,
      "reference": [
        "NSFW"
      ]
    },
    "R2": {
      "is_nucleotide": false,
      "reference": [
        "RSW"
      ]
    }
  }
}
""")
        self.config.load(jsonIO)
        coordinate_region_name = 'R1'

        self.assertEqual(9, self.config.getMaxVariants(coordinate_region_name))

    def testReload(self):
        jsonIO1 = StringIO("""\
{
  "projects": {
    "R1": {
      "regions": [
        {
          "coordinate_region": null,
          "seed_region_names": ["R1-seed"]
        }
      ]
    }
  },
  "regions": {
    "R1-seed": {
      "is_nucleotide": true,
      "reference": [
        "ACTGAAA",
        "GGG"
      ]
    }
  }
}
""")
        jsonIO2 = StringIO("""\
{
  "projects": {
    "R2": {
      "regions": [
        {
          "coordinate_region": null,
          "seed_region_names": ["R2-seed"]
        }
      ]
    }
  },
  "regions": {
    "R2-seed": {
      "is_nucleotide": true,
      "reference": [
        "GACCTA"
      ]
    }
  }
}
""")

        self.config.load(jsonIO1)
        self.config.load(jsonIO2)

        self.assertRaises(KeyError, self.config.getReference, "R1-seed")
        self.assertSequenceEqual("GACCTA", self.config.getReference("R2-seed"))

    def testProjectSeeds(self):
        expected_seeds = set(['R1-seed'])

        self.config.load(self.defaultJsonIO)
        seeds = self.config.getProjectSeeds('R1')

        self.assertSetEqual(expected_seeds, seeds)

    def testSeedGroup(self):
        expected_group = "R1-seeds"

        self.config.load(self.defaultJsonIO)
        group = self.config.getSeedGroup('R1-seed')

        self.assertEqual(expected_group, group)
예제 #2
0
class CoveragePlotsTest(TestCase):
    def setUp(self):
        self.addTypeEqualityFunc(str, self.assertMultiLineEqual)
        config_json = StringIO("""\
{
  "projects": {
    "R1": {
      "max_variants": 0,
      "regions": [
        {
          "coordinate_region": "R1",
          "coordinate_region_length": 3,
          "key_positions": [],
          "min_coverage1": 10,
          "min_coverage2": 50,
          "min_coverage3": 100,
          "seed_region_names": [
            "R1-seed"
          ]
        }
      ]
    },
    "R1-and-R2": {
      "max_variants": 0,
      "regions": [
        {
          "coordinate_region": "R1",
          "coordinate_region_length": 3,
          "key_positions": [
            {
              "end_pos": null,
              "start_pos": 1
            },
            {
              "end_pos": null,
              "start_pos": 3
            }
          ],
          "min_coverage1": 10,
          "min_coverage2": 50,
          "min_coverage3": 100,
          "seed_region_names": [
            "R1-seed"
          ]
        },
        {
          "coordinate_region": "R2",
          "coordinate_region_length": 1,
          "key_positions": [],
          "min_coverage1": 10,
          "min_coverage2": 50,
          "min_coverage3": 100,
          "seed_region_names": [
            "R2-seed"
          ]
        }
      ]
    }
  }
}
""")
        self.config = ProjectConfig()
        self.config.load(config_json)

    @patch('matplotlib.pyplot.savefig')
    @patch('micall.core.project_config.ProjectConfig.loadScoring')
    def test_simple(self, config_mock, savefig_mock):
        config_mock.return_value = self.config
        amino_csv = StringIO("""\
seed,region,q-cutoff,query.aa.pos,refseq.aa.pos,A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y,*
R1-seed,R1,15,100,1,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
R1-seed,R1,15,101,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0
R1-seed,R1,15,102,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0
""")
        expected_scores = """\
project,region,seed,q.cut,min.coverage,which.key.pos,off.score,on.score
R1,R1,R1-seed,15,5,1,-1,1
R1-and-R2,R1,R1-seed,15,5,1,-1,1
"""
        scores_csv = StringIO()
        amino_csv.name = 'E1234.amino.csv'
        expected_calls = [
            call('E1234.R1.R1.png'),
            call('E1234.R1-and-R2.R1.png')
        ]

        coverage_plot(amino_csv, coverage_scores_csv=scores_csv)

        self.assertEqual(expected_calls, savefig_mock.mock_calls)
        self.assertEqual(expected_scores, scores_csv.getvalue())
예제 #3
0
class ProjectConfigurationProjectRegionsTest(unittest.TestCase):
    def setUp(self):
        self.config = ProjectConfig()
        self.defaultJsonIO = StringIO("""\
{
  "projects": {
    "R1": {
      "max_variants": 0,
      "regions": [
        {
          "coordinate_region": "R1",
          "coordinate_region_length": 3,
          "key_positions": [],
          "min_coverage1": 10,
          "min_coverage2": 50,
          "min_coverage3": 100,
          "seed_region_names": [
            "R1-seed"
          ]
        }
      ]
    },
    "R1 and R2": {
      "max_variants": 0,
      "regions": [
        {
          "coordinate_region": "R1",
          "coordinate_region_length": 3,
          "key_positions": [1, 3],
          "min_coverage1": 10,
          "min_coverage2": 50,
          "min_coverage3": 100,
          "seed_region_names": [
            "R1-seed"
          ]
        },
        {
          "coordinate_region": "R2",
          "coordinate_region_length": 1,
          "key_positions": [],
          "min_coverage1": 10,
          "min_coverage2": 50,
          "min_coverage3": 100,
          "seed_region_names": [
            "R2-seed"
          ]
        }
      ]
    }
  }
}
""")

    def testProjectRegions(self):
        expected_project_regions = [{
            "project_name": "R1",
            "coordinate_region_length": 3,
            "key_positions": [],
            "min_coverage1": 10,
            "min_coverage2": 50,
            "min_coverage3": 100
        }, {
            "project_name": "R1 and R2",
            "coordinate_region_length": 3,
            "key_positions": [1, 3],
            "min_coverage1": 10,
            "min_coverage2": 50,
            "min_coverage3": 100
        }]

        self.config.load(self.defaultJsonIO)
        project_regions = list(self.config.getProjectRegions('R1-seed', 'R1'))

        self.assertEqual(expected_project_regions, project_regions)

    def testProjectExcluded(self):
        excluded_projects = ['R1']
        expected_project_regions = [{
            "project_name": "R1 and R2",
            "coordinate_region_length": 3,
            "key_positions": [1, 3],
            "min_coverage1": 10,
            "min_coverage2": 50,
            "min_coverage3": 100
        }]

        self.config.load(self.defaultJsonIO)
        project_regions = list(
            self.config.getProjectRegions('R1-seed', 'R1', excluded_projects))

        self.assertEqual(expected_project_regions, project_regions)
예제 #4
0
class ConvertPrelimTest(unittest.TestCase):
    def setUp(self):
        self.projects = ProjectConfig()
        self.projects.load(StringIO("""\
            {
              "regions": {
                "R1-seed": {
                  "seed_group": "main",
                  "reference": ["ACTAAAGGG"]
                },
                "R2-seed": {
                  "seed_group": "main",
                  "reference": ["ACTAAAGGGAAA"]
                }
              }
            }
            """))
        self.sam_file = StringIO()
        self.remap_counts = StringIO()
        self.remap_counts_writer = DictWriter(
            self.remap_counts,
            ['type', 'filtered_count', 'count'],
            lineterminator=os.linesep)
        self.remap_counts_writer.writeheader()

    def test_simple(self):
        prelim_csv = StringIO("""\
qname,flag,rname,pos,mapq,cigar,rnext,pnext,tlen,seq,qual
example1,89,R1-seed,1,0,9M,=,1,0,AAACCCTTT,BBBBBBBBB
""")
        count_threshold = 2
        expected_sam_file = """\
@HD	VN:1.0	SO:unsorted
@SQ	SN:R1-seed	LN:9
@SQ	SN:R2-seed	LN:12
@PG	ID:bowtie2	PN:bowtie2	VN:2.2.3	CL:""
example1\t89\tR1-seed\t1\t0\t9M\t=\t1\t0\tAAACCCTTT\tBBBBBBBBB
"""
        expected_remap_counts = """\
type,filtered_count,count
prelim R1-seed,0,1
"""
        expected_seed_counts = {}

        seed_counts = convert_prelim(prelim_csv,
                                     self.sam_file,
                                     self.remap_counts_writer,
                                     count_threshold,
                                     self.projects)

        self.assertEqual(expected_sam_file, self.sam_file.getvalue())
        self.assertEqual(expected_remap_counts, self.remap_counts.getvalue())
        self.assertEqual(expected_seed_counts, seed_counts)

    def test_two_regions(self):
        prelim_csv = StringIO("""\
qname,flag,rname,pos,mapq,cigar,rnext,pnext,tlen,seq,qual
example1,89,R1-seed,1,0,9M,=,1,0,AAACCCTTT,BBBBBBBBB
example2,89,R2-seed,1,0,9M,=,1,0,AAAACCTTT,BBBBBBBBB
example3,89,R2-seed,1,0,9M,=,1,0,AAAAACTTT,BBBBBBBBB
""")
        count_threshold = 2
        expected_sam_file = """\
@HD	VN:1.0	SO:unsorted
@SQ	SN:R1-seed	LN:9
@SQ	SN:R2-seed	LN:12
@PG	ID:bowtie2	PN:bowtie2	VN:2.2.3	CL:""
example1\t89\tR1-seed\t1\t0\t9M\t=\t1\t0\tAAACCCTTT\tBBBBBBBBB
example2\t89\tR2-seed\t1\t0\t9M\t=\t1\t0\tAAAACCTTT\tBBBBBBBBB
example3\t89\tR2-seed\t1\t0\t9M\t=\t1\t0\tAAAAACTTT\tBBBBBBBBB
"""
        expected_remap_counts = """\
type,filtered_count,count
prelim R1-seed,0,1
prelim R2-seed,0,2
"""
        expected_seed_counts = {}

        seed_counts = convert_prelim(prelim_csv,
                                     self.sam_file,
                                     self.remap_counts_writer,
                                     count_threshold,
                                     self.projects)

        self.assertEqual(expected_sam_file, self.sam_file.getvalue())
        self.assertEqual(expected_remap_counts, self.remap_counts.getvalue())
        self.assertEqual(expected_seed_counts, seed_counts)

    def test_long_reads(self):
        self.maxDiff = None
        prelim_csv = StringIO("""\
qname,flag,rname,pos,mapq,cigar,rnext,pnext,tlen,seq,qual
example1,89,R1-seed,1,0,54M,=,1,0,\
AAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTT,\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
example2,89,R1-seed,1,0,54M,=,1,0,\
AAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT,\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
""")
        count_threshold = 2
        expected_sam_file = """\
@HD	VN:1.0	SO:unsorted
@SQ	SN:R1-seed	LN:9
@SQ	SN:R2-seed	LN:12
@PG	ID:bowtie2	PN:bowtie2	VN:2.2.3	CL:""
example1\t89\tR1-seed\t1\t0\t54M\t=\t1\t0\t\
AAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTT\t\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
example2\t89\tR1-seed\t1\t0\t54M\t=\t1\t0\t\
AAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT\t\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
"""
        expected_remap_counts = """\
type,filtered_count,count
prelim R1-seed,2,2
"""
        expected_seed_counts = {'R1-seed': 2}

        seed_counts = convert_prelim(prelim_csv,
                                     self.sam_file,
                                     self.remap_counts_writer,
                                     count_threshold,
                                     self.projects)

        self.assertEqual(expected_sam_file, self.sam_file.getvalue())
        self.assertEqual(expected_remap_counts, self.remap_counts.getvalue())
        self.assertEqual(expected_seed_counts, seed_counts)

    def test_star_region(self):
        self.maxDiff = None
        prelim_csv = StringIO("""\
qname,flag,rname,pos,mapq,cigar,rnext,pnext,tlen,seq,qual
example1,89,R1-seed,1,0,54M,=,1,0,\
AAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTT,\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
example2,89,R1-seed,1,0,54M,=,1,0,\
AAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT,\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
example3,93,*,*,*,*,*,*,*,\
AAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT,\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
""")
        count_threshold = 2
        expected_sam_file = """\
@HD	VN:1.0	SO:unsorted
@SQ	SN:R1-seed	LN:9
@SQ	SN:R2-seed	LN:12
@PG	ID:bowtie2	PN:bowtie2	VN:2.2.3	CL:""
example1\t89\tR1-seed\t1\t0\t54M\t=\t1\t0\t\
AAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTT\t\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
example2\t89\tR1-seed\t1\t0\t54M\t=\t1\t0\t\
AAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT\t\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
example3\t93\t*\t*\t*\t*\t*\t*\t*\t\
AAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT\t\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
"""
        expected_remap_counts = """\
type,filtered_count,count
prelim *,0,1
prelim R1-seed,2,2
"""
        expected_seed_counts = {'R1-seed': 2}

        seed_counts = convert_prelim(prelim_csv,
                                     self.sam_file,
                                     self.remap_counts_writer,
                                     count_threshold,
                                     self.projects)

        self.assertEqual(expected_sam_file, self.sam_file.getvalue())
        self.assertEqual(expected_remap_counts, self.remap_counts.getvalue())
        self.assertEqual(expected_seed_counts, seed_counts)

    def test_best_in_group(self):
        self.maxDiff = None
        prelim_csv = StringIO("""\
qname,flag,rname,pos,mapq,cigar,rnext,pnext,tlen,seq,qual
example1,89,R1-seed,1,0,54M,=,1,0,\
AAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTT,\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
example2,89,R2-seed,1,0,54M,=,1,0,\
AAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT,\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
example3,89,R1-seed,1,0,54M,=,1,0,\
AAAAAATTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT,\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
example4,89,R2-seed,1,0,54M,=,1,0,\
AAAAAAAATAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT,\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
example5,89,R2-seed,1,0,54M,=,1,0,\
AAAAAAAAAAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT,\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
""")
        count_threshold = 2
        expected_sam_file = """\
@HD	VN:1.0	SO:unsorted
@SQ	SN:R1-seed	LN:9
@SQ	SN:R2-seed	LN:12
@PG	ID:bowtie2	PN:bowtie2	VN:2.2.3	CL:""
example1\t89\tR1-seed\t1\t0\t54M\t=\t1\t0\t\
AAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTT\t\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
example2\t89\tR2-seed\t1\t0\t54M\t=\t1\t0\t\
AAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT\t\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
example3\t89\tR1-seed\t1\t0\t54M\t=\t1\t0\t\
AAAAAATTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT\t\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
example4\t89\tR2-seed\t1\t0\t54M\t=\t1\t0\t\
AAAAAAAATAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT\t\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
example5\t89\tR2-seed\t1\t0\t54M\t=\t1\t0\t\
AAAAAAAAAAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT\t\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
"""
        expected_remap_counts = """\
type,filtered_count,count
prelim R1-seed,2,2
prelim R2-seed,3,3
"""
        expected_seed_counts = {'R2-seed': 3}

        seed_counts = convert_prelim(prelim_csv,
                                     self.sam_file,
                                     self.remap_counts_writer,
                                     count_threshold,
                                     self.projects)

        self.assertEqual(expected_sam_file, self.sam_file.getvalue())
        self.assertEqual(expected_remap_counts, self.remap_counts.getvalue())
        self.assertEqual(expected_seed_counts, seed_counts)

    def test_unmapped_read(self):
        self.maxDiff = None
        prelim_csv = StringIO("""\
qname,flag,rname,pos,mapq,cigar,rnext,pnext,tlen,seq,qual
example1,89,R1-seed,1,0,54M,=,1,0,\
AAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTT,\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
example2,93,R1-seed,1,0,54M,=,1,0,\
AAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT,\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
""")
        count_threshold = 2
        expected_sam_file = """\
@HD	VN:1.0	SO:unsorted
@SQ	SN:R1-seed	LN:9
@SQ	SN:R2-seed	LN:12
@PG	ID:bowtie2	PN:bowtie2	VN:2.2.3	CL:""
example1\t89\tR1-seed\t1\t0\t54M\t=\t1\t0\t\
AAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTT\t\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
example2\t93\tR1-seed\t1\t0\t54M\t=\t1\t0\t\
AAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT\t\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
"""
        expected_remap_counts = """\
type,filtered_count,count
prelim R1-seed,1,2
"""
        expected_seed_counts = {}

        seed_counts = convert_prelim(prelim_csv,
                                     self.sam_file,
                                     self.remap_counts_writer,
                                     count_threshold,
                                     self.projects)

        self.assertEqual(expected_sam_file, self.sam_file.getvalue())
        self.assertEqual(expected_remap_counts, self.remap_counts.getvalue())
        self.assertEqual(expected_seed_counts, seed_counts)
예제 #5
0
class ConvertPrelimTest(unittest.TestCase):
    def setUp(self):
        self.projects = ProjectConfig()
        self.projects.load(
            StringIO("""\
            {
              "regions": {
                "R1-seed": {
                  "seed_group": "main",
                  "reference": ["ACTAAAGGG"]
                },
                "R2-seed": {
                  "seed_group": "main",
                  "reference": ["ACTAAAGGGAAA"]
                }
              }
            }
            """))
        self.sam_file = StringIO()
        self.remap_counts = StringIO()
        self.remap_counts_writer = DictWriter(
            self.remap_counts, ['type', 'filtered_count', 'count'],
            lineterminator=os.linesep)
        self.remap_counts_writer.writeheader()

    def test_simple(self):
        prelim_csv = StringIO("""\
qname,flag,rname,pos,mapq,cigar,rnext,pnext,tlen,seq,qual
example1,89,R1-seed,1,0,9M,=,1,0,AAACCCTTT,BBBBBBBBB
""")
        count_threshold = 2
        expected_sam_file = """\
@HD	VN:1.0	SO:unsorted
@SQ	SN:R1-seed	LN:9
@SQ	SN:R2-seed	LN:12
@PG	ID:bowtie2	PN:bowtie2	VN:2.2.3	CL:""
example1\t89\tR1-seed\t1\t0\t9M\t=\t1\t0\tAAACCCTTT\tBBBBBBBBB
"""
        expected_remap_counts = """\
type,filtered_count,count
prelim R1-seed,0,1
"""
        expected_seed_counts = {}

        seed_counts = convert_prelim(prelim_csv, self.sam_file,
                                     self.remap_counts_writer, count_threshold,
                                     self.projects)

        self.assertEqual(expected_sam_file, self.sam_file.getvalue())
        self.assertEqual(expected_remap_counts, self.remap_counts.getvalue())
        self.assertEqual(expected_seed_counts, seed_counts)

    def test_two_regions(self):
        prelim_csv = StringIO("""\
qname,flag,rname,pos,mapq,cigar,rnext,pnext,tlen,seq,qual
example1,89,R1-seed,1,0,9M,=,1,0,AAACCCTTT,BBBBBBBBB
example2,89,R2-seed,1,0,9M,=,1,0,AAAACCTTT,BBBBBBBBB
example3,89,R2-seed,1,0,9M,=,1,0,AAAAACTTT,BBBBBBBBB
""")
        count_threshold = 2
        expected_sam_file = """\
@HD	VN:1.0	SO:unsorted
@SQ	SN:R1-seed	LN:9
@SQ	SN:R2-seed	LN:12
@PG	ID:bowtie2	PN:bowtie2	VN:2.2.3	CL:""
example1\t89\tR1-seed\t1\t0\t9M\t=\t1\t0\tAAACCCTTT\tBBBBBBBBB
example2\t89\tR2-seed\t1\t0\t9M\t=\t1\t0\tAAAACCTTT\tBBBBBBBBB
example3\t89\tR2-seed\t1\t0\t9M\t=\t1\t0\tAAAAACTTT\tBBBBBBBBB
"""
        expected_remap_counts = """\
type,filtered_count,count
prelim R1-seed,0,1
prelim R2-seed,0,2
"""
        expected_seed_counts = {}

        seed_counts = convert_prelim(prelim_csv, self.sam_file,
                                     self.remap_counts_writer, count_threshold,
                                     self.projects)

        self.assertEqual(expected_sam_file, self.sam_file.getvalue())
        self.assertEqual(expected_remap_counts, self.remap_counts.getvalue())
        self.assertEqual(expected_seed_counts, seed_counts)

    def test_long_reads(self):
        self.maxDiff = None
        prelim_csv = StringIO("""\
qname,flag,rname,pos,mapq,cigar,rnext,pnext,tlen,seq,qual
example1,89,R1-seed,1,0,54M,=,1,0,\
AAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTT,\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
example2,89,R1-seed,1,0,54M,=,1,0,\
AAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT,\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
""")
        count_threshold = 2
        expected_sam_file = """\
@HD	VN:1.0	SO:unsorted
@SQ	SN:R1-seed	LN:9
@SQ	SN:R2-seed	LN:12
@PG	ID:bowtie2	PN:bowtie2	VN:2.2.3	CL:""
example1\t89\tR1-seed\t1\t0\t54M\t=\t1\t0\t\
AAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTT\t\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
example2\t89\tR1-seed\t1\t0\t54M\t=\t1\t0\t\
AAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT\t\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
"""
        expected_remap_counts = """\
type,filtered_count,count
prelim R1-seed,2,2
"""
        expected_seed_counts = {'R1-seed': 2}

        seed_counts = convert_prelim(prelim_csv, self.sam_file,
                                     self.remap_counts_writer, count_threshold,
                                     self.projects)

        self.assertEqual(expected_sam_file, self.sam_file.getvalue())
        self.assertEqual(expected_remap_counts, self.remap_counts.getvalue())
        self.assertEqual(expected_seed_counts, seed_counts)

    def test_star_region(self):
        self.maxDiff = None
        prelim_csv = StringIO("""\
qname,flag,rname,pos,mapq,cigar,rnext,pnext,tlen,seq,qual
example1,89,R1-seed,1,0,54M,=,1,0,\
AAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTT,\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
example2,89,R1-seed,1,0,54M,=,1,0,\
AAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT,\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
example3,93,*,*,*,*,*,*,*,\
AAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT,\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
""")
        count_threshold = 2
        expected_sam_file = """\
@HD	VN:1.0	SO:unsorted
@SQ	SN:R1-seed	LN:9
@SQ	SN:R2-seed	LN:12
@PG	ID:bowtie2	PN:bowtie2	VN:2.2.3	CL:""
example1\t89\tR1-seed\t1\t0\t54M\t=\t1\t0\t\
AAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTT\t\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
example2\t89\tR1-seed\t1\t0\t54M\t=\t1\t0\t\
AAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT\t\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
example3\t93\t*\t*\t*\t*\t*\t*\t*\t\
AAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT\t\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
"""
        expected_remap_counts = """\
type,filtered_count,count
prelim *,0,1
prelim R1-seed,2,2
"""
        expected_seed_counts = {'R1-seed': 2}

        seed_counts = convert_prelim(prelim_csv, self.sam_file,
                                     self.remap_counts_writer, count_threshold,
                                     self.projects)

        self.assertEqual(expected_sam_file, self.sam_file.getvalue())
        self.assertEqual(expected_remap_counts, self.remap_counts.getvalue())
        self.assertEqual(expected_seed_counts, seed_counts)

    def test_best_in_group(self):
        self.maxDiff = None
        prelim_csv = StringIO("""\
qname,flag,rname,pos,mapq,cigar,rnext,pnext,tlen,seq,qual
example1,89,R1-seed,1,0,54M,=,1,0,\
AAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTT,\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
example2,89,R2-seed,1,0,54M,=,1,0,\
AAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT,\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
example3,89,R1-seed,1,0,54M,=,1,0,\
AAAAAATTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT,\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
example4,89,R2-seed,1,0,54M,=,1,0,\
AAAAAAAATAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT,\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
example5,89,R2-seed,1,0,54M,=,1,0,\
AAAAAAAAAAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT,\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
""")
        count_threshold = 2
        expected_sam_file = """\
@HD	VN:1.0	SO:unsorted
@SQ	SN:R1-seed	LN:9
@SQ	SN:R2-seed	LN:12
@PG	ID:bowtie2	PN:bowtie2	VN:2.2.3	CL:""
example1\t89\tR1-seed\t1\t0\t54M\t=\t1\t0\t\
AAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTT\t\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
example2\t89\tR2-seed\t1\t0\t54M\t=\t1\t0\t\
AAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT\t\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
example3\t89\tR1-seed\t1\t0\t54M\t=\t1\t0\t\
AAAAAATTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT\t\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
example4\t89\tR2-seed\t1\t0\t54M\t=\t1\t0\t\
AAAAAAAATAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT\t\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
example5\t89\tR2-seed\t1\t0\t54M\t=\t1\t0\t\
AAAAAAAAAAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT\t\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
"""
        expected_remap_counts = """\
type,filtered_count,count
prelim R1-seed,2,2
prelim R2-seed,3,3
"""
        expected_seed_counts = {'R2-seed': 3}

        seed_counts = convert_prelim(prelim_csv, self.sam_file,
                                     self.remap_counts_writer, count_threshold,
                                     self.projects)

        self.assertEqual(expected_sam_file, self.sam_file.getvalue())
        self.assertEqual(expected_remap_counts, self.remap_counts.getvalue())
        self.assertEqual(expected_seed_counts, seed_counts)

    def test_unmapped_read(self):
        self.maxDiff = None
        prelim_csv = StringIO("""\
qname,flag,rname,pos,mapq,cigar,rnext,pnext,tlen,seq,qual
example1,89,R1-seed,1,0,54M,=,1,0,\
AAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTT,\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
example2,93,R1-seed,1,0,54M,=,1,0,\
AAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT,\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
""")
        count_threshold = 2
        expected_sam_file = """\
@HD	VN:1.0	SO:unsorted
@SQ	SN:R1-seed	LN:9
@SQ	SN:R2-seed	LN:12
@PG	ID:bowtie2	PN:bowtie2	VN:2.2.3	CL:""
example1\t89\tR1-seed\t1\t0\t54M\t=\t1\t0\t\
AAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTT\t\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
example2\t93\tR1-seed\t1\t0\t54M\t=\t1\t0\t\
AAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT\t\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
"""
        expected_remap_counts = """\
type,filtered_count,count
prelim R1-seed,1,2
"""
        expected_seed_counts = {}

        seed_counts = convert_prelim(prelim_csv, self.sam_file,
                                     self.remap_counts_writer, count_threshold,
                                     self.projects)

        self.assertEqual(expected_sam_file, self.sam_file.getvalue())
        self.assertEqual(expected_remap_counts, self.remap_counts.getvalue())
        self.assertEqual(expected_seed_counts, seed_counts)
예제 #6
0
class ProjectConfigurationTest(unittest.TestCase):
    def setUp(self):
        self.defaultJsonIO = StringIO.StringIO("""\
{
  "projects": {
    "R1": {
      "max_variants": 5,
      "regions": [
        {
          "coordinate_region": "R1",
          "seed_region_names": ["R1-seed"],
          "id": 10042
        }
      ]
    }
  },
  "regions": {
    "R1-seed": {
      "is_nucleotide": true,
      "reference": [
        "ACTGAAA",
        "GGG"
      ],
      "seed_group": "R1-seeds"
    },
    "R1": {
      "is_nucleotide": false,
      "reference": [
        "RWN",
        "NWR"
      ],
      "seed_group": null
    }
  }
}
""")
        self.config = ProjectConfig()

    def testConvert(self):
        expected_fasta = """\
>R1-seed
ACTGAAAGGG
"""
        fasta = StringIO.StringIO()

        self.config.load(self.defaultJsonIO)
        self.config.writeSeedFasta(fasta)

        self.assertMultiLineEqual(expected_fasta, fasta.getvalue())

    def testSharedRegions(self):
        jsonIO = StringIO.StringIO("""\
{
  "projects": {
    "R1": {
      "regions": [
        {
          "coordinate_region": null,
          "seed_region_names": ["R1-seed"]
        }
      ]
    },
    "R1 and R2": {
      "regions": [
        {
          "coordinate_region": null,
          "seed_region_names": ["R1-seed"]
        },
        {
          "coordinate_region": null,
          "seed_region_names": ["R2-seed"]
        }
      ]
    }
  },
  "regions": {
    "R1-seed": {
      "is_nucleotide": true,
      "reference": [
        "ACTGAAA",
        "GGG"
      ]
    },
    "R2-seed": {
      "is_nucleotide": true,
      "reference": [
        "TTT"
      ]
    }
  }
}
""")
        expected_fasta = """\
>R1-seed
ACTGAAAGGG
>R2-seed
TTT
"""
        fasta = StringIO.StringIO()

        self.config.load(jsonIO)
        self.config.writeSeedFasta(fasta)

        self.assertMultiLineEqual(expected_fasta, fasta.getvalue())

    def testUnusedRegion(self):
        jsonIO = StringIO.StringIO("""\
{
  "projects": {
    "R1": {
      "regions": [
        {
          "coordinate_region": null,
          "seed_region_names": ["R1-seed"]
        }
      ]
    }
  },
  "regions": {
    "R1-seed": {
      "is_nucleotide": true,
      "reference": [
        "ACTGAAA",
        "GGG"
      ]
    },
    "R2-seed": {
      "is_nucleotide": true,
      "reference": [
        "TTT"
      ]
    }
  }
}
""")
        expected_fasta = """\
>R1-seed
ACTGAAAGGG
"""
        fasta = StringIO.StringIO()

        self.config.load(jsonIO)
        self.config.writeSeedFasta(fasta)

        self.assertMultiLineEqual(expected_fasta, fasta.getvalue())

    def testDuplicateReference(self):
        jsonIO = StringIO.StringIO("""\
{
  "projects": {
    "R1": {
      "regions": [
        {
          "coordinate_region": null,
          "seed_region_names": ["R1a-seed", "R1b-seed"]
        }
      ]
    }
  },
  "regions": {
    "R1a-seed": {
      "is_nucleotide": true,
      "reference": [
        "ACTAAAGGG"
      ]
    },
    "R1b-seed": {
      "is_nucleotide": true,
      "reference": [
        "ACTAAAGGG"
      ]
    }
  }
}
""")
        fasta = StringIO.StringIO()
        self.config.load(jsonIO)

        self.assertRaisesRegexp(RuntimeError,
                                "Duplicate references: R1a-seed and R1b-seed.",
                                self.config.writeSeedFasta,
                                fasta)

    def testGetReference(self):
        self.config.load(self.defaultJsonIO)
        seed_name = 'R1-seed'
        expected_ref = 'ACTGAAAGGG'

        seed_ref = self.config.getReference(seed_name)

        self.assertSequenceEqual(expected_ref, seed_ref)

    def testGetCoordinateReferences(self):
        self.config.load(self.defaultJsonIO)
        seed_name = 'R1-seed'
        expected_refs = {'R1': 'RWNNWR'}

        coordinate_refs = self.config.getCoordinateReferences(seed_name)

        self.assertDictEqual(expected_refs, coordinate_refs)

    def testUnknownReference(self):
        self.config.load(self.defaultJsonIO)
        seed_name = 'R-unknown'

        self.assertRaises(KeyError, self.config.getReference, seed_name)

    def testMaxVariants(self):
        self.config.load(self.defaultJsonIO)
        coordinate_region_name = 'R1'

        self.assertEqual(5, self.config.getMaxVariants(coordinate_region_name))

    def testMaxVariantsUnusedRegion(self):
        jsonIO = StringIO.StringIO("""\
{
  "projects": {
    "R1": {
      "max_variants": 2,
      "regions": [
        {
          "coordinate_region": "R1",
          "seed_region_names": ["R1-seed"]
        }
      ]
    }
  },
  "regions": {
    "R1-seed": {
      "is_nucleotide": true,
      "reference": [
        "ACTGAAA",
        "GGG"
      ]
    },
    "R1": {
      "is_nucleotide": false,
      "reference": [
        "NSFW"
      ]
    },
    "R2": {
      "is_nucleotide": false,
      "reference": [
        "RSW"
      ]
    }
  }
}
""")
        self.config.load(jsonIO)
        coordinate_region_name = 'R2'

        self.assertEqual(0, self.config.getMaxVariants(coordinate_region_name))

    def testMaxVariantsTwoProjects(self):
        """ If two projects specify a maximum for the same coordinate region,
        use the bigger of the two.
        """
        jsonIO = StringIO.StringIO("""\
{
  "projects": {
    "R1": {
      "max_variants": 9,
      "regions": [
        {
          "coordinate_region": "R1",
          "seed_region_names": ["R1-seed"]
        }
      ]
    },
    "R1-and-R2": {
      "max_variants": 2,
      "regions": [
        {
          "coordinate_region": "R1",
          "seed_region_names": ["R1-seed"]
        },
        {
          "coordinate_region": "R2",
          "seed_region_names": ["R1-seed"]
        }
      ]
    }
  },
  "regions": {
    "R1-seed": {
      "is_nucleotide": true,
      "reference": [
        "ACTGAAA",
        "GGG"
      ]
    },
    "R1": {
      "is_nucleotide": false,
      "reference": [
        "NSFW"
      ]
    },
    "R2": {
      "is_nucleotide": false,
      "reference": [
        "RSW"
      ]
    }
  }
}
""")
        self.config.load(jsonIO)
        coordinate_region_name = 'R1'

        self.assertEqual(9, self.config.getMaxVariants(coordinate_region_name))

    def testReload(self):
        jsonIO1 = StringIO.StringIO("""\
{
  "projects": {
    "R1": {
      "regions": [
        {
          "coordinate_region": null,
          "seed_region_names": ["R1-seed"]
        }
      ]
    }
  },
  "regions": {
    "R1-seed": {
      "is_nucleotide": true,
      "reference": [
        "ACTGAAA",
        "GGG"
      ]
    }
  }
}
""")
        jsonIO2 = StringIO.StringIO("""\
{
  "projects": {
    "R2": {
      "regions": [
        {
          "coordinate_region": null,
          "seed_region_names": ["R2-seed"]
        }
      ]
    }
  },
  "regions": {
    "R2-seed": {
      "is_nucleotide": true,
      "reference": [
        "GACCTA"
      ]
    }
  }
}
""")

        self.config.load(jsonIO1)
        self.config.load(jsonIO2)

        self.assertRaises(KeyError, self.config.getReference, "R1-seed")
        self.assertSequenceEqual("GACCTA", self.config.getReference("R2-seed"))

    def testProjectSeeds(self):
        expected_seeds = set(['R1-seed'])

        self.config.load(self.defaultJsonIO)
        seeds = self.config.getProjectSeeds('R1')

        self.assertSetEqual(expected_seeds, seeds)

    def testSeedGroup(self):
        expected_group = "R1-seeds"

        self.config.load(self.defaultJsonIO)
        group = self.config.getSeedGroup('R1-seed')

        self.assertEqual(expected_group, group)

    def testProjectRegions(self):
        jsonIO = StringIO.StringIO("""\
{
  "projects": {
    "R1": {
      "max_variants": 0,
      "regions": [
        {
          "coordinate_region": "R1",
          "coordinate_region_length": 3,
          "key_positions": [],
          "min_coverage1": 10,
          "min_coverage2": 50,
          "min_coverage3": 100,
          "seed_region_names": [
            "R1-seed"
          ]
        }
      ]
    },
    "R1 and R2": {
      "max_variants": 0,
      "regions": [
        {
          "coordinate_region": "R1",
          "coordinate_region_length": 3,
          "key_positions": [1, 3],
          "min_coverage1": 10,
          "min_coverage2": 50,
          "min_coverage3": 100,
          "seed_region_names": [
            "R1-seed"
          ]
        },
        {
          "coordinate_region": "R2",
          "coordinate_region_length": 1,
          "key_positions": [],
          "min_coverage1": 10,
          "min_coverage2": 50,
          "min_coverage3": 100,
          "seed_region_names": [
            "R2-seed"
          ]
        }
      ]
    }
  }
}
""")
        expected_project_regions = [{"project_name": "R1",
                                     "coordinate_region_length": 3,
                                     "key_positions": [],
                                     "min_coverage1": 10,
                                     "min_coverage2": 50,
                                     "min_coverage3": 100},
                                    {"project_name": "R1 and R2",
                                     "coordinate_region_length": 3,
                                     "key_positions": [1, 3],
                                     "min_coverage1": 10,
                                     "min_coverage2": 50,
                                     "min_coverage3": 100}]

        self.config.load(jsonIO)
        project_regions = list(self.config.getProjectRegions('R1-seed', 'R1'))

        self.assertEqual(expected_project_regions, project_regions)
예제 #7
0
class CoveragePlotsTest(TestCase):
    def setUp(self):
        self.addTypeEqualityFunc(str, self.assertMultiLineEqual)
        config_json = StringIO("""\
{
  "projects": {
    "R1": {
      "max_variants": 0,
      "regions": [
        {
          "coordinate_region": "R1",
          "coordinate_region_length": 3,
          "key_positions": [],
          "min_coverage1": 10,
          "min_coverage2": 50,
          "min_coverage3": 100,
          "seed_region_names": [
            "R1-seed"
          ]
        }
      ]
    },
    "R1-and-R2": {
      "max_variants": 0,
      "regions": [
        {
          "coordinate_region": "R1",
          "coordinate_region_length": 3,
          "key_positions": [
            {
              "end_pos": null,
              "start_pos": 1
            },
            {
              "end_pos": null,
              "start_pos": 3
            }
          ],
          "min_coverage1": 10,
          "min_coverage2": 50,
          "min_coverage3": 100,
          "seed_region_names": [
            "R1-seed"
          ]
        },
        {
          "coordinate_region": "R2",
          "coordinate_region_length": 1,
          "key_positions": [],
          "min_coverage1": 10,
          "min_coverage2": 50,
          "min_coverage3": 100,
          "seed_region_names": [
            "R2-seed"
          ]
        }
      ]
    }
  }
}
""")
        self.config = ProjectConfig()
        self.config.load(config_json)

    @patch('matplotlib.pyplot.savefig')
    @patch('micall.core.project_config.ProjectConfig.loadScoring')
    def test_simple(self, config_mock, savefig_mock):
        config_mock.return_value = self.config
        amino_csv = StringIO("""\
seed,region,q-cutoff,query.aa.pos,refseq.aa.pos,A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y,*
R1-seed,R1,15,100,1,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
R1-seed,R1,15,101,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0
R1-seed,R1,15,102,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0
""")
        expected_scores = """\
project,region,seed,q.cut,min.coverage,which.key.pos,off.score,on.score
R1,R1,R1-seed,15,5,1,-1,1
R1-and-R2,R1,R1-seed,15,5,1,-1,1
"""
        scores_csv = StringIO()
        amino_csv.name = 'E1234.amino.csv'
        expected_calls = [call('E1234.R1.R1.png'),
                          call('E1234.R1-and-R2.R1.png')]

        coverage_plot(amino_csv, coverage_scores_csv=scores_csv)

        self.assertEqual(expected_calls, savefig_mock.mock_calls)
        self.assertEqual(expected_scores, scores_csv.getvalue())