예제 #1
0
    def test_get_files_valid_dir_invalid_id(self):

        valid_dir = path.join(path_to_module, "fake_ngs_data")
        invalid_sample_id = "-1~"

        fastq_files = get_all_fastq_files(valid_dir)
        file_list = get_pair_files(fastq_files, invalid_sample_id)

        self.assertEqual(len(file_list), 0)
예제 #2
0
    def test_get_files_invalid_dir_valid_id(self):
        invalid_dir = "+/not a directory/+"
        valid_sample_id = "01-1111"

        with self.assertRaises(OSError) as context:
            fastq_files = get_all_fastq_files(invalid_dir)
            get_pair_files(fastq_files, valid_sample_id)

        self.assertTrue("Invalid directory" in str(context.exception))
예제 #3
0
    def test_common_prefix_sample_names(self):
        sheet_file = path.join(path_to_module, "testCommonPrefixSampleName",
                               "SampleSheet.csv")
        sample_list = parse_samples(sheet_file)

	fastq_files = get_all_fastq_files(path.join(path_to_module, "testCommonPrefixSampleName"))

	for sample in sample_list:
		sample_id = sample['sequencerSampleId']
		file_list = get_pair_files(fastq_files, sample_id)
		self.assertEquals(len(file_list), 2)
예제 #4
0
    def test_get_files_valid_dir_valid_id(self):

        valid_dir = path.join(path_to_module, "fake_ngs_data")
        valid_sample_id = "01-1111"

        fastq_files = get_all_fastq_files(valid_dir)
        file_list = get_pair_files(fastq_files, valid_sample_id)
        correct_list = [
            path.join(path_to_module, "fake_ngs_data", "Data", "Intensities",
                      "BaseCalls", "01-1111_S1_L001_R1_001.fastq.gz"),
            path.join(path_to_module, "fake_ngs_data", "Data", "Intensities",
                      "BaseCalls", "01-1111_S1_L001_R2_001.fastq.gz")]
        self.assertEqual(correct_list, file_list)
예제 #5
0
    def test_complete_parse_samples(self):

        sheet_file = path.join(path_to_module, "fake_ngs_data",
                               "SampleSheet.csv")
        data_dir = path.join(path_to_module, "fake_ngs_data")

        sample_list = complete_parse_samples(sheet_file)
        self.assertEqual(len(sample_list), 3)

        required_data_headers = [
            "sampleName",
            "description",
            "sequencerSampleId",
            "sampleProject"]

        seq_file_headers = [
            "index",
            "I7_Index_ID",
            "Sample_Well",
            "Sample_Plate",
            "index2",
            "I5_Index_ID"]

        for sample in sample_list:

            # sample only has the 4 required data headers as keys
            self.assertEqual(
                len(sample.get_dict().keys()), len(required_data_headers))

            # check if all values in required_data_headers are found in the
            # sample's dictionary keys
            self.assertTrue(
                all([data_header in sample.get_dict().keys() for data_header in
                    required_data_headers]))

            # check if all values in seq_file_headers are found in the Sequence
            # File properties dict /Sample metadata
            self.assertTrue(
                all([data_header in sample.get_sample_metadata().keys()
                    for data_header in seq_file_headers]))

            self.assertEqual(len(sample.get_files()), 2)
            fastq_files = get_all_fastq_files(data_dir)
            pf_list = get_pair_files(fastq_files, sample.get_id())
            self.assertEqual(pf_list, sample.get_files())