def test_validate_per_sample_FASTQ(self):
     prep_info = {
         "1.SKB2.640194": {
             "not_a_run_prefix": "prefix1"
         },
         "1.SKM4.640180": {
             "not_a_run_prefix": "prefix1"
         },
         "1.SKB3.640195": {
             "not_a_run_prefix": "prefix2"
         }
     }
     files = {
         'raw_forward_seqs': [
             '/path/to/SKB2.640194_file.fastq',
             '/path/to/SKM4.640180_file.fastq',
             '/path/to/SKB3.640195_file.fastq'
         ]
     }
     job_id = self._create_template_and_job(prep_info, files,
                                            "per_sample_FASTQ")
     obs_success, obs_ainfo, obs_error = _validate_per_sample_FASTQ(
         self.qclient, job_id, prep_info, files)
     self.assertTrue(obs_success)
     filepaths = [('/path/to/SKB2.640194_file.fastq', 'raw_forward_seqs'),
                  ('/path/to/SKM4.640180_file.fastq', 'raw_forward_seqs'),
                  ('/path/to/SKB3.640195_file.fastq', 'raw_forward_seqs')]
     exp = [ArtifactInfo(None, "per_sample_FASTQ", filepaths)]
     self.assertEqual(obs_ainfo, exp)
     self.assertEqual(obs_error, "")
 def test_validate_per_sample_FASTQ(self):
     prep_info = {"1.SKB2.640194": {"not_a_run_prefix": "prefix1"},
                  "1.SKM4.640180": {"not_a_run_prefix": "prefix1"},
                  "1.SKB3.640195": {"not_a_run_prefix": "prefix2"}}
     files = {'raw_forward_seqs': ['/path/to/SKB2.640194_file.fastq',
                                   '/path/to/SKM4.640180_file.fastq',
                                   '/path/to/SKB3.640195_file.fastq']}
     job_id = self._create_template_and_job(
         prep_info, files, "per_sample_FASTQ")
     obs_success, obs_ainfo, obs_error = _validate_per_sample_FASTQ(
         self.qclient, job_id, prep_info, files)
     self.assertTrue(obs_success)
     filepaths = [('/path/to/SKB2.640194_file.fastq', 'raw_forward_seqs'),
                  ('/path/to/SKM4.640180_file.fastq', 'raw_forward_seqs'),
                  ('/path/to/SKB3.640195_file.fastq', 'raw_forward_seqs')]
     exp = [ArtifactInfo(None, "per_sample_FASTQ", filepaths)]
     self.assertEqual(obs_ainfo, exp)
     self.assertEqual(obs_error, "")
    def test_validate_per_sample_FASTQ_error(self):
        # Filepath type not supported
        prep_info = {"1.SKB2.640194": {"run_prefix": "prefix1"},
                     "1.SKM4.640180": {"run_prefix": "prefix2"},
                     "1.SKB3.640195": {"run_prefix": "prefix3"}}
        files = {'Unknown': ['/path/to/file1.fastq']}
        job_id = self._create_template_and_job(
            prep_info, files, "per_sample_FASTQ")
        obs_success, obs_ainfo, obs_error = _validate_per_sample_FASTQ(
            self.qclient, job_id, prep_info, files)
        self.assertFalse(obs_success)
        self.assertIsNone(obs_ainfo)
        self.assertEqual(obs_error,
                         "Filepath type(s) Unknown not supported by artifact "
                         "type per_sample_FASTQ. Supported filepath types: "
                         "raw_forward_seqs, raw_reverse_seqs, "
                         "preprocessed_fastq")

        # Missing raw_forward_seqs and preprocessed_fastq
        files = {'raw_reverse_seqs': ['/path/to/file1.fastq']}
        job_id = self._create_template_and_job(
            prep_info, files, "per_sample_FASTQ")
        obs_success, obs_ainfo, obs_error = _validate_per_sample_FASTQ(
            self.qclient, job_id, prep_info, files)
        self.assertFalse(obs_success)
        self.assertIsNone(obs_ainfo)
        self.assertEqual(obs_error,
                         "Missing required filepath type: raw_forward_seqs "
                         "or preprocessed_fastq")

        # Raw forward seqs and preprocessed_fastq
        files = {'raw_forward_seqs': ['/path/to/file1.fastq'],
                 'preprocessed_fastq': ['/path/to/file1.fastq']}
        job_id = self._create_template_and_job(
            prep_info, files, "per_sample_FASTQ")
        obs_success, obs_ainfo, obs_error = _validate_per_sample_FASTQ(
            self.qclient, job_id, prep_info, files)
        self.assertFalse(obs_success)
        self.assertIsNone(obs_ainfo)
        self.assertEqual(obs_error,
                         "If raw_forward_seqs is provided, preprocessed_fastq "
                         "should not be provided")

        # Preprocessed fastq and raw_reverse_seqs
        files = {'raw_reverse_seqs': ['/path/to/file1.fastq'],
                 'preprocessed_fastq': ['/path/to/file1.fastq']}
        job_id = self._create_template_and_job(
            prep_info, files, "per_sample_FASTQ")
        obs_success, obs_ainfo, obs_error = _validate_per_sample_FASTQ(
            self.qclient, job_id, prep_info, files)
        self.assertFalse(obs_success)
        self.assertIsNone(obs_ainfo)
        self.assertEqual(obs_error,
                         "If preprocessed_fastq is provided, raw_reverse_seqs "
                         "should not be provided")

        # Count mismatch
        files = {'raw_forward_seqs': ['/path/to/file1.fastq'],
                 'raw_reverse_seqs': ['/path/to/file1.fastq',
                                      '/path/to/file1.fastq']}
        job_id = self._create_template_and_job(
            prep_info, files, "per_sample_FASTQ")
        obs_success, obs_ainfo, obs_error = _validate_per_sample_FASTQ(
            self.qclient, job_id, prep_info, files)
        self.assertFalse(obs_success)
        self.assertIsNone(obs_ainfo)
        self.assertEqual(obs_error,
                         "The number of provided files doesn't match the "
                         "number of samples (3): 1 raw_forward_seqs, "
                         "2 raw_reverse_seqs (optional, 0 is ok)")

        # preprocessed_fastq count mismatch
        files = {'preprocessed_fastq': ['/path/to/file1_R1.fastq',
                                        '/path/to/file1_R2.fastq']}
        job_id = self._create_template_and_job(
            prep_info, files, "per_sample_FASTQ")
        obs_success, obs_ainfo, obs_error = _validate_per_sample_FASTQ(
            self.qclient, job_id, prep_info, files)
        self.assertFalse(obs_success)
        self.assertIsNone(obs_ainfo)
        self.assertEqual(obs_error,
                         "The number of provided files doesn't match the "
                         "number of samples (3): 2 raw_forward_seqs, "
                         "0 raw_reverse_seqs (optional, 0 is ok)")

        # Run prefix mismatch
        files = {'raw_forward_seqs': ['/path/to/prefix1_fwd.fastq',
                                      '/path/to/prefix2_fwd.fastq',
                                      '/path/to/Aprefix3_fwd.fastq']}
        job_id = self._create_template_and_job(
            prep_info, files, "per_sample_FASTQ")
        obs_success, obs_ainfo, obs_error = _validate_per_sample_FASTQ(
            self.qclient, job_id, prep_info, files)
        self.assertFalse(obs_success)
        self.assertIsNone(obs_ainfo)
        self.assertEqual(obs_error,
                         'The provided files are not prefixed by sample id or '
                         'do not match the run prefix values in the prep '
                         'information. Offending files:\n raw_forward_seqs: '
                         'Aprefix3_fwd.fastq\nraw_reverse_seqs: ')

        # Non-unique run-prefix values
        prep_info = {"1.SKB2.640194": {"run_prefix": "prefix1"},
                     "1.SKM4.640180": {"run_prefix": "prefix1"},
                     "1.SKB3.640195": {"run_prefix": "prefix3"}}
        job_id = self._create_template_and_job(
            prep_info, files, "per_sample_FASTQ")
        obs_success, obs_ainfo, obs_error = _validate_per_sample_FASTQ(
            self.qclient, job_id, prep_info, files)
        self.assertFalse(obs_success)
        self.assertIsNone(obs_ainfo)
        self.assertEqual(obs_error,
                         "The values for the column 'run_prefix' are not "
                         "unique for each sample. Repeated values: prefix1 "
                         "(2)")

        # Sample id mismatch
        prep_info = {"1.SKB2.640194": {"not_a_run_prefix": "prefix1"},
                     "1.SKM4.640180": {"not_a_run_prefix": "prefix1"},
                     "1.SKB3.640195": {"not_a_run_prefix": "prefix3"}}
        job_id = self._create_template_and_job(
            prep_info, files, "per_sample_FASTQ")
        obs_success, obs_ainfo, obs_error = _validate_per_sample_FASTQ(
            self.qclient, job_id, prep_info, files)
        self.assertFalse(obs_success)
        self.assertIsNone(obs_ainfo)
        self.assertEqual(obs_error,
                         'The provided files are not prefixed by sample id. '
                         'Offending files:\n raw_forward_seqs: '
                         'prefix1_fwd.fastq, prefix2_fwd.fastq, '
                         'Aprefix3_fwd.fastq\nraw_reverse_seqs: ')
    def test_validate_per_sample_FASTQ_error(self):
        # Filepath type not supported
        prep_info = {
            "1.SKB2.640194": {
                "run_prefix": "prefix1"
            },
            "1.SKM4.640180": {
                "run_prefix": "prefix2"
            },
            "1.SKB3.640195": {
                "run_prefix": "prefix3"
            }
        }
        files = {'Unknown': ['/path/to/file1.fastq']}
        job_id = self._create_template_and_job(prep_info, files,
                                               "per_sample_FASTQ")
        obs_success, obs_ainfo, obs_error = _validate_per_sample_FASTQ(
            self.qclient, job_id, prep_info, files)
        self.assertFalse(obs_success)
        self.assertIsNone(obs_ainfo)
        self.assertEqual(
            obs_error, "Filepath type(s) Unknown not supported by artifact "
            "type per_sample_FASTQ. Supported filepath types: "
            "raw_forward_seqs, raw_reverse_seqs, "
            "preprocessed_fastq")

        # Missing raw_forward_seqs and preprocessed_fastq
        files = {'raw_reverse_seqs': ['/path/to/file1.fastq']}
        job_id = self._create_template_and_job(prep_info, files,
                                               "per_sample_FASTQ")
        obs_success, obs_ainfo, obs_error = _validate_per_sample_FASTQ(
            self.qclient, job_id, prep_info, files)
        self.assertFalse(obs_success)
        self.assertIsNone(obs_ainfo)
        self.assertEqual(
            obs_error, "Missing required filepath type: raw_forward_seqs "
            "or preprocessed_fastq")

        # Raw forward seqs and preprocessed_fastq
        files = {
            'raw_forward_seqs': ['/path/to/file1.fastq'],
            'preprocessed_fastq': ['/path/to/file1.fastq']
        }
        job_id = self._create_template_and_job(prep_info, files,
                                               "per_sample_FASTQ")
        obs_success, obs_ainfo, obs_error = _validate_per_sample_FASTQ(
            self.qclient, job_id, prep_info, files)
        self.assertFalse(obs_success)
        self.assertIsNone(obs_ainfo)
        self.assertEqual(
            obs_error, "If raw_forward_seqs is provided, preprocessed_fastq "
            "should not be provided")

        # Preprocessed fastq and raw_reverse_seqs
        files = {
            'raw_reverse_seqs': ['/path/to/file1.fastq'],
            'preprocessed_fastq': ['/path/to/file1.fastq']
        }
        job_id = self._create_template_and_job(prep_info, files,
                                               "per_sample_FASTQ")
        obs_success, obs_ainfo, obs_error = _validate_per_sample_FASTQ(
            self.qclient, job_id, prep_info, files)
        self.assertFalse(obs_success)
        self.assertIsNone(obs_ainfo)
        self.assertEqual(
            obs_error, "If preprocessed_fastq is provided, raw_reverse_seqs "
            "should not be provided")

        # Count mismatch
        files = {
            'raw_forward_seqs': ['/path/to/file1.fastq'],
            'raw_reverse_seqs':
            ['/path/to/file1.fastq', '/path/to/file1.fastq']
        }
        job_id = self._create_template_and_job(prep_info, files,
                                               "per_sample_FASTQ")
        obs_success, obs_ainfo, obs_error = _validate_per_sample_FASTQ(
            self.qclient, job_id, prep_info, files)
        self.assertFalse(obs_success)
        self.assertIsNone(obs_ainfo)
        self.assertEqual(
            obs_error, "The number of provided files doesn't match the "
            "number of samples (3): 1 raw_forward_seqs, "
            "2 raw_reverse_seqs (optional, 0 is ok)")

        # preprocessed_fastq count mismatch
        files = {
            'preprocessed_fastq':
            ['/path/to/file1_R1.fastq', '/path/to/file1_R2.fastq']
        }
        job_id = self._create_template_and_job(prep_info, files,
                                               "per_sample_FASTQ")
        obs_success, obs_ainfo, obs_error = _validate_per_sample_FASTQ(
            self.qclient, job_id, prep_info, files)
        self.assertFalse(obs_success)
        self.assertIsNone(obs_ainfo)
        self.assertEqual(
            obs_error, "The number of provided files doesn't match the "
            "number of samples (3): 2 raw_forward_seqs, "
            "0 raw_reverse_seqs (optional, 0 is ok)")

        # Run prefix mismatch
        files = {
            'raw_forward_seqs': [
                '/path/to/prefix1_fwd.fastq', '/path/to/prefix2_fwd.fastq',
                '/path/to/Aprefix3_fwd.fastq'
            ]
        }
        job_id = self._create_template_and_job(prep_info, files,
                                               "per_sample_FASTQ")
        obs_success, obs_ainfo, obs_error = _validate_per_sample_FASTQ(
            self.qclient, job_id, prep_info, files)
        self.assertFalse(obs_success)
        self.assertIsNone(obs_ainfo)
        self.assertEqual(
            obs_error, "The provided files do not match the run prefix "
            "values in the prep information. Offending files: "
            "raw_forward_seqs: Aprefix3_fwd.fastq, "
            "raw_reverse_seqs: ")

        # Non-unique run-prefix values
        prep_info = {
            "1.SKB2.640194": {
                "run_prefix": "prefix1"
            },
            "1.SKM4.640180": {
                "run_prefix": "prefix1"
            },
            "1.SKB3.640195": {
                "run_prefix": "prefix3"
            }
        }
        job_id = self._create_template_and_job(prep_info, files,
                                               "per_sample_FASTQ")
        obs_success, obs_ainfo, obs_error = _validate_per_sample_FASTQ(
            self.qclient, job_id, prep_info, files)
        self.assertFalse(obs_success)
        self.assertIsNone(obs_ainfo)
        self.assertEqual(
            obs_error, "The values for the column 'run_prefix' are not "
            "unique for each sample. Repeated values: prefix1 "
            "(2)")

        # Sample id mismatch
        prep_info = {
            "1.SKB2.640194": {
                "not_a_run_prefix": "prefix1"
            },
            "1.SKM4.640180": {
                "not_a_run_prefix": "prefix1"
            },
            "1.SKB3.640195": {
                "not_a_run_prefix": "prefix3"
            }
        }
        job_id = self._create_template_and_job(prep_info, files,
                                               "per_sample_FASTQ")
        obs_success, obs_ainfo, obs_error = _validate_per_sample_FASTQ(
            self.qclient, job_id, prep_info, files)
        self.assertFalse(obs_success)
        self.assertIsNone(obs_ainfo)
        self.assertEqual(
            obs_error, "The provided files are not prefixed by sample id. "
            "Please provide the 'run_prefix' column in your prep "
            "information. Offending files: raw_forward_seqs: "
            "prefix1_fwd.fastq, prefix2_fwd.fastq, "
            "Aprefix3_fwd.fastq, raw_reverse_seqs: ")