def test_validate_per_sample_FASTQ(self): prep_info = { "1.SKB2.640194": { "not_a_run_prefix": "prefix1" }, "1.SKM4.640180": { "not_a_run_prefix": "prefix1" }, "1.SKB3.640195": { "not_a_run_prefix": "prefix2" } } files = { 'raw_forward_seqs': [ '/path/to/SKB2.640194_file.fastq', '/path/to/SKM4.640180_file.fastq', '/path/to/SKB3.640195_file.fastq' ] } job_id = self._create_template_and_job(prep_info, files, "per_sample_FASTQ") obs_success, obs_ainfo, obs_error = _validate_per_sample_FASTQ( self.qclient, job_id, prep_info, files) self.assertTrue(obs_success) filepaths = [('/path/to/SKB2.640194_file.fastq', 'raw_forward_seqs'), ('/path/to/SKM4.640180_file.fastq', 'raw_forward_seqs'), ('/path/to/SKB3.640195_file.fastq', 'raw_forward_seqs')] exp = [ArtifactInfo(None, "per_sample_FASTQ", filepaths)] self.assertEqual(obs_ainfo, exp) self.assertEqual(obs_error, "")
def test_validate_per_sample_FASTQ(self): prep_info = {"1.SKB2.640194": {"not_a_run_prefix": "prefix1"}, "1.SKM4.640180": {"not_a_run_prefix": "prefix1"}, "1.SKB3.640195": {"not_a_run_prefix": "prefix2"}} files = {'raw_forward_seqs': ['/path/to/SKB2.640194_file.fastq', '/path/to/SKM4.640180_file.fastq', '/path/to/SKB3.640195_file.fastq']} job_id = self._create_template_and_job( prep_info, files, "per_sample_FASTQ") obs_success, obs_ainfo, obs_error = _validate_per_sample_FASTQ( self.qclient, job_id, prep_info, files) self.assertTrue(obs_success) filepaths = [('/path/to/SKB2.640194_file.fastq', 'raw_forward_seqs'), ('/path/to/SKM4.640180_file.fastq', 'raw_forward_seqs'), ('/path/to/SKB3.640195_file.fastq', 'raw_forward_seqs')] exp = [ArtifactInfo(None, "per_sample_FASTQ", filepaths)] self.assertEqual(obs_ainfo, exp) self.assertEqual(obs_error, "")
def test_validate_per_sample_FASTQ_error(self): # Filepath type not supported prep_info = {"1.SKB2.640194": {"run_prefix": "prefix1"}, "1.SKM4.640180": {"run_prefix": "prefix2"}, "1.SKB3.640195": {"run_prefix": "prefix3"}} files = {'Unknown': ['/path/to/file1.fastq']} job_id = self._create_template_and_job( prep_info, files, "per_sample_FASTQ") obs_success, obs_ainfo, obs_error = _validate_per_sample_FASTQ( self.qclient, job_id, prep_info, files) self.assertFalse(obs_success) self.assertIsNone(obs_ainfo) self.assertEqual(obs_error, "Filepath type(s) Unknown not supported by artifact " "type per_sample_FASTQ. Supported filepath types: " "raw_forward_seqs, raw_reverse_seqs, " "preprocessed_fastq") # Missing raw_forward_seqs and preprocessed_fastq files = {'raw_reverse_seqs': ['/path/to/file1.fastq']} job_id = self._create_template_and_job( prep_info, files, "per_sample_FASTQ") obs_success, obs_ainfo, obs_error = _validate_per_sample_FASTQ( self.qclient, job_id, prep_info, files) self.assertFalse(obs_success) self.assertIsNone(obs_ainfo) self.assertEqual(obs_error, "Missing required filepath type: raw_forward_seqs " "or preprocessed_fastq") # Raw forward seqs and preprocessed_fastq files = {'raw_forward_seqs': ['/path/to/file1.fastq'], 'preprocessed_fastq': ['/path/to/file1.fastq']} job_id = self._create_template_and_job( prep_info, files, "per_sample_FASTQ") obs_success, obs_ainfo, obs_error = _validate_per_sample_FASTQ( self.qclient, job_id, prep_info, files) self.assertFalse(obs_success) self.assertIsNone(obs_ainfo) self.assertEqual(obs_error, "If raw_forward_seqs is provided, preprocessed_fastq " "should not be provided") # Preprocessed fastq and raw_reverse_seqs files = {'raw_reverse_seqs': ['/path/to/file1.fastq'], 'preprocessed_fastq': ['/path/to/file1.fastq']} job_id = self._create_template_and_job( prep_info, files, "per_sample_FASTQ") obs_success, obs_ainfo, obs_error = _validate_per_sample_FASTQ( self.qclient, job_id, prep_info, files) self.assertFalse(obs_success) self.assertIsNone(obs_ainfo) self.assertEqual(obs_error, "If preprocessed_fastq is provided, raw_reverse_seqs " "should not be provided") # Count mismatch files = {'raw_forward_seqs': ['/path/to/file1.fastq'], 'raw_reverse_seqs': ['/path/to/file1.fastq', '/path/to/file1.fastq']} job_id = self._create_template_and_job( prep_info, files, "per_sample_FASTQ") obs_success, obs_ainfo, obs_error = _validate_per_sample_FASTQ( self.qclient, job_id, prep_info, files) self.assertFalse(obs_success) self.assertIsNone(obs_ainfo) self.assertEqual(obs_error, "The number of provided files doesn't match the " "number of samples (3): 1 raw_forward_seqs, " "2 raw_reverse_seqs (optional, 0 is ok)") # preprocessed_fastq count mismatch files = {'preprocessed_fastq': ['/path/to/file1_R1.fastq', '/path/to/file1_R2.fastq']} job_id = self._create_template_and_job( prep_info, files, "per_sample_FASTQ") obs_success, obs_ainfo, obs_error = _validate_per_sample_FASTQ( self.qclient, job_id, prep_info, files) self.assertFalse(obs_success) self.assertIsNone(obs_ainfo) self.assertEqual(obs_error, "The number of provided files doesn't match the " "number of samples (3): 2 raw_forward_seqs, " "0 raw_reverse_seqs (optional, 0 is ok)") # Run prefix mismatch files = {'raw_forward_seqs': ['/path/to/prefix1_fwd.fastq', '/path/to/prefix2_fwd.fastq', '/path/to/Aprefix3_fwd.fastq']} job_id = self._create_template_and_job( prep_info, files, "per_sample_FASTQ") obs_success, obs_ainfo, obs_error = _validate_per_sample_FASTQ( self.qclient, job_id, prep_info, files) self.assertFalse(obs_success) self.assertIsNone(obs_ainfo) self.assertEqual(obs_error, 'The provided files are not prefixed by sample id or ' 'do not match the run prefix values in the prep ' 'information. Offending files:\n raw_forward_seqs: ' 'Aprefix3_fwd.fastq\nraw_reverse_seqs: ') # Non-unique run-prefix values prep_info = {"1.SKB2.640194": {"run_prefix": "prefix1"}, "1.SKM4.640180": {"run_prefix": "prefix1"}, "1.SKB3.640195": {"run_prefix": "prefix3"}} job_id = self._create_template_and_job( prep_info, files, "per_sample_FASTQ") obs_success, obs_ainfo, obs_error = _validate_per_sample_FASTQ( self.qclient, job_id, prep_info, files) self.assertFalse(obs_success) self.assertIsNone(obs_ainfo) self.assertEqual(obs_error, "The values for the column 'run_prefix' are not " "unique for each sample. Repeated values: prefix1 " "(2)") # Sample id mismatch prep_info = {"1.SKB2.640194": {"not_a_run_prefix": "prefix1"}, "1.SKM4.640180": {"not_a_run_prefix": "prefix1"}, "1.SKB3.640195": {"not_a_run_prefix": "prefix3"}} job_id = self._create_template_and_job( prep_info, files, "per_sample_FASTQ") obs_success, obs_ainfo, obs_error = _validate_per_sample_FASTQ( self.qclient, job_id, prep_info, files) self.assertFalse(obs_success) self.assertIsNone(obs_ainfo) self.assertEqual(obs_error, 'The provided files are not prefixed by sample id. ' 'Offending files:\n raw_forward_seqs: ' 'prefix1_fwd.fastq, prefix2_fwd.fastq, ' 'Aprefix3_fwd.fastq\nraw_reverse_seqs: ')
def test_validate_per_sample_FASTQ_error(self): # Filepath type not supported prep_info = { "1.SKB2.640194": { "run_prefix": "prefix1" }, "1.SKM4.640180": { "run_prefix": "prefix2" }, "1.SKB3.640195": { "run_prefix": "prefix3" } } files = {'Unknown': ['/path/to/file1.fastq']} job_id = self._create_template_and_job(prep_info, files, "per_sample_FASTQ") obs_success, obs_ainfo, obs_error = _validate_per_sample_FASTQ( self.qclient, job_id, prep_info, files) self.assertFalse(obs_success) self.assertIsNone(obs_ainfo) self.assertEqual( obs_error, "Filepath type(s) Unknown not supported by artifact " "type per_sample_FASTQ. Supported filepath types: " "raw_forward_seqs, raw_reverse_seqs, " "preprocessed_fastq") # Missing raw_forward_seqs and preprocessed_fastq files = {'raw_reverse_seqs': ['/path/to/file1.fastq']} job_id = self._create_template_and_job(prep_info, files, "per_sample_FASTQ") obs_success, obs_ainfo, obs_error = _validate_per_sample_FASTQ( self.qclient, job_id, prep_info, files) self.assertFalse(obs_success) self.assertIsNone(obs_ainfo) self.assertEqual( obs_error, "Missing required filepath type: raw_forward_seqs " "or preprocessed_fastq") # Raw forward seqs and preprocessed_fastq files = { 'raw_forward_seqs': ['/path/to/file1.fastq'], 'preprocessed_fastq': ['/path/to/file1.fastq'] } job_id = self._create_template_and_job(prep_info, files, "per_sample_FASTQ") obs_success, obs_ainfo, obs_error = _validate_per_sample_FASTQ( self.qclient, job_id, prep_info, files) self.assertFalse(obs_success) self.assertIsNone(obs_ainfo) self.assertEqual( obs_error, "If raw_forward_seqs is provided, preprocessed_fastq " "should not be provided") # Preprocessed fastq and raw_reverse_seqs files = { 'raw_reverse_seqs': ['/path/to/file1.fastq'], 'preprocessed_fastq': ['/path/to/file1.fastq'] } job_id = self._create_template_and_job(prep_info, files, "per_sample_FASTQ") obs_success, obs_ainfo, obs_error = _validate_per_sample_FASTQ( self.qclient, job_id, prep_info, files) self.assertFalse(obs_success) self.assertIsNone(obs_ainfo) self.assertEqual( obs_error, "If preprocessed_fastq is provided, raw_reverse_seqs " "should not be provided") # Count mismatch files = { 'raw_forward_seqs': ['/path/to/file1.fastq'], 'raw_reverse_seqs': ['/path/to/file1.fastq', '/path/to/file1.fastq'] } job_id = self._create_template_and_job(prep_info, files, "per_sample_FASTQ") obs_success, obs_ainfo, obs_error = _validate_per_sample_FASTQ( self.qclient, job_id, prep_info, files) self.assertFalse(obs_success) self.assertIsNone(obs_ainfo) self.assertEqual( obs_error, "The number of provided files doesn't match the " "number of samples (3): 1 raw_forward_seqs, " "2 raw_reverse_seqs (optional, 0 is ok)") # preprocessed_fastq count mismatch files = { 'preprocessed_fastq': ['/path/to/file1_R1.fastq', '/path/to/file1_R2.fastq'] } job_id = self._create_template_and_job(prep_info, files, "per_sample_FASTQ") obs_success, obs_ainfo, obs_error = _validate_per_sample_FASTQ( self.qclient, job_id, prep_info, files) self.assertFalse(obs_success) self.assertIsNone(obs_ainfo) self.assertEqual( obs_error, "The number of provided files doesn't match the " "number of samples (3): 2 raw_forward_seqs, " "0 raw_reverse_seqs (optional, 0 is ok)") # Run prefix mismatch files = { 'raw_forward_seqs': [ '/path/to/prefix1_fwd.fastq', '/path/to/prefix2_fwd.fastq', '/path/to/Aprefix3_fwd.fastq' ] } job_id = self._create_template_and_job(prep_info, files, "per_sample_FASTQ") obs_success, obs_ainfo, obs_error = _validate_per_sample_FASTQ( self.qclient, job_id, prep_info, files) self.assertFalse(obs_success) self.assertIsNone(obs_ainfo) self.assertEqual( obs_error, "The provided files do not match the run prefix " "values in the prep information. Offending files: " "raw_forward_seqs: Aprefix3_fwd.fastq, " "raw_reverse_seqs: ") # Non-unique run-prefix values prep_info = { "1.SKB2.640194": { "run_prefix": "prefix1" }, "1.SKM4.640180": { "run_prefix": "prefix1" }, "1.SKB3.640195": { "run_prefix": "prefix3" } } job_id = self._create_template_and_job(prep_info, files, "per_sample_FASTQ") obs_success, obs_ainfo, obs_error = _validate_per_sample_FASTQ( self.qclient, job_id, prep_info, files) self.assertFalse(obs_success) self.assertIsNone(obs_ainfo) self.assertEqual( obs_error, "The values for the column 'run_prefix' are not " "unique for each sample. Repeated values: prefix1 " "(2)") # Sample id mismatch prep_info = { "1.SKB2.640194": { "not_a_run_prefix": "prefix1" }, "1.SKM4.640180": { "not_a_run_prefix": "prefix1" }, "1.SKB3.640195": { "not_a_run_prefix": "prefix3" } } job_id = self._create_template_and_job(prep_info, files, "per_sample_FASTQ") obs_success, obs_ainfo, obs_error = _validate_per_sample_FASTQ( self.qclient, job_id, prep_info, files) self.assertFalse(obs_success) self.assertIsNone(obs_ainfo) self.assertEqual( obs_error, "The provided files are not prefixed by sample id. " "Please provide the 'run_prefix' column in your prep " "information. Offending files: raw_forward_seqs: " "prefix1_fwd.fastq, prefix2_fwd.fastq, " "Aprefix3_fwd.fastq, raw_reverse_seqs: ")