def test_validate_sample_sheet_no_data_header(self, mock_csv_reader): """ Given a sample sheet with no header, make sure the correct errors are included in the response :param mock_csv_reader: :return: """ field_values = ("Sample_Name,Project_ID,File_Forward,File_Reverse\n" "my-sample-1, 72,file_1.fastq.gz,file_2.fastq.gz\n") file_contents_str = ("{field_values}").format( field_values=field_values) # converts string as a pseudo file / memory file sample_sheet_file = StringIO(file_contents_str) # the call to get_csv_reader() inside parse_samples() will return # items inside side_effect mock_csv_reader.side_effect = [reader(sample_sheet_file)] res = validate_sample_sheet(None) # This should be an invalid sample sheet self.assertFalse(res.is_valid()) # Only should have 2 error self.assertEqual(len(res.error_list), 2) # Error type should be SampleSheetError self.assertEqual(type(res.error_list[0]), SampleSheetError) self.assertEqual(type(res.error_list[1]), SampleSheetError)
def get_sequencing_run(sample_sheet, run_data_directory_file_list=None): """ Does local validation on the integrity of the run directory / sample sheet Throws a ValidationError with a validation result attached if it cannot make a sequencing run :param sample_sheet: :return: SequencingRun """ # get file list validation_result = model.ValidationResult() try: if run_data_directory_file_list is None: data_dir = os.path.dirname(sample_sheet) run_data_directory_file_list = common.get_file_list(data_dir) except exceptions.DirectoryError as error: validation_result.add_error(error) logging.error("Errors occurred while parsing files") raise exceptions.ValidationError( "Errors occurred while parsing files", validation_result) # Try to get the sample sheet, validate that the sample sheet is valid validation_result = validation.validate_sample_sheet(sample_sheet) if not validation_result.is_valid(): logging.error("Errors occurred while getting sample sheet") raise exceptions.ValidationError( "Errors occurred while getting sample sheet", validation_result) # Try to build sequencing run from sample sheet & meta data, raise validation error if errors occur try: sample_list = sample_parser.parse_sample_list( sample_sheet, run_data_directory_file_list) run_metadata = sample_parser.parse_metadata(sample_list) sequencing_run = common.build_sequencing_run_from_samples( sample_list, run_metadata) except exceptions.SequenceFileError as error: validation_result.add_error(error) logging.error( "Errors occurred while building sequence run from sample sheet" ) raise exceptions.ValidationError( "Errors occurred while building sequence run from sample sheet", validation_result) return sequencing_run
def test_validate_sample_sheet_no_data(self, mock_csv_reader): """ Given a sample sheet with no data, make sure the correct errors are included in the response :param mock_csv_reader: :return: """ file_contents_str = "[Data]\n" # converts string as a pseudo file / memory file sample_sheet_file = StringIO(file_contents_str) # the call to get_csv_reader() inside parse_samples() will return # items inside side_effect mock_csv_reader.side_effect = [reader(sample_sheet_file)] res = validate_sample_sheet(None) # This should be an invalid sample sheet self.assertFalse(res.is_valid()) # Only should have 1 error self.assertEqual(len(res.error_list), 1) # Error type should be SampleSheetError self.assertEqual(type(res.error_list[0]), SampleSheetError)
def test_validate_sample_sheet_valid(self, mock_csv_reader): """ Given a valid sample sheet, make sure the response shows as valid :param mock_csv_reader: :return: """ field_values = ("[Data]\n" "Sample_Name,Project_ID,File_Forward,File_Reverse\n" "my-sample-1,72,file_1.fastq.gz,file_2.fastq.gz\n") file_contents_str = ("{field_values}").format( field_values=field_values) # converts string as a pseudo file / memory file sample_sheet_file = StringIO(file_contents_str) # the call to get_csv_reader() inside parse_samples() will return # items inside side_effect mock_csv_reader.side_effect = [reader(sample_sheet_file)] res = validate_sample_sheet(None) # This should be a valid sample sheet self.assertTrue(res.is_valid())
def get_sequencing_run(self, sample_sheet, run_data_directory_file_list=None): """ Does local validation on the integrity of the run directory / sample sheet Throws a ValidationError with a validation result attached if it cannot make a sequencing run :param sample_sheet: :param run_data_directory_file_list: Optional: List of files in the data directory to verify against the SampleList.csv file. This is used when deploying the parsers on a cloud environment. :return: SequencingRun """ # Try to get the sample sheet, validate that the sample sheet is valid validation_result = validation.validate_sample_sheet(sample_sheet) if not validation_result.is_valid(): logging.error("Errors occurred while getting sample sheet") raise exceptions.ValidationError( "Errors occurred while getting sample sheet", validation_result) # When running with a premade file list, verify files on sample_sheet are in file list try: if run_data_directory_file_list is not None: sample_parser.verify_sample_sheet_file_names_in_file_list( sample_sheet, run_data_directory_file_list) except (exceptions.SequenceFileError, exceptions.SampleSheetError) as error: validation_result.add_error(error) logging.error( "Errors occurred while building sequence run from sample sheet" ) raise exceptions.ValidationError( "Errors occurred while building sequence run from sample sheet", validation_result) except Exception as error: validation_result.add_error(error) logging.error("System error while building sequencing run") raise exceptions.ValidationError( "System error while building sequencing run", validation_result) # Build a list of sample objects from sample sheet try: if run_data_directory_file_list is not None: sample_list = sample_parser.build_sample_list_from_sample_sheet_no_verify( sample_sheet) else: sample_list = sample_parser.build_sample_list_from_sample_sheet_with_abs_path( sample_sheet) except (exceptions.DirectoryError, exceptions.SampleSheetError) as error: validation_result.add_error(error) logging.error("Errors occurred while parsing files") raise exceptions.ValidationError( "Errors occurred while parsing files", validation_result) except Exception as error: validation_result.add_error(error) logging.error("System error while parsing files") raise exceptions.ValidationError( "System error while parsing files", validation_result) # verify samples in sample_list are all of one type, either single or paired end if not sample_parser.only_single_or_paired_in_sample_list(sample_list): e = exceptions.SampleSheetError(( "Your sample sheet is malformed. " "SampleSheet cannot have both paired end and single end runs. " "Make sure all samples are either paired or single."), sample_sheet) validation_result.add_error(e) logging.error( "Error occurred while building file list: Sample sheet has both paired and single end reads" ) raise exceptions.ValidationError( "Errors occurred while building file list.", validation_result) # Try to build sequencing run from sample sheet & meta data, raise validation error if errors occur try: run_metadata = sample_parser.parse_metadata(sample_list) sequencing_run = common.build_sequencing_run_from_samples( sample_list, run_metadata, self.get_parser_type_name()) except exceptions.SequenceFileError as error: validation_result.add_error(error) logging.error( "Errors occurred while building sequence run from sample sheet" ) raise exceptions.ValidationError( "Errors occurred while building sequence run from sample sheet", validation_result) return sequencing_run