Example #1
0
    def test_validate_sample_sheet_no_data_header(self, mock_csv_reader):
        """
        Given a sample sheet with no header, make sure the correct errors are included in the response
        :param mock_csv_reader:
        :return:
        """
        field_values = ("Sample_Name,Project_ID,File_Forward,File_Reverse\n"
                        "my-sample-1, 72,file_1.fastq.gz,file_2.fastq.gz\n")

        file_contents_str = ("{field_values}").format(
            field_values=field_values)

        # converts string as a pseudo file / memory file
        sample_sheet_file = StringIO(file_contents_str)

        # the call to get_csv_reader() inside parse_samples() will return
        # items inside side_effect
        mock_csv_reader.side_effect = [reader(sample_sheet_file)]

        res = validate_sample_sheet(None)

        # This should be an invalid sample sheet
        self.assertFalse(res.is_valid())
        # Only should have 2 error
        self.assertEqual(len(res.error_list), 2)
        # Error type should be SampleSheetError
        self.assertEqual(type(res.error_list[0]), SampleSheetError)
        self.assertEqual(type(res.error_list[1]), SampleSheetError)
Example #2
0
    def get_sequencing_run(sample_sheet, run_data_directory_file_list=None):
        """
        Does local validation on the integrity of the run directory / sample sheet

        Throws a ValidationError with a validation result attached if it cannot make a sequencing run

        :param sample_sheet:
        :return: SequencingRun
        """

        # get file list
        validation_result = model.ValidationResult()

        try:
            if run_data_directory_file_list is None:
                data_dir = os.path.dirname(sample_sheet)
                run_data_directory_file_list = common.get_file_list(data_dir)
        except exceptions.DirectoryError as error:
            validation_result.add_error(error)
            logging.error("Errors occurred while parsing files")
            raise exceptions.ValidationError(
                "Errors occurred while parsing files", validation_result)

        # Try to get the sample sheet, validate that the sample sheet is valid
        validation_result = validation.validate_sample_sheet(sample_sheet)
        if not validation_result.is_valid():
            logging.error("Errors occurred while getting sample sheet")
            raise exceptions.ValidationError(
                "Errors occurred while getting sample sheet",
                validation_result)

        # Try to build sequencing run from sample sheet & meta data, raise validation error if errors occur
        try:
            sample_list = sample_parser.parse_sample_list(
                sample_sheet, run_data_directory_file_list)
            run_metadata = sample_parser.parse_metadata(sample_list)
            sequencing_run = common.build_sequencing_run_from_samples(
                sample_list, run_metadata)
        except exceptions.SequenceFileError as error:
            validation_result.add_error(error)
            logging.error(
                "Errors occurred while building sequence run from sample sheet"
            )
            raise exceptions.ValidationError(
                "Errors occurred while building sequence run from sample sheet",
                validation_result)

        return sequencing_run
Example #3
0
    def test_validate_sample_sheet_no_data(self, mock_csv_reader):
        """
        Given a sample sheet with no data, make sure the correct errors are included in the response
        :param mock_csv_reader:
        :return:
        """
        file_contents_str = "[Data]\n"

        # converts string as a pseudo file / memory file
        sample_sheet_file = StringIO(file_contents_str)

        # the call to get_csv_reader() inside parse_samples() will return
        # items inside side_effect
        mock_csv_reader.side_effect = [reader(sample_sheet_file)]

        res = validate_sample_sheet(None)

        # This should be an invalid sample sheet
        self.assertFalse(res.is_valid())
        # Only should have 1 error
        self.assertEqual(len(res.error_list), 1)
        # Error type should be SampleSheetError
        self.assertEqual(type(res.error_list[0]), SampleSheetError)
Example #4
0
    def test_validate_sample_sheet_valid(self, mock_csv_reader):
        """
        Given a valid sample sheet, make sure the response shows as valid
        :param mock_csv_reader:
        :return:
        """
        field_values = ("[Data]\n"
                        "Sample_Name,Project_ID,File_Forward,File_Reverse\n"
                        "my-sample-1,72,file_1.fastq.gz,file_2.fastq.gz\n")

        file_contents_str = ("{field_values}").format(
            field_values=field_values)

        # converts string as a pseudo file / memory file
        sample_sheet_file = StringIO(file_contents_str)

        # the call to get_csv_reader() inside parse_samples() will return
        # items inside side_effect
        mock_csv_reader.side_effect = [reader(sample_sheet_file)]

        res = validate_sample_sheet(None)

        # This should be a valid sample sheet
        self.assertTrue(res.is_valid())
Example #5
0
    def get_sequencing_run(self,
                           sample_sheet,
                           run_data_directory_file_list=None):
        """
        Does local validation on the integrity of the run directory / sample sheet

        Throws a ValidationError with a validation result attached if it cannot make a sequencing run

        :param sample_sheet:
        :param run_data_directory_file_list: Optional: List of files in the data directory to verify against the
        SampleList.csv file. This is used when deploying the parsers on a cloud environment.
        :return: SequencingRun
        """

        # Try to get the sample sheet, validate that the sample sheet is valid
        validation_result = validation.validate_sample_sheet(sample_sheet)
        if not validation_result.is_valid():
            logging.error("Errors occurred while getting sample sheet")
            raise exceptions.ValidationError(
                "Errors occurred while getting sample sheet",
                validation_result)

        # When running with a premade file list, verify files on sample_sheet are in file list
        try:
            if run_data_directory_file_list is not None:
                sample_parser.verify_sample_sheet_file_names_in_file_list(
                    sample_sheet, run_data_directory_file_list)
        except (exceptions.SequenceFileError,
                exceptions.SampleSheetError) as error:
            validation_result.add_error(error)
            logging.error(
                "Errors occurred while building sequence run from sample sheet"
            )
            raise exceptions.ValidationError(
                "Errors occurred while building sequence run from sample sheet",
                validation_result)
        except Exception as error:
            validation_result.add_error(error)
            logging.error("System error while building sequencing run")
            raise exceptions.ValidationError(
                "System error while building sequencing run",
                validation_result)

        # Build a list of sample objects from sample sheet
        try:
            if run_data_directory_file_list is not None:
                sample_list = sample_parser.build_sample_list_from_sample_sheet_no_verify(
                    sample_sheet)
            else:
                sample_list = sample_parser.build_sample_list_from_sample_sheet_with_abs_path(
                    sample_sheet)
        except (exceptions.DirectoryError,
                exceptions.SampleSheetError) as error:
            validation_result.add_error(error)
            logging.error("Errors occurred while parsing files")
            raise exceptions.ValidationError(
                "Errors occurred while parsing files", validation_result)
        except Exception as error:
            validation_result.add_error(error)
            logging.error("System error while parsing files")
            raise exceptions.ValidationError(
                "System error while parsing files", validation_result)

        # verify samples in sample_list are all of one type, either single or paired end
        if not sample_parser.only_single_or_paired_in_sample_list(sample_list):
            e = exceptions.SampleSheetError((
                "Your sample sheet is malformed. "
                "SampleSheet cannot have both paired end and single end runs. "
                "Make sure all samples are either paired or single."),
                                            sample_sheet)
            validation_result.add_error(e)
            logging.error(
                "Error occurred while building file list: Sample sheet has both paired and single end reads"
            )
            raise exceptions.ValidationError(
                "Errors occurred while building file list.", validation_result)

        # Try to build sequencing run from sample sheet & meta data, raise validation error if errors occur
        try:
            run_metadata = sample_parser.parse_metadata(sample_list)
            sequencing_run = common.build_sequencing_run_from_samples(
                sample_list, run_metadata, self.get_parser_type_name())
        except exceptions.SequenceFileError as error:
            validation_result.add_error(error)
            logging.error(
                "Errors occurred while building sequence run from sample sheet"
            )
            raise exceptions.ValidationError(
                "Errors occurred while building sequence run from sample sheet",
                validation_result)

        return sequencing_run