Example #1
0
    def test_space_in_sample_name(self):
        directory = path.join(path_to_module, "ngs_space_in_sample_name")
        data_dir = path.join(
            directory, parsers.miseq.Parser.get_relative_data_directory())
        file_list = parsers.common.get_file_list(data_dir)
        file_path = path.join(directory, "SampleSheet.csv")

        # Just making sure this doesn't thow an error
        sample_parser.parse_sample_list(sample_sheet_file=file_path,
                                        run_data_directory=data_dir,
                                        run_data_directory_file_list=file_list)
Example #2
0
    def test_valid(self):
        """
        Ensure a a parsed valid directory matches the expected sample list
        :return:
        """
        directory = path.join(path_to_module, "fake_ngs_data")
        sheet_file = path.join(directory, "SampleSheet.csv")
        data_dir = path.join(
            directory, parsers.miseq.Parser.get_relative_data_directory())
        file_list = parsers.common.get_file_list(data_dir)

        sample = model.Sample(
            "01-1111", "Super bug", 1, {
                "Sample_Well": "01",
                "index": "AAAAAAAA",
                "Sample_Plate": "1",
                "I7_Index_ID": "N01",
                "sample_project": "6",
                "sequencer_sample_name": "01-1111",
                "I5_Index_ID": "S01",
                "index2": "TTTTTTTT",
            })

        sequence_file_properties = {
            'Sample_Plate': '1',
            'Sample_Well': '01',
            'I7_Index_ID': 'N01',
            'index': 'AAAAAAAA',
            'I5_Index_ID': 'S01',
            'index2': 'TTTTTTTT'
        }

        file_path_1 = path.join(path_to_module, "fake_ngs_data", "Data",
                                "Intensities", "BaseCalls",
                                "01-1111_S1_L001_R1_001.fastq.gz")
        file_path_2 = path.join(path_to_module, "fake_ngs_data", "Data",
                                "Intensities", "BaseCalls",
                                "01-1111_S1_L001_R2_001.fastq.gz")
        raw_file_list = [file_path_1, file_path_2]

        res = sample_parser.parse_sample_list(
            sample_sheet_file=sheet_file,
            run_data_directory=data_dir,
            run_data_directory_file_list=file_list)

        # Check sample is the same
        self.assertEqual(res[0].get_uploadable_dict(),
                         sample.get_uploadable_dict())
        # Check sequencing file is correct
        self.assertEqual(res[0].sequence_file.properties_dict,
                         sequence_file_properties)
        self.assertEqual(res[0].sequence_file.file_list.sort(),
                         raw_file_list.sort())
Example #3
0
    def test_not_valid_pf_list(self):
        """
        The file list in the sample sheet is invalid
        :return:
        """
        directory = path.join(path_to_module, "ngs_not_valid_pf_list")
        data_dir = path.join(
            directory, parsers.miseq.Parser.get_relative_data_directory())
        file_list = parsers.common.get_file_list(data_dir)
        file_path = path.join(directory, "SampleSheet.csv")

        with self.assertRaises(SequenceFileError):
            res = sample_parser.parse_sample_list(
                sample_sheet_file=file_path,
                run_data_directory=data_dir,
                run_data_directory_file_list=file_list)
Example #4
0
    def test_not_pf_list(self):
        """
        No Valid files were found with names given in sample sheet
        :return:
        """
        directory = path.join(path_to_module, "ngs_not_pf_list")
        data_dir = path.join(
            directory, parsers.miseq.Parser.get_relative_data_directory())
        file_list = parsers.common.get_file_list(data_dir)
        file_path = path.join(directory, "SampleSheet.csv")

        with self.assertRaises(SequenceFileError):
            res = sample_parser.parse_sample_list(
                sample_sheet_file=file_path,
                run_data_directory=data_dir,
                run_data_directory_file_list=file_list)
Example #5
0
    def get_sequencing_run(sample_sheet,
                           run_data_directory=None,
                           run_data_directory_file_list=None):
        """
        Does local validation on the integrety of the run directory / sample sheet

        Throws a ValidationError with a valadation result attached if it cannot make a sequencing run

        :param sample_sheet: Sample Sheet File
        :param run_data_directory: Optional: Directory (including run directory) to data files.
                                   Can be provided for bypassing os calls when developing on cloud systems
        :param run_data_directory_file_list: Optional: List of files in data directory.
                                             Can be provided for bypassing os calls when developing on cloud systems
        :return: SequencingRun
        """

        # get data directory and file list
        validation_result = model.ValidationResult()

        try:
            if run_data_directory is None:
                run_data_directory = Parser.get_full_data_directory(
                    sample_sheet)
            if run_data_directory_file_list is None:
                run_data_directory_file_list = common.get_file_list(
                    run_data_directory)
        except exceptions.DirectoryError as error:
            validation_result.add_error(error)
            logging.error("Errors occurred while parsing files")
            raise exceptions.ValidationError(
                "Errors occurred while parsing files", validation_result)

        # Try to get the sample sheet, validate that the sample sheet is valid
        validation_result = validation.validate_sample_sheet(sample_sheet)
        if not validation_result.is_valid():
            logging.error("Errors occurred while getting sample sheet")
            raise exceptions.ValidationError(
                "Errors occurred while getting sample sheet",
                validation_result)

        # Try to parse the meta data from the sample sheet, throw validation error if errors occur
        validation_result = model.ValidationResult()
        try:
            run_metadata = sample_parser.parse_metadata(sample_sheet)
        except exceptions.SampleSheetError as error:
            validation_result.add_error(error)
            logging.error("Errors occurred while parsing metadata")
            raise exceptions.ValidationError(
                "Errors occurred while parsing metadata", validation_result)

        # Try to build sequencing run from sample sheet & meta data, raise validation error if errors occur
        try:
            sample_list = sample_parser.parse_sample_list(
                sample_sheet, run_data_directory, run_data_directory_file_list)
            sequencing_run = common.build_sequencing_run_from_samples(
                sample_list, run_metadata)
        except exceptions.SequenceFileError as error:
            validation_result.add_error(error)
            logging.error(
                "Errors occurred while building sequence run from sample sheet"
            )
            raise exceptions.ValidationError(
                "Errors occurred while building sequence run from sample sheet",
                validation_result)

        return sequencing_run