コード例 #1
0
    def test_build_valid_with_description_field(self):
        """
        When given a valid directory, ensure a valid SequencingRun is built with Projects, Samples, ect
        :return:
        """
        directory = path.join(path_to_module, "iseq_with_desc_field")
        sheet_file = path.join(directory, "SampleSheet.csv")
        meta_data = sample_parser.parse_metadata(sheet_file)
        data_dir = path.join(directory, parsers.miniseq.Parser.get_relative_data_directory())
        data_dir = data_dir.replace("*", "some_dir")
        file_list = parsers.common.get_file_list(data_dir)

        sample_list = sample_parser.parse_sample_list(sample_sheet_file=sheet_file,
                                                      run_data_directory=data_dir,
                                                      run_data_directory_file_list=file_list)
        sequencing_run = parsers.common.build_sequencing_run_from_samples(sample_list, meta_data)

        # Returns a SequencingRun
        self.assertEqual(type(sequencing_run), model.SequencingRun)
        # Includes a single project
        self.assertEqual(len(sequencing_run.project_list), 1)
        # is of type Project
        self.assertEqual(type(sequencing_run.project_list[0]), model.Project)
        # Project has 3 samples
        self.assertEqual(len(sequencing_run.project_list[0].sample_list), 3)
        # samples are of type Sample
        self.assertEqual(type(sequencing_run.project_list[0].sample_list[0]), model.Sample)
        # samples have correct description
        self.assertEqual(sequencing_run.project_list[0].sample_list[0].description, "desc1")
        # samples have SequenceFile
        self.assertEqual(type(sequencing_run.project_list[0].sample_list[0].sequence_file), model.SequenceFile)
コード例 #2
0
    def test_parse_metadata_paired_valid(self, mock_csv_reader):
        """
        When given a valid directory, ensure valid metadata is built
        paired end reads
        :return:
        """
        h_field_values = (
            "Local Run Manager Analysis Id,4004\n"
            "Experiment Name,Some_Test_Data\n"
            "Date,2015-05-14\n"
            "Workflow,GenerateFastQWorkflow\n"
            "Description,12-34\n"
            "Chemistry,Yes\n"
        )

        reads = (
            "151\n"
            "151\n"
        )

        d_headers = ("Sample_ID,Sample_Name,"
                     "I7_Index_ID,index,I5_Index_ID,index2,Sample_Project")

        d_field_values = ("15-0318-4004,15-0318,N701,TAAGGCGA,S502,CTCTCTAT,203\n"
                          "15-0455-4004,15-0455,N701,TAAGGCGA,S503,TATCCTCT,203\n"
                          "15-0462-4004,15-0462,N701,TAAGGCGA,S505,GTAAGGAG,203\n")

        file_contents_str = (
            "[Header]\n"
            "{h_field_values}\n"
            "[Reads]\n"
            "{reads}\n"
            "[Data]\n"
            "{d_headers}\n"
            "{d_field_values}"
        ).format(h_field_values=h_field_values,
                 reads=reads,
                 d_headers=d_headers,
                 d_field_values=d_field_values)

        # converts string as a pseudo file / memory file
        sample_sheet_file = StringIO(file_contents_str)

        # the call to get_csv_reader() inside parse_samples() will return
        # items inside side_effect
        mock_csv_reader.side_effect = [reader(sample_sheet_file)]

        metadata = sample_parser.parse_metadata(None)
        # The meta data we care about the most
        self.assertEqual(metadata['readLengths'], "151")
        self.assertEqual(metadata['layoutType'], "PAIRED_END")
        # Other meta data should also be here
        self.assertEqual(metadata['localrunmanager'], "4004")
        self.assertEqual(metadata['experimentName'], "Some_Test_Data")
        self.assertEqual(metadata['date'], "2015-05-14")
        self.assertEqual(metadata['workflow'], "GenerateFastQWorkflow")
        self.assertEqual(metadata['description'], "12-34")
        self.assertEqual(metadata['chemistry'], "Yes")
コード例 #3
0
    def test_parse_metadata(self):
        """
        Testing the parsing meta data with actual files, instead of mocked files
        :return:
        """
        sheet_file = path.join(path_to_module, "fake_ngs_data",
                               "SampleSheet.csv")
        meta_data = sample_parser.parse_metadata(sheet_file)

        correct_metadata = {"readLengths": "151",
                            "workflow": "GenerateFastQWorkflow",
                            "localrunmanager": "4004",
                            "date": "10/15/2013",
                            "chemistry": "Amplicon",
                            "description": "Superbug",
                            "experimentName": '1',
                            "layoutType": "PAIRED_END"}

        self.assertEqual(correct_metadata, meta_data)
コード例 #4
0
ファイル: parser.py プロジェクト: DarianHole/irida-uploader
    def get_sequencing_run(sample_sheet,
                           run_data_directory=None,
                           run_data_directory_file_list=None):
        """
        Does local validation on the integrity of the run directory / sample sheet

        Throws a ValidationError with a validation result attached if it cannot make a sequencing run

        :param sample_sheet: Sample Sheet File
        :param run_data_directory: Optional: Directory (including run directory) to data files.
                                   Can be provided for bypassing os calls when developing on cloud systems
        :param run_data_directory_file_list: Optional: List of files in data directory.
                                             Can be provided for bypassing os calls when developing on cloud systems
        :return: SequencingRun
        """

        # get data directory and file list
        validation_result = model.ValidationResult()

        try:
            if run_data_directory is None:
                run_data_directory = Parser.get_full_data_directory(
                    sample_sheet)
            if run_data_directory_file_list is None:
                run_data_directory_file_list = common.get_file_list(
                    run_data_directory)
        except exceptions.DirectoryError as error:
            validation_result.add_error(error)
            logging.error("Errors occurred while parsing files")
            raise exceptions.ValidationError(
                "Errors occurred while parsing files", validation_result)

        # Try to get the sample sheet, validate that the sample sheet is valid
        validation_result = validation.validate_sample_sheet(sample_sheet)
        if not validation_result.is_valid():
            logging.error("Errors occurred while getting sample sheet")
            raise exceptions.ValidationError(
                "Errors occurred while getting sample sheet",
                validation_result)

        # Try to parse the meta data from the sample sheet, throw validation error if errors occur
        validation_result = model.ValidationResult()
        try:
            run_metadata = sample_parser.parse_metadata(sample_sheet)
        except exceptions.SampleSheetError as error:
            validation_result.add_error(error)
            logging.error("Errors occurred while parsing metadata")
            raise exceptions.ValidationError(
                "Errors occurred while parsing metadata", validation_result)

        # Try to build sequencing run from sample sheet & meta data, raise validation error if errors occur
        try:
            sample_list = sample_parser.parse_sample_list(
                sample_sheet, run_data_directory, run_data_directory_file_list)
            sequencing_run = common.build_sequencing_run_from_samples(
                sample_list, run_metadata)
        except exceptions.SequenceFileError as error:
            validation_result.add_error(error)
            logging.error(
                "Errors occurred while building sequence run from sample sheet"
            )
            raise exceptions.ValidationError(
                "Errors occurred while building sequence run from sample sheet",
                validation_result)

        return sequencing_run