def _make_seq_run():
     """
     Make a sequencing run pointed at real data for the tests
     :return: SequencingRun object
     """
     files_1 = model.SequenceFile([
         path.join(path_to_module, "fake_ngs_data", "Data", "Intensities",
                   "BaseCalls", "01-1111_S1_L001_R1_001.fastq.gz"),
         path.join(path_to_module, "fake_ngs_data", "Data", "Intensities",
                   "BaseCalls", "01-1111_S1_L001_R2_001.fastq.gz"),
     ])
     files_2 = model.SequenceFile([
         path.join(path_to_module, "fake_ngs_data", "Data", "Intensities",
                   "BaseCalls", "02-2222_S1_L001_R1_001.fastq.gz"),
         path.join(path_to_module, "fake_ngs_data", "Data", "Intensities",
                   "BaseCalls", "02-2222_S1_L001_R2_001.fastq.gz"),
     ])
     files_3 = model.SequenceFile([
         path.join(path_to_module, "fake_ngs_data", "Data", "Intensities",
                   "BaseCalls", "03-3333_S1_L001_R1_001.fastq.gz"),
         path.join(path_to_module, "fake_ngs_data", "Data", "Intensities",
                   "BaseCalls", "03-3333_S1_L001_R2_001.fastq.gz"),
     ])
     sample_1 = model.Sample("test_sample", "description", 1)
     sample_1.sequence_file = files_1
     sample_2 = model.Sample("test_sample", "description", 1)
     sample_2.sequence_file = files_2
     sample_3 = model.Sample("test_sample", "description", 1)
     sample_3.sequence_file = files_3
     project = model.Project("test_project", [sample_1, sample_2, sample_3],
                             "description")
     sequencing_run = model.SequencingRun({"layoutType": "PAIRED_END"},
                                          [project], "miseq")
     return sequencing_run
예제 #2
0
    def test_parse_samples_valid(self):
        """
        Verify samples created from parser match expected samples
        :return:
        """
        sheet_file = path.join(path_to_module, "fake_dir_data",
                               "SampleList.csv")

        sample1 = model.Sample(
            "my-sample-1",
            "",
        )

        sample2 = model.Sample(
            "my-sample-2",
            "",
        )

        sample3 = model.Sample(
            "my-sample-3",
            "",
        )

        res = Parser().get_sequencing_run(sheet_file)

        self.assertEqual(res.metadata, {'layoutType': 'PAIRED_END'})
        self.assertEqual(res.project_list[0].id, "75")
        self.assertEqual(res.project_list[1].id, "76")
        self.assertEqual(res.project_list[0].sample_list[0].sample_name,
                         sample1.sample_name)
        self.assertEqual(res.project_list[0].sample_list[1].sample_name,
                         sample2.sample_name)
        self.assertEqual(res.project_list[1].sample_list[0].sample_name,
                         sample3.sample_name)
예제 #3
0
    def test_parse_samples_valid(self):
        """
        Ensure a a parsed valid directory matches the expected samples
        :return:
        """
        sheet_file = path.join(path_to_module, "fake_ngs_data",
                               "SampleSheet.csv")

        sample1 = model.Sample(
            "01-1111",
            "",
            1,
            {
                "index": "AAAAAAAA",
                "I7_Index_ID": "N01",
                "sample_project": "6",
                "sequencer_sample_ID": "01-1111-4004",
                "I5_Index_ID": "S01",
                "index2": "TTTTTTTT"
            }
        )

        sample2 = model.Sample(
            "02-2222",
            "",
            2,
            {
                "index": "GGGGGGGG",
                "I7_Index_ID": "N02",
                "sample_project": "6",
                "sequencer_sample_ID": "02-2222-4004",
                "I5_Index_ID": "S02",
                "index2": "CCCCCCCC"
            }
        )

        sample3 = model.Sample(
            "03-3333",
            "",
            3,
            {
                "index": "CCCCCCCC",
                "I7_Index_ID": "N03",
                "sample_project": "6",
                "sequencer_sample_ID": "03-3333-4004",
                "I5_Index_ID": "S03",
                "index2": "GGGGGGGG"
            }
        )

        correct_samples = [sample1, sample2, sample3]

        res = sample_parser._parse_samples(sheet_file)
        for r_sample, c_sample in zip(res, correct_samples):
            self.assertEqual(r_sample.get_uploadable_dict(), c_sample.get_uploadable_dict())
예제 #4
0
    def test_parse_samples_valid(self):
        """
        Ensure a a parsed valid directory matches the expected samples
        :return:
        """
        sheet_file = path.join(path_to_module, "fake_ngs_data",
                               "UploadList.csv")

        sample1 = model.Sample(
            "01A100001",
            "",
            1,
            {
                "Index": "AAAAAAAA",
                "sample_project": "6",
                "Index2": "TTTTTTTT",
                "description": ''
            }
        )

        sample2 = model.Sample(
            "01A100002",
            "",
            2,
            {
                "Index": "GGGGGGGG",
                "sample_project": "6",
                "Index2": "CCCCCCCC",
                "description": ''
            }
        )

        sample3 = model.Sample(
            "01A100003",
            "",
            3,
            {
                "Index": "GGGGGGGG",
                "sample_project": "6",
                "Index2": "CCCCCCCC",
                "description": ''
            }
        )

        correct_samples = [sample1, sample2, sample3]

        res = sample_parser._parse_samples(sheet_file)
        for r_sample, c_sample in zip(res, correct_samples):
            self.assertEqual(r_sample.get_uploadable_dict(), c_sample.get_uploadable_dict())
예제 #5
0
    def test_parse_out_sequence_file(self):
        """
        Tests that parse out sequence file correctly filters sample related data from the extra params dict
        And ensures that the uploadable dict correctly includes all the needed data after removal
        :return:
        """
        sample = model.Sample(
            "01A100003",
            "",
            3,
            {
                "Index": "GGGGGGGG",
                "sampleProject": "6",
                "Index2": "CCCCCCCC",
                "description": ''
            }
        )

        uploadable_dict = {'Index': 'GGGGGGGG',
                           'sampleName': '01A100003',
                           'sampleProject': '6',
                           'Index2': 'CCCCCCCC',
                           'description': ''}

        sequence_file_dict = {'Index': 'GGGGGGGG',
                              'sampleProject': '6',
                              'Index2': 'CCCCCCCC',
                              'description': ''}

        res = sample_parser._parse_out_sequence_file(sample)

        self.assertEqual(sample.get_uploadable_dict(), uploadable_dict)
        self.assertEqual(res, sequence_file_dict)
예제 #6
0
    def test_send_and_get_sample(self):
        """
        Tests sending and receiving sample data
        :return:
        """
        # set up a project to upload samples to
        project_name = "test_project_2"
        project_description = "test_project_description"
        project = model.Project(name=project_name, description=project_description)

        proj_json_res = self.test_api.send_project(project)
        project_identifier = proj_json_res['resource']['identifier']

        # upload a sample
        sample_name = "test_sample"
        sample_desc = "test_sample_desc"
        sample = model.Sample(sample_name, sample_desc)

        sample_json_res = self.test_api.send_sample(sample, project_identifier)

        # make sure the returned values match what we tried to upload
        self.assertEqual(sample_json_res['resource']['sampleName'], sample_name)
        self.assertEqual(sample_json_res['resource']['description'], sample_desc)

        # get a list of samples on our project and make sure they match what we uploaded
        sample_list = self.test_api.get_samples(project_identifier)

        self.assertEqual(len(sample_list), 1)
        self.assertEqual(type(sample_list[0]), model.Sample)
        self.assertEqual(sample_list[0].sample_name, sample_name)
        self.assertEqual(sample_list[0].description, sample_desc)
예제 #7
0
    def test_parse_samples_valid(self):
        """
        Ensure a a parsed valid directory matches the expected samples
        :return:
        """
        sheet_file = path.join(path_to_module, "fake_nextseq_run",
                               "SampleSheet.csv")

        sample1 = model.Sample(
            "SA20121712",
            "Code Blue",
            1,
            {
                "Sample_Well": "A01",
                "index": "TAAGGCGA",
                "Sample_Plate": "NGS-001 Plate FNC-7",
                "I7_Index_ID": "N701",
                "sample_project": "67",
                "sequencer_sample_name": "SA20121712",
                "I5_Index_ID": "S502",
                "index2": "ATAGAGAG",
            }
        )

        sample2 = model.Sample(
            "SA20121716",
            "Code Blue",
            2,
            {
                "Sample_Well": "A02",
                "index": "CGTACTAG",
                "Sample_Plate": "NGS-001 Plate FNC-7",
                "I7_Index_ID": "N702",
                "sample_project": "68",
                "sequencer_sample_name": "SA20121716",
                "I5_Index_ID": "S502",
                "index2": "ATAGAGAG",
            }
        )

        correct_samples = [sample1, sample2]

        res = sample_parser._parse_samples(sheet_file)
        for r_sample, c_sample in zip(res, correct_samples):
            self.assertEqual(r_sample.get_uploadable_dict(), c_sample.get_uploadable_dict())
예제 #8
0
    def test_valid(self):
        """
        Ensure a a parsed valid directory matches the expected sample list
        :return:
        """
        directory = path.join(path_to_module, "fake_ngs_data")
        sheet_file = path.join(directory, "SampleSheet.csv")
        data_dir = path.join(
            directory, parsers.miseq.Parser.get_relative_data_directory())
        file_list = parsers.common.get_file_list(data_dir)

        sample = model.Sample(
            "01-1111", "Super bug", 1, {
                "Sample_Well": "01",
                "index": "AAAAAAAA",
                "Sample_Plate": "1",
                "I7_Index_ID": "N01",
                "sample_project": "6",
                "sequencer_sample_name": "01-1111",
                "I5_Index_ID": "S01",
                "index2": "TTTTTTTT",
            })

        sequence_file_properties = {
            'Sample_Plate': '1',
            'Sample_Well': '01',
            'I7_Index_ID': 'N01',
            'index': 'AAAAAAAA',
            'I5_Index_ID': 'S01',
            'index2': 'TTTTTTTT'
        }

        file_path_1 = path.join(path_to_module, "fake_ngs_data", "Data",
                                "Intensities", "BaseCalls",
                                "01-1111_S1_L001_R1_001.fastq.gz")
        file_path_2 = path.join(path_to_module, "fake_ngs_data", "Data",
                                "Intensities", "BaseCalls",
                                "01-1111_S1_L001_R2_001.fastq.gz")
        raw_file_list = [file_path_1, file_path_2]

        res = sample_parser.parse_sample_list(
            sample_sheet_file=sheet_file,
            run_data_directory=data_dir,
            run_data_directory_file_list=file_list)

        # Check sample is the same
        self.assertEqual(res[0].get_uploadable_dict(),
                         sample.get_uploadable_dict())
        # Check sequencing file is correct
        self.assertEqual(res[0].sequence_file.properties_dict,
                         sequence_file_properties)
        self.assertEqual(res[0].sequence_file.file_list.sort(),
                         raw_file_list.sort())
예제 #9
0
    def test_valid(self):
        """
        Ensure a a parsed valid directory matches the expected sample list
        :return:
        """
        directory = path.join(path_to_module, "fake_ngs_data")
        sheet_file = path.join(directory, "SampleSheet.csv")
        data_dir = path.join(
            directory, parsers.miniseq.Parser.get_relative_data_directory())
        data_dir = data_dir.replace("*", "some_dir")
        file_list = parsers.common.get_file_list(data_dir)

        sample = model.Sample(
            "01-1111", "", 1, {
                "index": "AAAAAAAA",
                "I7_Index_ID": "N01",
                "sample_project": "6",
                "sequencer_sample_ID": "01-1111-4004",
                "I5_Index_ID": "S01",
                "index2": "TTTTTTTT"
            })

        sequence_file_properties = {
            "sequencer_sample_ID": "01-1111-4004",
            "index": "AAAAAAAA",
            "I7_Index_ID": "N01",
            "I5_Index_ID": "S01",
            "index2": "TTTTTTTT",
            "description": ""
        }

        file_path_1 = path.join(path_to_module, "fake_ngs_data", "Alignment_1",
                                "some_dir", "Fastq",
                                "01-1111_S1_L001_R1_001.fastq.gz")
        file_path_2 = path.join(path_to_module, "fake_ngs_data", "Alignment_1",
                                "some_dir", "Fastq",
                                "01-1111_S1_L001_R2_001.fastq.gz")
        raw_file_list = [file_path_1, file_path_2]

        res = sample_parser.parse_sample_list(
            sample_sheet_file=sheet_file,
            run_data_directory=data_dir,
            run_data_directory_file_list=file_list)

        # Check sample is the same
        self.assertEqual(res[0].get_uploadable_dict(),
                         sample.get_uploadable_dict())
        # Check sequencing file is correct
        self.assertEqual(res[0].sequence_file.properties_dict,
                         sequence_file_properties)
        self.assertEqual(res[0].sequence_file.file_list.sort(),
                         raw_file_list.sort())
예제 #10
0
    def get_samples(self, project_id):
        """
        API call to api/projects/project_id/samples

        arguments:
            project_id -- project identifier from irida

        returns list of samples for the given project.
            each sample is a Sample object.
        """

        logging.info("Getting samples from project '{}'".format(project_id))

        if project_id not in self.cached_samples:
            try:
                project_url = self._get_link(self.base_url, "projects")
                url = self._get_link(project_url,
                                     "project/samples",
                                     target_dict={
                                         "key": "identifier",
                                         "value": project_id
                                     })

            except StopIteration:
                logging.error(
                    "The given project ID doesn't exist: ".format(project_id))
                raise exceptions.IridaResourceError(
                    "The given project ID doesn't exist", project_id)

            response = self._session.get(url)
            result = response.json()["resource"]["resources"]

            sample_list = []
            for sample_dict in result:
                # use name and description from dictionary as base parameters when creating sample
                sample_name = sample_dict['sampleName']
                sample_desc = sample_dict['description']
                sample_id = int(sample_dict['identifier'])
                # remove them from the dict so we don't have useless duplicate data
                del sample_dict['sampleName']
                del sample_dict['description']
                del sample_dict['identifier']
                sample_list.append(
                    model.Sample(sample_name=sample_name,
                                 description=sample_desc,
                                 samp_dict=sample_dict,
                                 sample_id=sample_id))
            self.cached_samples[project_id] = sample_list

        return self.cached_samples[project_id]
예제 #11
0
    def test_valid(self):
        """
        Ensure a a parsed valid directory matches the expected sample list
        :return:
        """
        sheet_file = path.join(path_to_module, "fake_nextseq_run",
                               "SampleSheet.csv")

        sample = model.Sample(
            "SA20121712",
            "Code Blue",
            1,
            {
                "Sample_Well": "A01",
                "index": "TAAGGCGA",
                "Sample_Plate": "NGS-001 Plate FNC-7",
                "I7_Index_ID": "N701",
                "sample_project": "67",
                "sequencer_sample_name": "SA20121712",
                "I5_Index_ID": "S502",
                "index2": "ATAGAGAG",
            }
        )

        sequence_file_properties = {
            'Sample_Plate': "NGS-001 Plate FNC-7",
            'Sample_Well': 'A01',
            'I7_Index_ID': 'N701',
            'index': 'TAAGGCGA',
            'I5_Index_ID': 'S502',
            'index2': 'ATAGAGAG'
        }

        file_path_1 = path.join(path_to_module,
                                "fake_nextseq_run", "Data", "Intensities", "BaseCalls", "67",
                                "SA20121712_S2_R1_001.fastq.qz")
        file_path_2 = path.join(path_to_module,
                                "fake_nextseq_run", "Data", "Intensities", "BaseCalls", "67",
                                "SA20121712_S2_R2_001.fastq.qz")
        file_list = [file_path_1, file_path_2]

        res = sample_parser._parse_sample_list(sheet_file)

        # Check sample is the same
        self.assertEqual(res[0].get_uploadable_dict(), sample.get_uploadable_dict())
        # Check sequencing file is correct
        self.assertEqual(res[0].sequence_file.properties_dict, sequence_file_properties)
        self.assertEqual(res[0].sequence_file.file_list.sort(), file_list.sort())
예제 #12
0
    def test_parse_out_sequence_file(self):
        """
        Tests that parse out sequence file correctly filters sample related data from the extra params dict
        And ensures that the uploadable dict correctly includes all the needed data after removal
        :return:
        """
        sample = model.Sample(
            "03-3333", "Deadly bug", None, {
                "Sample_Well": "03",
                "index": "CCCCCCCC",
                "Sample_Plate": "3",
                "I7_Index_ID": "N03",
                "sampleName": "03-3333",
                "sampleProject": "6",
                "sequencerSampleId": "03-3333",
                "I5_Index_ID": "S03",
                "index2": "GGGGGGGG",
                "description": "Deadly bug"
            })

        uploadable_dict = {
            'Sample_Well': '03',
            'index': 'CCCCCCCC',
            'Sample_Plate': '3',
            'I7_Index_ID': 'N03',
            'sampleName': '03-3333',
            'sampleProject': '6',
            'sequencerSampleId': '03-3333',
            'I5_Index_ID': 'S03',
            'index2': 'GGGGGGGG',
            'description': 'Deadly bug'
        }

        sequence_file_dict = {
            'Sample_Well': '03',
            'index': 'CCCCCCCC',
            'Sample_Plate': '3',
            'I7_Index_ID': 'N03',
            'sampleProject': '6',
            'sequencerSampleId': '03-3333',
            'I5_Index_ID': 'S03',
            'index2': 'GGGGGGGG'
        }

        res = sample_parser._parse_out_sequence_file(sample)

        self.assertEqual(sample.get_uploadable_dict(), uploadable_dict)
        self.assertEqual(res, sequence_file_dict)
    def test_send_and_get_sequence_files(self):
        """
        Tests sending and receiving sequence files
        :return:
        """
        # upload a project
        project_name = "test_project_2"
        project_description = "test_project_description"
        project = model.Project(name=project_name,
                                description=project_description)

        proj_json_res = self.test_api.send_project(project)
        project_identifier = proj_json_res['resource']['identifier']

        # upload a sample
        sample_name = "test_sample"
        sample_desc = "test_sample_desc"
        sample = model.Sample(sample_name, sample_desc)

        self.test_api.send_sample(sample, project_identifier)

        # upload sequence files
        sequence_file_list = [
            path.join(path_to_module, "fake_dir_data", "file_1.fastq.gz"),
            path.join(path_to_module, "fake_dir_data", "file_2.fastq.gz")
        ]
        sequence_file = model.SequenceFile(sequence_file_list)

        upload_id = self.test_api.create_seq_run({'layoutType': 'PAIRED_END'},
                                                 'miseq')

        self.test_api.send_sequence_files(sequence_file, sample_name,
                                          project_identifier, upload_id)

        # verify sequence files match what we sent to IRIDA
        returned_sequence_files = self.test_api.get_sequence_files(
            project_identifier, sample_name)

        self.assertEqual(returned_sequence_files[0]['fileName'],
                         'file_1.fastq.gz')
        self.assertEqual(returned_sequence_files[1]['fileName'],
                         'file_2.fastq.gz')
예제 #14
0
    def test_sample_exists(self):
        """
        Upload a sample and make sure it can be found with the sample_exists method
        :return:
        """
        # create a project to upload samples to
        project_name = "test_project_exists"
        project_description = "test_project_exists_description"
        project = model.Project(name=project_name, description=project_description)

        json_res = self.test_api.send_project(project)
        project_id = json_res['resource']['identifier']

        # create and upload a sample, and verify it exists
        sample_name = "test_sample_exists"
        sample_desc = "test_sample_exists_desc"
        sample = model.Sample(sample_name, sample_desc)

        self.test_api.send_sample(sample, project_id)
        self.assertTrue(self.test_api.sample_exists(sample_name, project_id))
예제 #15
0
    def test_valid(self):
        """
        Ensure a a parsed valid directory matches the expected sample list
        :return:
        """
        directory = path.join(path_to_module, "fake_ngs_data")
        sheet_file = path.join(directory, "UploadList.csv")
        data_dir = path.join(directory, parsers.nextseq2k_nml.Parser.get_relative_data_directory())
        file_list = parsers.common.get_file_list(data_dir)

        sample = model.Sample(
            "01A100001",
            "",
            1,
            {
                "Index": "AAAAAAAA",
                "sample_project": "6",
                "Index2": "TTTTTTTT",
            }
        )

        sequence_file_properties = {
            'Index': 'AAAAAAAA',
            'Index2': 'TTTTTTTT',
            'description': "",
        }

        file_path_1 = path.join(path_to_module,
                                "fake_ngs_data", "Analysis", "1", "Data", "fast1", "01A100001_S1_L001_R1_001.fastq.gz")
        file_path_2 = path.join(path_to_module,
                                "fake_ngs_data", "Analysis", "1", "Data", "fast1", "01A100001_S1_L001_R2_001.fastq.gz")
        raw_file_list = [file_path_1, file_path_2]

        res = sample_parser.parse_sample_list(sample_sheet_file=sheet_file, run_data_directory=data_dir,
                                              run_data_directory_file_list=file_list)

        # Check sample is the same
        self.assertEqual(res[0].get_uploadable_dict(), sample.get_uploadable_dict())
        # Check sequencing file is correct
        self.assertEqual(res[0].sequence_file.properties_dict, sequence_file_properties)
        self.assertEqual(res[0].sequence_file.file_list.sort(), raw_file_list.sort())
예제 #16
0
    def test_valid_full_file_path(self, mock_parse_samples):
        """
        Given a valid sample sheet with full file paths, parse correctly
        :return:
        """
        sheet_file = path.join(path_to_module, "fake_dir_data",
                               "SampleList_simple.csv")

        file_path_1 = path.join(path_to_module, "fake_dir_data",
                                "file_1.fastq.gz")
        file_path_2 = path.join(path_to_module, "fake_dir_data",
                                "file_2.fastq.gz")

        sample_list = [
            model.Sample(sample_name='my-sample-1',
                         description="",
                         sample_number=0,
                         samp_dict={
                             ('sample_project', '75'),
                             ('File_Forward', path.abspath(file_path_1)),
                             ('File_Reverse', path.abspath(file_path_2))
                         })
        ]

        mock_parse_samples.return_value = sample_list

        res = sample_parser.build_sample_list_from_sample_sheet_with_abs_path(
            sheet_file)

        mock_parse_samples.assert_called_with(sheet_file)
        # Check we have 1 sample
        self.assertEqual(len(res), 1)
        # Check if data is correct
        self.assertEqual(res[0].sample_name, "my-sample-1")
        self.assertEqual(res[0].get_uploadable_dict()["sample_project"], "75")
        self.assertEqual(res[0].get_uploadable_dict()["File_Forward"],
                         path.abspath(file_path_1))
        self.assertEqual(res[0].get_uploadable_dict()["File_Reverse"],
                         path.abspath(file_path_2))
        self.assertEqual(res[0].sequence_file.file_list[0], file_path_1)
        self.assertEqual(res[0].sequence_file.file_list[1], file_path_2)
예제 #17
0
def send_metadata(api_instance, metadata_csv):
    '''
    PURPOSE:
        Send metadata from qc.csv to IRIDA for each sample

    INPUTS:
        - API_INSTANCE --> Irida API instance from generate_api_instance

        - METADATA_CSV --> CSV file that contains all of the metadata along with the sample name and project id
            in the first and second columns respectively
    '''

    with open(metadata_csv, 'r') as input_handle:
        reader = csv.reader(input_handle)

        for index, row in enumerate(reader):

            if index == 0:
                header = row
                continue

            else:
                # Create dictionary of each row for creation of dictionary to upload to irida
                metadata = {}
                passing = True

            for i in range(len(row)):

                if i == 0 and re.search('sample', header[0]):
                    # Get sample name from row 1
                    sample_name = row[i]

                # Get the project ID from the correct header no matter where it is
                elif re.search('project_id', header[i]):

                    if row[i] == 'Unknown' or row[i] == 'NA':
                        passing = False
                        break

                    # Get project id from row 2
                    project_id = row[i]

                else:
                    # Put metadata into metadata dictionary for upload
                    metadata[header[i]] = row[i]
            if passing:
                # Check that sample exists and make it if not
                if api_instance.sample_exists(sample_name=sample_name,
                                              project_id=project_id):
                    pass
                else:
                    irida_sample = model.Sample(sample_name=sample_name)
                    api_instance.send_sample(sample=irida_sample,
                                             project_id=project_id)

                upload_metadata = model.Metadata(metadata=metadata,
                                                 project_id=project_id,
                                                 sample_name=sample_name)
                status = api_instance.send_metadata(
                    upload_metadata, upload_metadata.project_id,
                    upload_metadata.sample_name)

                print(status, '\n')

            else:
                print(
                    'Unknown sample data for {}, moving to next sample'.format(
                        sample_name))
예제 #18
0
def _parse_samples(sample_sheet_file):

    """
    Parse all the lines under "[Data]" in .csv file
    Keys in sample_key_translation_dict have their values changed for
        uploading to REST API
    All other keys keep the same name that they have in .csv file

    arguments:
            sample_sheet_file -- path to UploadList.csv

    returns	a list containing Sample objects that have been created by a
        dictionary from the parsed out key:pair values from .csv file
    """

    logging.info("Reading data from sample sheet {}".format(sample_sheet_file))

    csv_reader = common.get_csv_reader(sample_sheet_file)
    # start with an ordered dictionary so that keys are ordered in the same
    # way that they are inserted.
    sample_dict = OrderedDict()
    sample_list = []

    sample_key_translation_dict = {
        'Sample_ID': 'sampleName',
        'Sample_Project': 'sample_project'
    }

    _parse_samples.sample_key_translation_dict = sample_key_translation_dict

    # initialize dictionary keys from first line (data headers/attributes)
    set_attributes = False
    for line in csv_reader:

        if set_attributes:
            for item in line:

                if item in sample_key_translation_dict:
                    key_name = sample_key_translation_dict[item]
                else:
                    key_name = item

                sample_dict[key_name] = ""

            break

        if "[BCLConvert_Data]" in line:
            set_attributes = True

    # fill in values for keys. line is currently below the [Data] headers
    for sample_number, line in enumerate(csv_reader):

        if len(sample_dict.keys()) != len(line):
            """
            if there is one more Data header compared to the length of
            data values then add an empty string to the end of data values
            i.e the Description will be empty string
            assumes the last Data header is going to be the Description
            this handles the case where the last trailing comma is trimmed

            Shaun said this issue may come up when a user edits the
            SampleSheet from within the MiSeq software
            """
            if len(sample_dict.keys()) - len(line) == 1:
                line.append("")
            else:
                raise exceptions.SampleSheetError(
                    ("Your sample sheet is malformed. Expected to find {} "
                     "columns in the [Data] section, but only found {} columns "
                     "for line {}.".format(len(sample_dict.keys()), len(line), line)),
                    sample_sheet_file
                )

        for index, key in enumerate(sample_dict.keys()):
            sample_dict[key] = line[index].strip()  # assumes values are never empty

        new_sample_dict = deepcopy(sample_dict)
        new_sample_name = new_sample_dict['sampleName']
        del new_sample_dict['sampleName']

        sample = model.Sample(
            sample_name=new_sample_name,
            description="",
            sample_number=sample_number + 1,
            samp_dict=new_sample_dict)
        sample_list.append(sample)

    return sample_list
예제 #19
0
def _parse_samples(sample_sheet_file):
    """
    Parse all the lines under "[Data]" in .csv file

    arguments:
            sample_sheet_file -- path to SampleSheet.csv

    returns	a list containing Sample objects that have been created by a
        dictionary from the parsed out key:pair values from .csv file
    """

    logging.info("Reading data from sample sheet {}".format(sample_sheet_file))

    csv_reader = common.get_csv_reader(sample_sheet_file)
    # start with an ordered dictionary so that keys are ordered in the same
    # way that they are inserted.
    sample_dict = OrderedDict()
    sample_list = []

    sample_key_list = [
        'Sample_Name', 'Project_ID', 'File_Forward', 'File_Reverse'
    ]

    # initialize dictionary keys from first line (data headers/attributes)
    set_attributes = False
    for line in csv_reader:

        if set_attributes:
            for item in line:

                if item in sample_key_list:
                    key_name = item
                    sample_dict[key_name] = ""

            break

        if "[Data]" in line:
            set_attributes = True

    # fill in values for keys. line is currently below the [Data] headers
    for sample_number, line in enumerate(csv_reader):
        # if the line is empty (like a blank line at the end of the file) continue
        if not line:
            continue

        if len(sample_dict.keys()) != len(line):
            """
            if there is one more Data header compared to the length of
            data values then add an empty string to the end of data values
            i.e the File_Reverse will be empty string
            assumes the last Data header is going to be the File_Reverse
            this handles the case where the last trailing comma is trimmed when
            doing a single end run
            """
            if len(sample_dict.keys()) - len(line) == 1:
                line.append("")
            else:
                raise exceptions.SampleSheetError((
                    "Your sample sheet is malformed. Expected to find {} "
                    "columns in the [Data] section, but only found {} columns "
                    "for line {}.".format(len(sample_dict.keys()), len(line),
                                          line)), sample_sheet_file)

        for index, key in enumerate(sample_dict.keys()):
            value = line[index].strip()

            # Keys other than 'File_Reverse' cannot be empty
            if len(value) is 0:  # no value
                if key != 'File_Reverse':
                    raise exceptions.SampleSheetError((
                        "Your sample sheet is malformed. {} in the [Data] section cannot be empty."
                        "".format(key)), sample_sheet_file)

            sample_dict[key] = value

        sample_key_list = [
            'Sample_Name', 'Project_ID', 'File_Forward', 'File_Reverse'
        ]

        new_sample_dict = deepcopy(sample_dict)
        new_sample_name = new_sample_dict['Sample_Name']
        new_sample_project = new_sample_dict['Project_ID']
        new_sample_dict['sample_project'] = new_sample_project
        del new_sample_dict['Sample_Name']
        del new_sample_dict['Project_ID']

        sample = model.Sample(sample_name=new_sample_name,
                              description="",
                              sample_number=sample_number + 1,
                              samp_dict=new_sample_dict)

        sample_list.append(sample)

    return sample_list