Beispiel #1
0
 def test_path_in_filename_is_invalid(self):
     self.assertEqual(
         [
             "Path present in filename: /some/path/PAIR1_1.fastq.gz",
             "Path present in filename: /some/path/PAIR1_2.fastq.gz",
         ],
         validate_no_path_in_filename(
             Spreadsheet.new_instance("1234567890123456", [
                 RawRead(sample_accession=None,
                         forward_read='/some/path/PAIR1_1.fastq.gz',
                         reverse_read='/some/path/PAIR1_2.fastq.gz',
                         sample_name='SAMPLE1',
                         taxon_id="1280",
                         library_name='LIB1')
             ])))
Beispiel #2
0
 def test_T_or_F_is_valid(self):
     self.assertEqual([],
                      check_double_ended_column_is_T_or_F(
                          Spreadsheet.new_instance("1234567890123456", [
                              RawRead(sample_accession=None,
                                      forward_read='PAIR1_1.fastq.gz',
                                      reverse_read='T',
                                      sample_name='SAMPLE1',
                                      taxon_id="1280",
                                      library_name='LIB1'),
                              RawRead(sample_accession=None,
                                      forward_read='PAIR1_1.fastq.gz',
                                      reverse_read='F',
                                      sample_name='SAMPLE1',
                                      taxon_id="1280",
                                      library_name='LIB1')
                          ])))
Beispiel #3
0
 def test_uniqueness_of_files_sample_and_library_ENA_download(self):
     self.assertEqual([],
                      validate_uniqueness_of_reads(
                          Spreadsheet.new_instance("1234567890123456", [
                              RawRead(sample_accession=None,
                                      forward_read='PAIR1',
                                      reverse_read='T',
                                      sample_name='SAMPLE1',
                                      taxon_id="1280",
                                      library_name='LIB1'),
                              RawRead(sample_accession=None,
                                      forward_read='PAIR2',
                                      reverse_read='F',
                                      sample_name='SAMPLE2',
                                      taxon_id="1280",
                                      library_name='LIB2')
                          ])))
Beispiel #4
0
 def test_forward_read_not_unique(self):
     self.assertEqual(["Forward read is not unique: PAIR1_1.fastq.gz"],
                      validate_uniqueness_of_reads(
                          Spreadsheet.new_instance("1234567890123456", [
                              RawRead(sample_accession=None,
                                      forward_read='PAIR1_1.fastq.gz',
                                      reverse_read='PAIR1_2.fastq.gz',
                                      sample_name='SAMPLE1',
                                      taxon_id="1280",
                                      library_name='LIB1'),
                              RawRead(sample_accession=None,
                                      forward_read='PAIR1_1.fastq.gz',
                                      reverse_read='PAIR2_2.fastq.gz',
                                      sample_name='SAMPLE2',
                                      taxon_id="1280",
                                      library_name='LIB2')
                          ])))
Beispiel #5
0
 def test_reads_are_not_fastq(self):
     self.assertEqual([
         "Forward read file is not correctly formatted for RawRead(forward_read='PAIR1_1.gz', "
         "reverse_read='PAIR1_2.gz', sample_name='SAMPLE1', sample_accession=None, "
         "taxon_id='1280', library_name='LIB1')",
         "Reverse read file is not correctly formatted for RawRead(forward_read='PAIR1_1.gz', "
         "reverse_read='PAIR1_2.gz', sample_name='SAMPLE1', sample_accession=None, "
         "taxon_id='1280', library_name='LIB1')"
     ],
                      validate_files_are_compressed(
                          Spreadsheet.new_instance("1234567890123456", [
                              RawRead(sample_accession=None,
                                      forward_read='PAIR1_1.gz',
                                      reverse_read='PAIR1_2.gz',
                                      sample_name='SAMPLE1',
                                      taxon_id="1280",
                                      library_name='LIB1')
                          ])))
    def test_header_initialization_no_accession(self):
        loader = SpreadsheetLoader(
            os.path.join(self.data_dir,
                         'test_upload_no_pair_no_lib_no_accession.xls'))

        expected = Spreadsheet.new_instance("MyStudy", [
            self._raw_read('PAIR1_1.fastq.gz', None, 'SAMPLE1', 'LIB1', None),
            self._raw_read('PAIR2_1.fastq.gz', None, 'SAMPLE2', 'LIB2', None)
        ],
                                            contact="Some Name",
                                            organisation="ENA",
                                            supplier='ENA',
                                            technology='Illumina',
                                            size=123456.0,
                                            accession=None,
                                            limit='30/09/2020')
        actual = loader.load_xls()
        self.assertSpreadsheet(expected, actual)
    def test_no_filename_only_run_accession(self):
        loader = SpreadsheetLoader(
            os.path.join(self.data_dir, 'test_run_accession.xls'))

        expected = Spreadsheet.new_instance("MyStudy", [
            self._raw_read('PAIR1', 'T', 'SAMPLE1', 'LIB1', 'ACCESSION1'),
            self._raw_read('PAIR2', 'T', 'SAMPLE2', 'LIB2', 'ACCESSION2'),
            self._raw_read('PAIR3', 'F', 'SAMPLE3', 'LIB3', 'ACCESSION3')
        ],
                                            contact="Some Name",
                                            organisation="ENA",
                                            supplier='ENA',
                                            technology='Illumina',
                                            size=123456.0,
                                            accession='accession',
                                            limit='30/09/2020')
        actual = loader.load_xls()
        self.assertSpreadsheet(expected, actual)
    def test_sample_and_library_names_as_integers(self):
        loader = SpreadsheetLoader(
            os.path.join(self.data_dir, 'test_sample_name_as_int.xls'))

        expected = Spreadsheet.new_instance("AStudyName1", [
            self._raw_read('ERR0000001_1.fastq.gz', 'ERR0000001_2.fastq.gz',
                           '101260', '1000000001', 'ERR0000001', '485'),
            self._raw_read('ERR0000002_1.fastq.gz', 'ERR0000002_2.fastq.gz',
                           '101264', '2000000002', 'ERR0000002', '485')
        ],
                                            contact="Me",
                                            organisation="Org",
                                            supplier='Supplier',
                                            technology='Illumina',
                                            size=1.90,
                                            accession=None,
                                            limit='01/01/2025')
        actual = loader.load_xls()
        self.assertSpreadsheet(expected, actual)
    def test_cells_read_xlsx(self):
        loader = SpreadsheetLoader(
            os.path.join(self.data_dir, 'test_upload.xlsx'))

        expected = Spreadsheet.new_instance("MyStudy", [
            self._raw_read('PAIR1_1.fastq.gz', 'PAIR1_2.fastq.gz', 'SAMPLE1',
                           'LIB1', 'ACCESSION1'),
            self._raw_read('PAIR2_1.fastq.gz', 'PAIR2_2.fastq.gz', 'SAMPLE2',
                           'LIB2', 'ACCESSION2')
        ],
                                            contact="Some Name",
                                            organisation="ENA",
                                            supplier='ENA',
                                            technology='Illumina',
                                            size=123456.0,
                                            accession='accession',
                                            limit='30/09/2020')
        actual = loader.load_xlsx()
        self.assertSpreadsheet(expected, actual)
Beispiel #10
0
 def test_valid_name_for_external_data_part_of_internal_study(self):
     self.assertEqual(
         [],
         validate_external_data_part_of_internal_sequencing_study_name(
             Spreadsheet.new_instance("345_external", [])))
Beispiel #11
0
 def test_supplier_name_with_valid_char_should_pass_validation(self):
     self.assertEqual([],
                      validate_no_abnormal_characters_in_supplier_name(
                          Spreadsheet.new_instance(
                              "name", supplier="This should work")))
Beispiel #12
0
 def test_study_name_with_valid_char_should_pass_validation(self):
     self.assertEqual([],
                      validate_study_name(
                          Spreadsheet.new_instance("ValidName12345__")))