def test_path_in_filename_is_invalid(self): self.assertEqual( [ "Path present in filename: /some/path/PAIR1_1.fastq.gz", "Path present in filename: /some/path/PAIR1_2.fastq.gz", ], validate_no_path_in_filename( Spreadsheet.new_instance("1234567890123456", [ RawRead(sample_accession=None, forward_read='/some/path/PAIR1_1.fastq.gz', reverse_read='/some/path/PAIR1_2.fastq.gz', sample_name='SAMPLE1', taxon_id="1280", library_name='LIB1') ])))
def test_T_or_F_is_valid(self): self.assertEqual([], check_double_ended_column_is_T_or_F( Spreadsheet.new_instance("1234567890123456", [ RawRead(sample_accession=None, forward_read='PAIR1_1.fastq.gz', reverse_read='T', sample_name='SAMPLE1', taxon_id="1280", library_name='LIB1'), RawRead(sample_accession=None, forward_read='PAIR1_1.fastq.gz', reverse_read='F', sample_name='SAMPLE1', taxon_id="1280", library_name='LIB1') ])))
def test_uniqueness_of_files_sample_and_library_ENA_download(self): self.assertEqual([], validate_uniqueness_of_reads( Spreadsheet.new_instance("1234567890123456", [ RawRead(sample_accession=None, forward_read='PAIR1', reverse_read='T', sample_name='SAMPLE1', taxon_id="1280", library_name='LIB1'), RawRead(sample_accession=None, forward_read='PAIR2', reverse_read='F', sample_name='SAMPLE2', taxon_id="1280", library_name='LIB2') ])))
def test_forward_read_not_unique(self): self.assertEqual(["Forward read is not unique: PAIR1_1.fastq.gz"], validate_uniqueness_of_reads( Spreadsheet.new_instance("1234567890123456", [ RawRead(sample_accession=None, forward_read='PAIR1_1.fastq.gz', reverse_read='PAIR1_2.fastq.gz', sample_name='SAMPLE1', taxon_id="1280", library_name='LIB1'), RawRead(sample_accession=None, forward_read='PAIR1_1.fastq.gz', reverse_read='PAIR2_2.fastq.gz', sample_name='SAMPLE2', taxon_id="1280", library_name='LIB2') ])))
def test_reads_are_not_fastq(self): self.assertEqual([ "Forward read file is not correctly formatted for RawRead(forward_read='PAIR1_1.gz', " "reverse_read='PAIR1_2.gz', sample_name='SAMPLE1', sample_accession=None, " "taxon_id='1280', library_name='LIB1')", "Reverse read file is not correctly formatted for RawRead(forward_read='PAIR1_1.gz', " "reverse_read='PAIR1_2.gz', sample_name='SAMPLE1', sample_accession=None, " "taxon_id='1280', library_name='LIB1')" ], validate_files_are_compressed( Spreadsheet.new_instance("1234567890123456", [ RawRead(sample_accession=None, forward_read='PAIR1_1.gz', reverse_read='PAIR1_2.gz', sample_name='SAMPLE1', taxon_id="1280", library_name='LIB1') ])))
def test_header_initialization_no_accession(self): loader = SpreadsheetLoader( os.path.join(self.data_dir, 'test_upload_no_pair_no_lib_no_accession.xls')) expected = Spreadsheet.new_instance("MyStudy", [ self._raw_read('PAIR1_1.fastq.gz', None, 'SAMPLE1', 'LIB1', None), self._raw_read('PAIR2_1.fastq.gz', None, 'SAMPLE2', 'LIB2', None) ], contact="Some Name", organisation="ENA", supplier='ENA', technology='Illumina', size=123456.0, accession=None, limit='30/09/2020') actual = loader.load_xls() self.assertSpreadsheet(expected, actual)
def test_no_filename_only_run_accession(self): loader = SpreadsheetLoader( os.path.join(self.data_dir, 'test_run_accession.xls')) expected = Spreadsheet.new_instance("MyStudy", [ self._raw_read('PAIR1', 'T', 'SAMPLE1', 'LIB1', 'ACCESSION1'), self._raw_read('PAIR2', 'T', 'SAMPLE2', 'LIB2', 'ACCESSION2'), self._raw_read('PAIR3', 'F', 'SAMPLE3', 'LIB3', 'ACCESSION3') ], contact="Some Name", organisation="ENA", supplier='ENA', technology='Illumina', size=123456.0, accession='accession', limit='30/09/2020') actual = loader.load_xls() self.assertSpreadsheet(expected, actual)
def test_sample_and_library_names_as_integers(self): loader = SpreadsheetLoader( os.path.join(self.data_dir, 'test_sample_name_as_int.xls')) expected = Spreadsheet.new_instance("AStudyName1", [ self._raw_read('ERR0000001_1.fastq.gz', 'ERR0000001_2.fastq.gz', '101260', '1000000001', 'ERR0000001', '485'), self._raw_read('ERR0000002_1.fastq.gz', 'ERR0000002_2.fastq.gz', '101264', '2000000002', 'ERR0000002', '485') ], contact="Me", organisation="Org", supplier='Supplier', technology='Illumina', size=1.90, accession=None, limit='01/01/2025') actual = loader.load_xls() self.assertSpreadsheet(expected, actual)
def test_cells_read_xlsx(self): loader = SpreadsheetLoader( os.path.join(self.data_dir, 'test_upload.xlsx')) expected = Spreadsheet.new_instance("MyStudy", [ self._raw_read('PAIR1_1.fastq.gz', 'PAIR1_2.fastq.gz', 'SAMPLE1', 'LIB1', 'ACCESSION1'), self._raw_read('PAIR2_1.fastq.gz', 'PAIR2_2.fastq.gz', 'SAMPLE2', 'LIB2', 'ACCESSION2') ], contact="Some Name", organisation="ENA", supplier='ENA', technology='Illumina', size=123456.0, accession='accession', limit='30/09/2020') actual = loader.load_xlsx() self.assertSpreadsheet(expected, actual)
def test_valid_name_for_external_data_part_of_internal_study(self): self.assertEqual( [], validate_external_data_part_of_internal_sequencing_study_name( Spreadsheet.new_instance("345_external", [])))
def test_supplier_name_with_valid_char_should_pass_validation(self): self.assertEqual([], validate_no_abnormal_characters_in_supplier_name( Spreadsheet.new_instance( "name", supplier="This should work")))
def test_study_name_with_valid_char_should_pass_validation(self): self.assertEqual([], validate_study_name( Spreadsheet.new_instance("ValidName12345__")))