def writeSammpleSheets(sample_list, sheet_path, template_sheet): samplesheet_name = os.path.basename(sheet_path) samplesheet_dir = os.path.dirname(os.path.realpath(sheet_path)) count = 0 exit_status = "success" for key in sample_list: count += 1 logger.debug( f"{len(sample_list[key])} samples with idx lengths {key[1]}/{key[2]} for {key[0]} dataset" ) new_sample_sheet = SampleSheet() new_sample_sheet.Header = template_sheet.Header new_sample_sheet.Reads = template_sheet.Reads new_sample_sheet.Settings = template_sheet.Settings for sample in sample_list[key]: new_sample_sheet.add_sample(sample) new_sample_sheet_file = os.path.join( samplesheet_dir, samplesheet_name + ".custom." + str(count) + "." + key[0]) logger.info(f"Creating custom sample sheet: {new_sample_sheet_file}") try: with open(new_sample_sheet_file, "w") as ss_writer: new_sample_sheet.write(ss_writer) except Exception as error: logger.error(f"Exception writing new sample sheet: {error}") exit_status = "failure" logger.debug(f"Created custom sample sheet: {new_sample_sheet_file}") return exit_status
def build_samplesheet(df, args): samplesheet = SampleSheet() for ix, row in df.iterrows(): s = { 'Sample_ID': row.Sample_ID, 'Sample_Name': row.Sample_Name, 'Sample_Project': row.Sample_Project, 'index': row["index"], 'index2': row.index2 } if not args.merge_lanes: s["Lane"] = row.Lane samplesheet.add_sample(Sample(s)) return samplesheet
class TestUnidentifiedIndexHandler(HandlerTestBase): def setUp(self): qc_config = { 'name': 'UnidentifiedIndexHandler', 'significance_threshold': 1, 'white_listed_indexes': ['.*N.*', 'G{8,}'] } self.unidentifiedIndexHandler = UnidentifiedIndexHandler(qc_config) conversion_results_key = "ConversionResults" conversion_results = get_stats_json()["ConversionResults"] samplesheet_key = "samplesheet" self.samplesheet = SampleSheet() sample_1 = Sample( dict(Lane=1, Sample_ID='1823A', Sample_Name='1823A-tissue', index='AAAA')) sample_2 = Sample( dict(Lane=2, Sample_ID='1823B', Sample_Name='1823B-tissue', index='TTTT')) sample_3 = Sample( dict(Lane=3, Sample_ID='1823C', Sample_Name='1823C-tissue', index='AAAA', index2='TTTT')) sample_4 = Sample( dict(Lane=4, Sample_ID='1823D', Sample_Name='1823D-tissue', index='GGGG', index2='CCCC')) sample_5 = Sample( dict(Lane=6, Sample_ID='1823E', Sample_Name='1823D-tissue', index='ATCG')) self.samplesheet.add_sample(sample_1) self.samplesheet.add_sample(sample_2) self.samplesheet.add_sample(sample_3) self.samplesheet.add_sample(sample_4) self.samplesheet.add_sample(sample_5) self.unidentifiedIndexHandler.collect( (conversion_results_key, conversion_results)) self.unidentifiedIndexHandler.collect( (samplesheet_key, self.samplesheet)) self.samplesheet_searcher = _SamplesheetSearcher(self.samplesheet) def test_should_be_evaluated(self): # No self.assertFalse( self.unidentifiedIndexHandler.should_be_evaluated( tag='unknown', count=1, number_of_reads_on_lane=10)) # No self.assertFalse( self.unidentifiedIndexHandler.should_be_evaluated( tag='AAAAAA', count=1, number_of_reads_on_lane=1000)) # Yes self.assertTrue( self.unidentifiedIndexHandler.should_be_evaluated( tag='AAAAAA', count=100, number_of_reads_on_lane=1000)) # Yes self.assertTrue( self.unidentifiedIndexHandler.should_be_evaluated( tag='GGGGGG', count=100, number_of_reads_on_lane=1000)) def test_get_complementary_sequence(self): res = self.unidentifiedIndexHandler.get_complementary_sequence( 'ATCG+N') self.assertEqual(res, 'TAGC+N') def test_validate_configuration(self): with self.assertRaises(ConfigurationError): UnidentifiedIndexHandler({ 'name': 'UnidentifiedIndexHandler', 'foo': 'bar' }).validate_configuration() def test_always_warn_rules(self): res = next( self.unidentifiedIndexHandler.always_warn_rule(tag="", lane=1, percent_on_lane=1)) self.assertTrue(isinstance(res, QCErrorFatal)) def test_always_warn_rules_for_white_listed_tag(self): res = next( self.unidentifiedIndexHandler.always_warn_rule(tag="NNNNN", lane=1, percent_on_lane=1)) self.assertTrue(isinstance(res, QCErrorWarning)) def test_check_swapped_dual_index_yes(self): res = next( self.unidentifiedIndexHandler.check_swapped_dual_index( 'TTTT+AAAA', 1, self.samplesheet_searcher)) self.assertTrue( 'It appears that maybe the dual index tag: TTTT+AAAA was swapped. ' 'There was a hit for the swapped index: AAAA+TTTT at: Lane: 3, for sample: 1823C-tissue. ' 'The tag we found was: AAAA+TTTT' in res.message) def test_check_swapped_dual_index_no(self): with self.assertRaises(StopIteration): next( self.unidentifiedIndexHandler.check_swapped_dual_index( 'TTGG+AATT', 1, self.samplesheet_searcher)) def test_check_reversed_index_yes(self): res = next( self.unidentifiedIndexHandler.check_reversed_index( 'GCTA', 1, self.samplesheet_searcher)) self.assertTrue( 'We found a possible match for the reverse of tag: GCTA, on: Lane: 6, ' 'for sample: 1823D-tissue. The tag we found was: ATCG' in res.message) def test_check_reversed_index_no(self): with self.assertRaises(StopIteration): next( self.unidentifiedIndexHandler.check_reversed_index( 'GGTT', 1, self.samplesheet_searcher)) def test_check_complement_index_yes(self): res = next( self.unidentifiedIndexHandler.check_complement_index( 'TTTT', 1, self.samplesheet_searcher)) self.assertTrue( 'We found a possible match for the complementary of tag: TTTT, on: Lane: 1, for ' 'sample: 1823A-tissue. The tag we found was: AAAA' in res.message) def test_check_complement_index_no(self): with self.assertRaises(StopIteration): next( self.unidentifiedIndexHandler.check_complement_index( 'GGGG', 1, self.samplesheet_searcher)) def test_check_if_index_in_other_lane_hit_yes(self): res = next( self.unidentifiedIndexHandler.check_if_index_in_other_lane( 'TTTT', 1, self.samplesheet_searcher)) self.assertTrue( "We found a possible match for the tag: TTTT, on another lane: Lane: 2, for sample: " "1823B-tissue. The tag we found was: TTTT" in res.message) def test_check_if_index_in_other_lane_hit_no(self): with self.assertRaises(StopIteration): next( self.unidentifiedIndexHandler.check_if_index_in_other_lane( 'GGGG', 1, self.samplesheet_searcher)) def test_number_of_reads_per_lane(self): res = self.unidentifiedIndexHandler.number_of_reads_per_lane() self.assertDictEqual(res, {1: 162726440, 2: 164470667}) def test_is_significantly_represented(self): self.assertTrue( self.unidentifiedIndexHandler.is_significantly_represented( 11, 100)) self.assertFalse( self.unidentifiedIndexHandler.is_significantly_represented(1, 100)) def test_check_reversed_in_dual_index(self): res = next( self.unidentifiedIndexHandler. check_reverse_complement_in_dual_index('TTTT+TTTT', 1, self.samplesheet_searcher)) self.assertTrue( 'We found a possible match for the reverse complement of tag: TTTT, on: Lane: 1, for sample: ' '1823A-tissue. The tag we found was: AAAA. This originated from the dual index ' 'tag: TTTT+TTTT' in res.message) def test_check_reverse_complement_in_dual_index(self): res = next( self.unidentifiedIndexHandler. check_reverse_complement_in_dual_index('TTTT+TTTT', 1, self.samplesheet_searcher)) self.assertTrue( 'We found a possible match for the reverse complement of tag: TTTT, on: Lane: 1,' ' for sample: 1823A-tissue. The tag we found was: AAAA. This originated from the dual ' 'index tag: TTTT+TTTT' in res.message) def test_check_complement_in_dual_index(self): res = next( self.unidentifiedIndexHandler. check_reverse_complement_in_dual_index('TTTT+TTTT', 1, self.samplesheet_searcher)) self.assertTrue( 'We found a possible match for the reverse complement of tag: TTTT, on: Lane: 1, for sample: ' '1823A-tissue. The tag we found was: AAAA. This originated from the dual ' 'index tag: TTTT+TTTT')