def test_dots_in_filename(self): k = KmcComplex(os.getcwd(), 1, 1, [SampleData('a.b#c_1.fastq', 'a.b#c_2.fastq'), SampleData('a.b.c.d_1.fastq', 'a.b.c.d_2.fastq')], [SampleData('a.b.c.d.e_1.fastq', 'a.b.c.d.e_2.fastq')], 'intersection', False) k.create_config_files() with open(os.path.join(k.temp_working_dir, 'traits_config_file'), 'r') as actual_file: actual_config_content = actual_file.read() self.assertEqual(actual_config_content, """\ INPUT: a_b_c = kmc_a.b#c a_b_c_d = kmc_a.b.c.d a_b_c_d_e = kmc_a.b.c.d.e OUTPUT: traits = a_b_c*a_b_c_d OUTPUT_PARAMS: -ci1 """) with open(os.path.join(k.temp_working_dir, 'nontraits_config_file'), 'r') as actual_file: actual_config_content = actual_file.read() self.assertEqual(actual_config_content, """\ INPUT: a_b_c = kmc_a.b#c a_b_c_d = kmc_a.b.c.d a_b_c_d_e = kmc_a.b.c.d.e OUTPUT: nontraits = a_b_c_d_e OUTPUT_PARAMS: -ci1 """)
def extract_samples(self): samples = [] self.logger.warning("Reading input spreadsheet") with open(self.filename) as csvfile: spreadsheetreader = csv.reader(csvfile, delimiter=',') for row in spreadsheetreader: if len(row) == 2: forward_file = row[0] reverse_file = row[1] for filename in [forward_file, reverse_file]: if not os.path.exists(filename): raise Exception( 'Input file in spreadsheet doesnt exit: ' + filename) self.logger.warning("Found input files") samples.append(SampleData(forward_file, reverse_file)) elif len(row) == 1: filename = row[0] if not os.path.exists(filename): raise Exception( 'Input file in spreadsheet doesnt exit: ' + filename) self.logger.warning("Found input file") samples.append(SampleData(filename)) else: continue return samples
def test_sample_definitions_str(self): k = KmcComplex(os.getcwd(), 1, 1, [SampleData('a_1.fastq', 'a_2.fastq'), SampleData('b_1.fastq', 'b_2.fastq')], [SampleData('c_1.fastq', 'c_2.fastq')], 'union', False) expected_output = """\ a = kmc_a b = kmc_b c = kmc_c """ self.assertEqual(k.sample_definitions_str(),expected_output) k.cleanup()
def test_samples_to_set_operation_str(self): k = KmcComplex(os.getcwd(), 1, 1, [ SampleData('a_1.fastq', 'a_2.fastq'), SampleData('b_1.fastq', 'b_2.fastq') ], [SampleData('c_1.fastq', 'c_2.fastq')], 'union') self.assertEqual(k.trait_samples_to_set_operation_str(), 'traits = a+b') self.assertEqual(k.nontrait_samples_to_set_operation_str(), 'nontraits = c') k.cleanup()
def test_create_config_file_intersection(self): k = KmcComplex(os.getcwd(), 1, 1, [ SampleData('a_1.fastq', 'a_2.fastq'), SampleData('b_1.fastq', 'b_2.fastq') ], [SampleData('c_1.fastq', 'c_2.fastq')], 'intersection') k.create_config_files() with open(os.path.join(k.temp_working_dir, 'traits_config_file'), 'r') as actual_file: actual_config_content = actual_file.read() self.assertEqual( actual_config_content, """\ INPUT: a = kmc_a b = kmc_b c = kmc_c OUTPUT: traits = a*b OUTPUT_PARAMS: -ci1 """) with open(os.path.join(k.temp_working_dir, 'nontraits_config_file'), 'r') as actual_file: actual_config_content = actual_file.read() self.assertEqual( actual_config_content, """\ INPUT: a = kmc_a b = kmc_b c = kmc_c OUTPUT: nontraits = c OUTPUT_PARAMS: -ci1 """) with open(os.path.join(k.temp_working_dir, 'combined_config_file'), 'r') as actual_file: actual_config_content = actual_file.read() self.assertEqual( actual_config_content, """\ INPUT: set1 = traits set2 = nontraits OUTPUT: result = set1-set2 OUTPUT_PARAMS: -ci1 """) k.cleanup()
def test_populate_fofn_name(self): sample = SampleData('/path/to/sample_1.fastq.gz', '/path/to/sample_2.fastq.gz') i = Kmc(os.getcwd(), sample, 1, 51, 30, 200) self.assertEqual(sample.file_of_fastq_files, i.temp_working_dir + "/fofn") i.cleanup()
def test_filtering_low_coverage_contigs(self): sample = SampleData('/path/to/sample_1.fastq.gz', '/path/to/sample_2.fastq.gz') s = SpadesAssembly(sample, 'abc', 1, 1, '', 1, False, 24, True, False, 100) s.remove_small_large_contigs( os.path.join(data_dir, 'assembly_with_small_contigs.fa'), os.path.join(data_dir, 'actual_assembly_without_low_coverage_contigs.fa')) with open( os.path.join( data_dir, 'actual_assembly_without_low_coverage_contigs.fa'), 'r') as actual_file, open( os.path.join(data_dir, 'expected_assembly_coverage.fa'), 'r') as expected_file: actual_config_content = actual_file.read() expected_config_content = expected_file.read() self.assertEqual(actual_config_content, expected_config_content) os.remove( os.path.join(data_dir, 'actual_assembly_without_low_coverage_contigs.fa'))
def test_sample_definition_line(self): k = KmcComplex(os.getcwd(), 1, 1, [], [], 'union') s = SampleData('/path/to/sample#ABC_1.fastq', '/path/to/sample#ABC_2.fastq') self.assertEqual(k.sample_definition_line(s), 'sample_ABC = kmc_sample#ABC') k.cleanup()
def test_create_file_of_file_names(self): sample = SampleData('/path/to/sample_1.fastq.gz', '/path/to/sample_2.fastq.gz') i = Kmc(os.getcwd(), sample, 1, 51, 30, 200) i.create_file_of_file_names(sample.file_of_fastq_files) with open(sample.file_of_fastq_files, 'r') as actual_file: actual_fofn_content = actual_file.read() expected_fofn_content = """\ /path/to/sample_1.fastq.gz /path/to/sample_2.fastq.gz """ self.assertEqual(actual_fofn_content, expected_fofn_content) i.cleanup()
def test_basename_set_correctly(self): '''test basename extracted correctly from input file with gz''' i = SampleData('/path/to/sample_1.fastq.gz', '/path/to/sample_2.fastq.gz') self.assertEqual(i.basename, 'sample')