Beispiel #1
0
    def test_all_orfs(self):
        '''Test all_orfs()'''
        d = {}
        tasks.file_to_dict(os.path.join(data_dir, 'sequences_test_orfs.fa'), d)
        seq = d['1']
        orfs = seq.all_orfs(min_length=120)
        expected = [
            (intervals.Interval(27, 221), False),
            (intervals.Interval(44, 226), False),
            (intervals.Interval(48, 170), True),
            (intervals.Interval(109, 240), False),
            (intervals.Interval(143, 265), True),
            (intervals.Interval(227, 421), False),
            (intervals.Interval(277, 432), True),
            (intervals.Interval(286, 477), False),
            (intervals.Interval(288, 518), True),
            (intervals.Interval(562, 702), False),
            (intervals.Interval(600, 758), False),
            (intervals.Interval(605, 817), False),
            (intervals.Interval(818, 937), False),
            (intervals.Interval(835, 987), False),
            (intervals.Interval(864, 998), False)
        ]

        self.assertEqual(len(orfs), len(expected))

        for i in range(len(orfs)):
            print(orfs[i][0], expected[i][0])
            self.assertEqual(orfs[i][0], expected[i][0])
            self.assertEqual(orfs[i][1], expected[i][1])
	def __init__(self, 
				 fasta_file, 
				 working_directory=None, 
				 cutoff_contig_length=2000, 
				 percent_match=95, 
				 skip = None,
				 summary_file="contig_cleanup_summary.txt",
				 summary_prefix="[contig cleanup]",
				 debug=False):
				 
		''' Constructor '''
		self.fasta_file = fasta_file
		self.working_directory = working_directory if working_directory else os.getcwd()			
		self.cutoff_contig_length = cutoff_contig_length
		self.percent_match = percent_match
		self.summary_file = summary_file
		self.summary_prefix = summary_prefix
		self.debug = debug		
		self.contigs = {}
		tasks.file_to_dict(self.fasta_file, self.contigs) #Read contig ids and sequences into dict
		
		self.ids_to_skip = set()		
		if skip:
			if type(skip) == set:
				self.ids_to_skip = set(skip) # Assumes ids is a list
			else:
				fh = fastaqutils.open_file_read(skip)
				for line in fh:
					self.ids_to_skip.add(line.rstrip())
				fastaqutils.close(fh)
		self.output_file = self._build_final_filename()		
Beispiel #3
0
    def test_file_to_dict(self):
        '''check file_to_dict fills dictionary correctly'''
        d_test = {}
        d = {}
        tasks.file_to_dict(os.path.join(data_dir, 'sequences_test.fa'), d_test)
        for i in range(1,5):
            d[str(i)] = sequences.Fasta(str(i),'ACGTA')

        self.assertSequenceEqual(d_test.keys(),d.keys())
        for i in range(1,5):
            key = str(i)
            self.assertEqual(d_test[key].id, d[key].id)
            self.assertEqual(d_test[key].seq, d[key].seq)
Beispiel #4
0
    def test_file_to_dict(self):
        '''check file_to_dict fills dictionary correctly'''
        d_test = {}
        d = {}
        tasks.file_to_dict(os.path.join(data_dir, 'sequences_test.fa'), d_test)
        for i in range(1,5):
            d[str(i)] = sequences.Fasta(str(i),'ACGTA')

        self.assertSequenceEqual(d_test.keys(),d.keys())
        for i in range(1,5):
            key = str(i)
            self.assertEqual(d_test[key].id, d[key].id)
            self.assertEqual(d_test[key].seq, d[key].seq)
Beispiel #5
0
 def test_capillary_to_pairs(self):
     '''Check that capillary reads file converted to paired and unpaired'''
     tmp_prefix = 'tmp.cap_to_pairs'
     tasks.capillary_to_pairs(
         os.path.join(data_dir, 'sequences_test_cap_to_read_pairs.fa'),
         tmp_prefix)
     # sequences have been hashed, so could be in any order in
     # output files. So need to check contents of files are OK
     d_correct_paired = {}
     d_correct_unpaired = {}
     tasks.file_to_dict(
         os.path.join(data_dir,
                      'sequences_test_cap_to_read_pairs.fa.paired.gz'),
         d_correct_paired)
     tasks.file_to_dict(
         os.path.join(data_dir,
                      'sequences_test_cap_to_read_pairs.fa.unpaired.gz'),
         d_correct_unpaired)
     d_test_paired = {}
     d_test_unpaired = {}
     tasks.file_to_dict(tmp_prefix + '.paired.gz', d_test_paired)
     tasks.file_to_dict(tmp_prefix + '.unpaired.gz', d_test_unpaired)
     self.assertDictEqual(d_test_paired, d_correct_paired)
     self.assertDictEqual(d_test_unpaired, d_correct_unpaired)
     os.unlink(tmp_prefix + '.paired.gz')
     os.unlink(tmp_prefix + '.unpaired.gz')
Beispiel #6
0
        def files_are_equal(file1, file2):
            seqs1 = {}
            seqs2 = {}
            tasks.file_to_dict(file1, seqs1)
            tasks.file_to_dict(file2, seqs2)
            if len(seqs1) != len(seqs2):
                return False

            for name in seqs1:
                seq1 = seqs1[name]
                seq2 = seqs2[name]
                if seq1.id != seq2.id:
                    return False
                if len(seq1) != len(seq2):
                    return False

            return True
Beispiel #7
0
        def files_are_equal(file1, file2):
            seqs1 = {}
            seqs2 = {}
            tasks.file_to_dict(file1, seqs1)
            tasks.file_to_dict(file2, seqs2)
            if len(seqs1) != len(seqs2):
                return False

            for name in seqs1:
                seq1 = seqs1[name]
                seq2 = seqs2[name]
                if seq1.id != seq2.id:
                    return False
                if len(seq1) != len(seq2):
                    return False

            return True
    def test_finding_dnaA(self):

        tests = [
            [
                contig_break_finder.ContigBreakFinder(
                    fasta_file=os.path.join(data_dir, "BREAKING_input_0.fa"),
                    gene_file=os.path.join(data_dir, "test_dnaA_1.fa"),
                    choose_random_gene=False,
                    rename=False,
                    skip=os.path.join(data_dir, "BREAKING_skip_ids_0.txt")),
                'BREAKING_input_0.fa'
            ],  #skip 1, dnaA normal - nothing should change
            [
                contig_break_finder.ContigBreakFinder(
                    fasta_file=os.path.join(data_dir, "BREAKING_input_0.fa"),
                    gene_file=os.path.join(data_dir, "test_dnaA_1.fa"),
                    choose_random_gene=False,
                    rename=False,
                    skip=os.path.join(data_dir,
                                      "BREAKING_skip_ids_0_all.txt")),
                'BREAKING_input_0.fa'
            ],  #skip all - - nothing should change
            [
                contig_break_finder.ContigBreakFinder(
                    fasta_file=os.path.join(data_dir, "BREAKING_input_0.fa"),
                    gene_file=os.path.join(data_dir, "test_dnaA_1.fa"),
                    rename=False,
                    choose_random_gene=False,
                ), 'BREAKING_output_0_all.fa'
            ],  #skip none - test1 contig should be circularised
        ]

        for t in tests:
            t[0].run()
            self.assertTrue(os.path.isfile(t[0].output_file))
            self.assertTrue(os.path.isfile(t[0].summary_file))
            # Read expected output file and compare sequences
            expected_contigs = {}
            tasks.file_to_dict(os.path.join(data_dir, t[1]), expected_contigs)
            for id in expected_contigs.keys():
                # 				print(id + "\n")
                # 				print("Expected: " + expected_contigs[id].seq)
                # 				print("Got : " + t[0].contigs[id].seq)
                self.assertTrue(expected_contigs[id] == t[0].contigs[id])
            os.remove(t[0].output_file)
            os.remove(t[0].summary_file)
Beispiel #9
0
	def __init__(self, 
				 fasta_file='', 
				 working_directory=None, 
				 contigs={},
				 alignments=[],
				 trim = True,
				 trim_reversed_overlaps = False,
				 overlap_offset=1000, 
				 overlap_boundary_max=50, 
				 overlap_min_length=1000,
				 overlap_max_length=3000,
				 overlap_percent_identity=85,
				 min_trim_length=0.89,
				 skip = None,
				 summary_file = "contig_trimming_summary.txt",	
				 summary_prefix = '[contig trimmer]',		  
				 debug=False):

		''' Constructor '''
		self.fasta_file = fasta_file
		self.working_directory = working_directory if working_directory else os.getcwd()		
		self.contigs = contigs
		self.alignments = alignments
		self.trim = trim
		self.trim_reversed_overlaps = trim_reversed_overlaps
		self.overlap_offset = overlap_offset
		self.overlap_boundary_max = overlap_boundary_max * 0.01
		self.overlap_min_length = overlap_min_length
		self.overlap_max_length = overlap_max_length
		self.overlap_percent_identity = overlap_percent_identity
		self.min_trim_length = min_trim_length
		self.ids_to_skip = utils.parse_file_or_set(skip)
		self.summary_file = summary_file
		self.summary_prefix = summary_prefix
		self.output_file = self._build_final_filename()
		self.debug = debug
		
		# Extract contigs
		if not self.contigs:
			self.contigs = {}
			tasks.file_to_dict(self.fasta_file, self.contigs) 
Beispiel #10
0
    def __init__(
            self,
            fasta_file,
            gene_file,
            skip=None,  #Avoid circularising contigs with these ids
            hit_percent_id=80,
            match_length_percent=100,
            choose_random_gene=True,
            rename=True,
            working_directory=None,
            summary_file="contig_breaks_summary.txt",
            summary_prefix="[contig break finder]",
            debug=False):
        ''' Attributes '''
        self.fasta_file = fasta_file
        self.gene_file = gene_file
        self.hit_percent_id = hit_percent_id
        self.match_length_percent = match_length_percent
        self.choose_random_gene = choose_random_gene
        self.rename = rename
        self.working_directory = working_directory if working_directory else os.getcwd(
        )
        self.summary_file = summary_file
        self.summary_prefix = summary_prefix
        self.output_file = self._build_final_filename()
        self.debug = debug
        self.contigs = {}
        tasks.file_to_dict(
            self.fasta_file,
            self.contigs)  #Read contig ids and sequences into dict
        self.random_gene_starts = {}

        self.ids_to_skip = set()
        if skip:
            if type(skip) == set:
                self.ids_to_skip = set(skip)  # Assumes ids is a list
            else:
                fh = fastaqutils.open_file_read(skip)
                for line in fh:
                    self.ids_to_skip.add(line.rstrip())
                fastaqutils.close(fh)
	def test_contig_overlap_trimmer(self):
		'''Test contig overlap trimming'''		
		# test data
		test_fasta_file = os.path.join(data_dir, "TRIMMING_input_1.fa")
		test_overlap_coords = ['\t'.join(['1', '4', '57', '60', '4', '4', '100.00', '60', '60', '1', '1', 'contig1', 'contig1']),
							   '\t'.join(['1', '4', '57', '60', '4', '4', '100.00', '60', '60', '1', '1', 'contig2', 'contig2']),
				 	 		   '\t'.join(['2', '4', '57', '59', '3', '3', '100.00', '60', '60', '1', '1', 'contig3', 'contig3']),	
				 	 		   '\t'.join(['2', '4', '54', '56', '3', '3', '100.00', '60', '60', '1', '1', 'contig4', 'contig4']),
				 	 		   '\t'.join(['1', '3', '58', '60', '3', '3', '100.00', '60', '60', '1', '1', 'contig4', 'contig4']),		 	 		   
 				 	 		   '\t'.join(['1', '4', '57', '60', '4', '4', '100.00', '60', '60', '1', '1', 'contig5', 'contig5']),				 	 		   
				 	 		   '\t'.join(['1', '2', '59', '60', '2', '2', '100.00', '60', '60', '1', '1', 'contig6', 'contig6']), #Overlap too short				 	 		   
				 	 		   '\t'.join(['1', '12', '49', '60', '12', '12', '100.00', '60', '60', '1', '1', 'contig7', 'contig7']), # Trimmed length would be too short				 	 		   
				 	 		   '\t'.join(['1', '3', '60', '58', '3', '3', '100.00', '60', '60', '-1', '-1', 'contig8', 'contig8']), #overlap reversed
				 	 		   # No overlap for contig 9
				 	 		   '\t'.join(['4', '7', '36', '38', '4', '4', '100.00', '60', '60', '1', '-1', 'contig10', 'contig10']), #beyond offset
							  ]							  
		test_overlap_alignments = [alignment.Alignment(coord) for coord in test_overlap_coords] 
		overlap_trimmer = contig_overlap_trimmer.ContigOverlapTrimmer(fasta_file = test_fasta_file,
														     		  alignments = test_overlap_alignments,
														     		  overlap_offset = 10,
												       				  overlap_min_length=3,
												                      overlap_max_length=12,										     								       
												                     )	
		overlap_trimmer.run()
		
		self.assertTrue(os.path.isfile(overlap_trimmer.output_file))
		self.assertTrue(os.path.isfile(overlap_trimmer.summary_file))
		
		expected_contigs = {}
		tasks.file_to_dict(os.path.join(data_dir, "TRIMMING_output_1.fa"), expected_contigs)
		for id in expected_contigs.keys():
 			self.assertTrue(expected_contigs[id] == overlap_trimmer.contigs[id])
		
		os.remove(overlap_trimmer.output_file)
		os.remove(overlap_trimmer.summary_file)	 		
		
		
Beispiel #12
0
 def test_capillary_to_pairs(self):
     '''Check that capillary reads file converted to paired and unpaired'''
     tmp_prefix = 'tmp.cap_to_pairs'
     tasks.capillary_to_pairs(os.path.join(data_dir, 'sequences_test_cap_to_read_pairs.fa'), tmp_prefix)
     # sequences have been hashed, so could be in any order in
     # output files. So need to check contents of files are OK
     d_correct_paired = {}
     d_correct_unpaired = {}
     tasks.file_to_dict(os.path.join(data_dir, 'sequences_test_cap_to_read_pairs.fa.paired.gz'), d_correct_paired)
     tasks.file_to_dict(os.path.join(data_dir, 'sequences_test_cap_to_read_pairs.fa.unpaired.gz'), d_correct_unpaired)
     d_test_paired = {}
     d_test_unpaired = {}
     tasks.file_to_dict(tmp_prefix + '.paired.gz', d_test_paired)
     tasks.file_to_dict(tmp_prefix + '.unpaired.gz', d_test_unpaired)
     self.assertDictEqual(d_test_paired, d_correct_paired)
     self.assertDictEqual(d_test_unpaired, d_correct_unpaired)
     os.unlink(tmp_prefix + '.paired.gz')
     os.unlink(tmp_prefix + '.unpaired.gz')
                    help='Minimum length of contig to output [%(default)s]',
                    default=200)
parser.add_argument('--nucmer_options',
                    help='Options when running nucmer [%(default)s]',
                    default='')
parser.add_argument('contigs_fa',
                    help='Name of contigs fasta file',
                    metavar='contigs.fa')
parser.add_argument('ref_fa',
                    help='Name of reference fasta file',
                    metavar='reference.fa')
parser.add_argument('outprefix', help='Prefix of output files')
options = parser.parse_args()

ref_seqs = {}
tasks.file_to_dict(options.ref_fa, ref_seqs)

nucmer_out_prefix = options.outprefix + '.nucmer'
nucmer_out_delta = nucmer_out_prefix + '.delta'
nucmer_out_filter = nucmer_out_prefix + '.delta-filter'
nucmer_out_coords = nucmer_out_filter + '.coords'

# run nucmer of contigs vs ref
utils.syscall(' '.join([
    'nucmer', options.nucmer_options, '-p', nucmer_out_prefix, options.ref_fa,
    options.contigs_fa
]))
utils.syscall(' '.join([
    'delta-filter', '-i 98 -l 180 -q', nucmer_out_delta, '>', nucmer_out_filter
]))
utils.syscall(' '.join(
    contigs[(nucmer_hit.ref_name, nucmer_hit.ref_start, nucmer_hit.ref_end)] = contig


parser = argparse.ArgumentParser(
    description="Takes contigs and a reference sequence. Makes a new fasta file of the contigs, but they are now perfect sequences by using the reference instead",
    usage="%(prog)s [options] <contigs.fa> <reference.fa> <outprefix>",
)
parser.add_argument("--min_seq_length", type=int, help="Minimum length of contig to output [%(default)s]", default=200)
parser.add_argument("--nucmer_options", help="Options when running nucmer [%(default)s]", default="")
parser.add_argument("contigs_fa", help="Name of contigs fasta file", metavar="contigs.fa")
parser.add_argument("ref_fa", help="Name of reference fasta file", metavar="reference.fa")
parser.add_argument("outprefix", help="Prefix of output files")
options = parser.parse_args()

ref_seqs = {}
tasks.file_to_dict(options.ref_fa, ref_seqs)

nucmer_out_prefix = options.outprefix + ".nucmer"
nucmer_out_delta = nucmer_out_prefix + ".delta"
nucmer_out_filter = nucmer_out_prefix + ".delta-filter"
nucmer_out_coords = nucmer_out_filter + ".coords"

# run nucmer of contigs vs ref
utils.syscall(" ".join(["nucmer", options.nucmer_options, "-p", nucmer_out_prefix, options.ref_fa, options.contigs_fa]))
utils.syscall(" ".join(["delta-filter", "-i 98 -l 180 -q", nucmer_out_delta, ">", nucmer_out_filter]))
utils.syscall(" ".join(["show-coords", "-dTlro", nucmer_out_filter, ">", nucmer_out_coords]))

# load hits into hash. key=ref_name, value=another hash with key=qry_name, value=list of hit positions in that ref seq
nucmer_hits = {}
contigs_to_print = {}
    def test_finding_dnaA_in_various_positions(self):
        tests = [
            #dnaa at start - return identical sequence
            [
                contig_break_finder.ContigBreakFinder(
                    fasta_file=os.path.join(
                        data_dir, "BREAKFINDER_input_dnaa_at_start.fa"),
                    gene_file=os.path.join(data_dir,
                                           "BREAKFINDER_test_dnaA.fa"),
                    choose_random_gene=False,
                    rename=False,
                ), 'BREAKFINDER_output_dnaa_at_start.fa'
            ],

            # dnaa in the middle
            [
                contig_break_finder.ContigBreakFinder(
                    fasta_file=os.path.join(
                        data_dir, "BREAKFINDER_input_dnaa_in_middle.fa"),
                    gene_file=os.path.join(data_dir,
                                           "BREAKFINDER_test_dnaA.fa"),
                    choose_random_gene=False,
                    rename=False,
                ), 'BREAKFINDER_output_dnaa_in_middle.fa'
            ],
            # dnaa in middle of contig  but revcom
            [
                contig_break_finder.ContigBreakFinder(
                    fasta_file=os.path.join(
                        data_dir,
                        "BREAKFINDER_input_dnaa_in_middle_revcom.fa"),
                    gene_file=os.path.join(data_dir,
                                           "BREAKFINDER_test_dnaA.fa"),
                    choose_random_gene=False,
                    rename=False,
                ), 'BREAKFINDER_output_dnaa_in_middle.fa'
            ],

            # dnaa at the end
            [
                contig_break_finder.ContigBreakFinder(
                    fasta_file=os.path.join(
                        data_dir, "BREAKFINDER_input_dnaa_at_end.fa"),
                    gene_file=os.path.join(data_dir,
                                           "BREAKFINDER_test_dnaA.fa"),
                    choose_random_gene=False,
                    rename=False,
                ), 'BREAKFINDER_output_dnaa_at_end.fa'
            ],

            # dnaa split across start and end, but chunks large enough for promer to detect
            [
                contig_break_finder.ContigBreakFinder(
                    fasta_file=os.path.join(data_dir,
                                            "BREAKFINDER_input_dnaa_split.fa"),
                    gene_file=os.path.join(data_dir,
                                           "BREAKFINDER_test_dnaA.fa"),
                    choose_random_gene=False,
                    rename=False,
                ), 'BREAKFINDER_output_dnaa_split.fa'
            ],
            # dnaa split across start and end but revcom
            [
                contig_break_finder.ContigBreakFinder(
                    fasta_file=os.path.join(
                        data_dir, "BREAKFINDER_input_dnaa_split_revcom.fa"),
                    gene_file=os.path.join(data_dir,
                                           "BREAKFINDER_test_dnaA.fa"),
                    choose_random_gene=False,
                    rename=False,
                ), 'BREAKFINDER_output_dnaa_split_revcom.fa'
            ],
            #no dnaa
            [
                contig_break_finder.ContigBreakFinder(
                    fasta_file=os.path.join(data_dir,
                                            "BREAKFINDER_input_no_dnaa.fa"),
                    gene_file=os.path.join(data_dir,
                                           "BREAKFINDER_test_dnaA.fa"),
                    choose_random_gene=False,
                    rename=False,
                ), 'BREAKFINDER_input_no_dnaa.fa'
            ],  #do not change the contig
            # best dnaa hit not first
            [
                contig_break_finder.ContigBreakFinder(
                    fasta_file=os.path.join(
                        data_dir, "BREAKFINDER_input_multiple_dnaa.fa"),
                    gene_file=os.path.join(
                        data_dir, "BREAKFINDER_test_multiple_dnaA.fa"),
                    choose_random_gene=False,
                    rename=False,
                ), 'BREAKFINDER_output_multiple_dnaa.fa'
            ],
            # dnaa split across edges - just 4 bases of dnaa at the end, contig long enough to run promer on just the edges stuck together
            [
                contig_break_finder.ContigBreakFinder(
                    fasta_file=os.path.join(
                        data_dir, "BREAKFINDER_input_dnaa_split_edge.fa"),
                    gene_file=os.path.join(data_dir,
                                           "BREAKFINDER_test_dnaA.fa"),
                    choose_random_gene=False,
                    rename=False,
                ), 'BREAKFINDER_output_dnaa_split_edge.fa'
            ],
            # dnaa split across edges and rev com- just 4 bases of dnaa at the end, contig long enough to run promer on just the edges stuck together
            [
                contig_break_finder.ContigBreakFinder(
                    fasta_file=os.path.join(
                        data_dir,
                        "BREAKFINDER_input_dnaa_split_edge_revcom.fa"),
                    gene_file=os.path.join(data_dir,
                                           "BREAKFINDER_test_dnaA.fa"),
                    choose_random_gene=False,
                    rename=False,
                ), 'BREAKFINDER_output_dnaa_split_edge.fa'
            ],
            # dnaa split across edges  (just 5 bases of dnaa at the start) but contig not long enough to run promer on just ends - will not find dnaA
            [
                contig_break_finder.ContigBreakFinder(
                    fasta_file=os.path.join(
                        data_dir,
                        "BREAKFINDER_input_dnaa_split_edge_tooshort.fa"),
                    gene_file=os.path.join(data_dir,
                                           "BREAKFINDER_test_dnaA.fa"),
                    choose_random_gene=False,
                    rename=False,
                ), 'BREAKFINDER_input_dnaa_split_edge_tooshort.fa'
            ],
            # ---- testing other options -----------
            #rename genes
            [
                contig_break_finder.ContigBreakFinder(
                    fasta_file=os.path.join(
                        data_dir, "BREAKFINDER_input_dnaa_at_start.fa"),
                    gene_file=os.path.join(data_dir,
                                           "BREAKFINDER_test_dnaA.fa"),
                    choose_random_gene=False,
                    rename=True,
                ), 'BREAKFINDER_output_dnaa_at_start.fa'
            ],
            #			#no dnaa, but use prodigal
            #  			[contig_break_finder.ContigBreakFinder(fasta_file = os.path.join(data_dir, "BREAKFINDER_input_no_dnaa_use_prodigal.fa"),
            #  							      											 gene_file = os.path.join(data_dir, "BREAKFINDER_real_dnaa.fa"),
            #  							      											 choose_random_gene=True,
            #  							      											 rename = False,
            #  							      											),
            #  													'BREAKFINDER_output_no_dnaa_use_prodigal.fa' ],
            #skip one contig
            [
                contig_break_finder.ContigBreakFinder(
                    fasta_file=os.path.join(
                        data_dir, "BREAKFINDER_input_multiple_contigs.fa"),
                    gene_file=os.path.join(data_dir,
                                           "BREAKFINDER_test_dnaA.fa"),
                    choose_random_gene=False,
                    rename=False,
                    skip=os.path.join(data_dir,
                                      "BREAKFINDER_skip_one_id.txt")),
                'BREAKFINDER_output_skip_contig.fa'
            ],
            #skip all contigs
            [
                contig_break_finder.ContigBreakFinder(
                    fasta_file=os.path.join(
                        data_dir, "BREAKFINDER_input_multiple_contigs.fa"),
                    gene_file=os.path.join(data_dir,
                                           "BREAKFINDER_test_dnaA.fa"),
                    choose_random_gene=False,
                    rename=True,
                    skip=os.path.join(data_dir, "BREAKFINDER_skip_all.txt")),
                'BREAKFINDER_input_multiple_contigs.fa'
            ],  # do not change anything								
        ]

        for t in tests:
            t[0].run()
            self.assertTrue(os.path.isfile(t[0].output_file))
            self.assertTrue(os.path.isfile(t[0].summary_file))
            expected_contigs = {}
            tasks.file_to_dict(os.path.join(data_dir, t[1]), expected_contigs)
            for id in expected_contigs.keys():
                self.assertTrue(
                    expected_contigs[id].seq == t[0].contigs[id].seq)
            os.remove(t[0].output_file)
            os.remove(t[0].summary_file)