def run(self): # todo: filter fasta to keep just the largest contig. # run the fasta through barrnap fd, barrnap_outputfile = mkstemp() self.files_to_cleanup.append(barrnap_outputfile) b = Barrnap(self.input_file, self.threads) subprocess.check_output( b.construct_barrnap_command(barrnap_outputfile), shell=True) boundries = b.read_barrnap_output(barrnap_outputfile) f = Fasta(self.input_file) fragments = f.calc_fragment_coords(boundries) f.populate_fragments_from_chromosome(fragments, self.max_bases_from_ends) ff = FragmentFiles(fragments, self.output_directory, fragment_order=self.fragment_order) ff.create_fragment_fastas() # create a default profile.txt file default_profile = ProfileGenerator(self.output_directory, len(ff.ordered_fragments), self.dnaa_fasta, self.threads) default_profile.write_output_file()
def test_calc_fragment_coords_gz(self): f = Fasta(os.path.join(data_dir, 'calc_fragment_coords.fa.gz'), False) boundries = [[45, 55], [90, 110], [150, 180]] fragments = f.calc_fragment_coords(boundries) coords = [f.coords for f in fragments] self.assertEqual(coords, [[[180, 200], [0, 45]], [[55, 90]], [[110, 150]]])
def test_chop_from_ends(self): f = Fasta(os.path.join(data_dir, 'calc_fragment_coords.fa'), False) fragments = f.calc_fragment_coords([[45, 55], [90, 110], [150, 180]]) sequences = [str(f.sequence) for f in fragments] f.populate_fragments_from_chromosome(fragments, 5) sequences = [str(f.sequence) for f in fragments] self.assertEqual(sequences, ['TTTTTNNNAAAAA', 'CCCCCNNNCCCCC', 'GGGGGNNNGGGGG'])
def test_calc_fragment_coords(self): f = Fasta(os.path.join(data_dir, 'calc_fragment_coords.fa'), False) boundries = [[45, 55], [90, 110], [150, 180]] fragments = f.calc_fragment_coords(boundries) coords = [f.coords for f in fragments] self.assertEqual(coords, [[[180, 200], [0, 45]], [[55, 90]], [[110, 150]]]) f.populate_fragments_from_chromosome(fragments, None)
def run_analysis(self, input_file, p, d): # run the fasta through barrnap fd, barrnap_outputfile = mkstemp() b = Barrnap(input_file, self.threads) subprocess.check_output( b.construct_barrnap_command(barrnap_outputfile), shell=True) boundries = b.read_barrnap_output(barrnap_outputfile) f = Fasta(input_file, is_circular = self.is_circular) fragments = f.calc_fragment_coords( boundries) f.populate_fragments_from_chromosome(fragments, self.max_bases_from_ends) tmpdir = mkdtemp() self.dirs_to_cleanup.append(tmpdir) ff = FragmentFiles(fragments, tmpdir) ff.create_fragment_fastas() # take each fasta file and blast it against the database blast = Blast(d.db_prefix, self.threads) gat_profile = GATProfile(fragments = []) for fasta_file in ff.output_filenames: blast_results = blast.run_blast(fasta_file) fb = FilterBlast(blast_results, self.min_bit_score, self.min_alignment_length) top_result = fb.return_top_result() if top_result is None: gat_profile.fragments.append('?') fasta_file with open(fasta_file, "r") as fasta_file_fh: with open(self.new_fragments, "a+") as newfrag_fh: newfrag_fh.write(fasta_file_fh.read()) continue else: self.top_results.append(top_result) if top_result.is_forward(): gat_profile.fragments.append( str(top_result.subject)) else: gat_profile.fragments.append( str(top_result.subject)+ '\'') gat_profile.orientate_for_dnaA() # lookup the gat_profile to get the number tg = TypeGenerator(p, gat_profile) type_output_string = tg.calculate_type() + "\t" + str(gat_profile) if not tg.has_previously_seen: with open(self.novel_profiles, "a+") as output_fh: output_fh.write(self.db_dir + "\t" + type_output_string + "\n") return type_output_string
def test_populate_fragments_from_chromosome(self): f = Fasta(os.path.join(data_dir, 'calc_fragment_coords.fa'), False) fragments = f.calc_fragment_coords([[45, 55], [90, 110], [150, 180]]) sequences = [str(f.sequence) for f in fragments] self.assertEqual(sequences, ["", "", ""]) f.populate_fragments_from_chromosome(fragments, None) sequences = [str(f.sequence) for f in fragments] self.assertEqual(sequences, [ "TTTTTTTTTTTTTTTTTTTTAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC", "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG" ])
def test_chop_from_ends(self): f = Fasta(os.path.join(data_dir, 'calc_fragment_coords.fa'), False) boundries = [ Operon(45, 55, True), Operon(90, 110, False), Operon(150, 180, True) ] fragments = f.calc_fragment_coords(boundries) sequences = [str(f.sequence) for f in fragments] f.populate_fragments_from_chromosome(fragments, 5) sequences = [str(f.sequence) for f in fragments] self.assertEqual(sequences, ['TTTTTNNNAAAAA', 'CCCCCNNNCCCCC', 'GGGGGNNNGGGGG'])
def test_populate_fragments_from_chromosome(self): f = Fasta(os.path.join(data_dir, 'calc_fragment_coords.fa'), False) boundries = [ Operon(45, 55, True), Operon(90, 110, False), Operon(150, 180, True) ] fragments = f.calc_fragment_coords(boundries) sequences = [str(f.sequence) for f in fragments] self.assertEqual(sequences, ["", "", ""]) f.populate_fragments_from_chromosome(fragments, None) sequences = [str(f.sequence) for f in fragments] self.assertEqual(sequences, [ "TTTTTTTTTTTTTTTTTTTTAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC", "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG" ])
def populate_fragments_from_chromosome(self, input_file, boundries): f = Fasta(input_file, self.verbose, is_circular=self.is_circular) fragments = f.calc_fragment_coords(boundries) f.populate_fragments_from_chromosome(fragments, self.max_bases_from_ends) return fragments