def fastani(self, args): """Run FastANI and cache results.""" query_files = self._input_files(args.query_genomes, args.file_ext, args.validate_genome_files) ref_files = self._input_files(args.ref_genomes, args.file_ext, args.validate_genome_files) make_sure_path_exists(args.output_dir) self.logger.info( 'Identified {:,} query and {:,} reference genomes.'.format( len(query_files), len(ref_files))) # get genome pairs to be considered gid_pairs, genome_files = self._genome_comparisons( query_files, ref_files, args.ref_to_query) self.logger.info('Calculating ANI between {:,} genome pairs.'.format( len(gid_pairs))) # calculate ANI between genome pairs start = time.time() fastani = FastANI(args.ani_db_file, args.cpus) try: ani_af = fastani.pairs( gid_pairs, genome_files, report_progress=True, initial_cache_check=args.initial_cache_check) results_file = os.path.join(args.output_dir, 'ani_af.tsv') except Exception as e: print(e) raise finally: fastani.write_cache() fastani.write_ani_af(results_file, ani_af) if args.query_genomes == args.ref_genomes: # since pairwise calculations were performed # also write out results as matrices ani_matrix_file = os.path.join(args.output_dir, 'ani_matrix.tsv') fastani.write_ani_matrix(ani_matrix_file, ani_af) af_matrix_file = os.path.join(args.output_dir, 'af_matrix.tsv') fastani.write_ani_matrix(af_matrix_file, ani_af) elapsed_time_str = str( datetime.timedelta(seconds=round(time.time() - start))) self.logger.info( f'Time to calculate ANI values (h:mm:ss): {elapsed_time_str}') self.logger.info('Done.')
def test_write_ani_af(tmpdir): """Test writing of results.""" out_file = tmpdir.mkdir("output").join("results.tsv").strpath ani_af = {} ani_af['g1'] = {} ani_af['g1']['g2'] = (90, 0.95) fastani = FastANI(None, 1) fastani.write_ani_af(out_file, ani_af) fin = open(out_file) assert fin.readline().strip() == 'Query\tReference\tANI\tAF' assert fin.readline().strip() == 'g1\tg2\t90\t0.95'