def compile_glimmer(logger, only_clean=False): tool_dirpath = os.path.join(qconfig.LIBS_LOCATION, 'glimmer') tool_src_dirpath = os.path.join(tool_dirpath, 'src') if not get_path_to_program('glimmerhmm', tool_dirpath): compile_tool('GlimmerHMM', tool_src_dirpath, ['../glimmerhmm'], logger=logger, only_clean=only_clean) return get_path_to_program('glimmerhmm', tool_dirpath)
def compile_aligner(logger, only_clean=False): requirements = ['minimap2'] aligner_failed_compilation_flag = join(contig_aligner_dirpath, 'make.failed') if only_clean: compile_tool('Minimap2', contig_aligner_dirpath, requirements, logger=logger, only_clean=only_clean) return True if check_prev_compilation_failed('Minimap2', aligner_failed_compilation_flag, just_notice=True, logger=logger): logger.error( "Compilation of contig aligner software was unsuccessful! QUAST functionality will be limited." ) return False if compile_tool('Minimap2', contig_aligner_dirpath, requirements, just_notice=False, logger=logger, only_clean=only_clean): return True logger.error( "Compilation of contig aligner software was unsuccessful! QUAST functionality will be limited." ) return False
def compile_gnuplot(logger, only_clean=False): tool_dirpath = join(qconfig.LIBS_LOCATION, 'gnuplot') tool_exec_fpath = gnuplot_exec_fpath() compile_tool('gnuplot', tool_dirpath, [tool_exec_fpath], just_notice=True, logger=logger, only_clean=only_clean, configure_args=['--with-qt=no', '--disable-wxwidgets', '--prefix=' + tool_dirpath]) if only_clean: return True elif isfile(tool_exec_fpath): return tool_exec_fpath else: return None
def compile_aligner(logger, only_clean=False): global contig_aligner global contig_aligner_dirpath if contig_aligner_dirpath is not None: return True if qconfig.platform_name == 'macosx': contig_aligner = 'E-MEM' contig_aligner_dirpath = join(qconfig.LIBS_LOCATION, 'E-MEM-osx') return True default_requirements = ['nucmer', 'delta-filter', 'show-coords', 'show-snps', 'mummer', 'mgaps'] aligners_to_try = [ ('E-MEM', join(qconfig.LIBS_LOCATION, 'E-MEM-linux'), default_requirements + ['e-mem']), ('MUMmer', join(qconfig.LIBS_LOCATION, 'MUMmer3.23-linux'), default_requirements)] for i, (name, dirpath, requirements) in enumerate(aligners_to_try): success_compilation = compile_tool(name, dirpath, requirements, just_notice=(i < len(aligners_to_try) - 1), logger=logger, only_clean=only_clean) if not success_compilation: continue contig_aligner = name contig_aligner_dirpath = dirpath # successfully compiled return True logger.error("Compilation of contig aligner software was unsuccessful! QUAST functionality will be limited.") return False
def compile_minimap(logger, only_clean=False): if (minimap_fpath() and not only_clean) or compile_tool( 'Minimap2', contig_aligner_dirpath, ['minimap2'], just_notice=False, logger=logger, only_clean=only_clean): return True return False
def compile_aligner(logger, only_clean=False): requirements = ['nucmer', 'delta-filter', 'show-coords', 'show-snps', 'mummer', 'mummerplot', 'mgaps'] mummer_failed_compilation_flag = join(contig_aligner_dirpath, 'make.failed') if only_clean: compile_tool('MUMmer', contig_aligner_dirpath, requirements, logger=logger, only_clean=only_clean) return True if check_prev_compilation_failed('MUMmer', mummer_failed_compilation_flag, just_notice=True, logger=logger): logger.error("Compilation of contig aligner software was unsuccessful! QUAST functionality will be limited.") return False fix_configure_timestamps(contig_aligner_dirpath) prefix_arg = '--prefix=' + contig_aligner_dirpath if compile_tool('MUMmer', contig_aligner_dirpath, requirements, just_notice=False, logger=logger, only_clean=only_clean, configure_args=[prefix_arg, 'LDFLAGS=-static'] if qconfig.platform_name != 'macosx' else [prefix_arg]): return True logger.error("Compilation of contig aligner software was unsuccessful! QUAST functionality will be limited.") return False
def compile_aligner(logger, only_clean=False, compile_all_aligners=False): global contig_aligner global contig_aligner_dirpath if not compile_all_aligners: if contig_aligner_dirpath is not None and not \ check_prev_compilation_failed(contig_aligner, join(contig_aligner_dirpath, 'make.failed'), just_notice=True, logger=logger): return True if not qconfig.force_nucmer and not contig_aligner_dirpath and qconfig.platform_name == 'macosx': if get_installed_emem() or \ not check_prev_compilation_failed('E-MEM', e_mem_failed_compilation_flag, just_notice=True, logger=logger): contig_aligner = 'E-MEM' contig_aligner_dirpath = join(qconfig.LIBS_LOCATION, 'E-MEM-osx') return True default_requirements = ['nucmer', 'delta-filter', 'show-coords', 'show-snps', 'mummer', 'mgaps'] if qconfig.platform_name == 'macosx': aligners_to_try = [ ('MUMmer', join(qconfig.LIBS_LOCATION, 'MUMmer3.23-osx'), default_requirements)] else: if not qconfig.force_nucmer: if get_installed_emem(): emem_requirements = default_requirements else: emem_requirements = default_requirements + ['e-mem'] aligners_to_try = [ ('E-MEM', join(qconfig.LIBS_LOCATION, 'E-MEM-linux'), emem_requirements), ('MUMmer', join(qconfig.LIBS_LOCATION, 'MUMmer3.23-linux'), default_requirements)] else: aligners_to_try = [ ('MUMmer', join(qconfig.LIBS_LOCATION, 'MUMmer3.23-linux'), default_requirements)] for i, (name, dirpath, requirements) in enumerate(aligners_to_try): success_compilation = compile_tool(name, dirpath, requirements, just_notice=(i < len(aligners_to_try) - 1), logger=logger, only_clean=only_clean, make_cmd='no-emem' if 'E-MEM' in name and get_installed_emem() else None) if not success_compilation: continue contig_aligner = name contig_aligner_dirpath = dirpath # successfully compiled if not compile_all_aligners: return True if compile_all_aligners and contig_aligner and contig_aligner_dirpath: return True logger.error("Compilation of contig aligner software was unsuccessful! QUAST functionality will be limited.") return False
def compile_bedtools(logger, only_clean=False): return compile_tool('BEDtools', bedtools_dirpath, [join('bin', 'bedtools')], only_clean=only_clean, logger=logger)
def compile_bwa(logger, only_clean=False): return compile_tool('BWA', bwa_dirpath, ['bwa'], only_clean=only_clean, logger=logger)
def do(contigs_fpaths, output_dir, logger): logger.print_timestamp() logger.info('Running BUSCO...') compilation_success = True augustus_dirpath = download_augustus(logger) if not augustus_dirpath: compilation_success = False elif not compile_tool('Augustus', augustus_dirpath, [join('bin', 'augustus')], logger=logger): compilation_success = False if compilation_success and not download_blast_binaries( logger=logger, filenames=blast_filenames): compilation_success = False if not compilation_success: logger.info('Failed finding conservative genes.') return if not os.path.isdir(output_dir): os.makedirs(output_dir) tmp_dir = join(output_dir, 'tmp') if not os.path.isdir(tmp_dir): os.makedirs(tmp_dir) n_jobs = min(len(contigs_fpaths), qconfig.max_threads) busco_threads = max(1, qconfig.max_threads // n_jobs) clade_dirpath = download_db(logger, is_prokaryote=qconfig.prokaryote, is_fungus=qconfig.is_fungus) if not clade_dirpath: logger.info('Failed finding conservative genes.') return config_fpath = make_config(output_dir, tmp_dir, busco_threads, clade_dirpath, augustus_dirpath) logger.info('Logs and results will be saved under ' + output_dir + '...') os.environ['BUSCO_CONFIG_FILE'] = config_fpath os.environ['AUGUSTUS_CONFIG_PATH'] = copy_augustus_configs( augustus_dirpath, tmp_dir) if not os.environ['AUGUSTUS_CONFIG_PATH']: logger.error( 'Augustus configs not found, failed to run BUSCO without them.') busco_args = [[ contigs_fpath, qutils.label_from_fpath_for_fname(contigs_fpath) ] for contigs_fpath in contigs_fpaths] summary_fpaths = run_parallel(busco_main_handler, busco_args, qconfig.max_threads) if not any(fpath for fpath in summary_fpaths): logger.error( 'Failed running BUSCO for all the assemblies. See log files in ' + output_dir + ' for information ' '(rerun with --debug to keep all intermediate files).') return # saving results zero_output_for_all = True for i, contigs_fpath in enumerate(contigs_fpaths): report = reporting.get(contigs_fpath) if summary_fpaths[i] and os.path.isfile(summary_fpaths[i]): total_buscos, part_buscos, complete_buscos = 0, 0, 0 with open(summary_fpaths[i]) as f: for line in f: if 'Complete BUSCOs' in line: complete_buscos = int(line.split()[0]) elif 'Fragmented' in line: part_buscos = int(line.split()[0]) elif 'Total' in line: total_buscos = int(line.split()[0]) if total_buscos != 0: report.add_field( reporting.Fields.BUSCO_COMPLETE, ('%.2f' % (float(complete_buscos) * 100.0 / total_buscos))) report.add_field(reporting.Fields.BUSCO_PART, ('%.2f' % (float(part_buscos) * 100.0 / total_buscos))) if complete_buscos + part_buscos > 0: zero_output_for_all = False shutil.copy(summary_fpaths[i], output_dir) else: logger.error( 'Failed running BUSCO for ' + contigs_fpath + '. See the log for detailed information' ' (rerun with --debug to keep all intermediate files).') if zero_output_for_all: logger.warning( 'BUSCO did not fail explicitly but found nothing for all assemblies! ' 'Possible reasons and workarounds:\n' ' 1. Provided assemblies are so small that they do not contain even a single partial BUSCO gene. Not likely but may happen -- nothing to worry then.\n' ' 2. Incorrect lineage database was used. To run with fungi DB use --fungus, to run with eukaryota DB use --eukaryote, otherwise BUSCO uses bacteria DB.\n' ' 3. Problem with BUSCO dependencies, most likely Augustus. Check that the binaries in ' + augustus_dirpath + '/bin/ are working properly.\n' ' If something is wrong with Augustus, you may try to install it yourself (https://github.com/Gaius-Augustus/Augustus) and add "augustus" binary to PATH.\n' ' 4. Some other problem with BUSCO. Check the logs (you may need to rerun QUAST with --debug to see all intermediate files).\n' ' If you cannot solve the problem yourself, post an issue at https://github.com/ablab/quast/issues or write to [email protected]' ) if not qconfig.debug: cleanup(output_dir) logger.info('Done.')
def compile_minimap(logger, only_clean=False): if (minimap_fpath() and not only_clean) or compile_tool('Minimap2', contig_aligner_dirpath, ['minimap2'], just_notice=False, logger=logger, only_clean=only_clean): return True return False
def do(contigs_fpaths, output_dir, logger): logger.print_timestamp() logger.info('Running BUSCO...') compilation_success = True augustus_dirpath = download_augustus(logger) if not augustus_dirpath: compilation_success = False elif not compile_tool('Augustus', augustus_dirpath, [join('bin', 'augustus')], logger=logger): compilation_success = False if compilation_success and not download_blast_binaries( logger=logger, filenames=blast_filenames): compilation_success = False if not compilation_success: logger.info('Failed finding conservative genes.') return set_augustus_dir(augustus_dirpath) if not os.path.isdir(output_dir): os.makedirs(output_dir) tmp_dir = join(output_dir, 'tmp') if not os.path.isdir(tmp_dir): os.makedirs(tmp_dir) n_jobs = min(len(contigs_fpaths), qconfig.max_threads) busco_threads = max(1, qconfig.max_threads // n_jobs) clade_dirpath = download_db(logger, is_prokaryote=qconfig.prokaryote) if not clade_dirpath: logger.info('Failed finding conservative genes.') return log_fpath = join(output_dir, 'busco.log') logger.info('Logging to ' + log_fpath + '...') busco_args = [([ '-i', contigs_fpath, '-o', qutils.label_from_fpath_for_fname(contigs_fpath), '-l', clade_dirpath, '-m', 'genome', '-f', '-z', '-c', str(busco_threads), '-t', tmp_dir, '--augustus_parameters=\'--AUGUSTUS_CONFIG_PATH=' + join(augustus_dirpath, 'config') + '\'' ], output_dir) for contigs_fpath in contigs_fpaths] summary_fpaths = run_parallel(busco.main, busco_args, qconfig.max_threads) if not any(fpath for fpath in summary_fpaths): logger.error('Failed running BUSCO for all the assemblies. See ' + log_fpath + ' for information.') return # saving results for i, contigs_fpath in enumerate(contigs_fpaths): report = reporting.get(contigs_fpath) if summary_fpaths[i] and os.path.isfile(summary_fpaths[i]): total_buscos, part_buscos, complete_buscos = 0, 0, 0 with open(summary_fpaths[i]) as f: for line in f: if 'Complete BUSCOs' in line: complete_buscos = int(line.split()[0]) elif 'Fragmented' in line: part_buscos = int(line.split()[0]) elif 'Total' in line: total_buscos = int(line.split()[0]) if total_buscos != 0: report.add_field( reporting.Fields.BUSCO_COMPLETE, ('%.2f' % (float(complete_buscos) * 100.0 / total_buscos))) report.add_field(reporting.Fields.BUSCO_PART, ('%.2f' % (float(part_buscos) * 100.0 / total_buscos))) else: logger.error('Failed running BUSCO for ' + contigs_fpath + '. See ' + log_fpath + ' for information.') logger.info('Done.')
def do(contigs_fpaths, output_dir, logger): logger.print_timestamp() logger.info('Running BUSCO...') compilation_success = True augustus_dirpath = download_augustus(logger) if not augustus_dirpath: compilation_success = False elif not compile_tool('Augustus', augustus_dirpath, [join('bin', 'augustus')], logger=logger): compilation_success = False if compilation_success and not download_blast_binaries( logger=logger, filenames=blast_filenames): compilation_success = False if not compilation_success: logger.info('Failed finding conservative genes.') return if not os.path.isdir(output_dir): os.makedirs(output_dir) tmp_dir = join(output_dir, 'tmp') if not os.path.isdir(tmp_dir): os.makedirs(tmp_dir) n_jobs = min(len(contigs_fpaths), qconfig.max_threads) busco_threads = max(1, qconfig.max_threads // n_jobs) clade_dirpath = download_db(logger, is_prokaryote=qconfig.prokaryote, is_fungus=qconfig.is_fungus) if not clade_dirpath: logger.info('Failed finding conservative genes.') return config_fpath = make_config(output_dir, tmp_dir, busco_threads, clade_dirpath, augustus_dirpath) logger.info('Logs and results will be saved under ' + output_dir + '...') os.environ['BUSCO_CONFIG_FILE'] = config_fpath os.environ['AUGUSTUS_CONFIG_PATH'] = copy_augustus_contigs( augustus_dirpath, tmp_dir) if not os.environ['AUGUSTUS_CONFIG_PATH']: logger.error( 'Augustus configs not found, failed to run BUSCO without them.') busco_args = [[ contigs_fpath, qutils.label_from_fpath_for_fname(contigs_fpath) ] for contigs_fpath in contigs_fpaths] summary_fpaths = run_parallel(busco_main_handler, busco_args, qconfig.max_threads) if not any(fpath for fpath in summary_fpaths): logger.error( 'Failed running BUSCO for all the assemblies. See log files in ' + output_dir + ' for information.') return # saving results for i, contigs_fpath in enumerate(contigs_fpaths): report = reporting.get(contigs_fpath) if summary_fpaths[i] and os.path.isfile(summary_fpaths[i]): total_buscos, part_buscos, complete_buscos = 0, 0, 0 with open(summary_fpaths[i]) as f: for line in f: if 'Complete BUSCOs' in line: complete_buscos = int(line.split()[0]) elif 'Fragmented' in line: part_buscos = int(line.split()[0]) elif 'Total' in line: total_buscos = int(line.split()[0]) if total_buscos != 0: report.add_field( reporting.Fields.BUSCO_COMPLETE, ('%.2f' % (float(complete_buscos) * 100.0 / total_buscos))) report.add_field(reporting.Fields.BUSCO_PART, ('%.2f' % (float(part_buscos) * 100.0 / total_buscos))) shutil.copy(summary_fpaths[i], output_dir) else: logger.error('Failed running BUSCO for ' + contigs_fpath + '. See the log for detailed information.') if not qconfig.debug: cleanup(output_dir) logger.info('Done.')
def compile_minimap(logger, only_clean=False): return compile_tool('Minimap2', minimap_dirpath, ['minimap2'], only_clean=only_clean, logger=logger)
def do(contigs_fpaths, output_dir, logger): logger.print_timestamp() logger.info('Running BUSCO...') compilation_success = True augustus_dirpath = download_augustus(logger) if not augustus_dirpath: compilation_success = False elif not compile_tool('Augustus', augustus_dirpath, [join('bin', 'augustus')], logger=logger): compilation_success = False if compilation_success and not download_blast_binaries(logger=logger, filenames=blast_filenames): compilation_success = False if not compilation_success: logger.info('Failed finding conservative genes.') return set_augustus_dir(augustus_dirpath) if not os.path.isdir(output_dir): os.makedirs(output_dir) tmp_dir = join(output_dir, 'tmp') if not os.path.isdir(tmp_dir): os.makedirs(tmp_dir) n_jobs = min(len(contigs_fpaths), qconfig.max_threads) busco_threads = max(1, qconfig.max_threads // n_jobs) clade_dirpath = download_db(logger, is_prokaryote=qconfig.prokaryote, is_fungus=qconfig.is_fungus) if not clade_dirpath: logger.info('Failed finding conservative genes.') return log_fpath = join(output_dir, 'busco.log') logger.info('Logging to ' + log_fpath + '...') busco_args = [(['-i', contigs_fpath, '-o', qutils.label_from_fpath_for_fname(contigs_fpath), '-l', clade_dirpath, '-m', 'genome', '-f', '-z', '-c', str(busco_threads), '-t', tmp_dir, '--augustus_parameters=\'--AUGUSTUS_CONFIG_PATH=' + join(augustus_dirpath, 'config') + '\'' ], output_dir) for contigs_fpath in contigs_fpaths] summary_fpaths = run_parallel(busco.main, busco_args, qconfig.max_threads) if not any(fpath for fpath in summary_fpaths): logger.error('Failed running BUSCO for all the assemblies. See ' + log_fpath + ' for information.') return # saving results for i, contigs_fpath in enumerate(contigs_fpaths): report = reporting.get(contigs_fpath) if summary_fpaths[i] and os.path.isfile(summary_fpaths[i]): total_buscos, part_buscos, complete_buscos = 0, 0, 0 with open(summary_fpaths[i]) as f: for line in f: if 'Complete BUSCOs' in line: complete_buscos = int(line.split()[0]) elif 'Fragmented' in line: part_buscos = int(line.split()[0]) elif 'Total' in line: total_buscos = int(line.split()[0]) if total_buscos != 0: report.add_field(reporting.Fields.BUSCO_COMPLETE, ('%.2f' % (float(complete_buscos) * 100.0 / total_buscos))) report.add_field(reporting.Fields.BUSCO_PART, ('%.2f' % (float(part_buscos) * 100.0 / total_buscos))) else: logger.error( 'Failed running BUSCO for ' + contigs_fpath + '. See ' + log_fpath + ' for information.') logger.info('Done.')