def data_list(wanted_data): """ Retrieves information of additional files under folder ``BacterialTyper/data``. """ data = os.path.dirname(os.path.realpath(__file__)) list_data = HCGB_main.get_fullpath_list(data) for f in list_data: name = os.path.splitext(os.path.basename(f))[0] if (name == wanted_data): return (f)
def R_scripts(script, Debug): """ Lists files within ``other_tools/R`` directory and returns path to given script """ RDir = os.path.dirname(os.path.realpath(__file__)) list_R = main_functions.get_fullpath_list(RDir, Debug) dict_R = {} for f in list_R: name = os.path.splitext(os.path.basename(f))[0] if (name == script): return (f)
def R_scripts(script): """Lists files within ``other_tools/R`` directory and returns path to given script .. seealso:: This function depends on other BacterialTyper functions called: - :func:`HCGB.functions.main_functions.get_fullpath_list` """ RDir = os.path.dirname(os.path.realpath(__file__)) + '/R/' list_R = HCGB_main.get_fullpath_list(RDir) dict_R = {} for f in list_R: name = os.path.splitext(os.path.basename(f))[0] if (name == script): return (f)
def print_available_BUSCO(): HCGB_aes.print_sepLine("-", 100, False) busco_bin = set_config.get_exe('busco') ## get datasets busco_bin_call = busco_bin + ' --list-datasets > tmp' HCGB_sys.system_call(busco_bin_call, message=False) ## dump in screen with open("./tmp", 'r') as f: print(f.read()) ## clean list_files = HCGB_main.get_fullpath_list("./busco_downloads", False) list_files + ['tmp'] for i in list_files: os.remove(i) os.rmdir("./busco_downloads/information") os.rmdir("./busco_downloads/") HCGB_aes.print_sepLine("-", 100, False) print ("\n")
def agrvate_call(sample, assembly_file, folder, debug=False): """agrvate call and check results.""" ## prepare call log_call = os.path.join(folder, "agrvate_cmd.log") err_call = os.path.join(folder, "agrvate_cmd.err") agrvate_bin = set_config.get_exe('agrvate') ## system call cmd_call = "%s -i %s -m -f > %s 2> %s " %(agrvate_bin, assembly_file, log_call, err_call) ## use mummer (-m) and force results folder (-f) status = HCGB_sys.system_call(cmd_call) ## check results ## see https://github.com/VishnuRaghuram94/AgrVATE#results for additional details results = pd.DataFrame() ## check folder is created assembly_file_name = os.path.basename(assembly_file).split('.fna')[0] original_results_folder = os.path.join(folder, assembly_file_name + '-results') results_folder = os.path.join(folder, 'agrvate_results') if os.path.isdir(original_results_folder): print("+ Results folder generated OK") print("+ Check results generated:") ## rename folder os.rename(original_results_folder, results_folder) os.rename(os.path.join(folder, assembly_file_name + '.fna-error-report.tab'), os.path.join(results_folder, 'error_report.tab')) ## write to excel file_name_Excel = os.path.join(folder, sample + '_agr_results.xlsx') writer_Excel = pd.ExcelWriter(file_name_Excel, engine='xlsxwriter') ## open excel handle ## get all files list_files = HCGB_main.get_fullpath_list(results_folder) ## summary tab summary_tab_file = [s for s in list_files if s.endswith("summary.tab")][0] summary_tab = HCGB_main.get_data(summary_tab_file, '\t', options="") summary_tab['sample'] = sample ## columns #agr_group: gp1/gp2/gp3/gp4. 'u' means unknown. ## If multiple agr groups were found (col 5 = m), ## the displayed agr group is the majority/highest confidence. # match_score: maximum 15; 0 means untypeable; < 5 means low confidence. # canonical_agrD: 1 means canonical; 0 means non-canonical; u means unknown. # multiple_agr: s means single, m means multiple, u means unknown ) ## Multiple groups are found likely due to multiple S. aureus isolates in sequence # frameshifts: Number found in CDS of extracted agr operon ('u' if agr operon not extracted) ## debug messages if debug: HCGB_aes.debug_message("agrvate results: Summary tab file", 'yellow') print(summary_tab_file) print(summary_tab) ## add summary results to all results del summary_tab['#filename'] results = summary_tab.copy() ## save summary_tab into excel ## tab summary summary_tab.to_excel(writer_Excel, sheet_name='summary') ## write excel handle ## agr_gp tab agr_gp_tab_file = [s for s in list_files if s.endswith("agr_gp.tab")][0] if HCGB_files.is_non_zero_file(agr_gp_tab_file): agr_gp_tab = HCGB_main.get_data(agr_gp_tab_file, '\t', options='header=None') agr_gp_tab.columns = ['contig', 'agr', 'evalue', 'identity', 'start', 'end'] agr_gp_tab['sample'] = sample ## columns ## Assembly Contig ID ## ID of matched agr group kmer ## evalue ## Percentage identity of match ## Start position of kmer alignment on input sequence ## End position of kmer alignment on input sequence ## debug messages if debug: HCGB_aes.debug_message("agrvate results: agr_gp file", 'yellow') print(agr_gp_tab_file) print(agr_gp_tab) ## save agr_gp_tab file into excel ## tab operon agr_gp_tab.to_excel(writer_Excel, sheet_name='operon') ## write excel handle ## agr_operon fna try: agr_operon_fna_file = [s for s in list_files if s.endswith("agr_operon.fna")][0] ## debug messages if debug: HCGB_aes.debug_message("agrvate results: agr_operon file", 'yellow') print(agr_operon_fna_file) results['operon_fna'] = agr_operon_fna_file except: results['operon_fna'] = '' ## agr_operon fna error_report_file = [s for s in list_files if s.endswith("error_report.tab")][0] error_report = HCGB_main.get_data(error_report_file, '\t', options="") del error_report['#input_name'] ## debug messages if debug: HCGB_aes.debug_message("agrvate results: error_report.tab file", 'yellow') print(error_report_file) print(error_report) ## save error_report file into excel ## tab steps error_report.to_excel(writer_Excel, sheet_name='steps') ## write excel handle ## merge results results = pd.concat([results, error_report], axis=1) ## close xlsx file writer_Excel.save() ## close excel handle ## add to pandas dataframe results['agr_operon_xlsx'] = file_name_Excel ## debug messages if debug: HCGB_aes.debug_message("agrvate results", 'yellow') HCGB_main.print_all_pandaDF(results) return (results)
def install(software, min_version, install_path, Debug): (path2Export, versionInstalled) = install_soft(software, min_version, install_path, Debug) ## failed to install: if not path2Export: print(colored("**Check paths or install it in the system and add it to $PATH environment variable.",'yellow')) return () else: ## add to $PATH: include in environment bin env_bin_directory = os.path.dirname(os.environ['_']) print ("\n+ Add software to path") file_list = [] ## unique file to export if (software == 'fastqc' or software == 'trimmomatic'): file_list.append(path2Export) else: ## all folder if (software == 'spades'): pathToExport = os.path.join(path2Export, 'bin') if (software == 'prokka'): pathToExport = os.path.join(path2Export, 'bin') file_list = HCGB_main.get_fullpath_list(path2Export) ## add binaries compiled for linux if (software == 'prokka'): pathToExport2 = os.path.join(path2Export, 'binaries', 'linux') file_list = file_list + HCGB_main.get_fullpath_list(pathToExport2) ## discard some files obtain file_list = [s for s in file_list if '.a' not in s] file_list = [s for s in file_list if '.c' not in s] file_list = [s for s in file_list if '.o' not in s] file_list = [s for s in file_list if '.h' not in s] file_list = [s for s in file_list if '.git' not in s] file_list = [s for s in file_list if '.git/' not in s] file_list = [s for s in file_list if '.gitignore' not in s] file_list = [s for s in file_list if 'Makefile' not in s] file_list = [s for s in file_list if '.pdf' not in s] file_list = [s for s in file_list if '.tar.gz' not in s] file_list = [s for s in file_list if 'README.md' not in s] file_list = [s for s in file_list if '__pycache__' not in s] file_list = [s for s in file_list if 'db/' not in s] file_list = [s for s in file_list if 'doc/' not in s] file_list = [s for s in file_list if 'test/' not in s] file_list = [s for s in file_list if 'aux/' not in s] ## debug messages if Debug: print(colored("** Debug: list to include in path",'yellow')) print (file_list) print() ## create symbolic link in bin directory in environment HCGB_main.get_symbolic_link(file_list, env_bin_directory) print(colored("**Software (%s - Version: %s) installed in the system and add it to $PATH environment variable." %(software, versionInstalled),'green')) return (versionInstalled)