def data_list(wanted_data):
	"""
	Retrieves information of additional files under folder ``BacterialTyper/data``.
	"""

	data = os.path.dirname(os.path.realpath(__file__))
	list_data = HCGB_main.get_fullpath_list(data)
	for f in list_data:
		name = os.path.splitext(os.path.basename(f))[0]
		if (name == wanted_data):
			return (f)
Exemple #2
0
def R_scripts(script, Debug):
    """
    Lists files within ``other_tools/R`` directory and returns path to given script
    """
    RDir = os.path.dirname(os.path.realpath(__file__))
    list_R = main_functions.get_fullpath_list(RDir, Debug)

    dict_R = {}
    for f in list_R:
        name = os.path.splitext(os.path.basename(f))[0]
        if (name == script):
            return (f)
Exemple #3
0
def R_scripts(script):
    """Lists files within ``other_tools/R`` directory and returns path to given script
	
	.. seealso:: This function depends on other BacterialTyper functions called:
	
		- :func:`HCGB.functions.main_functions.get_fullpath_list`
	"""
    RDir = os.path.dirname(os.path.realpath(__file__)) + '/R/'
    list_R = HCGB_main.get_fullpath_list(RDir)

    dict_R = {}
    for f in list_R:
        name = os.path.splitext(os.path.basename(f))[0]
        if (name == script):
            return (f)
def print_available_BUSCO():
	HCGB_aes.print_sepLine("-", 100, False)
	busco_bin = set_config.get_exe('busco')
	
	## get datasets
	busco_bin_call = busco_bin + ' --list-datasets > tmp'
	HCGB_sys.system_call(busco_bin_call, message=False)
	
	## dump in screen
	with open("./tmp", 'r') as f:
		print(f.read())	
	
	## clean
	list_files = HCGB_main.get_fullpath_list("./busco_downloads", False)
	list_files + ['tmp']
	for i in list_files:
		os.remove(i)
	os.rmdir("./busco_downloads/information")		
	os.rmdir("./busco_downloads/")
		
	HCGB_aes.print_sepLine("-", 100, False)
	print ("\n")
Exemple #5
0
def agrvate_call(sample, assembly_file, folder, debug=False):
    """agrvate call and check results."""
    
    ## prepare call
    log_call = os.path.join(folder, "agrvate_cmd.log")
    err_call = os.path.join(folder, "agrvate_cmd.err")
    agrvate_bin = set_config.get_exe('agrvate')
    
    ## system call
    cmd_call = "%s -i %s -m -f >  %s 2> %s " %(agrvate_bin, 
                                               assembly_file,
                                               log_call, err_call) ## use mummer (-m) and force results folder (-f)
    status = HCGB_sys.system_call(cmd_call)
    
    ## check results
    ## see https://github.com/VishnuRaghuram94/AgrVATE#results for additional details
    results = pd.DataFrame()
    
    ## check folder is created
    assembly_file_name = os.path.basename(assembly_file).split('.fna')[0]    
    original_results_folder = os.path.join(folder, assembly_file_name + '-results')
    results_folder = os.path.join(folder, 'agrvate_results')
    
    if os.path.isdir(original_results_folder):
        print("+ Results folder generated OK")
        print("+ Check results generated:")
        
        ## rename folder
        os.rename(original_results_folder, results_folder)
        os.rename(os.path.join(folder, assembly_file_name + '.fna-error-report.tab'), os.path.join(results_folder, 'error_report.tab'))
        
        ## write to excel
        file_name_Excel = os.path.join(folder, sample + '_agr_results.xlsx')
        writer_Excel = pd.ExcelWriter(file_name_Excel, engine='xlsxwriter') ## open excel handle
    
        ## get all files
        list_files = HCGB_main.get_fullpath_list(results_folder)
    
        ## summary tab
        summary_tab_file = [s for s in list_files if s.endswith("summary.tab")][0]
        summary_tab =  HCGB_main.get_data(summary_tab_file, '\t', options="")
        summary_tab['sample'] = sample
        
        ## columns
        #agr_group: gp1/gp2/gp3/gp4. 'u' means unknown. 
        ##           If multiple agr groups were found (col 5 = m), 
        ##           the displayed agr group is the majority/highest confidence. 
        # match_score: maximum 15; 0 means untypeable; < 5 means low confidence.
        # canonical_agrD: 1 means canonical; 0 means non-canonical; u means unknown.
        # multiple_agr:  s means single, m means multiple, u means unknown ) 
        ##               Multiple groups are found likely due to multiple S. aureus isolates in sequence
        # frameshifts: Number found in CDS of extracted agr operon ('u' if agr operon not extracted)
        
        ## debug messages
        if debug:
            HCGB_aes.debug_message("agrvate results: Summary tab file", 'yellow')
            print(summary_tab_file)
            print(summary_tab)

        ## add summary results to all results
        del summary_tab['#filename']
        results = summary_tab.copy()

        ## save summary_tab into excel
        ## tab summary
        summary_tab.to_excel(writer_Excel, sheet_name='summary') ## write excel handle

        ## agr_gp tab
        agr_gp_tab_file = [s for s in list_files if s.endswith("agr_gp.tab")][0]
        if HCGB_files.is_non_zero_file(agr_gp_tab_file):
            agr_gp_tab =  HCGB_main.get_data(agr_gp_tab_file, '\t', options='header=None')
            agr_gp_tab.columns = ['contig', 'agr', 'evalue', 'identity', 'start', 'end']
            agr_gp_tab['sample'] = sample
            
            ## columns
            ## Assembly Contig ID
            ## ID of matched agr group kmer
            ## evalue
            ## Percentage identity of match
            ## Start position of kmer alignment on input sequence
            ## End position of kmer alignment on input sequence
    
            ## debug messages
            if debug:
                HCGB_aes.debug_message("agrvate results: agr_gp file", 'yellow')
                print(agr_gp_tab_file)
                print(agr_gp_tab)
            
            ## save agr_gp_tab file into excel
            ## tab operon
            agr_gp_tab.to_excel(writer_Excel, sheet_name='operon') ## write excel handle

        ## agr_operon fna
        try:
            agr_operon_fna_file = [s for s in list_files if s.endswith("agr_operon.fna")][0]
            ## debug messages
            if debug:
                HCGB_aes.debug_message("agrvate results: agr_operon file", 'yellow')
                print(agr_operon_fna_file)
            
            results['operon_fna'] = agr_operon_fna_file
        except:
            results['operon_fna'] = ''

        ## agr_operon fna
        error_report_file = [s for s in list_files if s.endswith("error_report.tab")][0]
        error_report =  HCGB_main.get_data(error_report_file, '\t', options="")
        del error_report['#input_name']

        ## debug messages
        if debug:
            HCGB_aes.debug_message("agrvate results: error_report.tab file", 'yellow')
            print(error_report_file)
            print(error_report)
            
        ## save error_report file into excel
        ## tab steps
        error_report.to_excel(writer_Excel, sheet_name='steps') ## write excel handle
        
        ## merge results
        results = pd.concat([results, error_report], axis=1)

        ## close xlsx file
        writer_Excel.save() ## close excel handle
    
        ## add to pandas dataframe
        results['agr_operon_xlsx'] = file_name_Excel

    ## debug messages
    if debug:
        HCGB_aes.debug_message("agrvate results", 'yellow')
        HCGB_main.print_all_pandaDF(results)
        
    return (results)
Exemple #6
0
def install(software, min_version, install_path, Debug):
	
	(path2Export, versionInstalled) = install_soft(software, min_version, install_path, Debug)
			
	## failed to install:
	if not path2Export:
		print(colored("**Check paths or install it in the system and add it to $PATH environment variable.",'yellow'))
		return ()
	
	else:
		## add to $PATH: include in environment bin
		env_bin_directory = os.path.dirname(os.environ['_'])
		
		print ("\n+ Add software to path")

		file_list = []

		## unique file to export
		if (software == 'fastqc' or software == 'trimmomatic'):
			file_list.append(path2Export)
		
		else:
		## all folder
			if (software == 'spades'):
				pathToExport = os.path.join(path2Export, 'bin')
		
			if (software == 'prokka'):
				pathToExport = os.path.join(path2Export, 'bin')
			
			file_list = HCGB_main.get_fullpath_list(path2Export)
			
			## add binaries compiled for linux
			if (software == 'prokka'):
				pathToExport2 = os.path.join(path2Export, 'binaries', 'linux')
				file_list = file_list + HCGB_main.get_fullpath_list(pathToExport2)
			
		## discard some files obtain
		file_list = [s for s in file_list if '.a' not in s]
		file_list = [s for s in file_list if '.c' not in s]
		file_list = [s for s in file_list if '.o' not in s]
		file_list = [s for s in file_list if '.h' not in s]		
		file_list = [s for s in file_list if '.git' not in s]
		file_list = [s for s in file_list if '.git/' not in s]
		file_list = [s for s in file_list if '.gitignore' not in s]
		file_list = [s for s in file_list if 'Makefile' not in s]
		file_list = [s for s in file_list if '.pdf' not in s]
		file_list = [s for s in file_list if '.tar.gz' not in s]
		file_list = [s for s in file_list if 'README.md' not in s]
		file_list = [s for s in file_list if '__pycache__' not in s]
		file_list = [s for s in file_list if 'db/' not in s]
		file_list = [s for s in file_list if 'doc/' not in s]
		file_list = [s for s in file_list if 'test/' not in s]
		file_list = [s for s in file_list if 'aux/' not in s]
			
		## debug messages
		if Debug:
			print(colored("** Debug: list to include in path",'yellow'))
			print (file_list)
			print()
		
		## create symbolic link in bin directory in environment
		HCGB_main.get_symbolic_link(file_list, env_bin_directory)
		print(colored("**Software (%s - Version: %s) installed in the system and add it to $PATH environment variable." %(software, versionInstalled),'green'))

	return (versionInstalled)