def format_protein_db(self, input_file_path, output_file_path): progress.new('Formatting raw files') progress.update('Decompressing protein sequences') # poor man's uncompress temp_fasta_path = filesnpaths.get_temp_file_path() with open(temp_fasta_path, 'wb') as f_out, gzip.open(input_file_path, 'rb') as f_in: f_out.write(f_in.read()) progress.end() if utils.is_program_exists('diamond', dont_raise=True): output_dir = J(self.COG_data_dir, 'DB_DIAMOND') if os.path.exists(output_dir): shutil.rmtree(output_dir) os.mkdir(output_dir) output_db_path = J(output_dir, 'COG') log_file_path = J(output_dir, 'log.txt') self.run.info('Diamond log', log_file_path) diamond = Diamond(temp_fasta_path) diamond.num_threads = self.num_threads diamond.run.log_file_path = log_file_path diamond.makedb(output_db_path) else: self.run.warning( "Diamond does not seem to be installed on this system, so anvi'o is not going to\ generate a search database for it. Remember this when/if things go South." ) if utils.is_program_exists( 'makeblastdb', dont_raise=True) and utils.is_program_exists( 'blastp', dont_raise=True): output_dir = J(self.COG_data_dir, 'DB_BLAST') if os.path.exists(output_dir): shutil.rmtree(output_dir) os.mkdir(output_dir) output_db_path = J(output_dir, 'COG') log_file_path = J(output_dir, 'log.txt') self.run.info('BLAST log', log_file_path) blast = BLAST(temp_fasta_path) blast.run.log_file_path = log_file_path blast.num_threads = self.num_threads blast.makedb(os.path.join(output_db_path, 'COG.fa')) else: self.run.warning( "BLAST tools do not seem to be installed on this system, so anvi'o is not going to\ generate a search database for them to be used. Keep this in mind for later." ) os.remove(temp_fasta_path)
def format_protein_db(self, input_file_path, output_file_path): progress.new('Formatting raw files') progress.update('Decompressing protein sequences') # poor man's uncompress temp_fasta_path = filesnpaths.get_temp_file_path() try: with open(temp_fasta_path, 'wb') as f_out, gzip.open(input_file_path, 'rb') as f_in: f_out.write(f_in.read()) except Exception as e: progress.end() raise ConfigError(f"Something went wrong while decompressing the downloaded file :/ It is likely that " f"the download failed and only part of the file was downloaded. If you would like to " f"try again, please run the setup command with the flag `--reset`. Here is what the " f"downstream library said: '{e}'.") progress.end() if utils.is_program_exists('diamond', dont_raise=True): output_dir = J(self.COG_data_dir, 'DB_DIAMOND') if os.path.exists(output_dir): shutil.rmtree(output_dir) os.mkdir(output_dir) output_db_path = J(output_dir, 'COG') log_file_path = J(output_dir, 'log.txt') self.run.info('Diamond log', log_file_path) diamond = Diamond(temp_fasta_path) diamond.num_threads = self.num_threads diamond.run.log_file_path = log_file_path diamond.makedb(output_db_path) else: self.run.warning("DIAMOND does not seem to be installed on this system, so anvi'o is not going to " "generate a search database for it. Remember this when/if things go South.") if utils.is_program_exists('makeblastdb', dont_raise=True) and utils.is_program_exists('blastp', dont_raise=True): output_dir = J(self.COG_data_dir, 'DB_BLAST') if os.path.exists(output_dir): shutil.rmtree(output_dir) os.mkdir(output_dir) output_db_path = J(output_dir, 'COG') log_file_path = J(output_dir, 'log.txt') self.run.info('BLAST log', log_file_path) blast = BLAST(temp_fasta_path) blast.run.log_file_path = log_file_path blast.num_threads = self.num_threads blast.makedb(os.path.join(output_db_path, 'COG.fa')) else: self.run.warning("BLAST tools do not seem to be installed on this system, so anvi'o is not going to " "generate a search database for them to be used. Keep this in mind for later.") os.remove(temp_fasta_path)
def format_protein_db(self, input_file_path, output_file_path): progress.new('Formatting raw files') progress.update('Decompressing protein sequences') # poor man's uncompress temp_fasta_path = filesnpaths.get_temp_file_path() with open(temp_fasta_path, 'wb') as f_out, gzip.open(input_file_path, 'rb') as f_in: f_out.write(f_in.read()) progress.end() if utils.is_program_exists('diamond', dont_raise=True): output_dir = J(self.COG_data_dir, 'DB_DIAMOND') if os.path.exists(output_dir): shutil.rmtree(output_dir) os.mkdir(output_dir) output_db_path = J(output_dir, 'COG') log_file_path = J(output_dir, 'log.txt') self.run.info('Diamond log', log_file_path) diamond = Diamond(temp_fasta_path) diamond.num_threads = self.num_threads diamond.run.log_file_path = log_file_path diamond.makedb(output_db_path) else: self.run.warning("Diamond does not seem to be installed on this system, so anvi'o is not going to\ generate a search database for it. Remember this when/if things go South.") if utils.is_program_exists('makeblastdb', dont_raise=True) and utils.is_program_exists('blastp', dont_raise=True): output_dir = J(self.COG_data_dir, 'DB_BLAST') if os.path.exists(output_dir): shutil.rmtree(output_dir) os.mkdir(output_dir) output_db_path = J(output_dir, 'COG') log_file_path = J(output_dir, 'log.txt') self.run.info('BLAST log', log_file_path) blast = BLAST(temp_fasta_path) blast.run.log_file_path = log_file_path blast.num_threads = self.num_threads blast.makedb(os.path.join(output_db_path, 'COG.fa')) else: self.run.warning("BLAST tools do not seem to be installed on this system, so anvi'o is not going to\ generate a search database for them to be used. Keep this in mind for later.") os.remove(temp_fasta_path)