def makedb_stdin(self, sequence, output_file_path=None): self.progress.new('DIAMOND') self.progress.update( 'creating the search database (using %d thread(s)) ...' % self.num_threads) cmd_line = [ 'diamond', 'makedb', '-d', output_file_path or self.target_fasta, '-p', self.num_threads ] utils.run_command_STDIN(cmd_line, self.run.log_file_path, sequence) self.progress.end() expected_output = utils.run_command_STDIN(cmd_line, self.run.log_file_path, sequence) expected_output = (output_file_path or self.target_fasta) + '.dmnd' self.check_output(expected_output, 'makedb') self.run.info('diamond makedb cmd', ' '.join([str(x) for x in cmd_line]), quiet=True)
def blast_stdin(self, multisequence): cmd_line = [ self.search_program, '-db', self.target_fasta, '-evalue', self.evalue, '-outfmt', '6', '-num_threads', self.num_threads ] if self.max_target_seqs: cmd_line += ['-max_target_seqs', self.max_target_seqs] if self.min_pct_id: cmd_line += ['-perc_identity', self.min_pct_id] self.run.info('NCBI %s stdin cmd' % self.search_program, ' '.join([str(p) for p in cmd_line]), quiet=(not anvio.DEBUG)) self.progress.new('BLAST') self.progress.update( 'running search (using %s with %d thread(s)) ...' % (self.search_program, self.num_threads)) output = utils.run_command_STDIN(cmd_line, self.run.log_file_path, multisequence, remove_log_file_if_exists=False) self.progress.end() self.run.info('BLAST results', '%d lines were returned from STDIN call' % len(output)) return (output)
def blastp_stdin(self, sequence): self.run.info('DIAMOND is set to be', 'Sensitive' if self.sensitive else 'Fast') cmd_line = [ 'diamond', 'blastp', '-d', self.target_fasta, '-p', self.num_threads ] cmd_line.append('--sensitive') if self.sensitive else None if self.max_target_seqs: cmd_line.extend(['--max-target-seqs', self.max_target_seqs]) if self.min_pct_id: cmd_line.extend(['--id', self.min_pct_id]) if self.evalue: cmd_line.extend(['--evalue', self.evalue]) self.run.info('DIAMOND blastp stdin cmd', ' '.join([str(p) for p in cmd_line]), quiet=(not anvio.DEBUG)) output = utils.run_command_STDIN(cmd_line, self.run.log_file_path, '>seq\n%s' % sequence, remove_log_file_if_exists=False) self.progress.end() self.run.info('Diamond blastp results', '%d lines were returned from STDIN call' % len(output)) return (output)
def blastp_stdin(self, sequence): self.run.info('DIAMOND is set to be', 'Sensitive' if self.sensitive else 'Fast') cmd_line = ['diamond', 'blastp', '-d', self.target_fasta, '-p', self.num_threads] cmd_line.append('--sensitive') if self.sensitive else None if self.max_target_seqs: cmd_line.extend(['--max-target-seqs', self.max_target_seqs]) if self.evalue: cmd_line.extend(['--evalue', self.evalue]) self.run.info('DIAMOND blastp stdin cmd', ' '.join([str(p) for p in cmd_line]), quiet=(not anvio.DEBUG)) self.progress.new('DIAMOND') self.progress.update('running blastp (using %d thread(s)) ...' % self.num_threads) output = utils.run_command_STDIN(cmd_line, self.run.log_file_path, '>seq\n%s' % sequence) self.progress.end() self.run.info('Diamond blastp results', '%d lines were returned from STDIN call' % len(output)) return(output)
def generate(self): d = {} log_file = filesnpaths.get_temp_file_path() num_all_programs = len(self.all_programs) for i in range(0, num_all_programs): program_path = self.all_programs[i] program_name = os.path.basename(program_path) if program_name in self.programs_to_skip: run.warning("Someone doesn't want %s to be in the output :/ Fine. Skipping." % (program_name)) progress.new('Bleep bloop') progress.update('%s (%d of %d)' % (program_name, i+1, num_all_programs)) output = utils.run_command_STDIN('%s --help' % (program_path), log_file, '').split('\n') if anvio.DEBUG: usage, description, params, output = parse_help_output(output) else: try: usage, description, params, output = parse_help_output(output) except Exception as e: progress.end() run.warning("The program '%s' does not seem to have the expected help menu output. Skipping to the next.\ For the curious, this was the error message: '%s'" % (program_name, str(e).strip())) continue d[program_name] = {'usage': usage, 'description': description, 'params': params, 'tags': get_meta_information_from_file(program_path, '__tags__'), 'resources': get_meta_information_from_file(program_path, '__resources__')} progress.end() os.remove(log_file) # generate output program_names = sorted([p for p in d if not p.startswith('anvi-script-')]) script_names = sorted([p for p in d if p.startswith('anvi-script-')]) vignette = {'vignette': d, 'program_names': program_names, 'script_names': script_names, 'all_names': program_names + script_names, 'meta': {'summary_type': 'vignette', 'version': '\n'.join(['|%s|%s|' % (t[0], t[1]) for t in anvio.get_version_tuples()]), 'date': utils.get_date()}} if anvio.DEBUG: run.warning(None, 'THE OUTPUT DICT') import json print(json.dumps(d, indent=2)) open(self.output_file_path, 'w').write(SummaryHTMLOutput(vignette, r=run, p=progress).render()) run.info('Output file', os.path.abspath(self.output_file_path))
def run_stdin(self, sequences_list, debug=False): """Takes a list of tuples for sequences, performs MSA using famsa, returns a dict. >>> from anvio.drivers.famsa import FAMSA >>> f = FAMSA() >>> f.run_stdin([('seq1', 'ATCATCATCGA'), ('seq2', 'ATCGAGTCGAT')]) {u'seq1': u'ATCATCATCGA-', u'seq2': u'ATCG-AGTCGAT'} """ tmp_dir = filesnpaths.get_temp_directory_path() log_file_path = os.path.join(tmp_dir, '00_log.txt') self.run.info('Running %s' % self.program_name, '%d sequences will be aligned' % len(sequences_list)) self.run.info('Log file path', log_file_path) sequences_data = ''.join( ['>%s\n%s\n' % (t[0], t[1]) for t in sequences_list]) cmd_line = [self.program_name, 'STDIN', 'STDOUT'] output = utils.run_command_STDIN(cmd_line, log_file_path, sequences_data) if output[0:5] != 'FAMSA' or output[-6:].strip() != "Done!": with open(log_file_path, "a") as log_file: log_file.write( '# THIS IS THE OUTPUT YOU ARE LOOKING FOR:\n\n%s\n' % (output)) raise ConfigError( "Drivers::FAMSA: Something is worng :/ The output does not like the expected output " "for a proper FAMSA run. You can find the output in this log file: %s" % (log_file_path)) alignments = {} # parse the output, and fill alignments defline, seq = None, None for line in [o for o in output.split('\n')[2:-2] if len(o)] + ['>']: if line.startswith('>'): if defline: alignments[defline[1:]] = seq defline, seq = line, None else: if not seq: seq = line else: seq += line if not debug: shutil.rmtree(tmp_dir) return alignments
def run_stdin(self, sequences_list, debug=False): """Takes a list of tuples for sequences, performs MSA using muscle, returns a dict. >>> from anvio.drivers.muscle import Muscle >>> m = Muscle() >>> m.run_stdin([('seq1', 'ATCATCATCGA'), ('seq2', 'ATCGAGTCGAT')]) {u'seq1': u'ATCATCATCGA-', u'seq2': u'ATCG-AGTCGAT'} """ tmp_dir = filesnpaths.get_temp_directory_path() log_file_path = os.path.join(tmp_dir, '00_log.txt') self.run.info('Running %s' % self.program_name, '%d seqeunces will be aligned' % len(sequences_list)) self.run.info('Log file path', log_file_path) sequences_data = ''.join( ['>%s\n%s\n' % (t[0], t[1]) for t in sequences_list]) cmd_line = [self.program_name, '-quiet'] output = utils.run_command_STDIN(cmd_line, log_file_path, sequences_data) if not output[0] == '>': with open(log_file_path, "a") as log_file: log_file.write( '# THIS IS THE OUTPUT YOU ARE LOOKING FOR:\n\n%s\n' % (output)) raise ConfigError( "Drivers::Muscle: Something went wrong with this run :/ The output does not\ look alright. You can find the output in this log file: %s" % (log_file_path)) alignments = {} # parse the output, and fill alignments defline, seq = None, None for line in [o for o in output.split('\n') if len(o)] + ['>']: if line.startswith('>'): if defline: alignments[defline[1:]] = seq defline, seq = line, None else: if not seq: seq = line else: seq += line if not debug: shutil.rmtree(tmp_dir) return alignments
def run_stdin(self, sequences_list, debug=False): """Takes a list of tuples for sequences, performs MSA using famsa, returns a dict. >>> from anvio.drivers.famsa import FAMSA >>> f = FAMSA() >>> f.run_stdin([('seq1', 'ATCATCATCGA'), ('seq2', 'ATCGAGTCGAT')]) {u'seq1': u'ATCATCATCGA-', u'seq2': u'ATCG-AGTCGAT'} """ tmp_dir = filesnpaths.get_temp_directory_path() log_file_path = os.path.join(tmp_dir, '00_log.txt') self.run.info('Running %s' % self.program_name, '%d seqeunces will be aligned' % len(sequences_list)) self.run.info('Log file path', log_file_path) sequences_data = ''.join(['>%s\n%s\n' % (t[0], t[1]) for t in sequences_list]) cmd_line = [self.program_name, 'STDIN', 'STDOUT'] output = utils.run_command_STDIN(cmd_line, log_file_path, sequences_data) if output[0:5] != 'FAMSA' or output[-6:].strip() != "Done!": with open(log_file_path, "a") as log_file: log_file.write('# THIS IS THE OUTPUT YOU ARE LOOKING FOR:\n\n%s\n' % (output)) raise ConfigError("Drivers::FAMSA: Something is worng :/ The output does not like the expected output\ for a proper FAMSA run. You can find the output in this log file: %s" % (log_file_path)) alignments = {} # parse the output, and fill alignments defline, seq = None, None for line in [o for o in output.split('\n')[2:-2] if len(o)] + ['>']: if line.startswith('>'): if defline: alignments[defline[1:]] = seq defline, seq = line, None else: if not seq: seq = line else: seq += line if not debug: shutil.rmtree(tmp_dir) return alignments
def run_stdin(self, sequences_list, debug=False): """Takes a list of tuples for sequences, performs MSA using muscle, returns a dict. >>> from anvio.drivers.muscle import Muscle >>> m = Muscle() >>> m.run_stdin([('seq1', 'ATCATCATCGA'), ('seq2', 'ATCGAGTCGAT')]) {u'seq1': u'ATCATCATCGA-', u'seq2': u'ATCG-AGTCGAT'} """ tmp_dir = filesnpaths.get_temp_directory_path() log_file_path = os.path.join(tmp_dir, '00_log.txt') self.run.info('Running %s' % self.program_name, '%d seqeunces will be aligned' % len(sequences_list)) self.run.info('Log file path', log_file_path) sequences_data = ''.join(['>%s\n%s\n' % (t[0], t[1]) for t in sequences_list]) cmd_line = [self.program_name, '-quiet'] output = utils.run_command_STDIN(cmd_line, log_file_path, sequences_data) if not (len(output) and output[0] == '>'): with open(log_file_path, "a") as log_file: log_file.write('# THIS IS THE OUTPUT YOU ARE LOOKING FOR:\n\n%s\n' % (output)) raise ConfigError("Drivers::Muscle: Something went wrong with this alignment that was working on %d\ sequences :/ You can find the output in this log file: %s" % (len(sequences_list), log_file_path)) alignments = {} # parse the output, and fill alignments defline, seq = None, None for line in [o for o in output.split('\n') if len(o)] + ['>']: if line.startswith('>'): if defline: alignments[defline[1:]] = seq defline, seq = line, None else: if not seq: seq = line else: seq += line if not debug: shutil.rmtree(tmp_dir) return alignments
def blastp_stdin_multi(self, multisequence): self.run.warning(None, header="DIAMOND BLASTP STDIN MULTI", lc="green") self.run.info('Mode', 'Sensitive' if self.sensitive else 'Fast') cmd_line = [ 'diamond', 'blastp', '-d', self.target_fasta, '-p', self.num_threads, '--outfmt', *self.outfmt.split() ] cmd_line.append('--sensitive') if self.sensitive else None if self.max_target_seqs: cmd_line.extend(['--max-target-seqs', self.max_target_seqs]) if self.min_pct_id: cmd_line.extend(['--id', self.min_pct_id]) if self.evalue: cmd_line.extend(['--evalue', self.evalue]) self.run.info('Command line', ' '.join([str(p) for p in cmd_line]), quiet=(not anvio.DEBUG)) self.progress.new('DIAMOND') self.progress.update('running blastp (using %d thread(s)) ...' % self.num_threads) output = utils.run_command_STDIN(cmd_line, self.run.log_file_path, multisequence, remove_log_file_if_exists=False) self.progress.end() self.run.info('Diamond blastp results', '%d lines were returned from STDIN call' % len(output)) return (output)