Example #1
0
    def makedb_stdin(self, sequence, output_file_path=None):
        self.progress.new('DIAMOND')
        self.progress.update(
            'creating the search database (using %d thread(s)) ...' %
            self.num_threads)

        cmd_line = [
            'diamond', 'makedb', '-d', output_file_path or self.target_fasta,
            '-p', self.num_threads
        ]

        utils.run_command_STDIN(cmd_line, self.run.log_file_path, sequence)

        self.progress.end()

        expected_output = utils.run_command_STDIN(cmd_line,
                                                  self.run.log_file_path,
                                                  sequence)

        expected_output = (output_file_path or self.target_fasta) + '.dmnd'
        self.check_output(expected_output, 'makedb')

        self.run.info('diamond makedb cmd',
                      ' '.join([str(x) for x in cmd_line]),
                      quiet=True)
Example #2
0
    def blast_stdin(self, multisequence):
        cmd_line = [
            self.search_program, '-db', self.target_fasta, '-evalue',
            self.evalue, '-outfmt', '6', '-num_threads', self.num_threads
        ]

        if self.max_target_seqs:
            cmd_line += ['-max_target_seqs', self.max_target_seqs]

        if self.min_pct_id:
            cmd_line += ['-perc_identity', self.min_pct_id]

        self.run.info('NCBI %s stdin cmd' % self.search_program,
                      ' '.join([str(p) for p in cmd_line]),
                      quiet=(not anvio.DEBUG))

        self.progress.new('BLAST')
        self.progress.update(
            'running search (using %s with %d thread(s)) ...' %
            (self.search_program, self.num_threads))

        output = utils.run_command_STDIN(cmd_line,
                                         self.run.log_file_path,
                                         multisequence,
                                         remove_log_file_if_exists=False)

        self.progress.end()

        self.run.info('BLAST results',
                      '%d lines were returned from STDIN call' % len(output))

        return (output)
Example #3
0
    def blastp_stdin(self, sequence):
        self.run.info('DIAMOND is set to be',
                      'Sensitive' if self.sensitive else 'Fast')

        cmd_line = [
            'diamond', 'blastp', '-d', self.target_fasta, '-p',
            self.num_threads
        ]

        cmd_line.append('--sensitive') if self.sensitive else None

        if self.max_target_seqs:
            cmd_line.extend(['--max-target-seqs', self.max_target_seqs])

        if self.min_pct_id:
            cmd_line.extend(['--id', self.min_pct_id])

        if self.evalue:
            cmd_line.extend(['--evalue', self.evalue])

        self.run.info('DIAMOND blastp stdin cmd',
                      ' '.join([str(p) for p in cmd_line]),
                      quiet=(not anvio.DEBUG))

        output = utils.run_command_STDIN(cmd_line,
                                         self.run.log_file_path,
                                         '>seq\n%s' % sequence,
                                         remove_log_file_if_exists=False)

        self.progress.end()

        self.run.info('Diamond blastp results',
                      '%d lines were returned from STDIN call' % len(output))

        return (output)
Example #4
0
    def blastp_stdin(self, sequence):
        self.run.info('DIAMOND is set to be', 'Sensitive' if self.sensitive else 'Fast')

        cmd_line = ['diamond',
                    'blastp',
                    '-d', self.target_fasta,
                    '-p', self.num_threads]

        cmd_line.append('--sensitive') if self.sensitive else None

        if self.max_target_seqs:
            cmd_line.extend(['--max-target-seqs', self.max_target_seqs])

        if self.evalue:
            cmd_line.extend(['--evalue', self.evalue])

        self.run.info('DIAMOND blastp stdin cmd', ' '.join([str(p) for p in cmd_line]), quiet=(not anvio.DEBUG))

        self.progress.new('DIAMOND')
        self.progress.update('running blastp (using %d thread(s)) ...' % self.num_threads)

        output = utils.run_command_STDIN(cmd_line, self.run.log_file_path, '>seq\n%s' % sequence)

        self.progress.end()

        self.run.info('Diamond blastp results', '%d lines were returned from STDIN call' % len(output))

        return(output)
Example #5
0
    def generate(self):
        d = {}

        log_file = filesnpaths.get_temp_file_path()
        num_all_programs = len(self.all_programs)
        for i in range(0, num_all_programs):
            program_path = self.all_programs[i]
            program_name = os.path.basename(program_path)

            if program_name in self.programs_to_skip:
                run.warning("Someone doesn't want %s to be in the output :/ Fine. Skipping." % (program_name))

            progress.new('Bleep bloop')
            progress.update('%s (%d of %d)' % (program_name, i+1, num_all_programs))

            output = utils.run_command_STDIN('%s --help' % (program_path), log_file, '').split('\n')

            if anvio.DEBUG:
                    usage, description, params, output = parse_help_output(output)
            else:
                try:
                    usage, description, params, output = parse_help_output(output)
                except Exception as e:
                    progress.end()
                    run.warning("The program '%s' does not seem to have the expected help menu output. Skipping to the next.\
                                 For the curious, this was the error message: '%s'" % (program_name, str(e).strip()))
                    continue

            d[program_name] = {'usage': usage,
                               'description': description,
                               'params': params,
                               'tags': get_meta_information_from_file(program_path, '__tags__'),
                               'resources': get_meta_information_from_file(program_path, '__resources__')}

            progress.end()

        os.remove(log_file)

        # generate output
        program_names = sorted([p for p in d if not p.startswith('anvi-script-')])
        script_names = sorted([p for p in d if p.startswith('anvi-script-')])
        vignette = {'vignette': d,
                    'program_names': program_names,
                    'script_names': script_names,
                    'all_names': program_names + script_names,
                    'meta': {'summary_type': 'vignette',
                             'version': '\n'.join(['|%s|%s|' % (t[0], t[1]) for t in anvio.get_version_tuples()]),
                             'date': utils.get_date()}}

        if anvio.DEBUG:
            run.warning(None, 'THE OUTPUT DICT')
            import json
            print(json.dumps(d, indent=2))

        open(self.output_file_path, 'w').write(SummaryHTMLOutput(vignette, r=run, p=progress).render())

        run.info('Output file', os.path.abspath(self.output_file_path))
Example #6
0
    def run_stdin(self, sequences_list, debug=False):
        """Takes a list of tuples for sequences, performs MSA using famsa, returns a dict.

            >>> from anvio.drivers.famsa import FAMSA
            >>> f = FAMSA()
            >>> f.run_stdin([('seq1', 'ATCATCATCGA'), ('seq2', 'ATCGAGTCGAT')])
            {u'seq1': u'ATCATCATCGA-', u'seq2': u'ATCG-AGTCGAT'}

        """

        tmp_dir = filesnpaths.get_temp_directory_path()
        log_file_path = os.path.join(tmp_dir, '00_log.txt')

        self.run.info('Running %s' % self.program_name,
                      '%d sequences will be aligned' % len(sequences_list))
        self.run.info('Log file path', log_file_path)

        sequences_data = ''.join(
            ['>%s\n%s\n' % (t[0], t[1]) for t in sequences_list])
        cmd_line = [self.program_name, 'STDIN', 'STDOUT']

        output = utils.run_command_STDIN(cmd_line, log_file_path,
                                         sequences_data)

        if output[0:5] != 'FAMSA' or output[-6:].strip() != "Done!":
            with open(log_file_path, "a") as log_file:
                log_file.write(
                    '# THIS IS THE OUTPUT YOU ARE LOOKING FOR:\n\n%s\n' %
                    (output))
            raise ConfigError(
                "Drivers::FAMSA: Something is worng :/ The output does not like the expected output "
                "for a proper FAMSA run. You can find the output in this log file: %s"
                % (log_file_path))

        alignments = {}

        # parse the output, and fill alignments
        defline, seq = None, None
        for line in [o for o in output.split('\n')[2:-2] if len(o)] + ['>']:
            if line.startswith('>'):
                if defline:
                    alignments[defline[1:]] = seq
                defline, seq = line, None
            else:
                if not seq:
                    seq = line
                else:
                    seq += line

        if not debug:
            shutil.rmtree(tmp_dir)

        return alignments
Example #7
0
    def run_stdin(self, sequences_list, debug=False):
        """Takes a list of tuples for sequences, performs MSA using muscle, returns a dict.

            >>> from anvio.drivers.muscle import Muscle
            >>> m = Muscle()
            >>> m.run_stdin([('seq1', 'ATCATCATCGA'), ('seq2', 'ATCGAGTCGAT')])
            {u'seq1': u'ATCATCATCGA-', u'seq2': u'ATCG-AGTCGAT'}

        """

        tmp_dir = filesnpaths.get_temp_directory_path()
        log_file_path = os.path.join(tmp_dir, '00_log.txt')

        self.run.info('Running %s' % self.program_name,
                      '%d seqeunces will be aligned' % len(sequences_list))
        self.run.info('Log file path', log_file_path)

        sequences_data = ''.join(
            ['>%s\n%s\n' % (t[0], t[1]) for t in sequences_list])
        cmd_line = [self.program_name, '-quiet']

        output = utils.run_command_STDIN(cmd_line, log_file_path,
                                         sequences_data)

        if not output[0] == '>':
            with open(log_file_path, "a") as log_file:
                log_file.write(
                    '# THIS IS THE OUTPUT YOU ARE LOOKING FOR:\n\n%s\n' %
                    (output))
            raise ConfigError(
                "Drivers::Muscle: Something went wrong with this run :/ The output does not\
                                look alright. You can find the output in this log file: %s"
                % (log_file_path))

        alignments = {}

        # parse the output, and fill alignments
        defline, seq = None, None
        for line in [o for o in output.split('\n') if len(o)] + ['>']:
            if line.startswith('>'):
                if defline:
                    alignments[defline[1:]] = seq
                defline, seq = line, None
            else:
                if not seq:
                    seq = line
                else:
                    seq += line

        if not debug:
            shutil.rmtree(tmp_dir)

        return alignments
Example #8
0
    def run_stdin(self, sequences_list, debug=False):
        """Takes a list of tuples for sequences, performs MSA using famsa, returns a dict.

            >>> from anvio.drivers.famsa import FAMSA
            >>> f = FAMSA()
            >>> f.run_stdin([('seq1', 'ATCATCATCGA'), ('seq2', 'ATCGAGTCGAT')])
            {u'seq1': u'ATCATCATCGA-', u'seq2': u'ATCG-AGTCGAT'}

        """

        tmp_dir = filesnpaths.get_temp_directory_path()
        log_file_path = os.path.join(tmp_dir, '00_log.txt')

        self.run.info('Running %s' % self.program_name, '%d seqeunces will be aligned' % len(sequences_list))
        self.run.info('Log file path', log_file_path)

        sequences_data = ''.join(['>%s\n%s\n' % (t[0], t[1]) for t in sequences_list])
        cmd_line = [self.program_name, 'STDIN', 'STDOUT']

        output = utils.run_command_STDIN(cmd_line, log_file_path, sequences_data)

        if output[0:5] != 'FAMSA' or output[-6:].strip() != "Done!":
            with open(log_file_path, "a") as log_file: log_file.write('# THIS IS THE OUTPUT YOU ARE LOOKING FOR:\n\n%s\n' % (output))
            raise ConfigError("Drivers::FAMSA: Something is worng :/ The output does not like the expected output\
                               for a proper FAMSA run. You can find the output in this log file: %s" % (log_file_path))

        alignments = {}

        # parse the output, and fill alignments
        defline, seq = None, None
        for line in [o for o in output.split('\n')[2:-2] if len(o)] + ['>']:
            if line.startswith('>'):
                if defline:
                    alignments[defline[1:]] = seq
                defline, seq = line, None
            else:
                if not seq:
                    seq = line
                else:
                    seq += line

        if not debug:
            shutil.rmtree(tmp_dir)

        return alignments
Example #9
0
    def run_stdin(self, sequences_list, debug=False):
        """Takes a list of tuples for sequences, performs MSA using muscle, returns a dict.

            >>> from anvio.drivers.muscle import Muscle
            >>> m = Muscle()
            >>> m.run_stdin([('seq1', 'ATCATCATCGA'), ('seq2', 'ATCGAGTCGAT')])
            {u'seq1': u'ATCATCATCGA-', u'seq2': u'ATCG-AGTCGAT'}

        """

        tmp_dir = filesnpaths.get_temp_directory_path()
        log_file_path = os.path.join(tmp_dir, '00_log.txt')

        self.run.info('Running %s' % self.program_name, '%d seqeunces will be aligned' % len(sequences_list))
        self.run.info('Log file path', log_file_path)

        sequences_data = ''.join(['>%s\n%s\n' % (t[0], t[1]) for t in sequences_list])
        cmd_line = [self.program_name, '-quiet']

        output = utils.run_command_STDIN(cmd_line, log_file_path, sequences_data)

        if not (len(output) and output[0] == '>'):
            with open(log_file_path, "a") as log_file: log_file.write('# THIS IS THE OUTPUT YOU ARE LOOKING FOR:\n\n%s\n' % (output))
            raise ConfigError("Drivers::Muscle: Something went wrong with this alignment that was working on %d\
                               sequences :/ You can find the output in this log file: %s" % (len(sequences_list), log_file_path))

        alignments = {}

        # parse the output, and fill alignments
        defline, seq = None, None
        for line in [o for o in output.split('\n') if len(o)] + ['>']:
            if line.startswith('>'):
                if defline:
                    alignments[defline[1:]] = seq
                defline, seq = line, None
            else:
                if not seq:
                    seq = line
                else:
                    seq += line

        if not debug:
            shutil.rmtree(tmp_dir)

        return alignments
Example #10
0
    def blastp_stdin_multi(self, multisequence):
        self.run.warning(None, header="DIAMOND BLASTP STDIN MULTI", lc="green")
        self.run.info('Mode', 'Sensitive' if self.sensitive else 'Fast')

        cmd_line = [
            'diamond', 'blastp', '-d', self.target_fasta, '-p',
            self.num_threads, '--outfmt', *self.outfmt.split()
        ]

        cmd_line.append('--sensitive') if self.sensitive else None

        if self.max_target_seqs:
            cmd_line.extend(['--max-target-seqs', self.max_target_seqs])

        if self.min_pct_id:
            cmd_line.extend(['--id', self.min_pct_id])

        if self.evalue:
            cmd_line.extend(['--evalue', self.evalue])

        self.run.info('Command line',
                      ' '.join([str(p) for p in cmd_line]),
                      quiet=(not anvio.DEBUG))

        self.progress.new('DIAMOND')
        self.progress.update('running blastp (using %d thread(s)) ...' %
                             self.num_threads)

        output = utils.run_command_STDIN(cmd_line,
                                         self.run.log_file_path,
                                         multisequence,
                                         remove_log_file_if_exists=False)

        self.progress.end()

        self.run.info('Diamond blastp results',
                      '%d lines were returned from STDIN call' % len(output))
        return (output)