Example #1
0
    def _precommand_initiation(self, input_fp, output_dir, working_dir,
                               params):
        if params['chimera_detection_method'] == 'blast_fragments':
            blast_db, db_files_to_remove = \
                build_blast_db_from_fasta_path(params['reference_seqs_fp'],
                                               output_dir=working_dir)
            self.files_to_remove += db_files_to_remove
            params['blast_db'] = blast_db
        elif params['chimera_detection_method'] == 'ChimeraSlayer':
            # copy the reference files to working dir
            # ChimeraSlayer creates an index file of the ref and
            # will crash without write permission in the ref seqs dir
            aligned_reference_seqs_fp = params['aligned_reference_seqs_fp']
            _, new_ref_filename = split(aligned_reference_seqs_fp)
            copy(aligned_reference_seqs_fp, working_dir)
            aligned_reference_seqs_fp = working_dir + "/" + new_ref_filename

            self.files_to_remove.append(aligned_reference_seqs_fp)
            params['aligned_reference_seqs_fp'] = aligned_reference_seqs_fp

            # if given, also copy the unaligned ref db
            reference_seqs_fp = params['reference_seqs_fp']
            if reference_seqs_fp:
                _, new_ref_filename = split(reference_seqs_fp)
                copy(reference_seqs_fp, working_dir)
                reference_seqs_fp = working_dir + "/" + new_ref_filename
            else:
                # otherwise create it
                reference_seqs_fp = write_degapped_fasta_to_file(
                    parse_fasta(open(aligned_reference_seqs_fp)),
                    tmp_dir=working_dir)
            # delete it afterwards
            self.files_to_remove.append(reference_seqs_fp)
            params['reference_seqs_fp'] = reference_seqs_fp

            # build blast db of reference, otherwise ChimeraSlayer will do it
            # and parallel jobs clash
            _, db_files_to_remove = \
                build_blast_db_from_fasta_path(reference_seqs_fp)
            self.files_to_remove += db_files_to_remove

            # make the index file globally
            # Reason: ChimeraSlayer first checks to see if the index file is
            # there. If not it tries to create it. This can lead to race
            # condition if several parallel jobs try to create it at the same
            # time.
            make_cidx_file(aligned_reference_seqs_fp)
            self.files_to_remove.append(aligned_reference_seqs_fp + ".cidx")
        else:
            raise ValueError("Unrecognized chimera detection method '%s'." %
                             params['chimera_detection_method'])
    def _precommand_initiation(self, input_fp, output_dir, working_dir,
                               params):
        if params['chimera_detection_method'] == 'blast_fragments':
            blast_db, db_files_to_remove = \
                build_blast_db_from_fasta_path(params['reference_seqs_fp'],
                                               output_dir=working_dir)
            self.files_to_remove += db_files_to_remove
            params['blast_db'] = blast_db
        elif params['chimera_detection_method'] == 'ChimeraSlayer':
            # copy the reference files to working dir
            # ChimeraSlayer creates an index file of the ref and
            # will crash without write permission in the ref seqs dir
            aligned_reference_seqs_fp = params['aligned_reference_seqs_fp']
            _, new_ref_filename = split(aligned_reference_seqs_fp)
            copy(aligned_reference_seqs_fp, working_dir)
            aligned_reference_seqs_fp = working_dir + "/" + new_ref_filename

            self.files_to_remove.append(aligned_reference_seqs_fp)
            params['aligned_reference_seqs_fp'] = aligned_reference_seqs_fp

            # if given, also copy the unaligned ref db
            reference_seqs_fp = params['reference_seqs_fp']
            if reference_seqs_fp:
                _, new_ref_filename = split(reference_seqs_fp)
                copy(reference_seqs_fp, working_dir)
                reference_seqs_fp = working_dir + "/" + new_ref_filename
            else:
                # otherwise create it
                reference_seqs_fp = write_degapped_fasta_to_file(
                    parse_fasta(open(aligned_reference_seqs_fp)),
                    tmp_dir=working_dir)
            # delete it afterwards
            self.files_to_remove.append(reference_seqs_fp)
            params['reference_seqs_fp'] = reference_seqs_fp

            # build blast db of reference, otherwise ChimeraSlayer will do it
            # and parallel jobs clash
            _, db_files_to_remove = \
                build_blast_db_from_fasta_path(reference_seqs_fp)
            self.files_to_remove += db_files_to_remove

            # make the index file globally
            # Reason: ChimeraSlayer first checks to see if the index file is
            # there. If not it tries to create it. This can lead to race
            # condition if several parallel jobs try to create it at the same
            # time.
            make_cidx_file(aligned_reference_seqs_fp)
            self.files_to_remove.append(aligned_reference_seqs_fp + ".cidx")
        else:
            raise ValueError("Unrecognized chimera detection method '%s'." %
                             params['chimera_detection_method'])
Example #3
0
 def _precommand_initiation(
         self, input_fp, output_dir, working_dir, params):
     if params['refseqs_path']:
         # Build the blast database from the refseqs_path -- all procs
         # will then access one db rather than create one per proc.
         blast_db, db_files_to_remove = \
             build_blast_db_from_fasta_path(params['refseqs_path'])
         self.files_to_remove += db_files_to_remove
         params['blast_db'] = blast_db
Example #4
0
 def _precommand_initiation(self, input_fp, output_dir, working_dir,
                            params):
     if not params['blast_db']:
         # Build the blast database from the reference_seqs_fp -- all procs
         # will then access one db rather than create one per proc
         blast_db, db_files_to_remove = \
             build_blast_db_from_fasta_path(params['refseqs_fp'])
         self.files_to_remove += db_files_to_remove
         params['blast_db'] = blast_db
Example #5
0
 def _precommand_initiation(self, input_fp, output_dir, working_dir, params):
     if not params["blast_db"]:
         # Build the blast database from the reference_seqs_fp -- all procs
         # will then access one db rather than create one per proc.
         blast_db, db_files_to_remove = build_blast_db_from_fasta_path(
             params["reference_seqs_fp"], output_dir=working_dir
         )
         self.files_to_remove += db_files_to_remove
         params["blast_db"] = blast_db
Example #6
0
    def _precommand_initiation(self, input_fp, output_dir, working_dir,
                               params):
        if not params['blast_db']:
            # Build the blast database from the reference_seqs_fp -- all procs
            # will then access one db rather than create one per proc
            blast_db, db_files_to_remove = \
                build_blast_db_from_fasta_path(params['template_fp'],
                                               output_dir=get_qiime_temp_dir())
            self.files_to_remove += db_files_to_remove
            params['blast_db'] = blast_db

        if params['min_length'] < 0:
            params['min_length'] = compute_min_alignment_length(
                open(input_fp, 'U'))
Example #7
0
    def _precommand_initiation(
            self, input_fp, output_dir, working_dir, params):
        if not params['blast_db']:
            # Build the blast database from the reference_seqs_fp -- all procs
            # will then access one db rather than create one per proc
            blast_db, db_files_to_remove = \
                build_blast_db_from_fasta_path(params['template_fp'],
                                               output_dir=get_qiime_temp_dir())
            self.files_to_remove += db_files_to_remove
            params['blast_db'] = blast_db

        if params['min_length'] < 0:
            params['min_length'] = compute_min_alignment_length(
                open(input_fp, 'U'))
Example #8
0
    def __init__(self, params):
        """Return new BlastFragmentsChimeraChecker object with specified params.

        """
        _params = {
            'max_e_value': 1e-30,
            'min_pct_id': 0.90,
            'num_fragments': 3,
            'taxonomy_depth': 4
        }
        _params.update(params)

        try:
            id_to_taxonomy_fp = params['id_to_taxonomy_fp']
        except KeyError:
            raise ValueError("id_to_taxonomy_filepath must be provided to %s" %
                             self.Name)

        # Create the blast database if it hasn't been provided
        if 'blast_db' not in params or params['blast_db'] is None:
            try:
                reference_seqs_fp = params['reference_seqs_fp']
            except KeyError:
                raise ValueError(
                    "refseqs_fp or blast_db must be provided to  %s" %
                    self.Name)
            blast_db, self._db_files_to_remove = \
                build_blast_db_from_fasta_path(reference_seqs_fp)
        else:
            blast_db = params['blast_db']
            self._db_files_to_remove = []

        self._taxon_assigner = BlastTaxonAssigner({
            'blast_db':
            blast_db,
            'id_to_taxonomy_filepath':
            id_to_taxonomy_fp,
            'Max E value':
            _params['max_e_value'],
            'Min percent identity':
            _params['min_pct_id']
        })

        ChimeraChecker.__init__(self, _params)
    def __init__(self, params):
        """Return new BlastFragmentsChimeraChecker object with specified params.

        """
        _params = {'max_e_value': 1e-30,
                   'min_pct_id': 0.90,
                   'num_fragments': 3,
                   'taxonomy_depth': 4}
        _params.update(params)

        try:
            id_to_taxonomy_fp = params['id_to_taxonomy_fp']
        except KeyError:
            raise ValueError(
                "id_to_taxonomy_filepath must be provided to %s" %
                self.Name)

        # Create the blast database if it hasn't been provided
        if 'blast_db' not in params or params['blast_db'] is None:
            try:
                reference_seqs_fp = params['reference_seqs_fp']
            except KeyError:
                raise ValueError(
                    "refseqs_fp or blast_db must be provided to  %s" %
                    self.Name)
            blast_db, self._db_files_to_remove = \
                build_blast_db_from_fasta_path(reference_seqs_fp)
        else:
            blast_db = params['blast_db']
            self._db_files_to_remove = []

        self._taxon_assigner = BlastTaxonAssigner(
            {'blast_db': blast_db,
             'id_to_taxonomy_filepath': id_to_taxonomy_fp,
             'Max E value': _params['max_e_value'],
             'Min percent identity': _params['min_pct_id']
             })

        ChimeraChecker.__init__(self, _params)
Example #10
0
    def test_build_blast_db_from_fasta_path_aln(self):
        """build_blast_db_from_fasta_path works with alignment as input
        """
        blast_db, db_files = build_blast_db_from_fasta_path(self.in_aln1_fp)
        self.assertEqual(blast_db,self.in_aln1_fp)
        expected_db_files = set([blast_db + ext\
         for ext in ['.nhr','.nin','.nsq','.nsd','.nsi','.log']])
        self.assertEqual(set(db_files),expected_db_files)
        # result returned when blasting against new db
        self.assertEqual(\
            len(blastn(self.test_seq,blast_db=blast_db,e_value=0.0)),1)

        # Make sure all db_files exist
        for fp in db_files:
            self.assertTrue(exists(fp))

        # Remove all db_files exist
        remove_files(db_files)

        # Make sure nothing weird happened in the remove
        for fp in db_files:
            self.assertFalse(exists(fp))
Example #11
0
    def test_build_blast_db_from_fasta_path(self):
        """build_blast_db_from_fasta_path convenience function works as expected
        """
        blast_db, db_files = \
         build_blast_db_from_fasta_path(self.in_seqs1_fp)
        self.assertEqual(blast_db,self.in_seqs1_fp)
        expected_db_files = set([self.in_seqs1_fp + ext\
         for ext in ['.nhr','.nin','.nsq','.nsd','.nsi','.log']])
        self.assertEqual(set(db_files),expected_db_files)

        # result returned when blasting against new db
        self.assertEqual(\
            len(blastn(self.test_seq,blast_db=blast_db)),1)

        # Make sure all db_files exist
        for fp in db_files:
            self.assertTrue(exists(fp))

        # Remove all db_files exist
        remove_files(db_files)

        # Make sure nothing weird happened in the remove
        for fp in db_files:
            self.assertFalse(exists(fp))