def _precommand_initiation(self, input_fp, output_dir, working_dir, params): if params['chimera_detection_method'] == 'blast_fragments': blast_db, db_files_to_remove = \ build_blast_db_from_fasta_path(params['reference_seqs_fp'], output_dir=working_dir) self.files_to_remove += db_files_to_remove params['blast_db'] = blast_db elif params['chimera_detection_method'] == 'ChimeraSlayer': # copy the reference files to working dir # ChimeraSlayer creates an index file of the ref and # will crash without write permission in the ref seqs dir aligned_reference_seqs_fp = params['aligned_reference_seqs_fp'] _, new_ref_filename = split(aligned_reference_seqs_fp) copy(aligned_reference_seqs_fp, working_dir) aligned_reference_seqs_fp = working_dir + "/" + new_ref_filename self.files_to_remove.append(aligned_reference_seqs_fp) params['aligned_reference_seqs_fp'] = aligned_reference_seqs_fp # if given, also copy the unaligned ref db reference_seqs_fp = params['reference_seqs_fp'] if reference_seqs_fp: _, new_ref_filename = split(reference_seqs_fp) copy(reference_seqs_fp, working_dir) reference_seqs_fp = working_dir + "/" + new_ref_filename else: # otherwise create it reference_seqs_fp = write_degapped_fasta_to_file( parse_fasta(open(aligned_reference_seqs_fp)), tmp_dir=working_dir) # delete it afterwards self.files_to_remove.append(reference_seqs_fp) params['reference_seqs_fp'] = reference_seqs_fp # build blast db of reference, otherwise ChimeraSlayer will do it # and parallel jobs clash _, db_files_to_remove = \ build_blast_db_from_fasta_path(reference_seqs_fp) self.files_to_remove += db_files_to_remove # make the index file globally # Reason: ChimeraSlayer first checks to see if the index file is # there. If not it tries to create it. This can lead to race # condition if several parallel jobs try to create it at the same # time. make_cidx_file(aligned_reference_seqs_fp) self.files_to_remove.append(aligned_reference_seqs_fp + ".cidx") else: raise ValueError("Unrecognized chimera detection method '%s'." % params['chimera_detection_method'])
def _precommand_initiation( self, input_fp, output_dir, working_dir, params): if params['refseqs_path']: # Build the blast database from the refseqs_path -- all procs # will then access one db rather than create one per proc. blast_db, db_files_to_remove = \ build_blast_db_from_fasta_path(params['refseqs_path']) self.files_to_remove += db_files_to_remove params['blast_db'] = blast_db
def _precommand_initiation(self, input_fp, output_dir, working_dir, params): if not params['blast_db']: # Build the blast database from the reference_seqs_fp -- all procs # will then access one db rather than create one per proc blast_db, db_files_to_remove = \ build_blast_db_from_fasta_path(params['refseqs_fp']) self.files_to_remove += db_files_to_remove params['blast_db'] = blast_db
def _precommand_initiation(self, input_fp, output_dir, working_dir, params): if not params["blast_db"]: # Build the blast database from the reference_seqs_fp -- all procs # will then access one db rather than create one per proc. blast_db, db_files_to_remove = build_blast_db_from_fasta_path( params["reference_seqs_fp"], output_dir=working_dir ) self.files_to_remove += db_files_to_remove params["blast_db"] = blast_db
def _precommand_initiation(self, input_fp, output_dir, working_dir, params): if not params['blast_db']: # Build the blast database from the reference_seqs_fp -- all procs # will then access one db rather than create one per proc blast_db, db_files_to_remove = \ build_blast_db_from_fasta_path(params['template_fp'], output_dir=get_qiime_temp_dir()) self.files_to_remove += db_files_to_remove params['blast_db'] = blast_db if params['min_length'] < 0: params['min_length'] = compute_min_alignment_length( open(input_fp, 'U'))
def _precommand_initiation( self, input_fp, output_dir, working_dir, params): if not params['blast_db']: # Build the blast database from the reference_seqs_fp -- all procs # will then access one db rather than create one per proc blast_db, db_files_to_remove = \ build_blast_db_from_fasta_path(params['template_fp'], output_dir=get_qiime_temp_dir()) self.files_to_remove += db_files_to_remove params['blast_db'] = blast_db if params['min_length'] < 0: params['min_length'] = compute_min_alignment_length( open(input_fp, 'U'))
def __init__(self, params): """Return new BlastFragmentsChimeraChecker object with specified params. """ _params = { 'max_e_value': 1e-30, 'min_pct_id': 0.90, 'num_fragments': 3, 'taxonomy_depth': 4 } _params.update(params) try: id_to_taxonomy_fp = params['id_to_taxonomy_fp'] except KeyError: raise ValueError("id_to_taxonomy_filepath must be provided to %s" % self.Name) # Create the blast database if it hasn't been provided if 'blast_db' not in params or params['blast_db'] is None: try: reference_seqs_fp = params['reference_seqs_fp'] except KeyError: raise ValueError( "refseqs_fp or blast_db must be provided to %s" % self.Name) blast_db, self._db_files_to_remove = \ build_blast_db_from_fasta_path(reference_seqs_fp) else: blast_db = params['blast_db'] self._db_files_to_remove = [] self._taxon_assigner = BlastTaxonAssigner({ 'blast_db': blast_db, 'id_to_taxonomy_filepath': id_to_taxonomy_fp, 'Max E value': _params['max_e_value'], 'Min percent identity': _params['min_pct_id'] }) ChimeraChecker.__init__(self, _params)
def __init__(self, params): """Return new BlastFragmentsChimeraChecker object with specified params. """ _params = {'max_e_value': 1e-30, 'min_pct_id': 0.90, 'num_fragments': 3, 'taxonomy_depth': 4} _params.update(params) try: id_to_taxonomy_fp = params['id_to_taxonomy_fp'] except KeyError: raise ValueError( "id_to_taxonomy_filepath must be provided to %s" % self.Name) # Create the blast database if it hasn't been provided if 'blast_db' not in params or params['blast_db'] is None: try: reference_seqs_fp = params['reference_seqs_fp'] except KeyError: raise ValueError( "refseqs_fp or blast_db must be provided to %s" % self.Name) blast_db, self._db_files_to_remove = \ build_blast_db_from_fasta_path(reference_seqs_fp) else: blast_db = params['blast_db'] self._db_files_to_remove = [] self._taxon_assigner = BlastTaxonAssigner( {'blast_db': blast_db, 'id_to_taxonomy_filepath': id_to_taxonomy_fp, 'Max E value': _params['max_e_value'], 'Min percent identity': _params['min_pct_id'] }) ChimeraChecker.__init__(self, _params)
def test_build_blast_db_from_fasta_path_aln(self): """build_blast_db_from_fasta_path works with alignment as input """ blast_db, db_files = build_blast_db_from_fasta_path(self.in_aln1_fp) self.assertEqual(blast_db,self.in_aln1_fp) expected_db_files = set([blast_db + ext\ for ext in ['.nhr','.nin','.nsq','.nsd','.nsi','.log']]) self.assertEqual(set(db_files),expected_db_files) # result returned when blasting against new db self.assertEqual(\ len(blastn(self.test_seq,blast_db=blast_db,e_value=0.0)),1) # Make sure all db_files exist for fp in db_files: self.assertTrue(exists(fp)) # Remove all db_files exist remove_files(db_files) # Make sure nothing weird happened in the remove for fp in db_files: self.assertFalse(exists(fp))
def test_build_blast_db_from_fasta_path(self): """build_blast_db_from_fasta_path convenience function works as expected """ blast_db, db_files = \ build_blast_db_from_fasta_path(self.in_seqs1_fp) self.assertEqual(blast_db,self.in_seqs1_fp) expected_db_files = set([self.in_seqs1_fp + ext\ for ext in ['.nhr','.nin','.nsq','.nsd','.nsi','.log']]) self.assertEqual(set(db_files),expected_db_files) # result returned when blasting against new db self.assertEqual(\ len(blastn(self.test_seq,blast_db=blast_db)),1) # Make sure all db_files exist for fp in db_files: self.assertTrue(exists(fp)) # Remove all db_files exist remove_files(db_files) # Make sure nothing weird happened in the remove for fp in db_files: self.assertFalse(exists(fp))