def initial_iteration(self): ''' Runs BLAST in a modified way as a initial iteration. ''' temp_output = os.path.join( self.temp_dir, '%s.1%s' % (self.output_base, self.output_ext)) runner = BLASTRunner(input=self.processed_input, output=temp_output, **self.kwargs) runner.run() blast = BLAST.BLAST(self.processed_input, runner.output) msa = blast[-1] #remove hits msa.remove_overlapping_hsps() msa.set_combine_hsps() msa_output = os.path.join(self.temp_dir, '%s.1.aln' % (self.output_base)) fp = open(msa_output, 'w') msa.build_psiblast_alignment_input(fp) fp.close() self.current_runner = runner self.current_msa_output = msa_output self.current_iteration = 1 #self.current_parser = blast self.msa_files.append(msa_output) return msa
def next_iteration(self): ''' Runs BLAST as a sebsequent iteration after initial iteration. Modify this function for inherited class. ''' self.current_iteration += 1 self.previous_runner = self.current_runner self.previous_msa_output = self.current_msa_output #self.previous_parser = self.current_parser i = self.current_iteration temp_output = os.path.join( self.temp_dir, '%s.%s%s' % (self.output_base, str(i), self.output_ext)) runner = BLASTRunner(input=self.processed_input, output=temp_output, input_alignment=self.previous_msa_output, **self.kwargs) runner.run() blast = BLAST.BLAST(self.processed_input, runner.output) msa_output = os.path.join(self.temp_dir, '%s.%s.aln' % (self.output_base, str(i))) fp = open(msa_output, 'w') blast[0].build_psiblast_alignment_input(fp) #need to be implemented fp.close() #finally self.current_runner = runner self.current_msa_output = msa_output #self.current_parser = blast self.msa_files.append(msa_output) return blast[0] #msa
def initial_iteration(self): ''' Runs BLAST in a modified way as a initial iteration. Modify this function for inherited class. ''' temp_output = os.path.join( self.temp_dir, '%s.1%s' % (self.output_base, self.output_ext)) runner = BLASTRunner(input=self.processed_input, output=temp_output, **self.kwargs) runner.run() blast = BLAST.BLAST(self.processed_input, runner.output) msa_output = os.path.join(self.temp_dir, '%s.1.aln' % (self.output_base)) fp = open(msa_output, 'w') blast[0].build_psiblast_alignment_input(fp) fp.close() self.current_runner = runner self.current_msa_output = msa_output self.current_iteration = 1 #self.current_parser = blast self.msa_files.append(msa_output) return blast[0] #msa
def parse(self): ''' Parse blast result and returns BLAST object. For details of the BLAST object, refer evdblib.Utils.Parsers.BLAST package. ''' blast = BLAST.BLAST(query=self.processed_inputfasta, blast_result_fn=self.output) return blast
def initial_iteration(self, echo=True): ''' Run initial blast and prepare output ''' #run blast temp_output = os.path.join( self.temp_dir, '%s.1%s' % (self.output_base, self.output_ext)) runner = BLASTRunner(input=self.processed_input, output=temp_output, **self.kwargs) runner.run() #parse blast output blast = BLAST.BLAST(self.processed_input, runner.output) msa = blast[-1] #flagging for combining HSPs for B input alignment. msa.set_combine_hsps() #purge the result if self.use_overlapping_purging: if echo: print('purging overlapping regions...') #build pssm #for initial iteration, BLOSUM 62 matrix is used! pssm = ScoreMat() # len(self.processed_inputfasta) ) pssm.set_blosum_mat() #start to purge the matrix if self.number_of_processors > 1: msa.purge_overlapping_hsps_multithreading( self.inserted_positions, pssm, self.number_of_processors) else: msa.purge_overlapping_hsps(self.inserted_positions, pssm) if self.use_backblast_purging: for neighbor_msa in self.neighboring_msas: #msa.psiblast_purge( neighbor_msa ) backblastpurger = BackblastPurger(msa, neighbor_msa, **self.kwargs) msa_output = build_profile_filename(self.temp_dir, self.output_base, 1, '.aln') msa_output_fp = open(msa_output, 'w') msa.build_psiblast_alignment_input(msa_output_fp) msa_output_fp.close() self.current_runner = runner self.current_msa_output = msa_output self.current_iteration = 1 self.current_parser = blast self.msa_files.append(msa_output) return msa #msa
def next_iteration(self, echo=True): self.current_iteration += 1 self.previous_runner = self.current_runner self.previous_msa_output = self.current_msa_output self.previous_parser = self.current_parser i = self.current_iteration temp_output = os.path.join( self.temp_dir, '%s.%s%s' % (self.output_base, str(i), self.output_ext)) runner = BLASTRunner(input=self.processed_input, output=temp_output, input_alignment=self.previous_msa_output, **self.kwargs) runner.run() blast = BLAST.BLAST(self.processed_input, runner.output) msa = blast[-1] msa.set_combine_hsps() if self.use_overlapping_purging: if echo: print('purging overlapping regions...') pssm = ScoreMat() pssm.build_pssm(self.previous_parser[-1], **self.kwargs) msa.purge_overlapping_hsps(self.inserted_positions, pssm) if self.use_backblast_purging: for neighbor_msa in self.neighboring_msas: #msa.psiblast_purge( neighbor_msa ) backblastpurger = BackblastPurger(msa, neighbor_msa) #make msa output msa_output = build_profile_filename(self.temp_dir, self.output_base, i, '.aln') msa_output_fp = open(msa_output, 'w') msa.build_psiblast_alignment_input(msa_output_fp) msa_output_fp.close() #finally self.current_runner = runner self.current_msa_output = msa_output self.current_parser = blast self.msa_files.append(msa_output) return msa
def next_iteration(self): ''' Runs BLAST as a sebsequent iteration after initial iteration. ''' self.current_iteration += 1 self.previous_runner = self.current_runner self.previous_msa_output = self.current_msa_output #self.previous_parser = self.current_parser i = self.current_iteration temp_output = os.path.join( self.temp_dir, '%s.%s%s' % (self.output_base, str(i), self.output_ext)) runner = BLASTRunner(input=self.processed_input, output=temp_output, input_alignment=self.previous_msa_output, **self.kwargs) runner.run() blast = BLAST.BLAST(self.processed_input, runner.output) msa = blast[-1] #remove hits msa.remove_overlapping_hsps() msa.set_combine_hsps() msa_output = os.path.join(self.temp_dir, '%s.%s.aln' % (self.output_base, str(i))) fp = open(msa_output, 'w') msa.build_psiblast_alignment_input(fp) #need to be implemented fp.close() #finally self.current_runner = runner self.current_msa_output = msa_output #self.current_parser = blast self.msa_files.append(msa_output) return msa
def __init__(self, msa, neighboring_msa, echo=True, **kwargs): ''' Builds PSIBLAST db for neihboring MSA and purge the alignment. This class is designed to be used to purge a single MSA using neighboring MSAs. For backblasting procedure, BackblastingPurgedBLASTRunner should be used. ''' self.del_names = [] self.msa = msa self.neighboring_msa = neighboring_msa #build Hit DB hit_db_fp = tempfile.NamedTemporaryFile() hit_db = hit_db_fp.name self.build_hit_db(hit_db) self.del_names.append(hit_db) #build input MSA for blast input_alignment_fp = tempfile.NamedTemporaryFile() input_alignment = input_alignment_fp.name self.neighboring_msa.build_psiblast_alignment_input(input_alignment_fp) #prepare temporary output file for blast backblast_result_fp = tempfile.NamedTemporaryFile() backblast_result = backblast_result_fp.name if 'input' in kwargs: del kwargs['input'] if 'input_string' in kwargs: del kwargs['input_string'] if 'database' in kwargs: del kwargs['database'] if 'output' in kwargs: del kwargs['output'] if 'effective_database_length' in kwargs: del kwargs['effective_database_length'] if 'input_alignment' in kwargs: del kwargs['input_alignment'] if 'profile_output' in kwargs: del kwargs['profile_output'] if 'range' in kwargs: del kwargs['range'] if echo: print("\nStarting Backblast Purging...") #run blast runner = BLASTRunner(input_string=str(self.neighboring_msa.query), output=backblast_result, database=hit_db, input_alignment=input_alignment, effective_database_length=5000, **kwargs) runner.run() blast = BLAST.BLAST(query=self.neighboring_msa.query, blast_result_fn=backblast_result) contaminant_msa = blast[-1] if kwargs.get('number_of_processors'): nthreads = kwargs.get('number_of_processors') else: nthreads = 1 if nthreads > 1: self.msa.remove_overlapped_hit_regions_multithreading( contaminant_msa, nthreads) else: self.msa.remove_overlapped_hit_regions(contaminant_msa)
def build_neighboring_msas(input_fasta=None, neighboring_fastas=None, max_iterations_for_neighbors=None, echo=True, **kwargs): ''' Run PSIBLAST to get the MSA for neighboring positions. ''' if verbose: print("neighboring_fastas:", neighboring_fastas) print("input_fasta") print(input_fasta) print("input_fasta range:", input_fasta.sequencerange) if not input_fasta and not neighboring_fastas: return [] elif input_fasta: sequencerange = input_fasta.sequencerange if neighboring_fastas: neighboring_fastas.extend( input_fasta.extract_fastas(split_fragments=True, inverse=True)) else: neighboring_fastas = input_fasta.extract_fastas( split_fragments=True, inverse=True) if verbose: print("neighboring_fastas:", neighboring_fastas) if not neighboring_fastas: return [] neighboring_msas = [] ############### #remove all input and output settings possibly screw things up. #so remove them.!! if kwargs.get('input'): kwargs['input'] = None if kwargs.get('output'): kwargs['output'] = None if 'input_string' in kwargs: del kwargs['input_string'] if kwargs.get('save_dir'): kwargs['save_dir'] = None if kwargs.get('range'): kwargs['range'] = None ############### ################################# #need to set up the arguments. if max_iterations_for_neighbors != None: kwargs['max_iterations'] = max_iterations_for_neighbors else: kwargs['max_iterations'] = default_max_iterations_for_neighbors ################################# for i, fasta in enumerate(neighboring_fastas): if echo: print('Building neighboring MSA', i, '...') namedfp = tempfile.NamedTemporaryFile() tempout = namedfp.name runner = PSIBLASTRunner(input_string=str(fasta), output=tempout, **kwargs) runner.run() blast = BLAST.BLAST(query=fasta, blast_result_fn=tempout) neighboring_msas.append(blast[-1]) return neighboring_msas