Exemple #1
0
    def initial_iteration(self):
        '''
		Runs BLAST in a modified way as a initial iteration.
		'''
        temp_output = os.path.join(
            self.temp_dir, '%s.1%s' % (self.output_base, self.output_ext))
        runner = BLASTRunner(input=self.processed_input,
                             output=temp_output,
                             **self.kwargs)
        runner.run()

        blast = BLAST.BLAST(self.processed_input, runner.output)

        msa = blast[-1]

        #remove hits
        msa.remove_overlapping_hsps()
        msa.set_combine_hsps()

        msa_output = os.path.join(self.temp_dir,
                                  '%s.1.aln' % (self.output_base))
        fp = open(msa_output, 'w')
        msa.build_psiblast_alignment_input(fp)
        fp.close()

        self.current_runner = runner
        self.current_msa_output = msa_output
        self.current_iteration = 1
        #self.current_parser = blast

        self.msa_files.append(msa_output)
        return msa
Exemple #2
0
    def next_iteration(self):
        '''
		Runs BLAST as a sebsequent iteration after initial iteration.
		Modify this function for inherited class.
		'''
        self.current_iteration += 1
        self.previous_runner = self.current_runner
        self.previous_msa_output = self.current_msa_output
        #self.previous_parser = self.current_parser

        i = self.current_iteration
        temp_output = os.path.join(
            self.temp_dir,
            '%s.%s%s' % (self.output_base, str(i), self.output_ext))
        runner = BLASTRunner(input=self.processed_input,
                             output=temp_output,
                             input_alignment=self.previous_msa_output,
                             **self.kwargs)
        runner.run()

        blast = BLAST.BLAST(self.processed_input, runner.output)

        msa_output = os.path.join(self.temp_dir,
                                  '%s.%s.aln' % (self.output_base, str(i)))
        fp = open(msa_output, 'w')
        blast[0].build_psiblast_alignment_input(fp)  #need to be implemented
        fp.close()

        #finally
        self.current_runner = runner
        self.current_msa_output = msa_output
        #self.current_parser = blast

        self.msa_files.append(msa_output)
        return blast[0]  #msa
Exemple #3
0
    def initial_iteration(self):
        '''
		Runs BLAST in a modified way as a initial iteration.
		Modify this function for inherited class.
		'''
        temp_output = os.path.join(
            self.temp_dir, '%s.1%s' % (self.output_base, self.output_ext))
        runner = BLASTRunner(input=self.processed_input,
                             output=temp_output,
                             **self.kwargs)
        runner.run()

        blast = BLAST.BLAST(self.processed_input, runner.output)

        msa_output = os.path.join(self.temp_dir,
                                  '%s.1.aln' % (self.output_base))
        fp = open(msa_output, 'w')
        blast[0].build_psiblast_alignment_input(fp)
        fp.close()

        self.current_runner = runner
        self.current_msa_output = msa_output
        self.current_iteration = 1
        #self.current_parser = blast

        self.msa_files.append(msa_output)
        return blast[0]  #msa
Exemple #4
0
    def parse(self):
        '''
		Parse blast result and returns BLAST object.
		
		For details of the BLAST object,
		refer evdblib.Utils.Parsers.BLAST package.
		'''
        blast = BLAST.BLAST(query=self.processed_inputfasta,
                            blast_result_fn=self.output)
        return blast
Exemple #5
0
    def initial_iteration(self, echo=True):
        '''
		Run initial blast and prepare output
		'''
        #run blast
        temp_output = os.path.join(
            self.temp_dir, '%s.1%s' % (self.output_base, self.output_ext))
        runner = BLASTRunner(input=self.processed_input,
                             output=temp_output,
                             **self.kwargs)
        runner.run()

        #parse blast output
        blast = BLAST.BLAST(self.processed_input, runner.output)
        msa = blast[-1]
        #flagging for combining HSPs for B input alignment.
        msa.set_combine_hsps()

        #purge the result
        if self.use_overlapping_purging:
            if echo:
                print('purging overlapping regions...')

            #build pssm
            #for initial iteration, BLOSUM 62 matrix is used!
            pssm = ScoreMat()  # len(self.processed_inputfasta) )
            pssm.set_blosum_mat()
            #start to purge the matrix
            if self.number_of_processors > 1:
                msa.purge_overlapping_hsps_multithreading(
                    self.inserted_positions, pssm, self.number_of_processors)
            else:
                msa.purge_overlapping_hsps(self.inserted_positions, pssm)

        if self.use_backblast_purging:
            for neighbor_msa in self.neighboring_msas:
                #msa.psiblast_purge( neighbor_msa )
                backblastpurger = BackblastPurger(msa, neighbor_msa,
                                                  **self.kwargs)

        msa_output = build_profile_filename(self.temp_dir, self.output_base, 1,
                                            '.aln')
        msa_output_fp = open(msa_output, 'w')
        msa.build_psiblast_alignment_input(msa_output_fp)
        msa_output_fp.close()

        self.current_runner = runner
        self.current_msa_output = msa_output
        self.current_iteration = 1
        self.current_parser = blast

        self.msa_files.append(msa_output)
        return msa  #msa
Exemple #6
0
    def next_iteration(self, echo=True):
        self.current_iteration += 1
        self.previous_runner = self.current_runner
        self.previous_msa_output = self.current_msa_output
        self.previous_parser = self.current_parser

        i = self.current_iteration
        temp_output = os.path.join(
            self.temp_dir,
            '%s.%s%s' % (self.output_base, str(i), self.output_ext))
        runner = BLASTRunner(input=self.processed_input,
                             output=temp_output,
                             input_alignment=self.previous_msa_output,
                             **self.kwargs)
        runner.run()

        blast = BLAST.BLAST(self.processed_input, runner.output)
        msa = blast[-1]
        msa.set_combine_hsps()

        if self.use_overlapping_purging:
            if echo:
                print('purging overlapping regions...')
            pssm = ScoreMat()
            pssm.build_pssm(self.previous_parser[-1], **self.kwargs)
            msa.purge_overlapping_hsps(self.inserted_positions, pssm)

        if self.use_backblast_purging:
            for neighbor_msa in self.neighboring_msas:
                #msa.psiblast_purge( neighbor_msa )
                backblastpurger = BackblastPurger(msa, neighbor_msa)

        #make msa output
        msa_output = build_profile_filename(self.temp_dir, self.output_base, i,
                                            '.aln')
        msa_output_fp = open(msa_output, 'w')
        msa.build_psiblast_alignment_input(msa_output_fp)
        msa_output_fp.close()

        #finally
        self.current_runner = runner
        self.current_msa_output = msa_output
        self.current_parser = blast

        self.msa_files.append(msa_output)

        return msa
Exemple #7
0
    def next_iteration(self):
        '''
		Runs BLAST as a sebsequent iteration after initial iteration.
		'''
        self.current_iteration += 1
        self.previous_runner = self.current_runner
        self.previous_msa_output = self.current_msa_output
        #self.previous_parser = self.current_parser

        i = self.current_iteration
        temp_output = os.path.join(
            self.temp_dir,
            '%s.%s%s' % (self.output_base, str(i), self.output_ext))
        runner = BLASTRunner(input=self.processed_input,
                             output=temp_output,
                             input_alignment=self.previous_msa_output,
                             **self.kwargs)
        runner.run()

        blast = BLAST.BLAST(self.processed_input, runner.output)
        msa = blast[-1]

        #remove hits
        msa.remove_overlapping_hsps()
        msa.set_combine_hsps()

        msa_output = os.path.join(self.temp_dir,
                                  '%s.%s.aln' % (self.output_base, str(i)))
        fp = open(msa_output, 'w')
        msa.build_psiblast_alignment_input(fp)  #need to be implemented
        fp.close()

        #finally
        self.current_runner = runner
        self.current_msa_output = msa_output
        #self.current_parser = blast

        self.msa_files.append(msa_output)
        return msa
Exemple #8
0
    def __init__(self, msa, neighboring_msa, echo=True, **kwargs):
        '''
		Builds PSIBLAST db for neihboring MSA 
		and purge the alignment.

		This class is designed to be used
		to purge a single MSA using neighboring MSAs.

		For backblasting procedure, 
		BackblastingPurgedBLASTRunner should be used.
		'''

        self.del_names = []

        self.msa = msa
        self.neighboring_msa = neighboring_msa

        #build Hit DB
        hit_db_fp = tempfile.NamedTemporaryFile()
        hit_db = hit_db_fp.name
        self.build_hit_db(hit_db)
        self.del_names.append(hit_db)

        #build input MSA for blast
        input_alignment_fp = tempfile.NamedTemporaryFile()
        input_alignment = input_alignment_fp.name
        self.neighboring_msa.build_psiblast_alignment_input(input_alignment_fp)

        #prepare temporary output file for blast
        backblast_result_fp = tempfile.NamedTemporaryFile()
        backblast_result = backblast_result_fp.name

        if 'input' in kwargs:
            del kwargs['input']
        if 'input_string' in kwargs:
            del kwargs['input_string']
        if 'database' in kwargs:
            del kwargs['database']
        if 'output' in kwargs:
            del kwargs['output']
        if 'effective_database_length' in kwargs:
            del kwargs['effective_database_length']
        if 'input_alignment' in kwargs:
            del kwargs['input_alignment']
        if 'profile_output' in kwargs:
            del kwargs['profile_output']
        if 'range' in kwargs:
            del kwargs['range']

        if echo:
            print("\nStarting Backblast Purging...")

        #run blast
        runner = BLASTRunner(input_string=str(self.neighboring_msa.query),
                             output=backblast_result,
                             database=hit_db,
                             input_alignment=input_alignment,
                             effective_database_length=5000,
                             **kwargs)
        runner.run()

        blast = BLAST.BLAST(query=self.neighboring_msa.query,
                            blast_result_fn=backblast_result)
        contaminant_msa = blast[-1]

        if kwargs.get('number_of_processors'):
            nthreads = kwargs.get('number_of_processors')
        else:
            nthreads = 1

        if nthreads > 1:
            self.msa.remove_overlapped_hit_regions_multithreading(
                contaminant_msa, nthreads)
        else:
            self.msa.remove_overlapped_hit_regions(contaminant_msa)
Exemple #9
0
def build_neighboring_msas(input_fasta=None,
                           neighboring_fastas=None,
                           max_iterations_for_neighbors=None,
                           echo=True,
                           **kwargs):
    '''
	Run PSIBLAST to get the MSA for neighboring positions.
	'''

    if verbose:
        print("neighboring_fastas:", neighboring_fastas)
        print("input_fasta")
        print(input_fasta)
        print("input_fasta range:", input_fasta.sequencerange)

    if not input_fasta and not neighboring_fastas:
        return []

    elif input_fasta:
        sequencerange = input_fasta.sequencerange
        if neighboring_fastas:
            neighboring_fastas.extend(
                input_fasta.extract_fastas(split_fragments=True, inverse=True))
        else:
            neighboring_fastas = input_fasta.extract_fastas(
                split_fragments=True, inverse=True)

    if verbose:
        print("neighboring_fastas:", neighboring_fastas)

    if not neighboring_fastas:
        return []

    neighboring_msas = []

    ###############
    #remove all input and output settings possibly screw things up.
    #so remove them.!!
    if kwargs.get('input'):
        kwargs['input'] = None
    if kwargs.get('output'):
        kwargs['output'] = None
    if 'input_string' in kwargs:
        del kwargs['input_string']
    if kwargs.get('save_dir'):
        kwargs['save_dir'] = None
    if kwargs.get('range'):
        kwargs['range'] = None
    ###############

    #################################
    #need to set up the arguments.
    if max_iterations_for_neighbors != None:
        kwargs['max_iterations'] = max_iterations_for_neighbors
    else:
        kwargs['max_iterations'] = default_max_iterations_for_neighbors
    #################################

    for i, fasta in enumerate(neighboring_fastas):

        if echo:
            print('Building neighboring MSA', i, '...')

        namedfp = tempfile.NamedTemporaryFile()
        tempout = namedfp.name
        runner = PSIBLASTRunner(input_string=str(fasta),
                                output=tempout,
                                **kwargs)
        runner.run()

        blast = BLAST.BLAST(query=fasta, blast_result_fn=tempout)
        neighboring_msas.append(blast[-1])

    return neighboring_msas