Beispiel #1
0
    def _precommand_initiation(self, input_fp, output_dir, working_dir,
                               params):
        if params['chimera_detection_method'] == 'blast_fragments':
            blast_db, db_files_to_remove = \
                build_blast_db_from_fasta_path(params['reference_seqs_fp'],
                                               output_dir=working_dir)
            self.files_to_remove += db_files_to_remove
            params['blast_db'] = blast_db
        elif params['chimera_detection_method'] == 'ChimeraSlayer':
            # copy the reference files to working dir
            # ChimeraSlayer creates an index file of the ref and
            # will crash without write permission in the ref seqs dir
            aligned_reference_seqs_fp = params['aligned_reference_seqs_fp']
            _, new_ref_filename = split(aligned_reference_seqs_fp)
            copy(aligned_reference_seqs_fp, working_dir)
            aligned_reference_seqs_fp = working_dir + "/" + new_ref_filename

            self.files_to_remove.append(aligned_reference_seqs_fp)
            params['aligned_reference_seqs_fp'] = aligned_reference_seqs_fp

            # if given, also copy the unaligned ref db
            reference_seqs_fp = params['reference_seqs_fp']
            if reference_seqs_fp:
                _, new_ref_filename = split(reference_seqs_fp)
                copy(reference_seqs_fp, working_dir)
                reference_seqs_fp = working_dir + "/" + new_ref_filename
            else:
                # otherwise create it
                reference_seqs_fp = write_degapped_fasta_to_file(
                    parse_fasta(open(aligned_reference_seqs_fp)),
                    tmp_dir=working_dir)
            # delete it afterwards
            self.files_to_remove.append(reference_seqs_fp)
            params['reference_seqs_fp'] = reference_seqs_fp

            # build blast db of reference, otherwise ChimeraSlayer will do it
            # and parallel jobs clash
            _, db_files_to_remove = \
                build_blast_db_from_fasta_path(reference_seqs_fp)
            self.files_to_remove += db_files_to_remove

            # make the index file globally
            # Reason: ChimeraSlayer first checks to see if the index file is
            # there. If not it tries to create it. This can lead to race
            # condition if several parallel jobs try to create it at the same
            # time.
            make_cidx_file(aligned_reference_seqs_fp)
            self.files_to_remove.append(aligned_reference_seqs_fp + ".cidx")
        else:
            raise ValueError("Unrecognized chimera detection method '%s'." %
                             params['chimera_detection_method'])
    def _precommand_initiation(self, input_fp, output_dir, working_dir,
                               params):
        if params['chimera_detection_method'] == 'blast_fragments':
            blast_db, db_files_to_remove = \
                 build_blast_db_from_fasta_path(params['reference_seqs_fp'],
                                                output_dir=working_dir)
            self.files_to_remove += db_files_to_remove
            params['blast_db'] = blast_db
        elif params['chimera_detection_method'] == 'ChimeraSlayer':
            #copy the reference files to working dir
            #ChimeraSlayer creates an index file of the ref and
            #will crash without write permission in the ref seqs dir
            aligned_reference_seqs_fp = params['aligned_reference_seqs_fp']
            _, new_ref_filename = split(aligned_reference_seqs_fp)
            copy(aligned_reference_seqs_fp, working_dir)
            aligned_reference_seqs_fp = working_dir + "/" + new_ref_filename

            self.files_to_remove.append(aligned_reference_seqs_fp)
            params['aligned_reference_seqs_fp'] = aligned_reference_seqs_fp
     
            #if given, also copy the unaligned ref db
            reference_seqs_fp = params['reference_seqs_fp']
            if reference_seqs_fp:
                _, new_ref_filename = split(reference_seqs_fp)
                copy(reference_seqs_fp, working_dir)
                reference_seqs_fp = working_dir + "/" + new_ref_filename
            else:
                #otherwise create it
                reference_seqs_fp = write_degapped_fasta_to_file(
                        MinimalFastaParser(open(aligned_reference_seqs_fp)),
                                           tmp_dir=working_dir)
            #delete it afterwards
            self.files_to_remove.append(reference_seqs_fp)
            params['reference_seqs_fp'] = reference_seqs_fp

            #build blast db of reference, otherwise ChimeraSlayer will do it
            #and parallel jobs clash
            _, db_files_to_remove = \
                 build_blast_db_from_fasta_path(reference_seqs_fp)
            self.files_to_remove += db_files_to_remove

            #make the index file globally
            #Reason: ChimeraSlayer first checks to see if the index file is
            #there. If not it tries to create it. This can lead to race
            #condition if several parallel jobs try to create it at the same
            #time.
            make_cidx_file(aligned_reference_seqs_fp)
            self.files_to_remove.append(aligned_reference_seqs_fp + ".cidx")
        else:
            raise ValueError("Unrecognized chimera detection method '%s'." %
                             params['chimera_detection_method'])
def get_chimeras_from_Nast_aligned(seqs_fp,
                                   ref_db_aligned_fp=None,
                                   ref_db_fasta_fp=None,
                                   HALT_EXEC=False,
                                   min_div_ratio=None,
                                   keep_intermediates=False):
    """remove chimeras from seqs_fp using chimeraSlayer.

    seqs_fp:  a filepath with the seqs to check in the file
    ref_db_aligned_fp: fp to (pynast) aligned reference sequences
    ref_db_fasta_fp: same seqs as above, just unaligned. Will be computed on the fly if not provided,
    HALT_EXEC: stop execution if true
    min_div_ratio: passed to ChimeraSlayer App
    """

    files_to_remove = []
    #might come in as FilePath object with quotes
    seqs_fp = str(seqs_fp)
    seqs_fp = seqs_fp.rstrip('"')
    seqs_fp = seqs_fp.lstrip('"')

    seqs_dir, new_seqs_fp = split(seqs_fp)

    #if fp is in current dir, we fake a dir change
    if seqs_dir == "":
        seqs_dir = "./"

    #Chimera Slayer puts some temp files in current dir and some in dir of input file
    #use exe_dir to change to dir of input file, so to have all tmp files in one place
    params = {'--query_NAST': new_seqs_fp, '--exec_dir': seqs_dir}

    if ref_db_aligned_fp == None and ref_db_fasta_fp == None:
        #use default db, whose relative position to the
        #ChimeraSlayer binary is hardcoded
        pass

    else:
        if not ref_db_fasta_fp:
            #make degapped reference file
            ref_db_fasta_fp = write_degapped_fasta_to_file(MinimalFastaParser( \
                    open(ref_db_aligned_fp)))
            files_to_remove.append(ref_db_fasta_fp)
        #use user db
        params.update({
            '--db_NAST': abspath(ref_db_aligned_fp),
            '--db_FASTA': abspath(ref_db_fasta_fp)
        })

    if min_div_ratio != None:
        params.update({'-R': min_div_ratio})

    app = ChimeraSlayer(params=params, HALT_EXEC=HALT_EXEC)
    app_results = app()

    #    this is a FilePath object in case of success.
    #    How can we test for failure here?
    #    if not exists(app_results['CPS']):
    #         raise ApplicationError, "ChimeraSlayer failed. No output file."

    chimeras = parse_CPS_file((app_results['CPS']))
    if not keep_intermediates:
        app.remove_intermediate_files()
        remove_files(files_to_remove)

    return chimeras
def get_chimeras_from_Nast_aligned(seqs_fp, ref_db_aligned_fp=None,
                                   ref_db_fasta_fp=None,
                                   HALT_EXEC=False, min_div_ratio=None,
                                   keep_intermediates=False):
    """remove chimeras from seqs_fp using chimeraSlayer.

    seqs_fp:  a filepath with the seqs to check in the file
    ref_db_aligned_fp: fp to (pynast) aligned reference sequences
    ref_db_fasta_fp: same seqs as above, just unaligned. Will be computed on the fly if not provided,
    HALT_EXEC: stop execution if true
    min_div_ratio: passed to ChimeraSlayer App
    """

    files_to_remove = []
    #might come in as FilePath object with quotes
    seqs_fp = str(seqs_fp)
    seqs_fp = seqs_fp.rstrip('"')
    seqs_fp = seqs_fp.lstrip('"')

    seqs_dir, new_seqs_fp = split(seqs_fp)

    #if fp is in current dir, we fake a dir change
    if seqs_dir == "":
        seqs_dir = "./"

    #Chimera Slayer puts some temp files in current dir and some in dir of input file
    #use exe_dir to change to dir of input file, so to have all tmp files in one place
    params={'--query_NAST': new_seqs_fp,
            '--exec_dir': seqs_dir}

    if ref_db_aligned_fp==None and ref_db_fasta_fp==None:
        #use default db, whose relative position to the
        #ChimeraSlayer binary is hardcoded
        pass

    else:
        if not ref_db_fasta_fp:
            #make degapped reference file 
            ref_db_fasta_fp = write_degapped_fasta_to_file(MinimalFastaParser( \
                    open(ref_db_aligned_fp)))
            files_to_remove.append(ref_db_fasta_fp)
        #use user db
        params.update({'--db_NAST': abspath(ref_db_aligned_fp),
                       '--db_FASTA': abspath(ref_db_fasta_fp)})

    if min_div_ratio !=None:
        params.update({'-R':min_div_ratio})
                            
    app = ChimeraSlayer(params=params, HALT_EXEC=HALT_EXEC)
    app_results = app()

#    this is a FilePath object in case of success.
#    How can we test for failure here?
    #    if not exists(app_results['CPS']):
#         raise ApplicationError, "ChimeraSlayer failed. No output file."

    chimeras = parse_CPS_file((app_results['CPS']))
    if not keep_intermediates:
        app.remove_intermediate_files()
        remove_files(files_to_remove)

    return chimeras