Python get_clusters_from_fasta_filepath 예제들, bfillings.uclust.get_clusters_from_fasta_filepath Python 예제들

예제 #1

0

파일 보기

파일: decontaminate_unitary.py 프로젝트: tanaes/decontaminate

def pick_ref_contaminants(queries, ref_db_fp, input_fasta_fp, contaminant_similarity, output_dir):
    # Blast against contaminant DB

    clusters, failures, seeds = get_clusters_from_fasta_filepath(
        input_fasta_fp,
        input_fasta_fp,
        percent_ID=contaminant_similarity,
        max_accepts=1,
        max_rejects=8, 
        stepwords=8,
        word_length=8,
        optimal=False,
        exact=False,
        suppress_sort=False,
        output_dir=output_dir,
        enable_rev_strand_matching=False,
        subject_fasta_filepath=ref_db_fp,
        suppress_new_clusters=True,
        return_cluster_maps=True,
        stable_sort=False,
        save_uc_files=True,
        HALT_EXEC=False)

    # Pick seqs that fail the similarity to contaminants rule

    ref_contaminants = set(queries) - set(failures)

    return(ref_contaminants)

예제 #2

0

파일 보기

파일: test_uclust.py 프로젝트: biocore/burrito-fillings

    def test_get_clusters_from_fasta_filepath_rev_strand_match(self):
        """ Test OTUs from filepath functions with rev strand match
        """
        # seq and its rc don't cluster when enable_rev_strand_matching = False
        expected_cluster_list = [['uclust_test_seqs_0'],
                                 ['uclust_test_seqs_0_rc']]
        expected_failure_list = []
        expected_new_seed_list = [
            'uclust_test_seqs_0',
            'uclust_test_seqs_0_rc']
        clusters_res = \
            get_clusters_from_fasta_filepath(self.tmp_raw_dna_seqs_rc_filepath,
                                             original_fasta_path=None, save_uc_files=False,
                                             percent_ID=0.90, enable_rev_strand_matching=False)

        expected_cluster_list.sort()
        expected_failure_list.sort()
        expected_new_seed_list.sort()
        clusters_res[0].sort()
        clusters_res[1].sort()
        clusters_res[2].sort()
        self.assertEqual(clusters_res, (expected_cluster_list,
                                        expected_failure_list,
                                        expected_new_seed_list))

        # seq and its rc cluster when enable_rev_strand_matching = False
        expected_cluster_list = [
            ['uclust_test_seqs_0', 'uclust_test_seqs_0_rc']]
        expected_failure_list = []
        expected_new_seed_list = ['uclust_test_seqs_0']
        clusters_res = \
            get_clusters_from_fasta_filepath(self.tmp_raw_dna_seqs_rc_filepath,
                                             original_fasta_path=None, save_uc_files=False,
                                             percent_ID=0.90, enable_rev_strand_matching=True)

        expected_cluster_list.sort()
        expected_failure_list.sort()
        expected_new_seed_list.sort()
        clusters_res[0].sort()
        clusters_res[1].sort()
        clusters_res[2].sort()
        self.assertEqual(clusters_res, (expected_cluster_list,
                                        expected_failure_list,
                                        expected_new_seed_list))

예제 #3

0

파일 보기

파일: test_uclust.py 프로젝트: biocore/burrito-fillings

    def test_get_clusters_from_fasta_filepath(self):
        """ Tests for return of lists of OTUs from given fasta filepath """

        clusters_res = \
            get_clusters_from_fasta_filepath(self.tmp_unsorted_fasta_filepath,
                                             original_fasta_path=None, percent_ID=0.90, save_uc_files=False)
        expected_cluster_list.sort()
        expected_failure_list.sort()
        expected_new_seed_list.sort()
        clusters_res[0].sort()
        clusters_res[1].sort()
        clusters_res[2].sort()
        self.assertEqual(clusters_res, (expected_cluster_list,
                                        expected_failure_list,
                                        expected_new_seed_list))

예제 #4

0

파일 보기

파일: test_uclust.py 프로젝트: biocore/burrito-fillings

    def test_get_clusters_from_fasta_filepath_optimal(self):
        """ Test OTUs from filepath functions with optimal
        """
        # need to compile a small test where optimal has an affect --
        # this currently is only testing that we don't get a failure with
        # optimal
        clusters_res = \
            get_clusters_from_fasta_filepath(self.tmp_unsorted_fasta_filepath,
                                             original_fasta_path=None, save_uc_files=False,
                                             percent_ID=0.90, optimal=True)
        expected_cluster_list.sort()
        expected_failure_list.sort()
        expected_new_seed_list.sort()
        clusters_res[0].sort()
        clusters_res[1].sort()
        clusters_res[2].sort()

        self.assertEqual(clusters_res, (expected_cluster_list,
                                        expected_failure_list,
                                        expected_new_seed_list))

예제 #5

0

파일 보기

파일: test_uclust.py 프로젝트: biocore/burrito-fillings

    def test_get_clusters_from_fasta_filepath_suppress_sort(self):
        """ Test OTUs from filepath functions with suppress sort
        """
        expected = [['uclust_test_seqs_0'], ['uclust_test_seqs_1'],
                    ['uclust_test_seqs_2'], ['uclust_test_seqs_3'],
                    ['uclust_test_seqs_4'], ['uclust_test_seqs_5'],
                    ['uclust_test_seqs_6', 'uclust_test_seqs_8'],
                    ['uclust_test_seqs_7'], ['uclust_test_seqs_9']]
        clusters_res = \
            get_clusters_from_fasta_filepath(self.tmp_unsorted_fasta_filepath,
                                             original_fasta_path=None,
                                             percent_ID=0.90, suppress_sort=True, save_uc_files=False)
        expected_cluster_list.sort()
        expected_failure_list.sort()
        expected_new_seed_list.sort()
        clusters_res[0].sort()
        clusters_res[1].sort()
        clusters_res[2].sort()

        self.assertEqual(clusters_res, (expected_cluster_list,
                                        expected_failure_list,
                                        expected_new_seed_list))

예제 #6

0

파일 보기

파일: test_uclust.py 프로젝트: biocore/burrito-fillings

    def test_get_clusters_from_fasta_filepath_extending_reference_db(self):
        """ Correct clusters when clustering against db and adding new clusters
        """
        clusters_res = get_clusters_from_fasta_filepath(
            self.tmp_unsorted_fasta_filepath,
            original_fasta_path=None,
            max_accepts=7, max_rejects=12,
            percent_ID=0.90,
            subject_fasta_filepath=self.ref_dna_seqs_fp,
            suppress_new_clusters=False, enable_rev_strand_matching=True,
            HALT_EXEC=False,
            save_uc_files=False)

        self.ref_test_clusters2.sort()
        self.ref_test_failures2.sort()
        self.ref_test_new_seeds2.sort()

        clusters_res[0].sort()
        clusters_res[1].sort()
        clusters_res[2].sort()
        self.assertEqual(clusters_res, (self.ref_test_clusters2,
                                        self.ref_test_failures2,
                                        self.ref_test_new_seeds2))

예제 #7

0

파일 보기

파일: test_uclust.py 프로젝트: biocore/burrito-fillings

    def test_get_clusters_from_fasta_filepath_reference_db_only(self):
        """ Correct clusters returned when clustering against a database only
        """
        clusters_res = get_clusters_from_fasta_filepath(
            self.tmp_unsorted_fasta_filepath,
            original_fasta_path=None,
            save_uc_files=False,
            max_accepts=7, max_rejects=12,
            percent_ID=0.90,
            subject_fasta_filepath=self.ref_dna_seqs_fp,
            suppress_new_clusters=True,
            HALT_EXEC=False)

        self.ref_test_clusters1.sort()
        self.ref_test_failures1.sort()
        self.ref_test_new_seeds1.sort()

        clusters_res[0].sort()
        clusters_res[1].sort()
        clusters_res[2].sort()
        self.assertEqual(clusters_res, (self.ref_test_clusters1,
                                        self.ref_test_failures1,
                                        self.ref_test_new_seeds1))

예제 #8

0

파일 보기

파일: split_libraries_lea_seq.py 프로젝트: DSWallach/qiime

def select_unique_rand_bcs(rand_bcs, unique_threshold):
    """
    Attempts to select true barcodes from set of barcodes
    i.e. removes barcodes that might be artifacts
    due to sequencing errors.
    Uses uclust to remove barcodes that are similar thatn
    threshold.
    Parameters
    ----------
    rand_bcs: list
    unique_threshold: float
    Returns
    ----------
    unique_rand_bcs: set
        set of unique random barcodes.
    """
    temp_dir = get_qiime_temp_dir()
    fasta_fd, fasta_tempfile_name = mkstemp(dir=temp_dir,
                                            prefix='tmp',
                                            suffix='.fas')
    rand_bcs = set(rand_bcs)

    with open(fasta_tempfile_name, 'w') as fasta_tempfile:
        for rand_bc in rand_bcs:
            fasta_tempfile.write(">{}\n{}\n".format(rand_bc, rand_bc))
    fasta_tempfile.close()

    _, _, unique_rand_bcs = get_clusters_from_fasta_filepath(
        fasta_tempfile_name,
        original_fasta_path=None,
        percent_ID=unique_threshold,
        save_uc_files=False,
        output_dir=temp_dir)

    unique_rand_bcs = set(unique_rand_bcs)
    remove_files([fasta_tempfile_name])
    return unique_rand_bcs

예제 #9

0

파일 보기

파일: split_libraries_lea_seq.py 프로젝트: Springbudder/qiime

def select_unique_rand_bcs(rand_bcs, unique_threshold):
    """
    Attempts to select true barcodes from set of barcodes
    i.e. removes barcodes that might be artifacts
    due to sequencing errors.
    Uses uclust to remove barcodes that are similar thatn
    threshold.
    Parameters
    ----------
    rand_bcs: list
    unique_threshold: float
    Returns
    ----------
    unique_rand_bcs: set
        set of unique random barcodes.
    """
    temp_dir = get_qiime_temp_dir()
    fasta_fd, fasta_tempfile_name = mkstemp(
        dir=temp_dir, prefix='tmp', suffix='.fas')
    rand_bcs = set(rand_bcs)

    with open(fasta_tempfile_name, 'w') as fasta_tempfile:
        for rand_bc in rand_bcs:
            fasta_tempfile.write(">{}\n{}\n".format(rand_bc, rand_bc))
    fasta_tempfile.close()

    _, _, unique_rand_bcs = get_clusters_from_fasta_filepath(
        fasta_tempfile_name,
        original_fasta_path=None,
        percent_ID=unique_threshold,
        save_uc_files=False,
        output_dir=temp_dir)

    unique_rand_bcs = set(unique_rand_bcs)
    remove_files([fasta_tempfile_name])
    return unique_rand_bcs