Ejemplo n.º 1
0
    def test_search_attc_uniq(self):
        """
        Test that it finds a unique attc array when giving a table with 3 attC sites
        on the same strand and separated by less than 4kb each.
        """
        attc_file = os.path.join("tests", "data", "Results_Integron_Finder_" + self.rep_name,
                                 "other", self.rep_name + "_attc_table.res")
        # Construct attC dataframe (read from infernal file)
        attc_df = integron_finder.read_infernal(attc_file)
        # search attC arrays, keeping palindromes
        # 2 attc sites are in the same array if they are on the same strand, and separated by
        # a distance less than 4kb

        attc_array = integron_finder.search_attc(attc_df, True)
        self.assertEqual(len(attc_array), 1)

        # Construct expected output:
        attc_res = pd.DataFrame(columns=["Accession_number", "cm_attC", "cm_debut", "cm_fin", "pos_beg", "pos_end", "sens", "evalue"], dtype='int')
        attc_res = attc_res.append({"Accession_number": self.rep_name, "cm_attC": "attC_4",
                                    "cm_debut": 1, "cm_fin": 47, "pos_beg": 17825,
                                    "pos_end": 17884, "sens": "-", "evalue": 1e-9},
                                    ignore_index=True)
        attc_res = attc_res.append({"Accession_number": self.rep_name, "cm_attC": "attC_4",
                                    "cm_debut": 1, "cm_fin": 47, "pos_beg": 19080,
                                    "pos_end": 19149, "sens": "-", "evalue": 1e-4},
                                    ignore_index=True)
        attc_res = attc_res.append({"Accession_number": self.rep_name, "cm_attC": "attC_4",
                                    "cm_debut": 1, "cm_fin": 47, "pos_beg": 19618,
                                    "pos_end": 19726, "sens": "-", "evalue": 1.1e-7},
                                    ignore_index=True)
        # convert positions to int
        intcols = ["cm_debut", "cm_fin", "pos_beg", "pos_end"]
        attc_res[intcols] = attc_res[intcols].astype(int)
        pdt.assert_frame_equal(attc_res, attc_array[0])
Ejemplo n.º 2
0
 def test_filter_evalue_thres(self):
     """
     Test that the filter by a maximum attc size works.
     """
     filename = os.path.join("tests", "data",
                             "Results_Integron_Finder_" + self.rep_name,
                             "other", self.rep_name + "_attc_table.res")
     df = integron_finder.read_infernal(filename, evalue=1e-8)
     expect = pd.DataFrame(columns=[
         "Accession_number", "cm_attC", "cm_debut", "cm_fin", "pos_beg",
         "pos_end", "sens", "evalue"
     ])
     expect = expect.append(
         {
             "Accession_number": self.rep_name,
             "cm_attC": "attC_4",
             "cm_debut": 1,
             "cm_fin": 47,
             "pos_beg": 17825,
             "pos_end": 17884,
             "sens": "-",
             "evalue": 1e-9
         },
         ignore_index=True)
     # convert positions to int
     intcols = ["cm_debut", "cm_fin", "pos_beg", "pos_end"]
     expect[intcols] = expect[intcols].astype(int)
     pdt.assert_frame_equal(df, expect)
Ejemplo n.º 3
0
 def test_no_total_cm_match_strandm(self):
     """
     Test that when the model did not completely match on the sequence,
     the start and end positions of hit are well recalculated. All hits are on strand -
     """
     filename = os.path.join("tests", "data",
                             "Results_Integron_Finder_" + self.rep_name,
                             "other",
                             self.rep_name + "_attc_table-partialm.res")
     df = integron_finder.read_infernal(filename)
     expect = pd.DataFrame(columns=[
         "Accession_number", "cm_attC", "cm_debut", "cm_fin", "pos_beg",
         "pos_end", "sens", "evalue"
     ])
     expect = expect.append(
         {
             "Accession_number": self.rep_name,
             "cm_attC": "attC_4",
             "cm_debut": 1,
             "cm_fin": 40,
             "pos_beg": 17818,
             "pos_end": 17884,
             "sens": "-",
             "evalue": 1e-9
         },
         ignore_index=True)
     expect = expect.append(
         {
             "Accession_number": self.rep_name,
             "cm_attC": "attC_4",
             "cm_debut": 1,
             "cm_fin": 47,
             "pos_beg": 19080,
             "pos_end": 19149,
             "sens": "-",
             "evalue": 1e-4
         },
         ignore_index=True)
     expect = expect.append(
         {
             "Accession_number": self.rep_name,
             "cm_attC": "attC_4",
             "cm_debut": 10,
             "cm_fin": 47,
             "pos_beg": 19618,
             "pos_end": 19735,
             "sens": "-",
             "evalue": 1.1e-7
         },
         ignore_index=True)
     # convert positions to int
     intcols = ["cm_debut", "cm_fin", "pos_beg", "pos_end"]
     expect[intcols] = expect[intcols].astype(int)
     pdt.assert_frame_equal(df, expect)
Ejemplo n.º 4
0
 def test_generate_df(self):
     """
     Test that if the infernal file exists and there are hits, it returns the
     dataframe corresponding to it.
     """
     filename = os.path.join("tests", "data",
                             "Results_Integron_Finder_" + self.rep_name,
                             "other", self.rep_name + "_attc_table.res")
     df = integron_finder.read_infernal(filename)
     expect = pd.DataFrame(columns=[
         "Accession_number", "cm_attC", "cm_debut", "cm_fin", "pos_beg",
         "pos_end", "sens", "evalue"
     ])
     expect = expect.append(
         {
             "Accession_number": self.rep_name,
             "cm_attC": "attC_4",
             "cm_debut": 1,
             "cm_fin": 47,
             "pos_beg": 17825,
             "pos_end": 17884,
             "sens": "-",
             "evalue": 1e-9
         },
         ignore_index=True)
     expect = expect.append(
         {
             "Accession_number": self.rep_name,
             "cm_attC": "attC_4",
             "cm_debut": 1,
             "cm_fin": 47,
             "pos_beg": 19080,
             "pos_end": 19149,
             "sens": "-",
             "evalue": 1e-4
         },
         ignore_index=True)
     expect = expect.append(
         {
             "Accession_number": self.rep_name,
             "cm_attC": "attC_4",
             "cm_debut": 1,
             "cm_fin": 47,
             "pos_beg": 19618,
             "pos_end": 19726,
             "sens": "-",
             "evalue": 1.1e-7
         },
         ignore_index=True)
     # convert positions to int
     intcols = ["cm_debut", "cm_fin", "pos_beg", "pos_end"]
     expect[intcols] = expect[intcols].astype(int)
     pdt.assert_frame_equal(df, expect)
Ejemplo n.º 5
0
 def test_search_attc_empty(self):
     """
     Test that when there are no attC sites detected, the attc array is empty.
     """
     attc_file = os.path.join("tests", "data", "Results_Integron_Finder_" + self.rep_name,
                              "other", self.rep_name + "_attc_table-empty.res")
     # Construct attC dataframe (read from infernal file)
     attc_df = integron_finder.read_infernal(attc_file)
     attc_array = integron_finder.search_attc(attc_df, True)
     self.assertEqual(len(attc_array), 0)
     attc_res = []
     self.assertEqual(attc_array, attc_res)
Ejemplo n.º 6
0
    def test_nofile(self):
        """
        Test that the function returns an empty dataframe if the given infernal file does not
        exist.

        """
        filename = "infernal.txt"
        df = integron_finder.read_infernal(filename)
        expect = pd.DataFrame(columns=[
            "Accession_number", "cm_attC", "cm_debut", "cm_fin", "pos_beg",
            "pos_end", "sens", "evalue"
        ])
        pdt.assert_frame_equal(df, expect)
Ejemplo n.º 7
0
 def test_evalue_thres(self):
     """
     Test that if the infernal file exists and there are hits, but
     the given evalue threshold is smaller than the hits thresholds:
     no hit kept, should return an empty dataframe.
     """
     filename = os.path.join("tests", "data",
                             "Results_Integron_Finder_" + self.rep_name,
                             "other", self.rep_name + "_attc_table.res")
     df = integron_finder.read_infernal(filename, evalue=1e-10)
     expect = pd.DataFrame(columns=[
         "Accession_number", "cm_attC", "cm_debut", "cm_fin", "pos_beg",
         "pos_end", "sens", "evalue"
     ])
     pdt.assert_frame_equal(df, expect)
Ejemplo n.º 8
0
 def test_nohit(self):
     """
     Test that if the infernal file exists but there is no hit
     inside, it returns an empty dataframe.
     """
     filename = os.path.join("tests", "data",
                             "Results_Integron_Finder_" + self.rep_name,
                             "other",
                             self.rep_name + "_attc_table-empty.res")
     df = integron_finder.read_infernal(filename)
     expect = pd.DataFrame(columns=[
         "Accession_number", "cm_attC", "cm_debut", "cm_fin", "pos_beg",
         "pos_end", "sens", "evalue"
     ])
     pdt.assert_frame_equal(df, expect)
    def test_find_integron_attC_is_df(self):
        replicon_name = 'acba.007.p01.13'
        replicon_path = os.path.join(self._data_dir, 'Replicons',
                                     replicon_name + '.fst')
        attc_file = os.path.join(
            self._data_dir,
            'Results_Integron_Finder_acba.007.p01.13/other/acba.007.p01.13_attc_table.res'
        )

        intI_file = os.path.join(
            self._data_dir,
            'Results_Integron_Finder_acba.007.p01.13/other/acba.007.p01.13_intI.res'
        )
        phageI_file = os.path.join(
            self._data_dir,
            'Results_Integron_Finder_acba.007.p01.13/other/acba.007.p01.13_phage_int.res'
        )
        args = argparse.Namespace
        args.no_proteins = True
        args.keep_palindromes = True

        integron_finder.replicon_name = replicon_name
        integron_finder.SEQUENCE = SeqIO.read(
            replicon_path, "fasta", alphabet=Seq.IUPAC.unambiguous_dna)
        integron_finder.SIZE_REPLICON = len(integron_finder.SEQUENCE)
        integron_finder.args = args
        integron_finder.evalue_attc = 1.
        integron_finder.max_attc_size = 200
        integron_finder.min_attc_size = 40
        integron_finder.length_cm = 47  # length in 'CLEN' (value for model attc_4.cm)
        integron_finder.DISTANCE_THRESHOLD = 4000  # (4kb at least between 2 different arrays)
        integron_finder.model_attc_name = integron_finder.MODEL_attc.split(
            "/")[-1].split(".cm")[0]

        attc_file = integron_finder.read_infernal(
            attc_file,
            evalue=integron_finder.evalue_attc,
            size_max_attc=integron_finder.max_attc_size,
            size_min_attc=integron_finder.min_attc_size)

        with self.catch_output() as (out, err):
            integrons = integron_finder.find_integron(replicon_name, attc_file,
                                                      intI_file, phageI_file)

        self.assertEqual(err.getvalue().strip(), "")
        self.assertEqual(
            out.getvalue().strip(), """In replicon acba.007.p01.13, there are:
- 0 complete integron(s) found with a total 0 attC site(s)
- 1 CALIN element(s) found with a total of 3 attC site(s)
- 0 In0 element(s) found with a total of 0 attC site""")

        self.assertEqual(len(integrons), 1)
        integron = integrons[0]
        self.assertEqual(integron.ID_replicon, replicon_name)

        exp = pd.DataFrame(
            {
                'annotation': ['attC'] * 3,
                'distance_2attC': [np.nan, 1196.0, 469.0],
                'evalue': [1.000000e-09, 1.000000e-04, 1.100000e-07],
                'model': ['attc_4'] * 3,
                'pos_beg': [17825, 19080, 19618],
                'pos_end': [17884, 19149, 19726],
                'strand': [-1, -1, -1],
                'type_elt': 'attC'
            },
            columns=self.columns,
            index=['attc_001', 'attc_002', 'attc_003'])
        pdt.assert_frame_equal(integron.attC, exp)

        exp = pd.DataFrame(columns=self.columns)

        exp = exp.astype(dtype=self.dtype)

        pdt.assert_frame_equal(integron.integrase, exp)
        pdt.assert_frame_equal(integron.promoter, exp)
        pdt.assert_frame_equal(integron.attI, exp)
        pdt.assert_frame_equal(integron.proteins, exp)
Ejemplo n.º 10
0
    def test_search_attc_dist_diff_strand(self):
        """
        Test that it finds a size 3 attc array when giving a table with:
        - 3 attC sites on the same strand (-) and separated by less than 4 kb
        - 2 other attC sites separated by less than 4kb but on the other strand (+)
        - 1 other attC site , also on strand +, but separated by more than 4kb.
        """
        attc_file = os.path.join("tests", "data", "Results_Integron_Finder_" + self.rep_name,
                                 "other", self.rep_name + "_attc_table.res")
        # Construct attC dataframe (read from infernal file)
        attc_df = integron_finder.read_infernal(attc_file)
        # Add another attC at more than 4kb, same strand
        attc_df = attc_df.append({"Accession_number": self.rep_name, "cm_attC": "attC_4",
                                  "cm_debut": 1, "cm_fin": 47, "pos_beg": 15800,
                                  "pos_end": 16000, "sens": "+", "evalue": 1e-3},
                                  ignore_index=True)
        attc_df = attc_df.append({"Accession_number": self.rep_name, "cm_attC": "attC_4",
                                  "cm_debut": 1, "cm_fin": 47, "pos_beg": 12000,
                                  "pos_end": 12500, "sens": "+", "evalue": 1e-3},
                                  ignore_index=True)
        attc_df = attc_df.append({"Accession_number": self.rep_name, "cm_attC": "attC_4",
                                  "cm_debut": 1, "cm_fin": 47, "pos_beg": 7100,
                                  "pos_end": 8200, "sens": "+", "evalue": 1e-3},
                                  ignore_index=True)
        attc_df.sort_values(["Accession_number", "pos_beg", "evalue"], inplace=True)
        # search attC arrays, keeping palindromes
        # 2 attc sites are in the same array if they are on the same strand, and separated by
        # a distance less than 4kb
        attc_array = integron_finder.search_attc(attc_df, True)
        self.assertEqual(len(attc_array), 3)

        # Construct expected outputs:
        attc_res = pd.DataFrame(columns=["Accession_number", "cm_attC", "cm_debut", "cm_fin",
                                         "pos_beg", "pos_end", "sens", "evalue"])
        attc_res = attc_res.append({"Accession_number": self.rep_name, "cm_attC": "attC_4",
                                    "cm_debut": 1, "cm_fin": 47, "pos_beg": 17825,
                                    "pos_end": 17884, "sens": "-", "evalue": 1e-9},
                                    ignore_index=True)
        attc_res = attc_res.append({"Accession_number": self.rep_name, "cm_attC": "attC_4",
                                    "cm_debut": 1, "cm_fin": 47, "pos_beg": 19080,
                                    "pos_end": 19149, "sens": "-", "evalue": 1e-4},
                                    ignore_index=True)
        attc_res = attc_res.append({"Accession_number": self.rep_name, "cm_attC": "attC_4",
                                    "cm_debut": 1, "cm_fin": 47, "pos_beg": 19618,
                                    "pos_end": 19726, "sens": "-", "evalue": 1.1e-7},
                                    ignore_index=True)
        attc_res2 = pd.DataFrame(columns=["Accession_number", "cm_attC", "cm_debut", "cm_fin",
                                          "pos_beg", "pos_end", "sens", "evalue"])
        attc_res2 = attc_res2.append({"Accession_number": self.rep_name, "cm_attC": "attC_4",
                                      "cm_debut": 1, "cm_fin": 47, "pos_beg": 12000,
                                      "pos_end": 12500, "sens": "+", "evalue": 1e-03},
                                     ignore_index=True)
        attc_res2 = attc_res2.append({"Accession_number": self.rep_name, "cm_attC": "attC_4",
                                      "cm_debut": 1, "cm_fin": 47, "pos_beg": 15800,
                                      "pos_end": 16000, "sens": "+", "evalue": 1e-03},
                                     ignore_index=True)
        attc_res3 = pd.DataFrame(columns=["Accession_number", "cm_attC", "cm_debut", "cm_fin",
                                          "pos_beg", "pos_end", "sens", "evalue"])
        attc_res3 = attc_res3.append({"Accession_number": self.rep_name, "cm_attC": "attC_4",
                                      "cm_debut": 1, "cm_fin": 47, "pos_beg": 7100,
                                      "pos_end": 8200, "sens": "+", "evalue": 1e-03},
                                     ignore_index=True)
        # convert positions to int
        intcols = ["cm_debut", "cm_fin", "pos_beg", "pos_end"]
        attc_res[intcols] = attc_res[intcols].astype(int)
        attc_res2[intcols] = attc_res2[intcols].astype(int)
        attc_res3[intcols] = attc_res3[intcols].astype(int)
        pdt.assert_frame_equal(attc_res, attc_array[2])
        pdt.assert_frame_equal(attc_res2, attc_array[1])
        pdt.assert_frame_equal(attc_res3, attc_array[0])