예제 #1
0
 def test_chain(self):
     func = StringChain(columns=["coq_word_label_1", "coq_source_genre_1"],
                        value=" ")
     val = FunctionList([func]).lapply(df0, session=None)[func.get_id()]
     self.assertListEqual(
         val.tolist(),
         ["abc SPOK", "abc NEWS", "abc NEWS", "x SPOK", "x NEWS"])
예제 #2
0
 def test_freq(self):
     df = pd.DataFrame(df0)
     func = Freq(columns=[
         x for x in df.columns if not x.startswith("coquery_invisible")
     ])
     val = FunctionList([func]).lapply(df, session=None)[func.get_id()]
     self.assertListEqual(val.tolist(), [1, 2, 2, 1, 1])
예제 #3
0
 def test_chain(self):
     func = StringChain(
         columns=["coq_word_label_1", "coq_source_genre_1"],
         value=" ")
     val = FunctionList([func]).lapply(df0, session=None)[func.get_id()]
     self.assertListEqual(
         val.tolist(),
         ["abc SPOK", "abc NEWS", "abc NEWS", "x SPOK", "x NEWS"])
예제 #4
0
 def test_lower_multi(self):
     df = pd.DataFrame({"a": list("ABCDEFGHIJ"), "b": list("ABABABABAB")})
     func = StringLower(columns=["a", "b"])
     val = FunctionList([func]).lapply(df, session=None)
     self.assertListEqual(val[[-2]].values.ravel().tolist(),
                          list("abcdefghij"))
     self.assertListEqual(val[[-1]].values.ravel().tolist(),
                          list("ababababab"))
예제 #5
0
    def test_count_with_nan(self):
        df = pd.DataFrame(df1)
        func = StringCount(columns=["db_celex_coq_phonoword_phoncvbr_1"],
                           value="[")
        df = FunctionList([func]).lapply(df, session=None)
        func = Freq(columns=[x for x in df.columns
                             if not x.startswith("coquery_invisible")])
        func_list = FunctionList([func])
        val_a = func_list.lapply(df, session=None)[func.get_id()]
        #print(df)

        df = pd.DataFrame(df1)
        df = df[[x for x in df if x.startswith("coq_")]]
        func = Freq(columns=df.columns)
        func_list = FunctionList([func])
        val_b = func_list.lapply(df, session=None)[func.get_id()]

        self.assertListEqual(val_a.tolist(), val_b.tolist())
예제 #6
0
 def test_upper(self):
     df = pd.DataFrame({
         "a": ["abx"] * 5 + ["a"] * 5 + ["bx"] * 5,
         "b": [""] * 10 + ["yyannxzzz"] * 5
     })
     func = StringUpper(columns=["a"])
     val = FunctionList([func]).lapply(df, session=None)[[-1]]
     self.assertListEqual(val.values.ravel().tolist(),
                          ["ABX"] * 5 + ["A"] * 5 + ["BX"] * 5)
예제 #7
0
 def test_extract_groups(self):
     """
     Tests issue #255
     """
     df = pd.DataFrame({
         "a": ["abx"] * 5 + ["a"] * 5 + ["bx"] * 5,
         "b": [""] * 10 + ["yyannxzzz"] * 5
     })
     func = StringExtract(columns=["a"], value="(a).*(x)")
     val = FunctionList([func]).lapply(df, session=None)
     self.assertListEqual(val[[-2]].values.ravel().tolist(),
                          ["a"] * 5 + [""] * 10)
     self.assertListEqual(val[[-1]].values.ravel().tolist(),
                          ["x"] * 5 + [""] * 10)
예제 #8
0
    def test_translate_header_multicolumn_functions(self):
        df = pd.DataFrame(
            {"coq_word_label_1": ["abx"] * 5 + ["a"] * 5 + ["bx"] * 5})
        func = StringExtract(columns=["coq_word_label_1"], value="(a).*(x)")
        self.session.column_functions = FunctionList([func])
        self.manager.set_column_order(df.columns)
        df = self.manager.process(df, self.session)

        self.assertListEqual(
            [self.session.translate_header(x) for x in df.columns], [
                "Word", "{} (match 1)".format(
                    func.get_label(self.session, self.manager)),
                "{} (match 2)".format(
                    func.get_label(self.session, self.manager))
            ])
예제 #9
0
    def test_count_with_nan(self):
        df = pd.DataFrame(df1)
        func = StringCount(columns=["db_celex_coq_phonoword_phoncvbr_1"],
                           value="[")
        df = FunctionList([func]).lapply(df, session=None)
        func = Freq(columns=[
            x for x in df.columns if not x.startswith("coquery_invisible")
        ])
        func_list = FunctionList([func])
        val_a = func_list.lapply(df, session=None)[func.get_id()]
        #print(df)

        df = pd.DataFrame(df1)
        df = df[[x for x in df if x.startswith("coq_")]]
        func = Freq(columns=df.columns)
        func_list = FunctionList([func])
        val_b = func_list.lapply(df, session=None)[func.get_id()]

        self.assertListEqual(val_a.tolist(), val_b.tolist())
예제 #10
0
 def test_freq_with_nan1(self):
     df = pd.DataFrame(df0)
     df["coq_test_label_1"] = [pd.np.nan, "A", pd.np.nan, "B", pd.np.nan]
     func = Freq(columns=["coq_word_label_1", "coq_test_label_1"])
     val = FunctionList([func]).lapply(df, session=None)[func.get_id()]
     self.assertListEqual(val.tolist(), [2, 1, 2, 1, 1])
예제 #11
0
 def test_freq(self):
     df = pd.DataFrame(df0)
     func = Freq(columns=[x for x in df.columns
                          if not x.startswith("coquery_invisible")])
     val = FunctionList([func]).lapply(df, session=None)[func.get_id()]
     self.assertListEqual(val.tolist(), [1, 2, 2, 1, 1])
예제 #12
0
 def test_match_null(self):
     func = StringMatch(columns=["coq_word_label_2"], value="[a]")
     val = FunctionList([func]).lapply(df0, session=None)[func.get_id()]
     self.assertListEqual(
         val.tolist(), [True, True, True, True, False])
예제 #13
0
 def test_length(self):
     func = StringLength(columns=["coq_word_label_1"])
     val = FunctionList([func]).lapply(df0, session=None)[func.get_id()]
     self.assertListEqual(val.tolist(), [3, 3, 3, 1, 1])
예제 #14
0
 def test_count(self):
     func = StringCount(columns=["coq_word_label_1"], value="x")
     val = FunctionList([func]).lapply(df0, session=None)[func.get_id()]
     self.assertListEqual(val.tolist(), [0, 0, 0, 1, 1])
예제 #15
0
 def assert_result(self, func_class, df, columns, expected, value=None):
     func = func_class(columns=columns, value=value)
     result = FunctionList([func]).lapply(df, session=None)
     npt.assert_equal(result[func.get_id()].values, expected)
예제 #16
0
 def test_freq_with_nan1(self):
     df = pd.DataFrame(df0)
     df["coq_test_label_1"] = [pd.np.nan, "A", pd.np.nan, "B", pd.np.nan]
     func = Freq(columns=["coq_word_label_1", "coq_test_label_1"])
     val = FunctionList([func]).lapply(df, session=None)[func.get_id()]
     self.assertListEqual(val.tolist(), [2, 1, 2, 1, 1])
예제 #17
0
 def test_extract(self):
     func = StringExtract(columns=["coq_word_label_1"], value="[abx]*")
     val = FunctionList([func]).lapply(df0, session=None)
     self.assertListEqual(val[[-1]].values.ravel().tolist(),
                          ["ab", "ab", "ab", "x", "x"])
예제 #18
0
 def test_match_null(self):
     func = StringMatch(columns=["coq_word_label_2"], value="[a]")
     val = FunctionList([func]).lapply(df0, session=None)[func.get_id()]
     self.assertListEqual(val.tolist(), [True, True, True, True, False])
예제 #19
0
 def test_count(self):
     func = StringCount(columns=["coq_word_label_1"], value="x")
     val = FunctionList([func]).lapply(df0, session=None)[func.get_id()]
     self.assertListEqual(val.tolist(), [0, 0, 0, 1, 1])
예제 #20
0
 def test_length(self):
     func = StringLength(columns=["coq_word_label_1"])
     val = FunctionList([func]).lapply(df0, session=None)[func.get_id()]
     self.assertListEqual(val.tolist(), [3, 3, 3, 1, 1])