def test_chain(self): func = StringChain(columns=["coq_word_label_1", "coq_source_genre_1"], value=" ") val = FunctionList([func]).lapply(df0, session=None)[func.get_id()] self.assertListEqual( val.tolist(), ["abc SPOK", "abc NEWS", "abc NEWS", "x SPOK", "x NEWS"])
def test_freq(self): df = pd.DataFrame(df0) func = Freq(columns=[ x for x in df.columns if not x.startswith("coquery_invisible") ]) val = FunctionList([func]).lapply(df, session=None)[func.get_id()] self.assertListEqual(val.tolist(), [1, 2, 2, 1, 1])
def test_chain(self): func = StringChain( columns=["coq_word_label_1", "coq_source_genre_1"], value=" ") val = FunctionList([func]).lapply(df0, session=None)[func.get_id()] self.assertListEqual( val.tolist(), ["abc SPOK", "abc NEWS", "abc NEWS", "x SPOK", "x NEWS"])
def test_lower_multi(self): df = pd.DataFrame({"a": list("ABCDEFGHIJ"), "b": list("ABABABABAB")}) func = StringLower(columns=["a", "b"]) val = FunctionList([func]).lapply(df, session=None) self.assertListEqual(val[[-2]].values.ravel().tolist(), list("abcdefghij")) self.assertListEqual(val[[-1]].values.ravel().tolist(), list("ababababab"))
def test_count_with_nan(self): df = pd.DataFrame(df1) func = StringCount(columns=["db_celex_coq_phonoword_phoncvbr_1"], value="[") df = FunctionList([func]).lapply(df, session=None) func = Freq(columns=[x for x in df.columns if not x.startswith("coquery_invisible")]) func_list = FunctionList([func]) val_a = func_list.lapply(df, session=None)[func.get_id()] #print(df) df = pd.DataFrame(df1) df = df[[x for x in df if x.startswith("coq_")]] func = Freq(columns=df.columns) func_list = FunctionList([func]) val_b = func_list.lapply(df, session=None)[func.get_id()] self.assertListEqual(val_a.tolist(), val_b.tolist())
def test_upper(self): df = pd.DataFrame({ "a": ["abx"] * 5 + ["a"] * 5 + ["bx"] * 5, "b": [""] * 10 + ["yyannxzzz"] * 5 }) func = StringUpper(columns=["a"]) val = FunctionList([func]).lapply(df, session=None)[[-1]] self.assertListEqual(val.values.ravel().tolist(), ["ABX"] * 5 + ["A"] * 5 + ["BX"] * 5)
def test_extract_groups(self): """ Tests issue #255 """ df = pd.DataFrame({ "a": ["abx"] * 5 + ["a"] * 5 + ["bx"] * 5, "b": [""] * 10 + ["yyannxzzz"] * 5 }) func = StringExtract(columns=["a"], value="(a).*(x)") val = FunctionList([func]).lapply(df, session=None) self.assertListEqual(val[[-2]].values.ravel().tolist(), ["a"] * 5 + [""] * 10) self.assertListEqual(val[[-1]].values.ravel().tolist(), ["x"] * 5 + [""] * 10)
def test_translate_header_multicolumn_functions(self): df = pd.DataFrame( {"coq_word_label_1": ["abx"] * 5 + ["a"] * 5 + ["bx"] * 5}) func = StringExtract(columns=["coq_word_label_1"], value="(a).*(x)") self.session.column_functions = FunctionList([func]) self.manager.set_column_order(df.columns) df = self.manager.process(df, self.session) self.assertListEqual( [self.session.translate_header(x) for x in df.columns], [ "Word", "{} (match 1)".format( func.get_label(self.session, self.manager)), "{} (match 2)".format( func.get_label(self.session, self.manager)) ])
def test_count_with_nan(self): df = pd.DataFrame(df1) func = StringCount(columns=["db_celex_coq_phonoword_phoncvbr_1"], value="[") df = FunctionList([func]).lapply(df, session=None) func = Freq(columns=[ x for x in df.columns if not x.startswith("coquery_invisible") ]) func_list = FunctionList([func]) val_a = func_list.lapply(df, session=None)[func.get_id()] #print(df) df = pd.DataFrame(df1) df = df[[x for x in df if x.startswith("coq_")]] func = Freq(columns=df.columns) func_list = FunctionList([func]) val_b = func_list.lapply(df, session=None)[func.get_id()] self.assertListEqual(val_a.tolist(), val_b.tolist())
def test_freq_with_nan1(self): df = pd.DataFrame(df0) df["coq_test_label_1"] = [pd.np.nan, "A", pd.np.nan, "B", pd.np.nan] func = Freq(columns=["coq_word_label_1", "coq_test_label_1"]) val = FunctionList([func]).lapply(df, session=None)[func.get_id()] self.assertListEqual(val.tolist(), [2, 1, 2, 1, 1])
def test_freq(self): df = pd.DataFrame(df0) func = Freq(columns=[x for x in df.columns if not x.startswith("coquery_invisible")]) val = FunctionList([func]).lapply(df, session=None)[func.get_id()] self.assertListEqual(val.tolist(), [1, 2, 2, 1, 1])
def test_match_null(self): func = StringMatch(columns=["coq_word_label_2"], value="[a]") val = FunctionList([func]).lapply(df0, session=None)[func.get_id()] self.assertListEqual( val.tolist(), [True, True, True, True, False])
def test_length(self): func = StringLength(columns=["coq_word_label_1"]) val = FunctionList([func]).lapply(df0, session=None)[func.get_id()] self.assertListEqual(val.tolist(), [3, 3, 3, 1, 1])
def test_count(self): func = StringCount(columns=["coq_word_label_1"], value="x") val = FunctionList([func]).lapply(df0, session=None)[func.get_id()] self.assertListEqual(val.tolist(), [0, 0, 0, 1, 1])
def assert_result(self, func_class, df, columns, expected, value=None): func = func_class(columns=columns, value=value) result = FunctionList([func]).lapply(df, session=None) npt.assert_equal(result[func.get_id()].values, expected)
def test_extract(self): func = StringExtract(columns=["coq_word_label_1"], value="[abx]*") val = FunctionList([func]).lapply(df0, session=None) self.assertListEqual(val[[-1]].values.ravel().tolist(), ["ab", "ab", "ab", "x", "x"])
def test_match_null(self): func = StringMatch(columns=["coq_word_label_2"], value="[a]") val = FunctionList([func]).lapply(df0, session=None)[func.get_id()] self.assertListEqual(val.tolist(), [True, True, True, True, False])