def test_convert_teach_id_row(test_corp): tokens = ["<bos>", "<sj>", "james", "</sj>", "<v>", "is", "</v>", "<eos>"] mark_vec = mark.convert_teach_id_row(test_corp.tokens_to_ids(tokens), test_corp) assert mark_vec[0] == mark.convert_types_to_vec([]) assert mark_vec[1] == mark.convert_types_to_vec(['<sj>']) # james assert mark_vec[2] == mark.convert_types_to_vec(['<v>']) # is assert mark_vec[3] == mark.convert_types_to_vec([]) tokens = ["<bos>", "<sj>", "A", "<v>", "B", "</v>", "</sj>", "<eos>"] mark_vec = mark.convert_teach_id_row(test_corp.tokens_to_ids(tokens), test_corp) assert mark_vec[0] == mark.convert_types_to_vec([]) assert mark_vec[1] == mark.convert_types_to_vec(['<sj>']) # A assert mark_vec[2] == mark.convert_types_to_vec(['<sj>', '<v>']) # B assert mark_vec[3] == mark.convert_types_to_vec([]) tokens = [ "<bos>", "<sj>", "james", "<v>", "is", "</sj>", "men", "</v>", "<eos>" ] mark_vec = mark.convert_teach_id_row(test_corp.tokens_to_ids(tokens), test_corp) assert mark_vec[0] == mark.convert_types_to_vec([]) assert mark_vec[1] == mark.convert_types_to_vec(['<sj>']) # james assert mark_vec[2] == mark.convert_types_to_vec(['<sj>', '<v>']) # is assert mark_vec[3] == mark.convert_types_to_vec(['<v>']) # men assert mark_vec[4] == mark.convert_types_to_vec([])
def test_convert_teach_id_row(test_corp): tokens = ["<bos>", "<sj>", "james", "</sj>", "<v>", "is", "</v>", "<eos>"] mark_vec = mark.convert_teach_id_row(test_corp.tokens_to_ids(tokens), test_corp) assert mark_vec[0] == mark.convert_types_to_vec([]) assert mark_vec[1] == mark.convert_types_to_vec(['<sj>']) # james assert mark_vec[2] == mark.convert_types_to_vec(['<v>']) # is assert mark_vec[3] == mark.convert_types_to_vec([]) tokens = ["<bos>", "<sj>", "A", "<v>", "B", "</v>", "</sj>", "<eos>"] mark_vec = mark.convert_teach_id_row(test_corp.tokens_to_ids(tokens), test_corp) assert mark_vec[0] == mark.convert_types_to_vec([]) assert mark_vec[1] == mark.convert_types_to_vec(['<sj>']) # A assert mark_vec[2] == mark.convert_types_to_vec(['<sj>', '<v>']) # B assert mark_vec[3] == mark.convert_types_to_vec([]) tokens = ["<bos>", "<sj>", "james", "<v>", "is", "</sj>", "men", "</v>", "<eos>"] mark_vec = mark.convert_teach_id_row(test_corp.tokens_to_ids(tokens), test_corp) assert mark_vec[0] == mark.convert_types_to_vec([]) assert mark_vec[1] == mark.convert_types_to_vec(['<sj>']) # james assert mark_vec[2] == mark.convert_types_to_vec(['<sj>', '<v>']) # is assert mark_vec[3] == mark.convert_types_to_vec(['<v>']) # men assert mark_vec[4] == mark.convert_types_to_vec([])
def convert_teach_id_rows(self, id_rows): rows = [mark.convert_teach_id_row(row, self.corpus) for row in id_rows] return self.fill_pad(rows, mark.padding())