def test_decode_one_sequence_label_to_span(): """ 测试对 sequence label 解码成 span 字典 :return: """ sequence_label_list = list() expect_list = list() sequence_label = ["B-T", "I-T", "O-T"] expect = [{"label": "T", "begin": 0, "end": 2}] sequence_label_list.append(sequence_label) expect_list.append(expect) sequence_label = ["B-T", "I-T", "I-T"] expect = [{"label": "T", "begin": 0, "end": 3}] sequence_label_list.append(sequence_label) expect_list.append(expect) sequence_label = ["B-T", "I-T", "I-T", "B-T"] expect = [{"label": "T", "begin": 0, "end": 3}, {"label": "T", "begin": 3, "end": 4}] sequence_label_list.append(sequence_label) expect_list.append(expect) for expect, sequence_label in zip(expect_list, sequence_label_list): span = BIO.decode_one_sequence_label_to_span(sequence_label) ASSERT.assertListEqual(expect, span)
def test_bmes_to_bio(): """ 测试 BMES schema 转换成 bio :return: """ bmes = ["B-T", "M-T", "E-T", "O", "S-T", "B-T", "E-T"] expect_bio = ["B-T", "I-T", "I-T", "O", "B-T", "B-T", "I-T"] bio_sequence_label = bio_schema.bmes_to_bio(bmes) ASSERT.assertListEqual(expect_bio, bio_sequence_label)
def test_crf_label_index_decoder_with_constraint(crf_data): mask = torch.tensor([[1, 1, 1], [1, 1, 0]], dtype=torch.uint8) crf_label_index_decoder = CRFLabelIndexDecoder( crf=crf_data.constraint_crf, label_vocabulary=crf_data.label_vocabulary) label_indices = crf_label_index_decoder(logits=crf_data.logits, mask=mask) padding_index = crf_data.label_vocabulary.padding_index expect = [[2, 3, 3], [2, 3, padding_index]] ASSERT.assertListEqual(expect, label_indices.tolist())
def test_decode_decode_label_index_to_span(): """ 测试解码 golden label index :return: """ vocabulary = LabelVocabulary([["B-T", "B-T", "B-T", "I-T", "I-T", "O"]], padding=LabelVocabulary.PADDING) b_index = vocabulary.index("B-T") ASSERT.assertEqual(0, b_index) i_index = vocabulary.index("I-T") ASSERT.assertEqual(1, i_index) o_index = vocabulary.index("O") ASSERT.assertEqual(2, o_index) golden_labels = torch.tensor([[0, 1, 2, 0], [2, 0, 1, 1]]) expect = [[{"label": "T", "begin": 0, "end": 2}, {"label": "T", "begin": 3, "end": 4}], [{"label": "T", "begin": 1, "end": 4}]] spans = BIO.decode_label_index_to_span(batch_sequence_label_index=golden_labels, mask=None, vocabulary=vocabulary) ASSERT.assertListEqual(expect, spans)
def __call__(self, instances: Iterable[Instance]) -> ModelInputs: x = list() labels = list() for instance in instances: x_data = instance["x"] x.append(torch.tensor([x_data], dtype=torch.float)) if x_data - 50 > 0: labels.append(1) else: labels.append(0) x = torch.stack(x) batch_size = x.size(0) ASSERT.assertEqual(x.dim(), 2) ASSERT.assertListEqual([batch_size, 1], [x.size(0), x.size(1)]) labels = torch.tensor(labels) ASSERT.assertEqual(labels.dim(), 1) ASSERT.assertEqual(batch_size, labels.size(0)) model_inputs = ModelInputs(batch_size=batch_size, model_inputs={"x": x}, labels=labels) return model_inputs
def test_decode(): """ 测试 模型输出的 batch logits 解码 :return: """ # [[O, B, I], [B, B, I], [B, I, I], [B, I, O]] batch_sequence_logits = torch.tensor([[[0.2, 0.3, 0.4], [0.7, 0.2, 0.3], [0.2, 0.3, 0.1]], [[0.8, 0.3, 0.4], [0.7, 0.2, 0.3], [0.2, 0.3, 0.1]], [[0.8, 0.3, 0.4], [0.1, 0.7, 0.3], [0.2, 0.3, 0.1]], [[0.8, 0.3, 0.4], [0.1, 0.7, 0.3], [0.2, 0.3, 0.5]]], dtype=torch.float) expect = [[{"label": "T", "begin": 1, "end": 3}], [{"label": "T", "begin": 0, "end": 1}, {"label": "T", "begin": 1, "end": 3}], [{"label": "T", "begin": 0, "end": 3}], [{"label": "T", "begin": 0, "end": 2}]] vocabulary = LabelVocabulary([["B-T", "B-T", "B-T", "I-T", "I-T", "O"]], padding=LabelVocabulary.PADDING) b_index = vocabulary.index("B-T") ASSERT.assertEqual(0, b_index) i_index = vocabulary.index("I-T") ASSERT.assertEqual(1, i_index) o_index = vocabulary.index("O") ASSERT.assertEqual(2, o_index) spans = BIO.decode(batch_sequence_logits=batch_sequence_logits, mask=None, vocabulary=vocabulary) ASSERT.assertListEqual(expect, spans)
def test_acc_metric(): # 对应的label是 [1, 1, 0, 1] logits = torch.tensor([[1., 2.], [3., 4.], [5, 4.], [3., 7.]], dtype=torch.float) prediction_labels = torch.argmax(logits, dim=-1) golden_labels = torch.tensor([0, 1, 1, 0], dtype=torch.long) acc_metric = AccMetric() expect = 1/4 acc = acc_metric(prediction_labels=prediction_labels, gold_labels=golden_labels, mask=None) ASSERT.assertAlmostEqual(expect, acc[acc_metric.ACC]) ASSERT.assertAlmostEqual(expect, acc_metric.metric[acc_metric.ACC]) # 对应的label是 [0, 1, 0, 1] logits = torch.tensor([[3., 2.], [4., 6.], [5, 4.], [3., 7.]], dtype=torch.float) prediction_labels = torch.argmax(logits, dim=-1) golden_labels = torch.tensor([0, 1, 1, 0], dtype=torch.long) acc = acc_metric(prediction_labels=prediction_labels, gold_labels=golden_labels, mask=None) expect = 2 / 4 ASSERT.assertAlmostEqual(expect, acc[acc_metric.ACC]) # 下面的会计算将两次结果综合起来 expect = (1+2)/(4+4) ASSERT.assertAlmostEqual(expect, acc_metric.metric[acc_metric.ACC])
def test_is_tensor_equal(): """ 测试两个 tensor 是否相等 :return: """ x = torch.tensor([1, 2, 3]) y = torch.tensor([1, 2, 3]) equal = tensor_util.is_tensor_equal(tensor1=x, tensor2=y, epsilon=0) ASSERT.assertTrue(equal) x = torch.tensor([1, 2, 3]) y = torch.tensor([2, 2, 3]) equal = tensor_util.is_tensor_equal(tensor1=x, tensor2=y, epsilon=0) ASSERT.assertFalse(equal) x = torch.tensor([1.0001, 2.0001, 3.0001]) y = torch.tensor([1., 2., 3.]) equal = tensor_util.is_tensor_equal(tensor1=x, tensor2=y, epsilon=1e-3) ASSERT.assertTrue(equal) equal = tensor_util.is_tensor_equal(tensor1=x, tensor2=y, epsilon=1e-4) ASSERT.assertFalse(equal)
def test_max_label_index_decoder(): """ 测试 max label index :return: """ decoder = MaxLabelIndexDecoder() logits = torch.tensor([[0.1, 0.9], [0.3, 0.7], [0.8, 0.2]]) label_indices = decoder(logits=logits) expect = [1, 1, 0] ASSERT.assertListEqual(expect, label_indices.tolist())
def test_masked_softmax(): """ 测试 masked softmax :return: """ vector = torch.FloatTensor([[1., 2., 3.], [4., 5., 6.]]) mask = torch.ByteTensor([[1, 1, 0], [1, 1, 1]]) result = masked_softmax(vector=vector, mask=mask) expect1 = np.exp(np.array([1., 2.])) expect1 = expect1 / np.sum(expect1) expect1 = np.concatenate([expect1, np.array([0.])], axis=-1).tolist() result1 = result[0].tolist() ASSERT.assertEqual(len(expect1), len(result1)) for expect_data, result_data in zip(expect1, result1): ASSERT.assertAlmostEqual(expect_data, result_data) expect2 = np.exp(np.array([4., 5., 6.])) expect2 = expect2 / np.sum(expect2) expect2 = expect2.tolist() result2 = result[1].tolist() ASSERT.assertEqual(len(expect2), len(result2)) for expect_data, result_data in zip(expect2, result2): ASSERT.assertAlmostEqual(expect_data, result_data)
def test_sequence_max_label_index_decoder(): label_vocabulary = LabelVocabulary( [["B-T", "B-T", "B-T", "I-T", "I-T", "O"]], padding=LabelVocabulary.PADDING) b_index = label_vocabulary.index("B-T") ASSERT.assertEqual(0, b_index) i_index = label_vocabulary.index("I-T") ASSERT.assertEqual(1, i_index) o_index = label_vocabulary.index("O") ASSERT.assertEqual(2, o_index) # [[O, B, I], [B, B, I], [B, I, I], [B, I, O]] batch_sequence_logits = torch.tensor( [[[0.2, 0.3, 0.4], [0.7, 0.2, 0.3], [0.2, 0.3, 0.1]], [[0.8, 0.3, 0.4], [0.7, 0.2, 0.3], [0.2, 0.3, 0.1]], [[0.8, 0.3, 0.4], [0.1, 0.7, 0.3], [0.2, 0.3, 0.1]], [[0.8, 0.3, 0.4], [0.1, 0.7, 0.3], [0.2, 0.3, 0.5]]], dtype=torch.float) expect_sequence_labels = [["O", "B-T", "I-T"], ["B-T", "B-T", "I-T"], ["B-T", "I-T", "I-T"], ["B-T", "I-T", "O"]] expect = list() for expect_sequence_label in expect_sequence_labels: expect.append( [label_vocabulary.index(label) for label in expect_sequence_label]) decoder = SequenceMaxLabelIndexDecoder(label_vocabulary=label_vocabulary) label_indices = decoder(logits=batch_sequence_logits, mask=None) ASSERT.assertEqual(expect, label_indices.tolist())
def test_allowed_transitions(): """ 测试允许转移mask pair :return: """ label_vocabulary = LabelVocabulary(labels=[["B-L1", "I-L1", "B-L2", "I-L2", "O"]], padding=LabelVocabulary.PADDING) allowed_pairs = BIO.allowed_transitions(label_vocabulary=label_vocabulary) for from_idx, to_idx in allowed_pairs: if from_idx == label_vocabulary.label_size: from_label = "START" else: from_label = label_vocabulary.token(from_idx) if to_idx == label_vocabulary.label_size + 1: to_label = "STOP" else: to_label = label_vocabulary.token(to_idx) print(f"(\"{from_label}\", \"{to_label}\"),") expect_trainsition_labels = [ ("B-L1", "B-L1"), ("B-L1", "I-L1"), ("B-L1", "B-L2"), ("B-L1", "O"), ("B-L1", "STOP"), ("I-L1", "B-L1"), ("I-L1", "I-L1"), ("I-L1", "B-L2"), ("I-L1", "O"), ("I-L1", "STOP"), ("B-L2", "B-L1"), ("B-L2", "B-L2"), ("B-L2", "I-L2"), ("B-L2", "O"), ("B-L2", "STOP"), ("I-L2", "B-L1"), ("I-L2", "B-L2"), ("I-L2", "I-L2"), ("I-L2", "O"), ("I-L2", "STOP"), ("O", "B-L1"), ("O", "B-L2"), ("O", "O"), ("O", "STOP"), ("START", "B-L1"), ("START", "B-L2"), ("START", "O")] expect = list() for from_label, to_label in expect_trainsition_labels: if from_label == "START": from_idx = label_vocabulary.label_size else: from_idx = label_vocabulary.index(from_label) if to_label == "STOP": to_idx = label_vocabulary.label_size + 1 else: to_idx = label_vocabulary.index(to_label) expect.append((from_idx, to_idx)) ASSERT.assertSetEqual(set(expect), set(allowed_pairs))
def event_type_vocabulary(): event_types = [["A", "B", "C"], ["A", "B"], ["A"]] vocabulary = Vocabulary(tokens=event_types, padding="", unk="Negative", special_first=True) ASSERT.assertEqual(4, vocabulary.size) ASSERT.assertEqual(0, vocabulary.index(vocabulary.unk)) ASSERT.assertEqual(1, vocabulary.index("A")) ASSERT.assertEqual(2, vocabulary.index("B")) ASSERT.assertEqual(3, vocabulary.index("C")) return vocabulary
def test_cnn_seq2vec_output_dim(): """ 测试 cnn 输出维度 :return: """ kernel_size = (1, 2, 3, 4, 5) encoder = CnnSeq2Vec(embedding_dim=7, num_filters=13, kernel_sizes=kernel_size) tokens = torch.rand(4, 8, 7) vector = encoder(sequence=tokens, mask=None) expect = (4, 13 * len(kernel_size)) ASSERT.assertEqual(expect, vector.size())
def test_crf_label_index_decoder(crf_data): """ 测试 crf label index decoder :param crf_data: crf data :return: """ mask = torch.tensor([[1, 1, 1], [1, 1, 0]], dtype=torch.long) crf_label_index_decoder = CRFLabelIndexDecoder( crf=crf_data.crf, label_vocabulary=crf_data.label_vocabulary) label_indices = crf_label_index_decoder(logits=crf_data.logits, mask=mask) padding_index = crf_data.label_vocabulary.padding_index expect = [[2, 4, 3], [4, 2, padding_index]] ASSERT.assertListEqual(expect, label_indices.tolist())
def test_component_evaluate_factory(): Registry().clear_objects() config_json_file_path = "data/easytext/tests/component/training.json" config_json_file_path = os.path.join(ROOT_PATH, config_json_file_path) with open(config_json_file_path, encoding="utf-8") as f: param_dict = json.load(f, object_pairs_hook=OrderedDict) factory = ComponentFactory(is_training=False) parsed_dict = factory.create(config=param_dict) my_component = parsed_dict["my_component"] ASSERT.assertEqual("evaluate_3", my_component.value)
def test_glove_loader(): pretrained_file_path = "data/easytext/tests/pretrained/word_embedding_sample.3d.txt" pretrained_file_path = os.path.join(ROOT_PATH, pretrained_file_path) glove_loader = GloveLoader(embedding_dim=3, pretrained_file_path=pretrained_file_path) embedding_dict = glove_loader.load() expect_embedding_dict = { "a": [1.0, 2.0, 3.0], "b": [4.0, 5.0, 6.0], "美丽": [7.0, 8.0, 9.0] } ASSERT.assertDictEqual(expect_embedding_dict, embedding_dict) ASSERT.assertEqual(glove_loader.embedding_dim, 3)
def test_span_f1_measure_with_mask(): # [[O, B, I], [B, B, I], [B, I, I], [B, I, O]] batch_sequence_logits = torch.tensor([[[0.2, 0.3, 0.4], [0.7, 0.2, 0.3], [0.2, 0.3, 0.1]], [[0.8, 0.3, 0.4], [0.7, 0.2, 0.3], [0.2, 0.3, 0.1]], [[0.8, 0.3, 0.4], [0.1, 0.7, 0.3], [0.2, 0.3, 0.1]], [[0.8, 0.3, 0.4], [0.1, 0.7, 0.3], [0.2, 0.3, 0.5]]], dtype=torch.float) batch_sequence_labels = [["O", "B-T", "I-T"], ["B-T", "B-T", "I-T"], ["B-T", "I-T", "I-T"], ["B-T", "I-T", "O"]] sequence_label_indices = list() for sequence_label in batch_sequence_labels: sequence_label_indices.append([VOCAB.index(label) for label in sequence_label]) sequence_label_indices = torch.tensor(sequence_label_indices, dtype=torch.long) gold = torch.tensor([ [2, 0, 1], [0, 0, 1], [0, 1, 1], [0, 1, 2] ]) f1 = SpanF1Metric(label_vocabulary=VOCAB) mask = torch.tensor([ [1, 1, 0], [1, 1, 1], [1, 0, 0], [1, 1, 1] ], dtype=torch.long) f1(prediction_labels=sequence_label_indices, gold_labels=gold, mask=mask) metrics = f1.metric print(f"metrics: {json.dumps(metrics)}") expect = {f"{SpanF1Metric.PRECISION}-T": 1., f"{SpanF1Metric.RECALL}-T": 1., f"{SpanF1Metric.F1}-T": 1., f"{SpanF1Metric.PRECISION_OVERALL}": 1., f"{SpanF1Metric.RECALL_OVERALL}": 1., f"{SpanF1Metric.F1_OVERALL}": 1.} for key, _ in expect.items(): ASSERT.assertAlmostEqual(expect[key], metrics[key])
def test_decode_one_sequence_logits_to_label(): """ 测试 decode sequence label :return: """ sequence_logits_list = list() expect_list = list() sequence_logits = torch.tensor([[0.2, 0.3, 0.4], [0.7, 0.2, 0.3], [0.2, 0.3, 0.1]], dtype=torch.float) # O B I 正常 expect = ["O", "B-T", "I-T"] sequence_logits_list.append(sequence_logits) expect_list.append(expect) sequence_logits = torch.tensor([[0.9, 0.3, 0.4], [0.2, 0.8, 0.3], [0.2, 0.3, 0.1]], dtype=torch.float) expect = ["B-T", "I-T", "I-T"] sequence_logits_list.append(sequence_logits) expect_list.append(expect) sequence_logits = torch.tensor([[0.9, 0.3, 0.4], [0.2, 0.8, 0.3], [0.2, 0.3, 0.9]], dtype=torch.float) expect = ["B-T", "I-T", "O"] sequence_logits_list.append(sequence_logits) expect_list.append(expect) vocabulary = LabelVocabulary([["B-T", "B-T", "B-T", "I-T", "I-T", "O"]], padding=LabelVocabulary.PADDING) b_index = vocabulary.index("B-T") ASSERT.assertEqual(0, b_index) i_index = vocabulary.index("I-T") ASSERT.assertEqual(1, i_index) o_index = vocabulary.index("O") ASSERT.assertEqual(2, o_index) for sequence_logits, expect in zip(sequence_logits_list, expect_list): sequence_label, sequence_label_indices = BIO.decode_one_sequence_logits_to_label( sequence_logits=sequence_logits, vocabulary=vocabulary) ASSERT.assertListEqual(sequence_label, expect) expect_indices = [vocabulary.index(label) for label in expect] ASSERT.assertListEqual(sequence_label_indices, expect_indices)
def test_label_vocabulary(): """ 测试 label vocabulary :return: """ vocabulary = LabelVocabulary([["A", "B", "C"], ["D", "E"]], padding="") ASSERT.assertEqual(vocabulary.size, 5) vocabulary = LabelVocabulary([["A", "B", "C"], ["D", "E"]], padding=LabelVocabulary.PADDING) ASSERT.assertEqual(vocabulary.size, 6) ASSERT.assertEqual(vocabulary.label_size, 5) ASSERT.assertEqual(vocabulary.index(vocabulary.padding), 5) for index, w in enumerate(["A", "B", "C", "D", "E"]): ASSERT.assertEqual(vocabulary.index(w), index)
def test_component_factory(): Registry().clear_objects() model_json_file_path = "data/easytext/tests/component/model.json" model_json_file_path = os.path.join(ROOT_PATH, model_json_file_path) with open(model_json_file_path, encoding="utf-8") as f: config = json.load(f, object_pairs_hook=OrderedDict) factory = ComponentFactory(is_training=True) parserd_dict = factory.create(config=config) model = parserd_dict["model"] ASSERT.assertTrue(model.linear is not None) ASSERT.assertEqual((2, 4), (model.linear.in_features, model.linear.out_features))
def test_ibo1_to_bio(): """ 测试 ibo1 转换到 bio :return: """ ibo1 = [ "I-L1", "I-L1", "O", "I-L1", "I-L2", "O", "I-L1", "I-L1", "I-L1", "B-L1", "I-L1", "O", "B-L1", "I-L1", "O" ] expect_bio = [ "B-L1", "I-L1", "O", "B-L1", "B-L2", "O", "B-L1", "I-L1", "I-L1", "B-L1", "I-L1", "O", "B-L1", "I-L1", "O" ] bio_sequence = bio_schema.ibo1_to_bio(ibo1) ASSERT.assertListEqual(expect_bio, bio_sequence)
def test_default_typename(): """ 测试,当 component 构建的时候,某个参数是 object :return: """ Registry().clear_objects() config_json_file_path = "data/easytext/tests/component/default_typename.json" config_json_file_path = os.path.join(ROOT_PATH, config_json_file_path) with open(config_json_file_path, encoding="utf-8") as f: param_dict = json.load(f, object_pairs_hook=OrderedDict) factory = ComponentFactory(is_training=False) parsed_dict = factory.create(config=param_dict) default_typename = parsed_dict["default_typename"] ASSERT.assertEqual(10, default_typename.value)
def test_vocabulary_speical_first(): """ 测试 vocabulary speical first :return: """ batch_tokens = [["我", "和", "你"], ["在", "我"]] vocabulary = Vocabulary(batch_tokens, padding=Vocabulary.PADDING, unk=Vocabulary.UNK, special_first=True, min_frequency=1, max_size=None) ASSERT.assertEqual(vocabulary.size, 6) ASSERT.assertEqual(vocabulary.padding, vocabulary.PADDING) ASSERT.assertEqual(vocabulary.unk, vocabulary.UNK) ASSERT.assertEqual(vocabulary.index(vocabulary.padding), 0) ASSERT.assertEqual(vocabulary.index(vocabulary.unk), 1)
def test_fill(): """ 测试 bio :return: """ pairs = [(1, 2), (2, 4)] for begin, end in pairs: sl = ["O"] * 10 tag = "Test" BIO.fill(sequence_label=sl, begin_index=begin, end_index=end, tag=tag) for i in range(begin, end): if i == begin: ASSERT.assertEqual(sl[i], f"B-{tag}") else: ASSERT.assertEqual(sl[i], f"I-{tag}")
def test_config(): config_file_path = "data/easytext/tests/config/config.json" config_file_path = os.path.join(ROOT_PATH, config_file_path) config = Config(is_training=True, config_file_path=config_file_path) ASSERT.assertTrue(config.model is not None) ASSERT.assertTrue(isinstance(config.model, _MyModel)) ASSERT.assertTrue(config.optimizer is not None) ASSERT.assertTrue(isinstance(config.optimizer, _MyOpitmizer))
def test_gat_without_hidden(): """ 测试 gat :return: """ torch.manual_seed(7) torch.cuda.manual_seed_all(7) in_features = 2 out_features = 4 gat = GAT(in_features=in_features, out_features=out_features, dropout=0., alpha=0.1, num_heads=3, hidden_size=None) nodes = torch.tensor([[[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]], [[0.7, 0.8], [0.9, 0.10], [0.11, 0.12]]], dtype=torch.float) adj = torch.tensor( [[[0, 1, 0], [1, 0, 0], [0, 0, 0]], [[0, 1, 1], [1, 0, 1], [1, 1, 0]]], dtype=torch.long) output_nodes: torch.Tensor = gat(nodes=nodes, adj=adj) expect_size = (nodes.size(0), nodes.size(1), out_features) ASSERT.assertEqual(expect_size, output_nodes.size()) expect = torch.tensor([[[-1.6478, -0.3935, -2.6613, -2.7653], [-1.3204, -0.8394, -1.8519, -1.9375], [-1.6478, -0.3935, -2.6613, -2.7653]], [[-1.9897, -0.4203, -2.4447, -2.1232], [-2.1944, -0.1897, -3.4053, -3.5697], [-2.9364, -0.0878, -4.1695, -4.1617]]], dtype=torch.float) ASSERT.assertTrue( tensor_util.is_tensor_equal(expect, output_nodes, epsilon=1e-4))
def test_gat_with_hidden(): """ 测试 gat :return: """ torch.manual_seed(7) torch.cuda.manual_seed_all(7) in_features = 2 out_features = 4 gat = GAT(in_features=in_features, out_features=out_features, dropout=0., alpha=0.1, num_heads=3, hidden_size=3) nodes = torch.tensor([[[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]], [[0.7, 0.8], [0.9, 0.10], [0.11, 0.12]]], dtype=torch.float) adj = torch.tensor( [[[0, 1, 0], [1, 0, 0], [0, 0, 0]], [[0, 1, 1], [1, 0, 1], [1, 1, 0]]], dtype=torch.long) output_nodes: torch.Tensor = gat(nodes=nodes, adj=adj) expect_size = (nodes.size(0), nodes.size(1), out_features) ASSERT.assertEqual(expect_size, output_nodes.size()) expect = torch.tensor([[[-1.3835, -1.4764, -1.2033, -1.5113], [-1.3316, -1.5785, -1.1564, -1.5368], [-1.3475, -1.5467, -1.1706, -1.5279]], [[-1.3388, -1.6693, -1.4427, -1.1610], [-1.4288, -1.6525, -1.6607, -0.9707], [-1.4320, -1.4422, -1.6465, -1.1025]]]) ASSERT.assertTrue( tensor_util.is_tensor_equal(expect, output_nodes, epsilon=1e-4))
def test_cnn_seq2vec(): """ 测试 cnn seq2vec :return: """ encoder = CnnSeq2Vec(embedding_dim=2, num_filters=1, kernel_sizes=(1, 2)) for name, parameter in encoder.named_parameters(): parameter.data.fill_(1.) tokens = torch.FloatTensor([[[0.7, 0.8], [0.1, 1.5]]]) vector = encoder(sequence=tokens, mask=None) vector = vector.view(-1).tolist() expect = torch.tensor([[0.1 + 1.5 + 1., 0.7 + 0.8 + 0.1 + 1.5 + 1.]]).view(-1).tolist() ASSERT.assertEqual(len(expect), len(vector)) for i in range(len(vector)): ASSERT.assertAlmostEqual(expect[i], vector[i])
def test_vocabulary(): """ :return: """ batch_tokens = [["我", "和", "你"], ["在", "我"]] vocabulary = Vocabulary(batch_tokens, padding="", unk="", special_first=True, min_frequency=1, max_size=None) ASSERT.assertEqual(vocabulary.size, 4) ASSERT.assertTrue(not vocabulary.padding) ASSERT.assertTrue(not vocabulary.unk) ASSERT.assertEqual(vocabulary.index("我"), 0) ASSERT.assertEqual(vocabulary.index("和"), 1)