コード例 #1
0
 def test_creating_buffered_context_check_labels_shape(self):
     with self.assertRaises(ValueError):
         BufferedPathContext.create_from_lists(
             ([[], [], []], ConvertParameters(0, False, {})),
             {
                 FROM_TOKEN: ([[], []], ConvertParameters(0, False, {})),
                 PATH_TYPES: ([[], []], ConvertParameters(0, False, {})),
                 TO_TOKEN: ([[], []], ConvertParameters(0, False, {})),
             },
         )
コード例 #2
0
def _convert_raw_buffer(convert_args: Tuple[List[str], PreprocessingConfig, Vocabulary, str, int]):
    lines, config, vocab, output_folder, buffer_id = convert_args
    labels, from_tokens, path_types, to_tokens = [], [], [], []
    for line in lines:
        label, *path_contexts = line.split()
        label = _parse_token(label, config.split_target)
        labels.append([vocab.label_to_id.get(_l, vocab.label_to_id[UNK]) for _l in label])
        converted_context = [_convert_path_context_to_ids(config.split_names, pc, vocab) for pc in path_contexts]
        from_tokens.append([cc[0] for cc in converted_context])
        path_types.append([cc[1] for cc in converted_context])
        to_tokens.append([cc[2] for cc in converted_context])

    bpc = BufferedPathContext.create_from_lists(
        (labels, ConvertParameters(config.max_target_parts, config.wrap_target, vocab.label_to_id)),
        {
            FROM_TOKEN: (from_tokens, ConvertParameters(config.max_name_parts, config.wrap_name, vocab.token_to_id),),
            PATH_TYPES: (path_types, ConvertParameters(config.max_path_length, config.wrap_path, vocab.type_to_id)),
            TO_TOKEN: (to_tokens, ConvertParameters(config.max_name_parts, config.wrap_name, vocab.token_to_id)),
        },
    )

    with open(path.join(output_folder, DESCRIPTION_FILE), "a") as desc_file:
        n_samples = len(bpc.contexts_per_label)
        n_paths = sum(bpc.contexts_per_label)
        desc_file.write(f"{buffer_id},{BUFFERED_PATH_TEMPLATE.format(buffer_id)},{n_samples},{n_paths}\n")
    bpc.dump(path.join(output_folder, BUFFERED_PATH_TEMPLATE.format(buffer_id)))
コード例 #3
0
    def test_creating_standard_path_context(self):
        token_to_id = {SOS: 0, EOS: 1, PAD: 2}
        type_to_id = {SOS: 1, EOS: 2, PAD: 0}
        label_to_id = {SOS: 2, EOS: 0, PAD: 1}
        labels = [[4], [], [4, 5, 6]]
        from_tokens = [
            [[4], [5, 6]],
            [[], [], []],
            [[6, 5, 4]],
        ]
        path_types = [
            [[4, 5], [6]],
            [[], [], []],
            [[6, 5, 4]],
        ]
        to_tokens = [
            [[6], [4, 5]],
            [[], [], []],
            [[4, 6, 4]],
        ]

        buffered_path_context = BufferedPathContext.create_from_lists(
            (labels, ConvertParameters(3, True, label_to_id)),
            {
                FROM_TOKEN:
                (from_tokens, ConvertParameters(3, False, token_to_id)),
                PATH_TYPES:
                (path_types, ConvertParameters(3, True, type_to_id)),
                TO_TOKEN:
                (to_tokens, ConvertParameters(3, False, token_to_id)),
            },
        )

        true_labels = numpy.array([[2, 2, 2], [4, 0, 4], [0, 1, 5], [1, 1, 6]])
        true_from_tokens = numpy.array([[4, 5, 2, 2, 2, 6], [2, 6, 2, 2, 2, 5],
                                        [2, 2, 2, 2, 2, 4]])
        true_path_types = numpy.array([[1, 1, 1, 1, 1, 1], [4, 6, 2, 2, 2, 6],
                                       [5, 2, 0, 0, 0, 5], [2, 0, 0, 0, 0, 4]])
        true_to_tokens = numpy.array([[6, 4, 2, 2, 2, 4], [2, 5, 2, 2, 2, 6],
                                      [2, 2, 2, 2, 2, 4]])

        self.assertListEqual([2, 3, 1],
                             buffered_path_context.contexts_per_label)
        numpy.testing.assert_array_equal(true_labels,
                                         buffered_path_context.labels)
        numpy.testing.assert_array_equal(
            true_from_tokens, buffered_path_context.contexts[FROM_TOKEN])
        numpy.testing.assert_array_equal(
            true_path_types, buffered_path_context.contexts[PATH_TYPES])
        numpy.testing.assert_array_equal(
            true_to_tokens, buffered_path_context.contexts[TO_TOKEN])