Exemple #1
0
        def make_tensor_fn(data):
            column_idxs, question_idxs, data_idxs = zip(*data)

            features = {
                "column": utils.transpose(column_idxs, skip_keys=["text"]),
                "question": utils.transpose(question_idxs, skip_keys=["text"]),
            }
            labels = {
                "data_idx": data_idxs,
            }
            return collator(features, labels)
Exemple #2
0
        def make_tensor_fn(data):
            data_idxs, bert_input_idxs, token_type_idxs, class_idxs = zip(*data)

            features = {
                "bert_input": utils.transpose(bert_input_idxs, skip_keys=["text"]),
                "token_type": utils.transpose(token_type_idxs, skip_keys=["text"]),
            }
            labels = {
                "class_idx": class_idxs,
                "data_idx": data_idxs,
            }
            return collator(features, labels)
Exemple #3
0
        def make_tensor_fn(data):
            context_idxs, question_idxs, data_idxs, \
                answer_starts, answer_ends, answerables = zip(*data)

            features = {
                "context": utils.transpose(context_idxs, skip_keys=["text"]),
                "question": utils.transpose(question_idxs, skip_keys=["text"]),
            }
            labels = {
                "answer_idx": data_idxs,
                "answer_start_idx": answer_starts,
                "answer_end_idx": answer_ends,
                "answerable": answerables,
            }
            return collator(features, labels)
Exemple #4
0
        def make_tensor_fn(data):
            bert_input_idxs, token_type_idxs, data_idxs, answer_starts, answer_ends, answerables = zip(
                *data)

            features = {
                "bert_input": utils.transpose(bert_input_idxs,
                                              skip_keys=["text"]),
                "token_type": utils.transpose(token_type_idxs,
                                              skip_keys=["text"]),
            }
            labels = {
                "answer_idx": data_idxs,
                "answer_start_idx": answer_starts,
                "answer_end_idx": answer_ends,
                "answerable": answerables,
            }
            return collator(features, labels)
Exemple #5
0
        def make_tensor_fn(data):
            data_idxs, sequence_idxs, class_idxs = zip(*data)

            features = {
                "sequence": utils.transpose(sequence_idxs, skip_keys=["text"]),
            }
            labels = {
                "class_idx": class_idxs,
                "data_idx": data_idxs,
            }
            return collator(features, labels)
Exemple #6
0
        def make_tensor_fn(data):
            data_idxs, bert_input_idxs, token_type_idxs, tagged_token_idxs, num_tokens, tag_idxs_list = zip(
                *data)

            features = {
                "bert_input":
                utils.transpose(bert_input_idxs, skip_keys=["text"]),
                "token_type":
                utils.transpose(token_type_idxs, skip_keys=["text"]),
                "tagged_sub_token_idxs":
                utils.transpose(tagged_token_idxs, skip_keys=["text"]),
                "num_tokens":
                utils.transpose(num_tokens, skip_keys=["text"]),
            }
            labels = {
                "tag_idxs": tag_idxs_list,
                "data_idx": data_idxs,
            }
            return collator(features,
                            labels,
                            apply_pad_labels=["tag_idxs"],
                            apply_pad_values=[self.ignore_tag_idx])
Exemple #7
0
        def raw_to_tensor(inputs):
            is_one = True  # batch_size 1 flag
            feature, _helper = data_reader.read_one_example(inputs)

            nonlocal helper
            helper.update(_helper)

            if type(feature) == list:
                is_one = False
                features = feature
            else:
                features = [feature]

            self._index_features(features,
                                 data_reader.text_columns,
                                 suppress_tqdm=True)

            if is_one:
                indexed_features = features[0]
            else:  # when features > 1, need to transpose (dict_of_list -> list_of_dict)
                indexed_features = {}
                for key in features[0]:
                    feature_with_key = [feature[key] for feature in features]
                    indexed_features[key] = transpose(feature_with_key,
                                                      skip_keys=["text"])

            for key in indexed_features:
                for token_name in self.token_makers:
                    if token_name not in indexed_features[key]:
                        continue

                    indexed_values = indexed_features[key][token_name]
                    if is_one:
                        indexed_values = [indexed_values]

                    tensor = padding_tokens(indexed_values,
                                            token_name=token_name)
                    if cuda_device is not None and type(tensor) != list:
                        tensor = tensor.cuda(cuda_device)
                    indexed_features[key][token_name] = tensor

            for key in indexed_features:
                if "text" in indexed_features[key]:
                    del indexed_features[key]["text"]

            return indexed_features, helper