def make_tensor_fn(data): column_idxs, question_idxs, data_idxs = zip(*data) features = { "column": utils.transpose(column_idxs, skip_keys=["text"]), "question": utils.transpose(question_idxs, skip_keys=["text"]), } labels = { "data_idx": data_idxs, } return collator(features, labels)
def make_tensor_fn(data): data_idxs, bert_input_idxs, token_type_idxs, class_idxs = zip(*data) features = { "bert_input": utils.transpose(bert_input_idxs, skip_keys=["text"]), "token_type": utils.transpose(token_type_idxs, skip_keys=["text"]), } labels = { "class_idx": class_idxs, "data_idx": data_idxs, } return collator(features, labels)
def make_tensor_fn(data): context_idxs, question_idxs, data_idxs, \ answer_starts, answer_ends, answerables = zip(*data) features = { "context": utils.transpose(context_idxs, skip_keys=["text"]), "question": utils.transpose(question_idxs, skip_keys=["text"]), } labels = { "answer_idx": data_idxs, "answer_start_idx": answer_starts, "answer_end_idx": answer_ends, "answerable": answerables, } return collator(features, labels)
def make_tensor_fn(data): bert_input_idxs, token_type_idxs, data_idxs, answer_starts, answer_ends, answerables = zip( *data) features = { "bert_input": utils.transpose(bert_input_idxs, skip_keys=["text"]), "token_type": utils.transpose(token_type_idxs, skip_keys=["text"]), } labels = { "answer_idx": data_idxs, "answer_start_idx": answer_starts, "answer_end_idx": answer_ends, "answerable": answerables, } return collator(features, labels)
def make_tensor_fn(data): data_idxs, sequence_idxs, class_idxs = zip(*data) features = { "sequence": utils.transpose(sequence_idxs, skip_keys=["text"]), } labels = { "class_idx": class_idxs, "data_idx": data_idxs, } return collator(features, labels)
def make_tensor_fn(data): data_idxs, bert_input_idxs, token_type_idxs, tagged_token_idxs, num_tokens, tag_idxs_list = zip( *data) features = { "bert_input": utils.transpose(bert_input_idxs, skip_keys=["text"]), "token_type": utils.transpose(token_type_idxs, skip_keys=["text"]), "tagged_sub_token_idxs": utils.transpose(tagged_token_idxs, skip_keys=["text"]), "num_tokens": utils.transpose(num_tokens, skip_keys=["text"]), } labels = { "tag_idxs": tag_idxs_list, "data_idx": data_idxs, } return collator(features, labels, apply_pad_labels=["tag_idxs"], apply_pad_values=[self.ignore_tag_idx])
def raw_to_tensor(inputs): is_one = True # batch_size 1 flag feature, _helper = data_reader.read_one_example(inputs) nonlocal helper helper.update(_helper) if type(feature) == list: is_one = False features = feature else: features = [feature] self._index_features(features, data_reader.text_columns, suppress_tqdm=True) if is_one: indexed_features = features[0] else: # when features > 1, need to transpose (dict_of_list -> list_of_dict) indexed_features = {} for key in features[0]: feature_with_key = [feature[key] for feature in features] indexed_features[key] = transpose(feature_with_key, skip_keys=["text"]) for key in indexed_features: for token_name in self.token_makers: if token_name not in indexed_features[key]: continue indexed_values = indexed_features[key][token_name] if is_one: indexed_values = [indexed_values] tensor = padding_tokens(indexed_values, token_name=token_name) if cuda_device is not None and type(tensor) != list: tensor = tensor.cuda(cuda_device) indexed_features[key][token_name] = tensor for key in indexed_features: if "text" in indexed_features[key]: del indexed_features[key]["text"] return indexed_features, helper