def get_features(tokenizer, sentences, labels): features = [] for i, sentence in enumerate(sentences): inputs = tokenizer.encode_plus(sentence, add_special_tokens=True, max_length=tokenizer.max_len) input_ids, token_type_ids = inputs['input_ids'], inputs[ 'token_type_ids'] padding_length = tokenizer.max_len - len(input_ids) if tokenizer.padding_side == 'right': attention_mask = [1] * len(input_ids) + [0] * padding_length input_ids = input_ids + [tokenizer.pad_token_id] * padding_length token_type_ids = token_type_ids + [tokenizer.pad_token_type_id ] * padding_length else: attention_mask = [0] * padding_length + [1] * len(input_ids) input_ids = [tokenizer.pad_token_id] * padding_length + input_ids token_type_ids = [tokenizer.pad_token_type_id ] * padding_length + token_type_ids assert tokenizer.max_len == len(attention_mask) == len( input_ids ) == len( token_type_ids ), f'{tokenizer.max_len}, {len(attention_mask)}, {len(input_ids)}, {len(token_type_ids)}' feature = { 'input_ids': input_ids, 'attention_mask': attention_mask, 'token_type_ids': token_type_ids, 'label': int(labels[i]) } features.append(feature) def gen(): for feature in features: yield ( { 'input_ids': feature['input_ids'], 'attention_mask': feature['attention_mask'], 'token_type_ids': feature['token_type_ids'], }, feature['label'], ) dataset = data.Dataset.from_generator( gen, ({ 'input_ids': int32, 'attention_mask': int32, 'token_type_ids': int32 }, int64), ( { 'input_ids': TensorShape([None]), 'attention_mask': TensorShape([None]), 'token_type_ids': TensorShape([None]), }, TensorShape([]), ), ) return dataset
def _assert_compatible_shape(shape: tf.TensorShape, example_shape): if not shape.is_compatible_with(example_shape): raise ValueError(f"example shape {example_shape} is incompatible with " f"feature shape {shape}")
def _hash_dtype_and_shape(dtype: tf.DType, shape: tf.TensorShape) -> int: if shape.rank is not None: # as_list is not defined on unknown tensorshapes return hash((dtype.name, tuple(shape.as_list()))) return hash((dtype.name, None))
def _hash_dtype_and_shape(dtype: tf.DType, shape: tf.TensorShape) -> int: return hash((dtype.name, tuple(shape.as_list())))
def compute_output_shape(self, input_shape): return TensorShape( (input_shape[0], input_shape[1] // self.stride, input_shape[2] // self.stride, self.filters * self.k))
def compute_output_shape(self, input_shape): return TensorShape(( input_shape[0], self.number_of_classes ))