def _build_bert_input(self, input_sentences): # first convert raw text sentences into a list of InputExample objects input_examples = [] # in this case use single sentence InputExamples for single embedding # loop through each full example from input df for idx, sentence in enumerate(input_sentences): input_example = InputExample( unique_id = idx, text_a = sentence, text_b = None ) input_examples.append(input_example) # then convert the InputExample objects to InputFeature objects input_features = convert_examples_to_features( examples=input_examples, seq_length=self.max_seq_length, tokenizer=self.tokenizer ) # build input_fn to feed to bert model input_fn = input_fn_builder( features=input_features, seq_length=self.max_seq_length ) # store required info from each example for use in predict loop input_features_dict = {} for feature in input_features: input_features_dict[feature.unique_id] = feature return input_fn, input_features_dict
def extract(self, sentence): example = [InputExample(unique_id=0, text_a=sentence, text_b=None)] features = convert_examples_to_features(examples=example, seq_length=_max_seq_length, tokenizer=self._tokenizer) input_fn = input_fn_builder(features=features, seq_length=_max_seq_length) outputs = [] for output in self._estimator.predict(input_fn): feature = features[0] output_dict = collections.OrderedDict() all_features = [] for (i, token) in enumerate(feature.tokens): all_layers = [] for (j, layer_index) in enumerate(_layers): layer_output = output["layer_output_%d" % j] layers = collections.OrderedDict() layers["index"] = layer_index layers["values"] = [ round(float(x), 6) for x in layer_output[i:(i + 1)].flat ] all_layers.append(layers) features = collections.OrderedDict() features["token"] = token features["layers"] = all_layers all_features.append(features) output_dict["features"] = all_features outputs.append(output_dict) return outputs[0]
def extract_v1(sentence, estimator, tokenizer, sen_len=15): example = [InputExample(unique_id=0, text_a=sentence, text_b=None)] features = convert_examples_to_features_1(examples=example, seq_length=sen_len, tokenizer=tokenizer) input_fn = input_fn_builder(features=features, seq_length=sen_len) outputs = [] for output in estimator.predict(input_fn): outputs.append(output) return outputs[0]
def extract_v1(self, sentence): example = [InputExample(unique_id=0, text_a=sentence, text_b=None)] features = convert_examples_to_features(examples=example, seq_length=_max_seq_length, tokenizer=self._tokenizer) input_fn = input_fn_builder(features=features, seq_length=_max_seq_length) outputs = [] for output in self._estimator.predict(input_fn): outputs.append(output) return outputs[0]
def extracts_v1(sentences, estimator, tokenizer, sen_len=15): examples = [] for idx, sentence in enumerate(sentences): examples.append( InputExample(unique_id=idx, text_a=sentence, text_b=None)) features = convert_examples_to_features( examples=examples, seq_length=sen_len, tokenizer=tokenizer) #, get_cls = get_cls) input_fn = input_fn_builder(features=features, seq_length=sen_len) outputs = [] for output in estimator.predict(input_fn): outputs.append(output) return outputs
def extracts_pad(add_n, estimator, select_layers, sen_len=15): features = [] for _ in range(add_n): pad_feature = InputFeatures(unique_id=[0], tokens='[PAD]', input_ids=[0], input_mask=[0], input_type_ids=[0]) features.append(pad_feature) input_fn = input_fn_builder(features=features, seq_length=sen_len) outputs = [] layers = len(select_layers) for output in estimator.predict(input_fn): outputs.append(concat_layers(output, layers)) return outputs
def _make_examples(self, texts): """Creates BERT examples and input_fn to iterate over them. Args: texts: List of strings. One example will be created per string. Returns: Dictionary mapping from unique example ID to example """ print('MAKING EXAMPLES') examples = [ extract_features.InputExample(i, text, None) for i, text in enumerate(texts) ] features = extract_features.convert_examples_to_features( examples, self._max_seq_len, self._tokenizer) unique_id_to_feature = {} for feature in features: unique_id_to_feature[feature.unique_id] = feature input_fn = extract_features.input_fn_builder( features=features, seq_length=self._max_seq_len) return unique_id_to_feature, input_fn