def load_input_data(genre, level, data_type, input_config, add_features, scale_features): if input_config in ['token', 'elmo_id', 'token_combine_elmo_id']: _data = load_processed_data(genre, level, data_type) input_data = {'x': [_data['premise'], _data['hypothesis']], 'y': _data['label']} elif input_config == 'elmo_s': _data = load_processed_text_data(genre, data_type) _data['premise'] = np.array(_data['premise'], dtype=object)[:, np.newaxis] _data['hypothesis'] = np.array(_data['hypothesis'], dtype=object)[:, np.newaxis] _data['label'] = load_processed_data(genre, level, data_type)['label'] input_data = {'x': [_data['premise'], _data['hypothesis']], 'y': _data['label']} elif input_config == 'token_combine_elmo_s': _data = load_processed_data(genre, level, data_type) _text_data = load_processed_text_data(genre, data_type) _text_data['premise'] = [np.array(_text_data['premise'], dtype=object)[:, np.newaxis]] _text_data['hypothesis'] = [np.array(_text_data['hypothesis'], dtype=object)[:, np.newaxis]] input_data = {'x': [_data['premise'], _data['hypothesis'], _text_data['premise'], _text_data['hypothesis']], 'y': _data['label']} elif input_config == 'bert': # prepare input examples for bert model _data = load_processed_text_data(genre, data_type) if _data['label'] is None: input_data = [run_classifier.InputExample(guid=None, text_a=p, text_b=h) for p, h in zip(_data['premise'], _data['hypothesis'])] else: input_data = [run_classifier.InputExample(guid=None, text_a=p, text_b=h, label=l) for p, h, l in zip(_data['premise'], _data['hypothesis'], _data['label'])] else: raise ValueError('input config Not Understood: {}'.format(input_config)) if add_features and input_config != 'bert': input_data['x'].append(load_features(genre, data_type, scale_features)) return input_data
def train_and_evaluate(train_sents,test_sents,labels_train,labels_test): train_InputExamples = [ run_classifier.InputExample(guid=None,text_a=sentence,text_b=None,label=label) for sentence,label in zip(train_sents, labels_train) ] input_features = convert_examples_to_features_RE(train_InputExamples,label_list, MAX_SEQUENCE_LENGTH,tokenizer) train_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQUENCE_LENGTH, is_training=True,drop_remainder=False) print("############ Beginning of training #####################") estimator.train(input_fn=train_input_fn,max_steps=num_train_steps) print("############ Ending of training #####################") test_InputExamples = [run_classifier.InputExample(guid=None, text_a=sentence, text_b=None, label=label) for sentence, label in zip(test_sents, labels_test)] input_features = convert_examples_to_features_RE(test_InputExamples, label_list, MAX_SEQUENCE_LENGTH, tokenizer) test_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQUENCE_LENGTH, is_training=False, drop_remainder=False) estimator.evaluate(input_fn=test_input_fn, steps=None)
def do_eval(self): print(f"[INFO] Started working on evaluation...") print("[INFO] Preparing test InputExample...") test_inputExamples = self.test.apply( lambda x: run_classifier.InputExample( guid=None, text_a=x['sentence'], text_b=None, label=x['label' ]), axis=1) print("[INFO] Done preparing test InputExample...\n") label_list = list(range(len(self.labels))) print("[INFO] Preparing test features...") test_features = run_classifier.convert_examples_to_features( test_inputExamples, label_list, self.max_seq_length, self.tokenizer) print("[INFO] Done preparing test features...\n") test_input_fn = run_classifier.input_fn_builder( features=test_features, seq_length=self.max_seq_length, is_training=False, drop_remainder=False) print(f'[INFO] Begin evaluating...!') result = self.estimator.evaluate(input_fn=test_input_fn, steps=None) print(f"[INFO] Done evaluating...\n") for key in sorted(result.keys()): print(f"[INFO] {key} = {result[key]}")
def getPrediction(in_sentences, type_output="features"): #A list to map the actual labels to the predictions labels = np.unique(train['label']) input_examples = [ run_classifier.InputExample(guid="", text_a=x, text_b=None, label=0) for x in in_sentences ] input_features = run_classifier.convert_examples_to_features( input_examples, label_list, MAX_SEQ_LENGTH, tokenizer) #Predicting the classes predict_input_fn = run_classifier.input_fn_builder( features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False) predictions = estimator.predict(predict_input_fn) if type_output == "features": return [ prediction['pooled_output'] for _, prediction in enumerate(predictions) ] else: return ([(sentence, prediction['probabilities'], prediction['labels'], labels[prediction['labels']]) for sentence, prediction in zip(in_sentences, predictions)])
def getPrediction(in_sentences): labels = ["Negative", "Positive"] input_examples = [run_classifier.InputExample(guid="", text_a = x, text_b = None, label = 0) for x in in_sentences] # here, "" is just a dummy label input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer) predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False) predictions = estimator.predict(predict_input_fn) return [(sentence, prediction['probabilities'], labels[prediction['labels']]) for sentence, prediction in zip(in_sentences, predictions)]
def getRatings(self, in_sentences): #print(in_sentences) #1 input_examples = [ run_classifier.InputExample(guid="", text_a=x, text_b=None, label="1") for x in in_sentences ] # here, "" is just a dummy label #2 input_features = run_classifier.convert_examples_to_features( input_examples, label_list, MAX_SEQ_LENGTH, self.tokenizer ) #[<bert.run_classifier.InputFeatures object at 0x0000015AF00D7448>] #3 predict_input_fn = run_classifier.input_fn_builder( features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False ) #function input_fn_builder.<locals>.input_fn at 0x0000015B23F65678 #4 predictions = self.estimator.predict(input_fn=predict_input_fn) return predictions
def getPrediction(in_sentences): labels = [0, 1, 2] input_examples = [run_classifier.InputExample(guid="", text_a=x, text_b=None, label=0) for x in in_sentences] # here, "" is just a dummy label input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer) predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False) predictions = estimator.predict(predict_input_fn) indice=0 resultado = [] # try: for prediction in predictions: #elimian # from sklearn.preprocessing import label_binarize resultado.append((in_sentences[indice], prediction['probabilities'], label_binarize([labels[prediction['labels']]], classes=[0, 1, 2])[0] )) print(str(indice) +"###"+str(len(in_sentences))) indice = indice + 1 except Exception as e: print(e) return resultado return resultado '''
def predict(sentences, predict_fn): labels = [0, 1] input_examples = [ run_classifier.InputExample(guid="", text_a=x, text_b=None, label=0) for x in sentences ] # here, "" is just a dummy label input_features = run_classifier.convert_examples_to_features( input_examples, labels, MAX_SEQ_LENGTH, tokenizer) all_input_ids = [] all_input_mask = [] all_segment_ids = [] all_label_ids = [] for feature in input_features: all_input_ids.append(feature.input_ids) all_input_mask.append(feature.input_mask) all_segment_ids.append(feature.segment_ids) all_label_ids.append(feature.label_id) pred_dict = { 'input_ids': all_input_ids, 'input_mask': all_input_mask, 'segment_ids': all_segment_ids, 'label_ids': all_label_ids } predictions = predict_fn(pred_dict) return [(sentence, prediction, label) for sentence, prediction, label in zip( pred_sentences, predictions['probabilities'], predictions['labels'])]
def create_bert_input_example(row: pd.Series) -> run_classifier.InputExample: return run_classifier.InputExample( guid=None, # Globally unique ID for bookkeeping, unused in this example text_a=row[DATA_COLUMN], text_b=None, label=row[LABEL_COLUMN], )
def get_final_predictions(in_contexts, in_last_sentences, tokenizer, estimator: tf.estimator.Estimator, label_list): """ Return the log probabilities based on the story context and the endings proposed Parameters ---------- in_contexts: str of the story context in_last_sentences: proposed last sentence tokenizer: bert tokenizer estimator: tf.estimator label_list: possible values """ input_examples = [ run_classifier.InputExample(guid="", text_a=x, text_b=y, label=0) for x, y in zip(in_contexts, in_last_sentences) ] # here, "" is just a dummy label input_features = run_classifier.convert_examples_to_features( input_examples, label_list, flags.max_seq_length, tokenizer) predict_input_fn = run_classifier.input_fn_builder( features=input_features, seq_length=flags.max_seq_length, is_training=False, drop_remainder=False) predictions = estimator.predict(predict_input_fn) predictions = [prediction['probabilities'] for prediction in predictions] return predictions
def getPrediction(in_sentences): labels = ["Non-Sensitive", "Sensitive"] input_examples = [ run_classifier.InputExample(guid="", text_a=x, text_b=None, label=0) for x in in_sentences ] # here, "" is just a dummy label input_features = run_classifier.convert_examples_to_features( input_examples, label_list, MAX_SEQ_LENGTH, tokenizer) predict_input_fn = run_classifier.input_fn_builder( features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False) estimator = run() predictions = estimator.predict(predict_input_fn) return [(sentence, prediction['probabilities'], labels[prediction['labels']]) for sentence, prediction in zip(in_sentences, predictions)] # pred_sentences = [ # "He drinks apple", # "He drinks milk", # "A mosquito stings me", # "I sting a mosquito", # "A niece is a person.", # "A giraffe is a person.", # "I like to ride my chocolate", # "I like to ride my bike", # "he put elephant into the jug", # ] # predictions = getPrediction(pred_sentences) # print(predictions,"predictions")
def getListPrediction(self, in_sentences): #print(in_sentences) #1 input_examples = [ run_classifier.InputExample(guid="", text_a=x, text_b=None, label="0") for x in in_sentences ] # here, "" is just a dummy label #2 input_features = run_classifier.convert_examples_to_features( input_examples, label_list, MAX_SEQ_LENGTH, self.tokenizer) #3 predict_input_fn = run_classifier.input_fn_builder( features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False) #4 predictions = self.estimator.predict(input_fn=predict_input_fn) return predictions
def main(): MAX_SEQ_LENGTH, LABELS_LIST, VOCAB_FILE_PATH = get_config("cn") # get model from the server channel = grpc.insecure_channel(server) stub = prediction_service_pb2_grpc.PredictionServiceStub(channel) # get the sentences of input sentences = globals.request.form.to_dict() # convert single sentence to feature tokenizer = tokenization.FullTokenizer( vocab_file=VOCAB_FILE_PATH, do_lower_case=True) # Construct the request to tensorflow serving request = predict_pb2.PredictRequest() request.model_spec.name = model_name request.model_spec.signature_name = 'serving_default' results = {} for key, sentence in sentences.items(): example = run_classifier.InputExample( guid="test-0", text_a=tokenization.convert_to_unicode(sentence), text_b=None, label=LABELS_LIST[0]) feature = run_classifier.convert_single_example( 0, example, LABELS_LIST, MAX_SEQ_LENGTH, tokenizer) # get the input of model input_ids = np.reshape([feature.input_ids], (1, MAX_SEQ_LENGTH)) input_mask = np.reshape([feature.input_mask], (1, MAX_SEQ_LENGTH)) segment_ids = np.reshape([feature.segment_ids], (MAX_SEQ_LENGTH)) label_ids = [feature.label_id] # package the input into request, Note the format of the input(follow the model) request.inputs['input_ids'].CopyFrom( tf.contrib.util.make_tensor_proto(input_ids, shape=[1, MAX_SEQ_LENGTH], dtype=tf.int32)) request.inputs['input_mask'].CopyFrom( tf.contrib.util.make_tensor_proto(input_mask, shape=[1, MAX_SEQ_LENGTH], dtype=tf.int32)) request.inputs['label_ids'].CopyFrom( tf.contrib.util.make_tensor_proto(label_ids, shape=[1], dtype=tf.int32)) request.inputs['segment_ids'].CopyFrom( tf.contrib.util.make_tensor_proto(segment_ids, shape=[1, MAX_SEQ_LENGTH], dtype=tf.int32)) # do predict result = stub.Predict(request, 10.0) # 10 secs timeout # parse the result probabilities_tensor_proto = result.outputs["probabilities"] probabilities = list(probabilities_tensor_proto.float_val) probabilities_np = np.array(probabilities) top3_index_np = probabilities_np.argsort()[-3:][::-1] probabilities_top3 = probabilities_np[top3_index_np] label_top3 = np.array(LABELS_LIST)[top3_index_np] # shape = tf.TensorShape(probabilities_tensor_proto.tensor_shape) # probabilities = np.array(probabilities_tensor_proto.float_val).reshape( # shape.as_list()) result_list = [] for index in range(3): result_list.append( {"label": label_top3[index], "score": str(probabilities_top3[index])}) results[key] = result_list return Response(json.dumps(results), mimetype='application/json')
def getPrediction(in_sentences): labels = ["Not an error", "Is an error"] input_examples = [ run_classifier.InputExample(guid="", text_a=x[0], text_b=x[1], label=0) for x in in_sentences ] # here, "" is just a dummy label input_features = run_classifier.convert_examples_to_features( input_examples, label_list, MAX_SEQ_LENGTH, tokenizer) predict_input_fn = run_classifier.input_fn_builder( features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False) predictions = estimator.predict(predict_input_fn, checkpoint_path="./" + OUTPUT_DIR + "/model.ckpt-56165", yield_single_examples=False) prediction_list = [] i = 0 for prediction in predictions: if hasattr(prediction["labels"], '__len__'): for j in range(len(prediction["labels"])): prediction_list.append( (in_sentences[i], prediction["probabilities"][j], labels[prediction["labels"][j]])) i += 1 else: prediction_list.append( (in_sentences[i], prediction["probabilities"], labels[prediction["labels"]])) i += 1 return prediction_list
def pre_inference(payload, signature, metadata): input_example = run_classifier.InputExample(guid="", text_a=payload["review"], label=0) input_feature = run_classifier.convert_single_example( 0, input_example, [0, 1], 128, tokenizer) return {"input_ids": [input_feature.input_ids]}
def predict(self): """ Predict and store the predicted label in self.predicted_class """ if not self.empty: """ See runclassifier.py from the bert git for more details""" input_examples = [ run_classifier.InputExample(guid="", text_a=self.clean_text, text_b=None, label=self.label_list[0]) ] # here, "" is just a dummy label self.input_features = run_classifier.convert_examples_to_features( input_examples, self.label_list, self.max_seq_length, self.tokenizer) input_ids = np.array(self.input_features[0].input_ids).reshape( -1, self.max_seq_length) input_mask = np.array(self.input_features[0].input_mask).reshape( -1, self.max_seq_length) label_ids = np.array(self.input_features[0].label_id).reshape(-1, ) segment_ids = np.array(self.input_features[0].segment_ids).reshape( -1, self.max_seq_length) result = SinglePredict.sess.run( SinglePredict.tensor_outputs, feed_dict={ SinglePredict.tensor_input_ids: input_ids, SinglePredict.tensor_input_mask: input_mask, SinglePredict.tensor_label_ids: label_ids, SinglePredict.tensor_segment_ids: segment_ids }) self.predicted_class = self.label_list[result]
def data_processor(): if os.path.exists("sentiment_data/train.csv"): x_train = pd.read_csv("sentiment_data/train.csv") x_test = pd.read_csv("sentiment_data/test.csv") else: data = pd.read_csv("sentiment_data/simplifyweibo_4_moods.csv") data["label"] = data["label"].replace(0, "happy").replace( 1, "angry").replace(2, "disgust").replace(3, "sad") x_train, x_test, y_train, y_test = train_test_split(data, data["label"], test_size=0.4, random_state=0) x_test, x_dev, y_test, y_dev = train_test_split(x_test, y_test, test_size=0.4, random_state=0) x_train.to_csv("sentiment_data/train.csv", index=False) x_test.to_csv("sentiment_data/test.csv", index=False) x_dev.to_csv("sentiment_data/dev.csv", index=False) train_InputExamples = x_train.apply( lambda x: run_classifier.InputExample( guid=None, # Globally unique ID for bookkeeping, unused in this example text_a=x[args.DATA_COLUMN], text_b=None, label=x[args.LABEL_COLUMN]), axis=1) test_InputExamples = x_test.apply( lambda x: run_classifier.InputExample(guid=None, text_a=x[args.DATA_COLUMN], text_b=None, label=x[args.LABEL_COLUMN]), axis=1) #获取tokenizer tokenizer = tokenization.FullTokenizer(vocab_file=args.vocab_file, do_lower_case=True) #获取特征 train_features = bert.run_classifier.convert_examples_to_features( train_InputExamples, args.label_list, args.MAX_SEQ_LENGTH, tokenizer) test_features = bert.run_classifier.convert_examples_to_features( test_InputExamples, args.label_list, args.MAX_SEQ_LENGTH, tokenizer) return train_features, test_features
def embed(self, sequences: Union[str, Iterable[str]], per_token: bool = _DEFAULT_PER_TOKEN_EMBEDDING) -> np.ndarray: """ Embeds a sequence or multiple sequences using the BERT model Args: sequences (Union[str, Iterable[str]]): the sequence(s) to embed per_token (bool): whether to produce an embedding per token or a pooled embedding for the whole sequence Returns: a numpy array with the embedding(s) of the sequence(s) """ single_input = isinstance(sequences, str) if single_input: sequences = [sequences] # Convert sequnces into BERT input format input_examples = [ run_classifier.InputExample(guid=None, text_a=sequence, text_b=None, label=0) for sequence in sequences ] input_features = run_classifier.convert_examples_to_features( input_examples, [0], self._input_ids.shape[1], self._tokenizer) # Execute the computation graph on the inputs if self._session is not None: output = self._session.run( self. _outputs["sequence_output" if per_token else "pooled_output"], feed_dict={ self._input_ids: [sequence.input_ids for sequence in input_features], self._input_mask: [sequence.input_mask for sequence in input_features], self._segment_ids: [sequence.segment_ids for sequence in input_features] }) else: with tf.compat.v1.Session(graph=self._graph) as session: session.run(tf.global_variables_initializer()) output = session.run( self._outputs[ "sequence_output" if per_token else "pooled_output"], feed_dict={ self._input_ids: [sequence.input_ids for sequence in input_features], self._input_mask: [sequence.input_mask for sequence in input_features], self._segment_ids: [sequence.segment_ids for sequence in input_features] }) if single_input: output = output.reshape(output.shape[1:]) return output
def predict(self, payload): input_example = run_classifier.InputExample(guid="", text_a=payload["review"], label=0) input_feature = run_classifier.convert_single_example( 0, input_example, [0, 1], 128, self._tokenizer ) model_input = {"input_ids": [input_feature.input_ids]} prediction = self.client.predict(model_input) return labels[prediction["labels"][0]]
def data_prep(stressors_list, label_list, MAX_SEQ_LENGTH, tokenizer): input_examples = [ run_classifier.InputExample(guid="", text_a=x, text_b=None, label=1) for x in stressors_list ] input_features = run_classifier.convert_examples_to_features( input_examples, label_list, MAX_SEQ_LENGTH, tokenizer) return input_features
def predict(in_sentences): """ predicts the output relation of sentences""" input_examples = [run_classifier.InputExample(guid="", text_a=x, text_b=None, label=0) for x in in_sentences] # print(input_examples) input_features = convert_examples_to_features_RE(input_examples, label_list, MAX_SEQUENCE_LENGTH, tokenizer) predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQUENCE_LENGTH, is_training=False, drop_remainder=False) predictions = estimator.predict(predict_input_fn, yield_single_examples=True) #return predictions return [(sentence, prediction['probabilities'], prediction['labels']) for sentence, prediction in zip(in_sentences, predictions)]
def get_bert_features(text): if len(text) == 1: in_sentences = [text[0], 'aha'] else: in_sentences = text input_examples = [ run_classifier.InputExample(guid="", text_a=x, text_b=None, label=0) for x in in_sentences ] input_features = run_classifier.convert_examples_to_features( input_examples, label_list, MAX_SEQ_LENGTH, tokenizer) return input_features
def _create_examples(self, lines, set_type): """Creates examples for the training and dev sets.""" examples = [] for (i, line) in enumerate(lines): guid = "%s-%s" % (set_type, i) text_a = bert.tokenization.convert_to_unicode(line[1]) label = bert.tokenization.convert_to_unicode(line[0]) examples.append( brc.InputExample(guid=guid, text_a=text_a, text_b=None, label=label)) return examples
def predict_serve(in_sentences): """ predicts the output relation of sentences""" input_examples = [run_classifier.InputExample(guid="", text_a=x, text_b=None, label=0) for x in in_sentences] # print(input_examples) input_features = [convert_single_example_RE(0,input_example, label_list, MAX_SEQUENCE_LENGTH, tokenizer) for input_example in input_examples] input_ids = [input_feature.input_ids for input_feature in input_features] label_ids = [input_feature.label_id for input_feature in input_features] input_mask = [input_feature.input_mask for input_feature in input_features] segment_ids = [input_feature.segment_ids for input_feature in input_features] predictions = predict_fn({'input_ids':input_ids,'label_ids':label_ids, 'input_mask':input_mask,'segment_ids':segment_ids}) #predict_input_fn(params={"batch_size": BATCH_SIZE})) # return predictions return [(sentence,probs,label) for sentence, probs ,label in zip(in_sentences, predictions['probabilities'], predictions['labels'])]
def convert_sent_to_vec(tokenizer, dataset, DATA_COLUMN='text_1', DATA_COLUMN2='text_2'): label_list = [1, 0] input_example = dataset.apply(lambda x: run_classifier.InputExample( guid=None, text_a=x[DATA_COLUMN], text_b=x[DATA_COLUMN2], label=1), axis=1) features = run_classifier.convert_examples_to_features( input_example, cfg.label_list, cfg.MAX_SEQ_LENGTH, tokenizer) return features
def getPrediction(in_sentences): #A list to map the actual labels to the predictions labels = ["0", "1","2","3"] #Transforming the test data into BERT accepted form input_examples = [run_classifier.InputExample(guid="", text_a = x, text_b = None, label = 0) for x in in_sentences] #Creating input features for Test data input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer) #Predicting the classes predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False) predictions = estimator.predict(predict_input_fn) return [(sentence, prediction['probabilities'],prediction['labels'], labels[prediction['labels']]) for sentence, prediction in zip(in_sentences, predictions)]
def create_bert_examples(lines, set_type, labels=None): #Generate data for the BERT model guid = f'{set_type}' examples = [] if guid == 'train': for line, label in zip(lines, labels): text_a = line label = str(label) examples.append( run_classifier.InputExample(guid=guid, text_a=text_a, text_b=None, label=label)) else: for line in lines: text_a = line label = '0' examples.append( run_classifier.InputExample(guid=guid, text_a=text_a, text_b=None, label=label)) return examples
def fit(self, X, y): self.labels_ = sorted(set(y)) train_InputExamples = [ run_classifier.InputExample(guid=None, text_a=x_, text_b=None, label=y_) for (x_, y_) in zip(X, y) ] # Convert our train and test features to InputFeatures that BERT understands. train_features = bert.run_classifier.convert_examples_to_features( train_InputExamples, self.labels_, self.max_seq_length, self.tokenizer) # Compute # train and warmup steps from batch size num_train_steps = int( len(train_features) / self.batch_size * self.num_train_epochs) num_warmup_steps = int(num_train_steps * self.warmup_proportion) # Specify outpit directory and number of checkpoint steps to save run_config = tf.estimator.RunConfig( model_dir=self.model_dir, save_summary_steps=self.save_summary_steps, save_checkpoints_steps=self.save_checkpoints_steps) model_fn = model_fn_builder(num_labels=len(self.labels_), learning_rate=self.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps) self.estimator_ = tf.estimator.Estimator( model_fn=model_fn, config=run_config, params={"batch_size": self.batch_size}) # Create an input function for training. drop_remainder = True for using TPUs. train_input_fn = bert.run_classifier.input_fn_builder( features=train_features, seq_length=self.max_seq_length, is_training=True, drop_remainder=False) print('Beginning Training!') current_time = datetime.now() self.estimator_.train(input_fn=train_input_fn, max_steps=num_train_steps) print("Training took time ", datetime.now() - current_time) return self
def getPrediction(estimator, in_sentences, labels, label_list, max_seq_len, tokenizer): input_examples = [ run_classifier.InputExample(guid="", text_a=x, text_b=None, label=0) for x in in_sentences ] input_features = run_classifier.convert_examples_to_features( input_examples, label_list, max_seq_len, tokenizer) predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=max_seq_len, is_training=False, drop_remainder=False) predictions = estimator.predict(predict_input_fn) return [(sentence, pred['probabilities'], labels[pred['labels']]) for sentence, pred in zip(in_sentences, predictions)]
def do_train(self): print("[INFO] Preparing train InputExample...") train_inputExamples = self.train.apply( lambda x: run_classifier.InputExample( guid=None, text_a=x['sentence'], text_b=None, label=x['label' ]), axis=1) print("[INFO] Done preparing train InputExample...\n") label_list = list(range(len(self.labels))) print("[INFO] Preparing train features...") train_features = run_classifier.convert_examples_to_features( train_inputExamples, label_list, self.max_seq_length, self.tokenizer) print("[INFO] Done preparing train features...\n") train_input_fn = run_classifier.input_fn_builder( features=train_features, seq_length=self.max_seq_length, is_training=True, drop_remainder=False) num_train_steps = \ int(len(train_features)/self.train_batch_size*self.num_train_epochs) num_warmup_steps = int(num_train_steps * self.warmup_proportion) print(f"[INFO] No. of train steps: {num_train_steps}") print(f"[INFO] No. of warmup steps: {num_warmup_steps}") self.estimator = get_estimator( self.init_checkpoint, self.bert_config_file, self.labels, self.train_batch_size, self.model_dir, save_summary_steps=self.save_summary_steps, save_checkpoints_steps=self.save_checkpoints_steps, learning_rate=self.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps) print(f'[INFO] Begin Training...!') current_time = datetime.now() self.estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) print( f"[INFO] Training took time {datetime.now() - current_time} sec..!\n" )