def predict_window(text_predict, number_predict, window_size): """ Generate a secuence to continue text_predict of len number_predict param: text_predict .- Init text number_predict .- Number chars to generate window_size .- The same that used in trainig return text_predict + number_predict chars """ # Get values used in trainig chars_to_indices, indices_to_chars = load_coded_dictionaries() number_chars = len(chars_to_indices) # Clean input input_clean = clean_text(text_predict.lower()) # Get the stub channel = grpc.insecure_channel(IP + str(9000)) stub = prediction_service_pb2_grpc.PredictionServiceStub(channel) # Call the service n times for i in range(number_predict): d = predict_one( input_clean[i:], stub, window_size, number_chars, chars_to_indices, indices_to_chars ) input_clean += d return input_clean
def test_load(self): chars_to_indices_new, indices_to_chars_new = load_coded_dictionaries() text_clean = load_text_clean('../data/Beiras.txt', chars_to_indices_new) text_to_csv(text_clean, "train.unittest", "test.unittest", chars_to_indices_new, PERCENT_TRAIN) text_read = csv_to_text("train.unittest", indices_to_chars_new) self.assertEqual(text_clean[:101], text_read[0])
def predict(sentence, number_predict, window_size): """ Return a text sequence predicted by the GRU network continuing the input sentence :param sentence: Input sentence :return: text sequence """ chars_to_indices, indices_to_chars = load_coded_dictionaries() return predict_window(sentence, number_predict, window_size)
def predict(sentence): """ Return a text sequence predicted by the GRU network continuing the input sentence :param sentence: Input sentence :return: text sequence """ chars_to_indices, indices_to_chars = load_coded_dictionaries() model = create_gru_model(chars_to_indices) model.load_weights( '../model_weights/best_beiras_gru_textdata_weights.hdf5') return predict_next_chars(model, sentence, window_size, chars_to_indices, indices_to_chars)
def predict_window(text_predict, number_predict, window_size, lproject, lmodel, version): # Get dictionaries chars_to_indices, indices_to_chars = load_coded_dictionaries() # Get stub service = googleapiclient.discovery.build('ml', 'v1') name = 'projects/{}/models/{}'.format(lproject, lmodel) if lversion is not None: name += '/versions/{}'.format(lversion) print(name) input_clean = text_predict # Call server for all charazters for i in range(number_predict): d = predict_one(input_clean[i:], service, name, window_size, chars_to_indices, indices_to_chars) input_clean += d return input_clean
class TestPreproccess(unittest.TestCase): def test_load(self): chars_to_indices_new, indices_to_chars_new = load_coded_dictionaries() text_clean = load_text_clean('../data/Beiras.txt', chars_to_indices_new) text_to_csv(text_clean, "train.unittest", "test.unittest", chars_to_indices_new, PERCENT_TRAIN) text_read = csv_to_text("train.unittest", indices_to_chars_new) self.assertEqual(text_clean[:101], text_read[0]) if __name__ == "__main__": """ Generate FILE_OUTPUT_TRAIN and FILE_OUTPUT_TEST from '../data/Beiras.txt' """ # Load dictionaries to convert char to index and index to char chars_to_indices_new, indices_to_chars_new = load_coded_dictionaries() # Get String with clean text text_clean = load_text_clean('../data/Beiras.txt', chars_to_indices_new) # Generate files text_to_csv(text_clean, FILE_OUTPUT_TRAIN, FILE_OUTPUT_TEST, chars_to_indices_new, PERCENT_TRAIN) # For test, read the file and test text_read = csv_to_text(FILE_OUTPUT_TRAIN, indices_to_chars_new) for sentence in text_read: print(sentence)