def test_text_generation(self): seq, last_state = generate_text(self.rnn, self.goblet) print(seq) self.assertEqual(last_state.size, self.rnn.state_size) seq, last_state = generate_text( self.rnn, self.goblet, self.goblet.encode('A'), np.random.random(self.rnn.state_size), 100) print(seq) self.assertEqual(last_state.size, self.rnn.state_size)
def print_text(rnn, last_probs, last_state, data, length): first_char = np.zeros_like(last_probs) first_char[np.random.choice(rnn.input_size, p=last_probs)] = 1 text, _ = generate_text(rnn, data, first_char=first_char, initial_state=last_state, length=length) print(text)
def results(): form = TextForm(request.form) if request.method == 'POST' and form.validate(): textx = request.form['textx'] numberx = int(request.form['numberx']) text_rnn = generate_text(new_model, textx, numberx) return render_template('results.html', content=textx, text_rnn=text_rnn) return render_template('text-form.html', form=form)
def callback(opt, last_probs, last_state, data, start): first_char = np.zeros_like(last_probs) first_char[np.random.choice(opt.rnn.input_size, p=last_probs)] = 1 text, _ = generate_text(opt.rnn, data, first_char=first_char, initial_state=last_state) print( 'Sequences {} cost {:.3f} learning rate {:.2e} elapsed {:.0f}s:\n{}\n'. format(opt.steps, opt.smooth_costs[-1], opt.learning_rates[-1], time.time() - start, text)) plt.plot(opt.smooth_costs, 'b-') plt.pause(.05)
def handle_callback(bot, update): cb_query = update.callback_query message = cb_query.message.message_id user = cb_query.message.chat.id bot.answer_callback_query(cb_query.id) if cb_query.data[:3] == 'del': new_categories = utils.remove_category(cb_query.data[4:], user) else: new_categories = utils.append_category(cb_query.data[4:], user) bot.edit_message_text(chat_id=user, message_id=message, text=utils.generate_text(new_categories)) # editing the message which originated the query bot.edit_message_reply_markup(user, message, reply_markup=utils.generate_keyboard(new_categories)) # according to new category list utils.update_categories(user, new_categories)
async def main(): prev_update_time = datetime.now() - timedelta(minutes=1) while True: if time_has_changed(prev_update_time): bts = generate_time_image_bytes( datetime.now(args.tz).replace(tzinfo=None)) await client( DeletePhotosRequest(await client.get_profile_photos('me'))) file = await client.upload_file(bts) await client(UploadProfilePhotoRequest(file)) await client( UpdateProfileRequest(about=generate_text( datetime.now(args.tz).replace(tzinfo=None)))) prev_update_time = datetime.now() time.sleep(1)
def main(save_filename=None, load_filename="class_model_weights.h5", do_train=False): """ Entry point """ if do_train: print("Training and saving model...") (model, vocab) = train_model(file_name=save_filename) ids_from_chars = preprocessing.StringLookup(vocabulary=list(vocab)) vocab_size = len(ids_from_chars.get_vocabulary()) else: if load_filename is None: print( "ERROR load file name not provided and training flag set to false, no model can be used" ) return 1 # TODO Somehow this vocab should be accessible without needed to read and process this data data = open('./archive/drake_lyrics.txt').read() print('Length of text: {} characters'.format(len(data))) vocab = sorted(set(data)) ids_from_chars = preprocessing.StringLookup(vocabulary=list(vocab)) vocab_size = len(ids_from_chars.get_vocabulary()) print("Loading model from disk...") model = DrakeGRUSequential(vocab_size, embedding_dim) utils.load_weights(load_filename, model, tf.TensorShape([1, vocab_size])) print("Generating Bars...please wait") seed_texts = [ "[Verse]", "you", "love", "boy", "I love", "I love you", "Kiki, ", "Swanging" ] for seed in seed_texts: num_chars = 400 output_text = utils.generate_text(seed, model, seq_length, ids_from_chars, chars_to_gen=num_chars) print(">>>>>>>>>>>>>>>>>>>>") print("Input seed: %s" % (seed)) print("%d character generated sequence:\n%s\n" % (num_chars, output_text)) print("End of output for seed: %s" % (seed)) print("<<<<<<<<<<<<<<<<<<<<") #Hope you enjoyed :) return 0
end_idx = tokenizer.word_index[config.end_token] unknown_idx = tokenizer.word_index[config.unknown_token] blocked_idxs = [unknown_idx, config.pad_idx] if not args.beginning_list is None: beginning = args.beginning_list elif not args.beginning_string is None: beginning = args.beginning_string else: beginning = None model, _ = Gated_Transformer_XL.build_from_config( config=config, checkpoint_path=args.checkpoint_path) generated_features, _ = generate_text(model=model, seq_len=config.seq_len, mem_len=config.mem_len, max_len=args.gen_len, tokenizer=tokenizer, start_idx=start_idx, end_idx=end_idx, blocked_idxs=blocked_idxs, batch_size=args.n_samples, beginning=beginning, top_k=args.top_k, temp=args.temp) generated_texts = scam_parser.features_to_text(features=generated_features, tokenizer=tokenizer, stored_tokens=stored_tokens) delimiter = '\n' * 4 + ('#'*80 + '\n') * 4 + '\n' * 4 generated_texts = delimiter.join(generated_texts) with open(args.dst_path, 'w', encoding='ISO-8859-1') as file: file.write(generated_texts)
def text_gen(data): "Text generation cloud function." return {'text': generate_text(data)}
def main(feature_type: str, language: str, domain: str, main_dir: str, seq_len: int, batch_size: int, lstm_dim: int, character_level: bool = False): """ Parameters ---------- feature_type: the name of the feature language: language of the text. main_dir: base directory seq_len: sequence length batch_size: batch size lstm_dim: lstm hidden dimension character_level: whether tokenizer should be on character level. """ texts = get_texts(main_dir, language, feature_type, character_level, domain) tokenizer = Tokenizer(texts.values(), character_level=character_level) samples = {} for book in texts: print(len(texts[book])) len_text = len(texts[book]) if character_level else len(texts[book].split()) if len_text < seq_len: logger.warn(f"Requested seq_len larger than text length: {len_text} / {seq_len} " f"for {book} and feature type {feature_type}.") continue rand_idx = np.random.randint(0, len_text - seq_len, batch_size) if character_level: samples[book] = tokenizer.encode([texts[book][i: i + seq_len] for i in rand_idx]) else: split_text = texts[book].split() samples[book] = tokenizer.encode( [" ".join(split_text[i: i + seq_len]) for i in rand_idx] ) test_generator = DataGenerator(tokenizer, tokenizer.full_text, seq_len=seq_len, batch_size=batch_size, with_embedding=True, train=False) sample_batch = next(iter(test_generator)) logger.info(f"X batch shape: {sample_batch[0].shape}, y batch shape: {sample_batch[1].shape}") logger.info(f"Sample batch text: {tokenizer.decode(sample_batch[0][0])}") file_path = os.path.join(main_dir, 'models', f'{feature_type}_{language}_lstm_{lstm_dim}') if domain: file_path += '_' + domain if character_level: file_path += '_character_level' file_path += '.h5' logger.info(f"Loading {file_path}") prediction_model = lstm_model(num_words=tokenizer.num_words, lstm_dim=lstm_dim, seq_len=1, batch_size=batch_size, stateful=True, return_state=True) prediction_model.load_weights(file_path) hiddens = {} seeds = {} predictions = {} for book in samples: seed = np.stack(samples[book]) print(seed.shape) hf, preds = generate_text(prediction_model, tokenizer, seed, get_hidden=True) print(hf.shape) hiddens[book] = hf seeds[book] = seed preds = [tokenizer.ix_to_word[pred] for pred in preds] predictions[book] = preds file_name = f'{feature_type}_{language}_lstm_{lstm_dim}_seq_len_{seq_len}' if domain: file_name += '_' + domain if character_level: file_name += '_character-level' file_name += '.pkl' path_out = os.path.join('data', 'hidden_states', file_name) with open(path_out, 'wb') as f: pickle.dump(hiddens, f) logger.info(f"Succesfully saved hidden dimensions to {path_out}") path_out = os.path.join('data', 'seeds', file_name) with open(path_out, 'wb') as f: pickle.dump(seeds, f) logger.info(f"Succesfully saved seeds to {path_out}") path_out = os.path.join('data', 'predictions', file_name) with open(path_out, 'wb') as f: pickle.dump(predictions, f) logger.info(f"Succesfully saved predictions to {path_out}")
def handle_start(bot, update): user = update.message.chat.id if len(utils.get_user(user)) == 0: # if new user - add it to the users list and suggest choosing categories utils.add_user(user) bot.send_message(user, 'start_text') # добавить стартовый текст в конфиг bot.send_message(user, utils.generate_text(user), reply_markup=utils.generate_keyboard(user))
def handle_edit(bot, update): # shows current category list and suggests editing it user = update.message.chat.id bot.send_message(user, utils.generate_text(user), reply_markup=utils.generate_keyboard(user))
def main(epochs=60, sentences=None, generate=400, temperature=[0.2, 0.5, 0.8, 1.0], verbose=False): #Create log directory for job run job_start_time = time.strftime("%Y%m%d_%H%M%S") data_directory = "output_data_" + job_start_time utils.nice_mk_dir(data_directory) #Define the loggers config_logger = setup_logger('config_logger', data_directory + '/config.log') training_logger = setup_logger('training_logger', data_directory + '/training.log') testing_logger = setup_logger('testing_logger', data_directory + '/testing.log') console_logger = setup_logger('console_logger') #Set the command line arguments if verbose: logging.basicConfig(level=logging.DEBUG) num_epochs = epochs create_str_len = generate my_data = utils.Nietzche_Data() mdl = utils.sl_lstm_model(my_data.chars, my_data.maxlen) if sentences is not None: if sentences > my_data.len_sentences: config_logger.error( 'Optional argument {} was set to {}. However this is outside of the range 0 - {}.' .format('Sentences', sentences, my_data.len_sentences)) data_size = my_data.len_sentences config_logger.info('Optional argument {} was set to {}'.format( 'Sentences', sentences)) else: config_logger.info( 'Optional argument {} has been set. The value is: {}'.format( 'Sentences', sentences)) data_size = sentences else: data_size = my_data.len_sentences temperature = [0.2, 0.5, 1.0, 1.2] mem_bytes = os.sysconf('SC_PAGE_SIZE') * os.sysconf('SC_PHYS_PAGES') mem_gib = mem_bytes / (1024.**3) config_logger.info('Number of Epochs: {}'.format(str(epochs))) console_logger.info('Number of Epochs: {}'.format(str(epochs))) config_logger.info('Data Size: {}'.format(str(data_size))) console_logger.info('Data Size: {}'.format(str(data_size))) config_logger.info('String Length to create: {}'.format( str(create_str_len))) console_logger.info('String Length to create: {}'.format( str(create_str_len))) config_logger.info('CPU Count: {}'.format( str(round(multiprocessing.cpu_count(), 2)))) console_logger.info('CPU Count: {}'.format( str(round(multiprocessing.cpu_count(), 2)))) config_logger.info('Memory: {}'.format(str(round(mem_gib, 2)))) console_logger.info('Memory: {}'.format(str(round(mem_gib, 2)))) training_logger.info(['Job_Start_Time', 'Create_String_Len', 'Data_Size', 'Epoch_Num','Epoch_tm', 'Model_tm', \ 'SeedGen_tm', 'temp0.2_tm','temp0.5_tm','temp1.0_tm', 'temp1.2_tm']) #Setup the number of 'tests' to generate text gen_after_epoch_num = 5 epoch_num_list = range(0, num_epochs - 1) epochs_to_test = list( filter(lambda x: x % (gen_after_epoch_num - 1) == 0, epoch_num_list)) config_logger.info( 'Will genereate text after each of the following epochs: {}'.format( epochs_to_test)) console_logger.info( 'Will genereate text after each of the following epochs: {}'.format( epochs_to_test)) for epoch in range(num_epochs): training_logger.info('Training Epoch number: {}'.format(str(epoch))) console_logger.info('--------Training Epoch number: {}------'.format( str(epoch))) callbacks_list = [ keras.callbacks.ModelCheckpoint( filepath=data_directory + '/my_model_{epoch}.h5'.format(epoch=epoch)) ] mdl.fit(my_data.x[0:data_size], my_data.y[0:data_size], batch_size=128, epochs=1, callbacks=callbacks_list, verbose=0) if epoch in epochs_to_test: #Generate Seed Text seed_text = utils.get_seed_text(my_data.text, my_data.maxlen) testing_logger.info('Seed Text: {}'.format(seed_text)) console_logger.info('Seed Text: {}'.format(seed_text)) #Generate Text for temp in temperature: generated_text = utils.generate_text(mdl, my_data.maxlen, my_data.chars, my_data.char_indices, seed_text, temp, create_str_len) testing_logger.info('Generated Text: [Temp: {0}] {1}'.format( temp, generated_text)) console_logger.info('Generated Text: [Temp: {0}] {1}'.format( temp, generated_text))