Exemple #1
0
    def test_text_generation(self):
        seq, last_state = generate_text(self.rnn, self.goblet)
        print(seq)
        self.assertEqual(last_state.size, self.rnn.state_size)

        seq, last_state = generate_text(
            self.rnn, self.goblet, self.goblet.encode('A'),
            np.random.random(self.rnn.state_size), 100)
        print(seq)
        self.assertEqual(last_state.size, self.rnn.state_size)
def print_text(rnn, last_probs, last_state, data, length):
    first_char = np.zeros_like(last_probs)
    first_char[np.random.choice(rnn.input_size, p=last_probs)] = 1
    text, _ = generate_text(rnn,
                            data,
                            first_char=first_char,
                            initial_state=last_state,
                            length=length)
    print(text)
Exemple #3
0
def results():
    form = TextForm(request.form)
    if request.method == 'POST' and form.validate():
        textx = request.form['textx']
        numberx = int(request.form['numberx'])
        text_rnn = generate_text(new_model, textx, numberx)
        return render_template('results.html',
                               content=textx,
                               text_rnn=text_rnn)
    return render_template('text-form.html', form=form)
def callback(opt, last_probs, last_state, data, start):
    first_char = np.zeros_like(last_probs)
    first_char[np.random.choice(opt.rnn.input_size, p=last_probs)] = 1
    text, _ = generate_text(opt.rnn,
                            data,
                            first_char=first_char,
                            initial_state=last_state)
    print(
        'Sequences {} cost {:.3f} learning rate {:.2e} elapsed {:.0f}s:\n{}\n'.
        format(opt.steps, opt.smooth_costs[-1], opt.learning_rates[-1],
               time.time() - start, text))
    plt.plot(opt.smooth_costs, 'b-')
    plt.pause(.05)
Exemple #5
0
def handle_callback(bot, update):
    cb_query = update.callback_query
    message = cb_query.message.message_id
    user = cb_query.message.chat.id
    bot.answer_callback_query(cb_query.id)
    if cb_query.data[:3] == 'del':
        new_categories = utils.remove_category(cb_query.data[4:], user)
    else:
        new_categories = utils.append_category(cb_query.data[4:], user)
    bot.edit_message_text(chat_id=user, message_id=message,
                          text=utils.generate_text(new_categories))  # editing the message which originated the query
    bot.edit_message_reply_markup(user, message,
                                  reply_markup=utils.generate_keyboard(new_categories))  # according to new category list
    utils.update_categories(user, new_categories)
Exemple #6
0
async def main():
    prev_update_time = datetime.now() - timedelta(minutes=1)

    while True:
        if time_has_changed(prev_update_time):
            bts = generate_time_image_bytes(
                datetime.now(args.tz).replace(tzinfo=None))
            await client(
                DeletePhotosRequest(await client.get_profile_photos('me')))
            file = await client.upload_file(bts)
            await client(UploadProfilePhotoRequest(file))
            await client(
                UpdateProfileRequest(about=generate_text(
                    datetime.now(args.tz).replace(tzinfo=None))))
            prev_update_time = datetime.now()
            time.sleep(1)
Exemple #7
0
def main(save_filename=None,
         load_filename="class_model_weights.h5",
         do_train=False):
    """ Entry point """
    if do_train:
        print("Training and saving model...")
        (model, vocab) = train_model(file_name=save_filename)
        ids_from_chars = preprocessing.StringLookup(vocabulary=list(vocab))
        vocab_size = len(ids_from_chars.get_vocabulary())
    else:
        if load_filename is None:
            print(
                "ERROR load file name not provided and training flag set to false, no model can be used"
            )
            return 1
        # TODO Somehow this vocab should be accessible without needed to read and process this data
        data = open('./archive/drake_lyrics.txt').read()
        print('Length of text: {} characters'.format(len(data)))
        vocab = sorted(set(data))
        ids_from_chars = preprocessing.StringLookup(vocabulary=list(vocab))
        vocab_size = len(ids_from_chars.get_vocabulary())
        print("Loading model from disk...")
        model = DrakeGRUSequential(vocab_size, embedding_dim)
        utils.load_weights(load_filename, model,
                           tf.TensorShape([1, vocab_size]))
    print("Generating Bars...please wait")
    seed_texts = [
        "[Verse]", "you", "love", "boy", "I love", "I love you", "Kiki, ",
        "Swanging"
    ]
    for seed in seed_texts:
        num_chars = 400
        output_text = utils.generate_text(seed,
                                          model,
                                          seq_length,
                                          ids_from_chars,
                                          chars_to_gen=num_chars)
        print(">>>>>>>>>>>>>>>>>>>>")
        print("Input seed: %s" % (seed))
        print("%d character generated sequence:\n%s\n" %
              (num_chars, output_text))
        print("End of output for seed: %s" % (seed))
        print("<<<<<<<<<<<<<<<<<<<<")
    #Hope you enjoyed :)
    return 0
Exemple #8
0
    end_idx = tokenizer.word_index[config.end_token]
    unknown_idx = tokenizer.word_index[config.unknown_token]
    blocked_idxs = [unknown_idx, config.pad_idx]

    if not args.beginning_list is None:
        beginning = args.beginning_list
    elif not args.beginning_string is None:
        beginning = args.beginning_string
    else:
        beginning = None

    model, _ = Gated_Transformer_XL.build_from_config(
        config=config, checkpoint_path=args.checkpoint_path)

    generated_features, _ = generate_text(model=model, seq_len=config.seq_len,
                                          mem_len=config.mem_len, max_len=args.gen_len,
                                          tokenizer=tokenizer, start_idx=start_idx,
                                          end_idx=end_idx, blocked_idxs=blocked_idxs,
                                          batch_size=args.n_samples, beginning=beginning,
                                          top_k=args.top_k, temp=args.temp)

    generated_texts = scam_parser.features_to_text(features=generated_features,
                                                   tokenizer=tokenizer,
                                                   stored_tokens=stored_tokens)

    delimiter = '\n' * 4 + ('#'*80 + '\n') * 4 + '\n' * 4
    generated_texts = delimiter.join(generated_texts)

    with open(args.dst_path, 'w', encoding='ISO-8859-1') as file:
        file.write(generated_texts)
def text_gen(data):
    "Text generation cloud function."
    return {'text': generate_text(data)}
Exemple #10
0
def main(feature_type: str, language: str, domain: str, main_dir: str, seq_len: int,
         batch_size: int, lstm_dim: int, character_level: bool = False):
    """
    Parameters
    ----------
    feature_type: the name of the feature
    language: language of the text.
    main_dir: base directory
    seq_len: sequence length
    batch_size: batch size
    lstm_dim: lstm hidden dimension
    character_level: whether tokenizer should be on character level.
    """

    texts = get_texts(main_dir, language, feature_type, character_level, domain)

    tokenizer = Tokenizer(texts.values(), character_level=character_level)

    samples = {}

    for book in texts:
        print(len(texts[book]))
        len_text = len(texts[book]) if character_level else len(texts[book].split())

        if len_text < seq_len:
            logger.warn(f"Requested seq_len larger than text length: {len_text} / {seq_len} "
                             f"for {book} and feature type {feature_type}.")
            continue
        rand_idx = np.random.randint(0, len_text - seq_len, batch_size)

        if character_level:
            samples[book] = tokenizer.encode([texts[book][i: i + seq_len] for i in rand_idx])

        else:
            split_text = texts[book].split()
            samples[book] = tokenizer.encode(
                [" ".join(split_text[i: i + seq_len]) for i in rand_idx]
            )

    test_generator = DataGenerator(tokenizer,
                                   tokenizer.full_text,
                                   seq_len=seq_len,
                                   batch_size=batch_size,
                                   with_embedding=True,
                                   train=False)

    sample_batch = next(iter(test_generator))

    logger.info(f"X batch shape: {sample_batch[0].shape}, y batch shape: {sample_batch[1].shape}")
    logger.info(f"Sample batch text: {tokenizer.decode(sample_batch[0][0])}")

    file_path = os.path.join(main_dir, 'models',
                             f'{feature_type}_{language}_lstm_{lstm_dim}')

    if domain:
        file_path += '_' + domain

    if character_level:
        file_path += '_character_level'

    file_path += '.h5'

    logger.info(f"Loading {file_path}")

    prediction_model = lstm_model(num_words=tokenizer.num_words,
                                  lstm_dim=lstm_dim,
                                  seq_len=1,
                                  batch_size=batch_size,
                                  stateful=True,
                                  return_state=True)

    prediction_model.load_weights(file_path)

    hiddens = {}
    seeds = {}
    predictions = {}

    for book in samples:
        seed = np.stack(samples[book])
        print(seed.shape)
        hf, preds = generate_text(prediction_model, tokenizer, seed, get_hidden=True)
        print(hf.shape)
        hiddens[book] = hf
        seeds[book] = seed
        preds = [tokenizer.ix_to_word[pred] for pred in preds]
        predictions[book] = preds

    file_name = f'{feature_type}_{language}_lstm_{lstm_dim}_seq_len_{seq_len}'

    if domain:
        file_name += '_' + domain

    if character_level:
        file_name += '_character-level'
    file_name += '.pkl'

    path_out = os.path.join('data', 'hidden_states', file_name)
    with open(path_out, 'wb') as f:
        pickle.dump(hiddens, f)

    logger.info(f"Succesfully saved hidden dimensions to {path_out}")

    path_out = os.path.join('data', 'seeds', file_name)
    with open(path_out, 'wb') as f:
        pickle.dump(seeds, f)
    logger.info(f"Succesfully saved seeds to {path_out}")

    path_out = os.path.join('data', 'predictions', file_name)
    with open(path_out, 'wb') as f:
        pickle.dump(predictions, f)

    logger.info(f"Succesfully saved predictions to {path_out}")
Exemple #11
0
def handle_start(bot, update):
    user = update.message.chat.id
    if len(utils.get_user(user)) == 0:  # if new user - add it to the users list and suggest choosing categories
        utils.add_user(user)
        bot.send_message(user, 'start_text')  # добавить стартовый текст в конфиг
        bot.send_message(user, utils.generate_text(user), reply_markup=utils.generate_keyboard(user))
Exemple #12
0
def handle_edit(bot, update):  # shows current category list and suggests editing it
    user = update.message.chat.id
    bot.send_message(user, utils.generate_text(user), reply_markup=utils.generate_keyboard(user))
Exemple #13
0
def main(epochs=60,
         sentences=None,
         generate=400,
         temperature=[0.2, 0.5, 0.8, 1.0],
         verbose=False):
    #Create log directory for job run
    job_start_time = time.strftime("%Y%m%d_%H%M%S")
    data_directory = "output_data_" + job_start_time
    utils.nice_mk_dir(data_directory)

    #Define the loggers
    config_logger = setup_logger('config_logger',
                                 data_directory + '/config.log')
    training_logger = setup_logger('training_logger',
                                   data_directory + '/training.log')
    testing_logger = setup_logger('testing_logger',
                                  data_directory + '/testing.log')
    console_logger = setup_logger('console_logger')

    #Set the command line arguments
    if verbose:
        logging.basicConfig(level=logging.DEBUG)
    num_epochs = epochs
    create_str_len = generate
    my_data = utils.Nietzche_Data()
    mdl = utils.sl_lstm_model(my_data.chars, my_data.maxlen)

    if sentences is not None:
        if sentences > my_data.len_sentences:
            config_logger.error(
                'Optional argument {} was set to {}. However this is outside of the range 0 - {}.'
                .format('Sentences', sentences, my_data.len_sentences))
            data_size = my_data.len_sentences
            config_logger.info('Optional argument {} was set to {}'.format(
                'Sentences', sentences))
        else:
            config_logger.info(
                'Optional argument {} has been set. The value is: {}'.format(
                    'Sentences', sentences))
            data_size = sentences
    else:
        data_size = my_data.len_sentences

    temperature = [0.2, 0.5, 1.0, 1.2]

    mem_bytes = os.sysconf('SC_PAGE_SIZE') * os.sysconf('SC_PHYS_PAGES')
    mem_gib = mem_bytes / (1024.**3)
    config_logger.info('Number of Epochs: {}'.format(str(epochs)))
    console_logger.info('Number of Epochs: {}'.format(str(epochs)))
    config_logger.info('Data Size: {}'.format(str(data_size)))
    console_logger.info('Data Size: {}'.format(str(data_size)))
    config_logger.info('String Length to create: {}'.format(
        str(create_str_len)))
    console_logger.info('String Length to create: {}'.format(
        str(create_str_len)))
    config_logger.info('CPU Count: {}'.format(
        str(round(multiprocessing.cpu_count(), 2))))
    console_logger.info('CPU Count: {}'.format(
        str(round(multiprocessing.cpu_count(), 2))))
    config_logger.info('Memory: {}'.format(str(round(mem_gib, 2))))
    console_logger.info('Memory: {}'.format(str(round(mem_gib, 2))))

    training_logger.info(['Job_Start_Time', 'Create_String_Len', 'Data_Size', 'Epoch_Num','Epoch_tm', 'Model_tm', \
        'SeedGen_tm', 'temp0.2_tm','temp0.5_tm','temp1.0_tm', 'temp1.2_tm'])

    #Setup the number of 'tests' to generate text
    gen_after_epoch_num = 5
    epoch_num_list = range(0, num_epochs - 1)
    epochs_to_test = list(
        filter(lambda x: x % (gen_after_epoch_num - 1) == 0, epoch_num_list))
    config_logger.info(
        'Will genereate text after each of the following epochs: {}'.format(
            epochs_to_test))
    console_logger.info(
        'Will genereate text after each of the following epochs: {}'.format(
            epochs_to_test))

    for epoch in range(num_epochs):
        training_logger.info('Training Epoch number: {}'.format(str(epoch)))
        console_logger.info('--------Training Epoch number: {}------'.format(
            str(epoch)))
        callbacks_list = [
            keras.callbacks.ModelCheckpoint(
                filepath=data_directory +
                '/my_model_{epoch}.h5'.format(epoch=epoch))
        ]
        mdl.fit(my_data.x[0:data_size],
                my_data.y[0:data_size],
                batch_size=128,
                epochs=1,
                callbacks=callbacks_list,
                verbose=0)
        if epoch in epochs_to_test:
            #Generate Seed Text
            seed_text = utils.get_seed_text(my_data.text, my_data.maxlen)
            testing_logger.info('Seed Text: {}'.format(seed_text))
            console_logger.info('Seed Text: {}'.format(seed_text))
            #Generate Text
            for temp in temperature:
                generated_text = utils.generate_text(mdl, my_data.maxlen,
                                                     my_data.chars,
                                                     my_data.char_indices,
                                                     seed_text, temp,
                                                     create_str_len)
                testing_logger.info('Generated Text: [Temp: {0}] {1}'.format(
                    temp, generated_text))
                console_logger.info('Generated Text: [Temp: {0}] {1}'.format(
                    temp, generated_text))