Python TextUtils.TextUtils Exemples, utils.TextUtils.TextUtils Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : main.py Projet : wajatimur/Slack-Gitsin

def main():
    """ 
         Start the Slack Client 
    """
    os.system("clear; figlet 'Slack Gitsin' | lolcat")
    history = FileHistory(os.path.expanduser("~/.slackHistory"))
    while True:
        text = prompt("slack> ", history=history,
                      auto_suggest=AutoSuggestFromHistory(),
                      on_abort=AbortAction.RETRY,
                      style=DocumentStyle,
                      completer=Completer(fuzzy_match=False,
                                          text_utils=TextUtils()),
                      complete_while_typing=Always(),
                      get_bottom_toolbar_tokens=get_bottom_toolbar_tokens,
                      key_bindings_registry=manager.registry,
                      accept_action=AcceptAction.RETURN_DOCUMENT
        )
        slack = Slack(text)
        slack.run_command()

Exemple #2

0

Afficher le fichier

Fichier : train.py Projet : phvan2312/text_similarity

def make_fold(train_df, test_df, save_model_class, save_model_deep):
    text_util = TextUtils()

    # preprocessing and tokenize
    train_spa_sent_1_df = train_df['spa_sent_1'].tolist()
    train_spa_sent_2_df = train_df['spa_sent_2'].tolist()

    test_spa_sent_1_df = test_df['spa_sent_1'].tolist()
    test_spa_sent_2_df = test_df['spa_sent_2'].tolist()

    train_spa_tokens_1 = text_util.tokenize(sentences=train_spa_sent_1_df,
                                            language=text_util.spanish)
    train_spa_tokens_2 = text_util.tokenize(sentences=train_spa_sent_2_df,
                                            language=text_util.spanish)
    test_spa_tokens_1 = text_util.tokenize(sentences=test_spa_sent_1_df,
                                           language=text_util.spanish)
    test_spa_tokens_2 = text_util.tokenize(sentences=test_spa_sent_2_df,
                                           language=text_util.spanish)

    # building vocabulary (#using only training dataset)
    train_spa_tokens = train_spa_tokens_1 + train_spa_tokens_2
    train_label_df = train_df['label'].tolist()

    (spa_id2word, spa_word2id), spa_E_by_id = text_util.create_word_vocab(
        lst_tokens=train_spa_tokens,
        word_dim=300,
        fasttext_path='./data/new/pretrained/mine.wiki.es.vec')
    (id2label, label2id) = text_util.create_label_vocab(labels=train_label_df)

    # builing dataset (mean convert token, label to its corressponding id)
    train_dataset = text_util.create_dataset(lst_tokens_1=train_spa_tokens_1,
                                             lst_tokens_2=train_spa_tokens_2,
                                             labels=train_label_df,
                                             label2id=label2id,
                                             word2id_1=spa_word2id,
                                             word2id_2=spa_word2id)

    test_dataset = text_util.create_dataset(lst_tokens_1=test_spa_tokens_1,
                                            lst_tokens_2=test_spa_tokens_2,
                                            labels=test_df['label'].tolist(),
                                            label2id=label2id,
                                            word2id_1=spa_word2id,
                                            word2id_2=spa_word2id)

    # create batch
    train_batches = text_util.create_batch(dataset=train_dataset,
                                           batch_size=batch_size)
    test_batches = text_util.create_batch(dataset=test_dataset,
                                          batch_size=batch_size)

    # training
    train_score = train(train_batchs=train_batches,
                        test_batchs=test_batches,
                        n_epoch=n_epoch,
                        init_lr=init_lr,
                        init_keep_prob=init_keep_prob,
                        init_word_emb=spa_E_by_id,
                        text_util=text_util,
                        save_model_class=save_model_class,
                        save_model_deep=save_model_deep,
                        word2id=spa_word2id,
                        label2id=label2id)

    return train_score

Exemple #3

0

Afficher le fichier

import pandas as pd
import tensorflow as tf
from nn.similarNN import Model
import _pickle as cPickle
from utils import TextUtils

save_model_class = './saved/24072018/model.pkl'
save_model_deep = './saved/24072018/model.ckpt'

data_file_path = './data/cikm_test_a_20180516.txt'
data_file_headers = ['spa_sent_1', 'spa_sent_2']

if __name__ == '__main__':
    text_util = TextUtils()
    text_util.pad_id = 1
    text_util.unk_id = 0
    """
    Restore model
    """
    model = cPickle.load(open(save_model_class, 'rb'))
    model.build(build_session=True, init_word_embedding=None)

    model.restore(save_model_deep)
    """
    Load data
    """
    data_df = pd.read_csv(data_file_path,
                          sep='\t',
                          header=None,
                          names=data_file_headers)
    """