def main(): """ Start the Slack Client """ os.system("clear; figlet 'Slack Gitsin' | lolcat") history = FileHistory(os.path.expanduser("~/.slackHistory")) while True: text = prompt("slack> ", history=history, auto_suggest=AutoSuggestFromHistory(), on_abort=AbortAction.RETRY, style=DocumentStyle, completer=Completer(fuzzy_match=False, text_utils=TextUtils()), complete_while_typing=Always(), get_bottom_toolbar_tokens=get_bottom_toolbar_tokens, key_bindings_registry=manager.registry, accept_action=AcceptAction.RETURN_DOCUMENT ) slack = Slack(text) slack.run_command()
def make_fold(train_df, test_df, save_model_class, save_model_deep): text_util = TextUtils() # preprocessing and tokenize train_spa_sent_1_df = train_df['spa_sent_1'].tolist() train_spa_sent_2_df = train_df['spa_sent_2'].tolist() test_spa_sent_1_df = test_df['spa_sent_1'].tolist() test_spa_sent_2_df = test_df['spa_sent_2'].tolist() train_spa_tokens_1 = text_util.tokenize(sentences=train_spa_sent_1_df, language=text_util.spanish) train_spa_tokens_2 = text_util.tokenize(sentences=train_spa_sent_2_df, language=text_util.spanish) test_spa_tokens_1 = text_util.tokenize(sentences=test_spa_sent_1_df, language=text_util.spanish) test_spa_tokens_2 = text_util.tokenize(sentences=test_spa_sent_2_df, language=text_util.spanish) # building vocabulary (#using only training dataset) train_spa_tokens = train_spa_tokens_1 + train_spa_tokens_2 train_label_df = train_df['label'].tolist() (spa_id2word, spa_word2id), spa_E_by_id = text_util.create_word_vocab( lst_tokens=train_spa_tokens, word_dim=300, fasttext_path='./data/new/pretrained/mine.wiki.es.vec') (id2label, label2id) = text_util.create_label_vocab(labels=train_label_df) # builing dataset (mean convert token, label to its corressponding id) train_dataset = text_util.create_dataset(lst_tokens_1=train_spa_tokens_1, lst_tokens_2=train_spa_tokens_2, labels=train_label_df, label2id=label2id, word2id_1=spa_word2id, word2id_2=spa_word2id) test_dataset = text_util.create_dataset(lst_tokens_1=test_spa_tokens_1, lst_tokens_2=test_spa_tokens_2, labels=test_df['label'].tolist(), label2id=label2id, word2id_1=spa_word2id, word2id_2=spa_word2id) # create batch train_batches = text_util.create_batch(dataset=train_dataset, batch_size=batch_size) test_batches = text_util.create_batch(dataset=test_dataset, batch_size=batch_size) # training train_score = train(train_batchs=train_batches, test_batchs=test_batches, n_epoch=n_epoch, init_lr=init_lr, init_keep_prob=init_keep_prob, init_word_emb=spa_E_by_id, text_util=text_util, save_model_class=save_model_class, save_model_deep=save_model_deep, word2id=spa_word2id, label2id=label2id) return train_score
import pandas as pd import tensorflow as tf from nn.similarNN import Model import _pickle as cPickle from utils import TextUtils save_model_class = './saved/24072018/model.pkl' save_model_deep = './saved/24072018/model.ckpt' data_file_path = './data/cikm_test_a_20180516.txt' data_file_headers = ['spa_sent_1', 'spa_sent_2'] if __name__ == '__main__': text_util = TextUtils() text_util.pad_id = 1 text_util.unk_id = 0 """ Restore model """ model = cPickle.load(open(save_model_class, 'rb')) model.build(build_session=True, init_word_embedding=None) model.restore(save_model_deep) """ Load data """ data_df = pd.read_csv(data_file_path, sep='\t', header=None, names=data_file_headers) """