def decode(): with tf.Session(config=get_session_configs()) as sess: # Create model and load parameters. model = create_model(sess, True) model.batch_size = 1 # We decode one sentence at a time. # Load vocabularies. vocab, rev_vocab = read_vocabulary_from_file(paths['vocab_path_context']) # Load vocabulary vectors vocab_vectors = load_pickle_file(paths['vocab_vectors_context']) # Load FastText model used for preprocessing print("Load existing FastText model...") fast_text_model = fasttext.load_model(paths['fast_text_model_context'], encoding='utf-8') # Decode from standard input. sys.stdout.write("Human: ") sys.stdout.flush() sentence = sys.stdin.readline() sentence = preprocess_input(sentence, fast_text_model, vocab_vectors) context = "" while sentence: context_sentence = context + sentence output = decode_sentence(context_sentence, vocab, rev_vocab, model, sess) print("Ola: " + " ".join(output)) print("Human: ", end="") context = sentence # or context = output sys.stdout.flush() sentence = sys.stdin.readline() sentence = preprocess_input(sentence, fast_text_model, vocab_vectors)
def decode(): # Avoid allocating all of the GPU memory config = get_session_configs() with tf.Session(config=config) as sess: # Create model and load parameters. model = create_model(sess, True) model.batch_size = 1 # We decode one sentence at a time. # Load vocabularies. vocab, rev_vocab = read_vocabulary_from_file(paths['vocab_path']) # Load vocabulary vectors vocab_vectors = load_pickle_file(paths['vocab_vectors']) # Load FastText model used for preprocessing print("Load existing FastText model...") fast_text_model = fasttext.load_model(paths['fast_text_model'], encoding='utf-8') # Decode from standard input. print("To reset states, type '*reset*'") sys.stdout.write("Human: ") sys.stdout.flush() sentence = sys.stdin.readline() sentence = preprocess_input(sentence, fast_text_model, vocab_vectors) # Initial state if FLAGS.use_lstm: initial_state = np.zeros((num_layers, 2, model.batch_size, size)) else: initial_state = np.zeros((num_layers, model.batch_size, size)) states = initial_state while sentence: output, states = decode_stateful_sentence(sentence, vocab, rev_vocab, model, sess, states) output = " ".join(output) output = get_sliced_output(output, 1) print("Vinyals_Stateful: " + " ".join(output)) print("Human: ", end="") sys.stdout.flush() sentence = sys.stdin.readline() if sentence.strip() == "*reset*": states = initial_state print("States were successfully reset.") print("Human: ", end="") sys.stdout.flush() sentence = sys.stdin.readline() sentence = preprocess_input(sentence, fast_text_model, vocab_vectors)
def __init__(self, train, ytrain, metric, test=None, ytest=None, features=[], features_to_encode=[], split_by=None, stratify_folds=False, n_splits=5, split_seed=0): """ metric - function, that accepts (true_values, predicted_values) and returns float. train, ytrain (pd.DataFrame/numpy.array/scipy.csr_matrix) - train data test (pd.DataFrame/numpy.array/scipy.csr_matrix) - data to predict. If not given - you still can get OOF prediction for train data. ytest - if given, stacker will be evaluated on test data. features - list of pandas column names to train on. If not given or train is not a pd.DataFrame - all columns are used as features. features_to_encode (not implemented) - features for target encoding. Encoding uses target from train folds only. split_by (str) - column name, unique values from which should be found in a single fold. Is used to avoid overfitting or leakage. For example you may want to put all events corresponding to the same `user_id` to a single fold. If None, or train is not pd.dataFrame - ignored. stratify_folds (bool) - used if split_by is None. n_splits - number of splits for train data. In order to get one OOF prediciton, model must be fitted n_splits times. split_seed - seed for folds """ train, ytrain, features, split_by, test, ytest = preprocess_input( train, ytrain, features, split_by, test, ytest) self._train = train self.ytrain = ytrain self.metric = metric self._test = test self.ytest = ytest self.features = features self.features_to_encode = features_to_encode self.split_by = split_by self.stratify_folds = stratify_folds self.n_splits = n_splits self.split_seed = split_seed # Current level of fitting. 1 means fitting on src features. # 2 means fitting on meta features. self.level = 1 # Meta dataframes - for storing level 1 predictions. self.train_meta = pd.DataFrame() self.test_meta = pd.DataFrame() # Result dataframes - for storing level 2 predictions. self.train_result = pd.DataFrame() self.test_result = pd.DataFrame() self.folds = self.get_folds()
def decode(): with tf.Session(config=get_session_configs()) as sess: # Create model and load parameters. model = create_model(sess, True) model.batch_size = 1 # We decode one sentence at a time. # Load vocabularies. vocab, rev_vocab = read_vocabulary_from_file(paths['vocab_path']) # Load vocabulary vectors vocab_vectors = load_pickle_file(paths['vocab_vectors']) # Load FastText model used for preprocessing print("Load existing FastText model...") fast_text_model = fasttext.load_model(paths['fast_text_model'], encoding='utf-8') if FLAGS.open_subtitles: num_output_sentences = 1 else: num_output_sentences = 2 # Decode from standard input. sys.stdout.write("Human: ") sys.stdout.flush() sentence = sys.stdin.readline() sentence = preprocess_input(sentence, fast_text_model, vocab_vectors) while sentence: output = decode_sentence(sentence, vocab, rev_vocab, model, sess) output = " ".join(output) output = get_sliced_output(output, num_output_sentences) print("Grid LSTM: " + output.strip()) print("Human: ", end="") sys.stdout.flush() sentence = sys.stdin.readline() if FLAGS.context_full_turns: sentence = preprocess_input(output.strip() + " " + sentence.strip(), fast_text_model, vocab_vectors) else: sentence = preprocess_input(sentence, fast_text_model, vocab_vectors)