def train_with_goblet_of_fire(results_path=None): book = dt.load_goblet_of_fire() np.random.seed(42) net = RNNet(m=100, K=book.K) # optimizer = RNNet.AdaGrad(net, eta=0.1) optimizer = RNNet.RMSProp(net, eta=0.001, gamma=0.9) config = { 'epochs': 10, 'output_folder': '../out', 'optimizer': optimizer, 'sequence_length': 25, 'record_interval': 1_000, 'test_length': 200 } res = net.train(book, config) if results_path is not None: pickle(res, results_path) return res
def fine_tune_trump_tweets(output_dir, initial_model_path, results_path=None): np.random.seed(42) tweets = dt.load_trump_tweets() net = RNNet.import_model(initial_model_path) net.clear_grads() optimizer = RNNet.RMSProp(net, eta=0.0001, gamma=0.9) config = { 'epochs': 5, 'output_folder': output_dir, 'optimizer': optimizer, 'sequence_length': 70, 'record_interval': 200, 'test_length': 140 } res = net.train(tweets, config) if results_path is not None: pickle(res, results_path) return res
def check_gradients(): book = dt.load_goblet_of_fire() seq_len = 25 m = 5 X, Y, _ = book.get_labeled_data(0, seq_len) h0 = np.zeros((m, 1)) np.random.seed(42) net = RNNet(m=m, K=book.K) print('===> Computing numerical gradients...') num_grads = compute_grads_numerical(X, Y, h0, net) print('===> Computing analytical gradients...') grads = net._backward(X, Y, h0, *net._forward(X, h0)) errors = compare_grads(num_grads, grads, m, book.K) errors_v = vars(errors) for k in errors_v: v = errors_v[k] print( f'MSEs for {k} -> max: {v.max()},\t avg: {v.mean()},\t std: {v.std()}' )
def compute_grads_numerical(X, Y, h0, net: RNNet, step_size=1e-5): old_theta = net.theta tmp_theta = old_theta.copy() m, K = net.m, net.K net.theta = tmp_theta def loss_fn(X_, Y_): return net.cross_entropy_loss(X_, Y_, h_prev=h0) grads = _compute_grads_numerical(X, Y, m, K, tmp_theta, loss_fn, step_size) net.theta = old_theta return grads
def synthesize_with_best_model(): model_path = '../trained_models/2018-06-12-2205-e10.pkl' if exists(model_path): book = dt.load_goblet_of_fire() net = RNNet.import_model(model_path) np.random.seed(50) print(net.synthesize(1000, book.char_to_one_hot, book.index_to_char)) else: eprint('Best trained model found!')
def synthesize_trump(model_path): np.random.seed(42) tweets = dt.load_trump_tweets() net = RNNet.import_model(model_path) print( net.synthesize(140 * 100, tweets.char_to_one_hot, tweets.index_to_char, separator=tweets.separator, stop_at_separator=False))
def train_with_trump_tweets(output_dir, results_path=None): np.random.seed(42) tweets = dt.load_trump_tweets() net = RNNet(m=100, K=tweets.K) optimizer = RNNet.RMSProp(net, eta=0.0008, gamma=0.9) config = { 'epochs': 5, 'output_folder': output_dir, 'optimizer': optimizer, 'sequence_length': 70, 'record_interval': 200, 'test_length': 140 } res = net.train(tweets, config) if results_path is not None: pickle(res, results_path) return res
def synthesize_trump_special_seq(model_path, n=100, x0='#'): np.random.seed(42) tweets = dt.load_trump_tweets() net = RNNet.import_model(model_path) n_jobs = -1 parallel = Parallel(n_jobs=n_jobs, verbose=5, backend='multiprocessing') res = parallel( delayed(synthesize_special_seq_worker)(n // n_jobs, net, tweets, x0, 42 + i) for i in range(n_jobs)) res = sum(res, []) res.sort() return res
def synthesize_special_seq(net: RNNet, dataset: dt.Dataset, x0, random_state=None, max_len: int = 40, min_len: int = 3) -> str: while True: synth = net.synthesize(max_len, dataset.char_to_one_hot, dataset.index_to_char, x0=x0, separator=dataset.separator, random_state=random_state) single_piece = synth.split(' ')[0] valid_words = re.findall(r'(\w+)', single_piece) if len(valid_words) > 0 and len(valid_words[0]) >= min_len: return x0 + valid_words[0]