def test_training_performance(self): n = NoOutputLstm(100, 80) training_data = [] for _ in range(30): xs = np.asarray([np.random.uniform(size=100), np.random.uniform(size=100), np.random.uniform(size=100), np.random.uniform(size=100), np.random.uniform(size=100), np.random.uniform(size=100), np.random.uniform(size=100), np.random.uniform(size=100)]) h0 = np.random.uniform(size=80) t = np.random.uniform(size=80) training_data.append((xs, h0, t)) epochs = 100 start = time.time() for _ in range(epochs): for xs, h0, t in training_data: intermediate_results = {} h_last = n.forward_prop(xs, h0, intermediate_results) n.back_prop(ce_err_prime(h_last, t), intermediate_results) n.train(0.1, intermediate_results) end = time.time() time_taken = end - start print(str(epochs) + " training epochs took " + str(time_taken) + " seconds")
def test_learn_word_vectors_from_char_vector_sequence(self): text = "please learn how to infer word vectors from sequences of character vectors" index_to_word = list(set(text.split())) index_to_char = list(set(text)) word_to_index = { word: index for index, word in enumerate(index_to_word) } char_to_index = { word: index for index, word in enumerate(index_to_char) } def to_char_vector_sequence(word): sequence = [] for char in word: vector = np.ones(len(char_to_index)) * -1 vector[char_to_index[char]] = 1 sequence.append(vector) sequence.append(np.zeros(len(char_to_index))) return np.asarray(sequence) def to_word_vector(word): vector = np.ones(len(word_to_index)) * -1 vector[word_to_index[word]] = 1 return vector training_data = [(to_char_vector_sequence(word), to_word_vector(word)) for word in text.split()] n = NoOutputLstm(len(index_to_char), len(index_to_word)) for i in range(1000): for char_vectors, word_vector in training_data: intermediate_results = {} h_last = n.forward_prop(char_vectors, np.zeros(len(index_to_word)), intermediate_results) n.back_prop(ce_err_prime(h_last, word_vector), intermediate_results) n.train(0.1, intermediate_results) if i % 200 == 0: total_err = 0 for char_vectors, word_vector in training_data: h = n.activate(char_vectors, np.zeros(len(index_to_word))) total_err += mathutils.mean_squared_error(h, word_vector) print((total_err / len(training_data))) result = n.activate(to_char_vector_sequence("infer"), np.zeros(len(index_to_word))) self.assertEquals("infer", index_to_word[np.argmax(result)])
def test_multi_step_gradients(self): t = 1e-4 x_size = 4 h_size = 5 xs = np.random.uniform(size=(10, x_size)) h0 = np.random.uniform(size=h_size) n = NoOutputLstm(x_size, h_size) intermediate_results = {} h_last = n.forward_prop(xs, h0, intermediate_results) dh0 = n.back_prop(derr(h_last), intermediate_results) def grad_check(attribute, numerical_gradient): for i in np.ndindex(numerical_gradient.shape): plus_n = n.clone() getattr(plus_n, attribute)[i] += t neg_n = n.clone() getattr(neg_n, attribute)[i] -= t plus_h_last = plus_n.forward_prop(xs, h0, {}) neg_h_last = neg_n.forward_prop(xs, h0, {}) exp_grad = (err(plus_h_last) - err(neg_h_last)) / (2 * t) num_grad = numerical_gradient[i] self.assertTrue(abs(exp_grad - num_grad) < 0.01, "{}: {} not within threshold of {}".format(attribute, numerical_gradient[i], exp_grad)) checks = { "w_xf_g": intermediate_results["dw_xf_g"], "w_hf_g": intermediate_results["dw_hf_g"], "b_f_g": intermediate_results["db_f_g"], "w_xi_g": intermediate_results["dw_xi_g"], "w_hi_g": intermediate_results["dw_hi_g"], "b_i_g": intermediate_results["db_i_g"], "w_xc": intermediate_results["dw_xc"], "w_hc": intermediate_results["dw_hc"], "b_c": intermediate_results["db_c"] } for attr, numerical_grad in checks.items(): grad_check(attr, numerical_grad) for i in np.ndindex(dh0.shape): plus_h0 = np.copy(h0) plus_h0[i] += t neg_h0 = np.copy(h0) neg_h0[i] -= t plus_h_last = n.forward_prop(xs, plus_h0, {}) neg_h_last = n.forward_prop(xs, neg_h0, {}) exp_grad = (err(plus_h_last) - err(neg_h_last)) / (2 * t) num_grad = dh0[i] self.assertTrue(abs(exp_grad - num_grad) < 0.01, "h0: {} not within threshold of {}".format(dh0[i], exp_grad))
def test_single_step_gradients(self): t = 1e-4 input_size = 4 hidden_size = 5 xs = [np.random.uniform(size=input_size)] h0 = np.random.uniform(size=hidden_size) n = NoOutputLstm(input_size, hidden_size) intermediate_results = {} h_next = n.forward_prop(xs, h0, intermediate_results) dh0 = n.back_prop(derr(h_next), intermediate_results) def grad_check(attribute, numerical_gradient): for i in np.ndindex(numerical_gradient.shape): plus_n = n.clone() getattr(plus_n, attribute)[i] += t neg_n = n.clone() getattr(neg_n, attribute)[i] -= t plus_h_next = plus_n.forward_prop(xs, h0, {}) neg_h_next = neg_n.forward_prop(xs, h0, {}) exp_grad = (err(plus_h_next) - err(neg_h_next)) / (2 * t) self.assertTrue(abs(exp_grad - numerical_gradient[i]) < 0.01, "{}: {} not within threshold of {}".format(attribute, numerical_gradient[i], exp_grad)) checks = { "w_xf_g": intermediate_results["dw_xf_g"], "w_hf_g": intermediate_results["dw_hf_g"], "b_f_g": intermediate_results["db_f_g"], "w_xi_g": intermediate_results["dw_xi_g"], "w_hi_g": intermediate_results["dw_hi_g"], "b_i_g": intermediate_results["db_i_g"], "w_xc": intermediate_results["dw_xc"], "w_hc": intermediate_results["dw_hc"], "b_c": intermediate_results["db_c"] } for attr, numerical_grad in checks.items(): grad_check(attr, numerical_grad) for i in np.ndindex(dh0.shape): h0_plus = np.copy(h0) h0_plus[i] += t h0_minus = np.copy(h0) h0_minus[i] -= t plus_h1 = n.forward_prop(xs, h0_plus, {}) neg_h1 = n.forward_prop(xs, h0_minus, {}) exp_dh0 = ((err(plus_h1) - err(neg_h1)) / (2 * t)) self.assertTrue(abs(exp_dh0 - dh0[i]) < 0.01, "dh_prev: {} not within threshold of {}".format(dh0[i], exp_dh0))
def test_learn_word_vectors_from_char_vector_sequence(self): text = "please learn how to infer word vectors from sequences of character vectors" index_to_word = list(set(text.split())) index_to_char = list(set(text)) word_to_index = {word: index for index, word in enumerate(index_to_word)} char_to_index = {word: index for index, word in enumerate(index_to_char)} def to_char_vector_sequence(word): sequence = [] for char in word: vector = np.ones(len(char_to_index)) * -1 vector[char_to_index[char]] = 1 sequence.append(vector) sequence.append(np.zeros(len(char_to_index))) return np.asarray(sequence) def to_word_vector(word): vector = np.ones(len(word_to_index)) * -1 vector[word_to_index[word]] = 1 return vector training_data = [(to_char_vector_sequence(word), to_word_vector(word)) for word in text.split()] n = NoOutputLstm(len(index_to_char), len(index_to_word)) for i in range(1000): for char_vectors, word_vector in training_data: intermediate_results = {} h_last = n.forward_prop(char_vectors, np.zeros(len(index_to_word)), intermediate_results) n.back_prop(ce_err_prime(h_last, word_vector), intermediate_results) n.train(0.1, intermediate_results) if i % 200 == 0: total_err = 0 for char_vectors, word_vector in training_data: h = n.activate(char_vectors, np.zeros(len(index_to_word))) total_err += mathutils.mean_squared_error(h, word_vector) print(total_err/len(training_data)) result = n.activate(to_char_vector_sequence("infer"), np.zeros(len(index_to_word))) self.assertEquals("infer", index_to_word[np.argmax(result)])
def test_learn_word_vectors_from_char_vector_sequence_2(self): text = "please learn how to infer word vectors from sequences of character vectors" \ "giving it more words to try and confuse it" \ "how evil" \ "much diabolical" \ "many genius" \ "the doge of venice gives his regards" index_to_word = list(set(text.split())) index_to_char = list(set(text)) word_to_index = {word: index for index, word in enumerate(index_to_word)} char_to_index = {word: index for index, word in enumerate(index_to_char)} def to_char_vector_sequence(word): sequence = [] for char in word: vector = np.ones(len(char_to_index)) * -1 vector[char_to_index[char]] = 1 sequence.append(vector) sequence.append(np.zeros(len(char_to_index))) return np.asarray(sequence) def to_word_vector(word): vector = np.ones(len(word_to_index)) * -1 vector[word_to_index[word]] = 1 return vector hidden_size = 50 training_data = [(to_char_vector_sequence(word), to_word_vector(word)) for word in text.split()] lstm = NoOutputLstm(len(index_to_char), hidden_size) ffn = FeedForwardNetwork([hidden_size, len(index_to_word)]) h0 = np.random.uniform(-1, 1, size=hidden_size) learning_rate = 5 for i in range(2000): for char_vectors, word_vector in training_data: hs, f_gs, i_gs, cs, h = lstm.forward_prop(char_vectors, h0) res = {} y = ffn.forward_prop(h, res) dy = mathutils.mean_squared_error(y, word_vector) dx = ffn.dx(h, dy, res) ffn.train(learning_rate, h, dy, res) dh = dx dw_xf_g, dw_hf_g, db_f_g, dw_xi_g, dw_hi_g, db_i_g, dw_xc, dw_hc, db_c = lstm.back_prop(char_vectors, hs, f_gs, i_gs, cs, dh) lstm.w_xf_g -= dw_xf_g * learning_rate lstm.w_hf_g -= dw_hf_g * learning_rate lstm.b_f_g -= db_f_g * learning_rate lstm.w_xi_g -= dw_xi_g * learning_rate lstm.w_hi_g -= dw_hi_g * learning_rate lstm.b_i_g -= db_i_g * learning_rate lstm.w_xc -= dw_xc * learning_rate lstm.w_hc -= dw_hc * learning_rate lstm.b_c -= db_c * learning_rate if i % 200 == 0: total_err = 0 for char_vectors, word_vector in training_data: h = lstm.activate(char_vectors, h0) y = ffn.forward_prop(h, {}) total_err += mathutils.mean_squared_error(y, word_vector) print(total_err/len(training_data)) h = lstm.activate(to_char_vector_sequence("infer"), h0) y = ffn.forward_prop(h, {}) self.assertEquals("infer", index_to_word[np.argmax(y)])
def test_learn_word_vectors_from_char_vector_sequence(self): text = "please learn how to infer word vectors from sequences of character vectors" index_to_word = list(set(text.split())) index_to_char = list(set(text)) word_to_index = {word: index for index, word in enumerate(index_to_word)} char_to_index = {word: index for index, word in enumerate(index_to_char)} def to_char_vector_sequence(word): sequence = [] for char in word: vector = np.ones(len(char_to_index)) * -1 vector[char_to_index[char]] = 1 sequence.append(vector) sequence.append(np.zeros(len(char_to_index))) return np.asarray(sequence) def to_word_vector(word): vector = np.ones(len(word_to_index)) * -1 vector[word_to_index[word]] = 1 return vector training_data = [(to_char_vector_sequence(word), to_word_vector(word)) for word in text.split()] # hidden_size = 100 hidden_size = len(index_to_word) lstm = NoOutputLstm(len(index_to_char), hidden_size) ffn = FeedForwardNetwork([hidden_size, 50, 20, len(index_to_word)]) h0 = np.random.uniform(-1, 1, size=hidden_size) learning_rate = 0.5 for i in range(1000): for char_vectors, word_vector in training_data: hs, f_gs, i_gs, cs, lstm_output = lstm.forward_prop(char_vectors, h0) res = {} y = ffn.forward_prop(lstm_output, res) # dy = mathutils.mean_squared_error_prime(y, word_vector) dy = mathutils.mean_squared_error_prime(lstm_output, word_vector) dx = ffn.dx(lstm_output, dy, res) ffn.train(learning_rate, lstm_output, dy, res) # dw_xf_g, dw_hf_g, db_f_g, dw_xi_g, dw_hi_g, db_i_g, dw_xc, dw_hc, db_c = lstm.back_prop(char_vectors, hs, f_gs, i_gs, cs, dx) dw_xf_g, dw_hf_g, db_f_g, dw_xi_g, dw_hi_g, db_i_g, dw_xc, dw_hc, db_c = lstm.back_prop(char_vectors, hs, f_gs, i_gs, cs, dy) lstm.w_xf_g -= dw_xf_g * learning_rate lstm.w_hf_g -= dw_hf_g * learning_rate lstm.b_f_g -= db_f_g * learning_rate lstm.w_xi_g -= dw_xi_g * learning_rate lstm.w_hi_g -= dw_hi_g * learning_rate lstm.b_i_g -= db_i_g * learning_rate lstm.w_xc -= dw_xc * learning_rate lstm.w_hc -= dw_hc * learning_rate lstm.b_c -= db_c * learning_rate if i % 200 == 0: total_err = 0 for char_vectors, word_vector in training_data: h = lstm.activate(char_vectors, h0) output_vector = ffn.forward_prop(h[-1], {}) total_err += mathutils.mean_squared_error(output_vector, word_vector) print(total_err/len(training_data)) lstm_out = lstm.activate(to_char_vector_sequence("infer"), h0) result = ffn.forward_prop(lstm_out, {}) self.assertEquals("infer", index_to_word[np.argmax(result)])
def test_learn_word_vectors_from_char_vector_sequence_2(self): text = "please learn how to infer word vectors from sequences of character vectors" \ "giving it more words to try and confuse it" \ "how evil" \ "much diabolical" \ "many genius" \ "the doge of venice gives his regards" index_to_word = list(set(text.split())) index_to_char = list(set(text)) word_to_index = { word: index for index, word in enumerate(index_to_word) } char_to_index = { word: index for index, word in enumerate(index_to_char) } def to_char_vector_sequence(word): sequence = [] for char in word: vector = np.ones(len(char_to_index)) * -1 vector[char_to_index[char]] = 1 sequence.append(vector) sequence.append(np.zeros(len(char_to_index))) return np.asarray(sequence) def to_word_vector(word): vector = np.ones(len(word_to_index)) * -1 vector[word_to_index[word]] = 1 return vector hidden_size = 50 training_data = [(to_char_vector_sequence(word), to_word_vector(word)) for word in text.split()] lstm = NoOutputLstm(len(index_to_char), hidden_size) ffn = FeedForwardNetwork([hidden_size, len(index_to_word)]) h0 = np.random.uniform(-1, 1, size=hidden_size) learning_rate = 5 for i in range(2000): for char_vectors, word_vector in training_data: hs, f_gs, i_gs, cs, h = lstm.forward_prop(char_vectors, h0) res = {} y = ffn.forward_prop(h, res) dy = mathutils.mean_squared_error(y, word_vector) dx = ffn.dx(h, dy, res) ffn.train(learning_rate, h, dy, res) dh = dx dw_xf_g, dw_hf_g, db_f_g, dw_xi_g, dw_hi_g, db_i_g, dw_xc, dw_hc, db_c = lstm.back_prop( char_vectors, hs, f_gs, i_gs, cs, dh) lstm.w_xf_g -= dw_xf_g * learning_rate lstm.w_hf_g -= dw_hf_g * learning_rate lstm.b_f_g -= db_f_g * learning_rate lstm.w_xi_g -= dw_xi_g * learning_rate lstm.w_hi_g -= dw_hi_g * learning_rate lstm.b_i_g -= db_i_g * learning_rate lstm.w_xc -= dw_xc * learning_rate lstm.w_hc -= dw_hc * learning_rate lstm.b_c -= db_c * learning_rate if i % 200 == 0: total_err = 0 for char_vectors, word_vector in training_data: h = lstm.activate(char_vectors, h0) y = ffn.forward_prop(h, {}) total_err += mathutils.mean_squared_error(y, word_vector) print(total_err / len(training_data)) h = lstm.activate(to_char_vector_sequence("infer"), h0) y = ffn.forward_prop(h, {}) self.assertEquals("infer", index_to_word[np.argmax(y)])
def test_learn_word_vectors_from_char_vector_sequence(self): text = "please learn how to infer word vectors from sequences of character vectors" index_to_word = list(set(text.split())) index_to_char = list(set(text)) word_to_index = { word: index for index, word in enumerate(index_to_word) } char_to_index = { word: index for index, word in enumerate(index_to_char) } def to_char_vector_sequence(word): sequence = [] for char in word: vector = np.ones(len(char_to_index)) * -1 vector[char_to_index[char]] = 1 sequence.append(vector) sequence.append(np.zeros(len(char_to_index))) return np.asarray(sequence) def to_word_vector(word): vector = np.ones(len(word_to_index)) * -1 vector[word_to_index[word]] = 1 return vector training_data = [(to_char_vector_sequence(word), to_word_vector(word)) for word in text.split()] # hidden_size = 100 hidden_size = len(index_to_word) lstm = NoOutputLstm(len(index_to_char), hidden_size) ffn = FeedForwardNetwork([hidden_size, 50, 20, len(index_to_word)]) h0 = np.random.uniform(-1, 1, size=hidden_size) learning_rate = 0.5 for i in range(1000): for char_vectors, word_vector in training_data: hs, f_gs, i_gs, cs, lstm_output = lstm.forward_prop( char_vectors, h0) res = {} y = ffn.forward_prop(lstm_output, res) # dy = mathutils.mean_squared_error_prime(y, word_vector) dy = mathutils.mean_squared_error_prime( lstm_output, word_vector) dx = ffn.dx(lstm_output, dy, res) ffn.train(learning_rate, lstm_output, dy, res) # dw_xf_g, dw_hf_g, db_f_g, dw_xi_g, dw_hi_g, db_i_g, dw_xc, dw_hc, db_c = lstm.back_prop(char_vectors, hs, f_gs, i_gs, cs, dx) dw_xf_g, dw_hf_g, db_f_g, dw_xi_g, dw_hi_g, db_i_g, dw_xc, dw_hc, db_c = lstm.back_prop( char_vectors, hs, f_gs, i_gs, cs, dy) lstm.w_xf_g -= dw_xf_g * learning_rate lstm.w_hf_g -= dw_hf_g * learning_rate lstm.b_f_g -= db_f_g * learning_rate lstm.w_xi_g -= dw_xi_g * learning_rate lstm.w_hi_g -= dw_hi_g * learning_rate lstm.b_i_g -= db_i_g * learning_rate lstm.w_xc -= dw_xc * learning_rate lstm.w_hc -= dw_hc * learning_rate lstm.b_c -= db_c * learning_rate if i % 200 == 0: total_err = 0 for char_vectors, word_vector in training_data: h = lstm.activate(char_vectors, h0) output_vector = ffn.forward_prop(h[-1], {}) total_err += mathutils.mean_squared_error( output_vector, word_vector) print(total_err / len(training_data)) lstm_out = lstm.activate(to_char_vector_sequence("infer"), h0) result = ffn.forward_prop(lstm_out, {}) self.assertEquals("infer", index_to_word[np.argmax(result)])