def testSystem(): t = Tokenizer() xTrain, yTrain = t.getData() np.random.seed(10) model = RNN(15000) o, s = model.forwardPropagation(xTrain[30]) predictions = model.predict(xTrain[30]) print(o.shape) print(o) print(predictions.shape) print(predictions) print("Expected Loss: \n" + str(np.log(model.vocab))) print("Actual Loss:") print(model.calculateLoss(xTrain[:100], yTrain[:100]))
# Training loop for k in range(8): for i in tqdm(range(0, len(all_data) - 25, seq_length)): # select batch X_chars = all_data[i:i + seq_length] X = rnn.mapper[X_chars] Y_chars = all_data[i + 1:i + seq_length + 1] Y = rnn.mapper[Y_chars] # display loss every 250th update if np.mod(ll, 100) == 0: print(loss) # write synthesized text in file every 500th update if np.mod(ll, 500) == 0: txt = rnn.predict(x=X[:, 0], h=rnn.h, n=250) if np.mod(ll, 1000) == 0: with open('trump_synthezed/out_trumpp2.txt', 'a') as f: print("\n*iter =*" + str(ll) + "*, smooth_loos=*" + str(loss) + "\n", file=f) print("".join(rnn.word(txt)), file=f) print("**iter =**" + str(ll) + "**, smooth_loos=**" + str(loss) + "\n") print("".join(rnn.word(txt))) # reset initial state if new epoche if i == 0: rnn.h = None # update loss
print("\nExample sentence after Pre-processing: '%s'" % tokenized_sentences[0]) # Create the training data X_train = np.asarray([[word_to_index[w] for w in sent[:-1]] for sent in tokenized_sentences]) y_train = np.asarray([[word_to_index[w] for w in sent[1:]] for sent in tokenized_sentences]) np.random.seed(10) model = RNN(vocabulary_size) o, s = model.forward_propagation(X_train[10]) print(o.shape) print(o) predictions = model.predict(X_train[10]) print(predictions.shape) print(predictions) print("-------------------------------------------") ''' Cross Entropy loss is L(y, o) = -\cfrac{1}{N}\sum_{n\in N} y_n \log o_n ''' E_loss = np.log(vocabulary_size) print('Expected loss for random predictions is {}'.format(E_loss)) Actual_loos = model.calc_loss(X_train[:1000], y_train[:1000]) print('Actual loss for random predictions is {}'.format(Actual_loos)) losses = train_with_sgd(model, X_train[:100],
old_valid_error = 0.0 result = [] # list for saving all predictions made by the network # file = 'training_pred.pickle.gz' for k in range(n_epoch): correct_number_train = 0.0 correct_number_valid = 0.0 class_occurrence_train = np.zeros(n_classes) class_occurrence_valid = np.zeros(n_classes) confusion_matrix_train = np.zeros((n_classes, n_classes)) confusion_matrix_valid = np.zeros((n_classes, n_classes)) n_data = 0 for i in range(n_batches): cost, output, gradient, hidden_act, t, x_1, wout = rnn.train(input[i], mask[i], label[i], lrate) rnn.save(filesave) train_cost.append(cost) prob = rnn.predict(input[i], mask[i]) for jj in range(prob.shape[1]): for kk in range(prob.shape[0]): prediction = (-prob[kk, jj, :]).argsort() label_sorted = (-label[i][kk, jj, :]).argsort() if any(label[i][kk, jj, :]): n_data += 1 if prediction[0] == label_sorted[0]: correct_number_train += 1 confusion_matrix_train[prediction[0], label_sorted[0]] += 1 class_occurrence_train[label_sorted[0]] += 1 log('> epoch ' + str(k) + ', training cost: ' + str(100 * np.mean(train_cost))) confusion_matrix_train = 100 * confusion_matrix_train / class_occurrence_train train_error_rate = 100 * (1.0 - correct_number_train / n_data) log('Error rate on training set is ' + str(train_error_rate) + ' (%)') log('Confusion Matrix on training set is \n\n ' + str(np.around(confusion_matrix_train, 2)) + ' (%)\n')
temp = [0] * index temp[y[i][j]] = 1 y[i][j] = temp return [x, y, index, dict] if __name__ == '__main__': X, Y, index, dicts = read('1168.txt') print("read finished") print(index) rnn = RNN(index, 20) rnn.train(X, Y) s = [0] * index s[1] = 1 predict_x = [] for i in range(5): temp = [0] * index p = random.randint(2, index - 1) temp[p] = 1 predict_x.append(temp) #index = rnn.predict(np.array([s,predict_x[0],predict_x[1],predict_x[2],predict_x[3],predict_x[4]])) index = rnn.predict(np.array([s, predict_x[0], predict_x[1], predict_x[2]])) print(index) for i in index: for j in dicts.keys(): if dicts[j] == i: print(j)
locals().update(cPickle.load(f)) with open(os.path.expanduser(SCALINGFACTORS), 'rb') as f: w, b = cPickle.load(f) pca = lambda x: ((x[:, mask] - mu) / sigma).dot(V) * w + b # Predict for each recording for filename in os.listdir(INPUT_DIR): conf = {} print "Filename {}".format(filename) id, ext = os.path.splitext(filename) if ext != '.htk': continue print "Predicting for {} ...".format(id) feature = pca(readHtk(os.path.join(INPUT_DIR, filename))).astype('float32') x = feature.reshape((1, ) + feature.shape) m = numpy.ones(x.shape[:-1], dtype='int32') conf[id] = net.predict(x, m)[0] # Save predictions with smart_open(os.path.join(OUTPUT_DIR, id + '.confidence.pkl.gz'), 'wb') as f: cPickle.dump(conf, f) savemat(os.path.join(OUTPUT_DIR, id + '.confidence.mat'), conf) result_ = conf[id] # Add classes 1 and 2 (speech english and speech non english) # to create a class " Speech " result = numpy.zeros((result_.shape[0], result_.shape[1] - 1)) result[:, 0] = result_[:, 0] result[:, 1] = result_[:, 1] + result_[:, 2] result[:, 2:] = result_[:, 3:]
old_valid_error = 0.0 result = [] # list for saving all predictions made by the network # file = 'training_pred.pickle.gz' for k in range(n_epoch): correct_number_train = 0.0 correct_number_valid = 0.0 class_occurrence_train = np.zeros(n_classes) class_occurrence_valid = np.zeros(n_classes) confusion_matrix_train = np.zeros((n_classes, n_classes)) confusion_matrix_valid = np.zeros((n_classes, n_classes)) n_data = 0 for i in range(n_batches): cost, output = rnn.train(input[i], mask[i], label[i], lrate) rnn.save(filesave) train_cost.append(cost) prob = rnn.predict(input[i], mask[i]) if multi_label == "false": for jj in range(prob.shape[1]): for kk in range(prob.shape[0]): prediction = (-prob[kk, jj, :]).argsort() label_sorted = (-label[i][kk, jj, :]).argsort() if any(label[i][kk, jj, :]): n_data += 1 if prediction[0] == label_sorted[0]: correct_number_train += 1 confusion_matrix_train[prediction[0], label_sorted[0]] += 1 class_occurrence_train[label_sorted[0]] += 1 if multi_label == "true": train_error_rate = format_results(prob, label[i], multi_label, n_classes)
# preparing dataset size = 8 int2binary = {} largest_number = pow(2, size) binary = np.unpackbits(np.array([range(largest_number)], dtype=np.uint8).T, axis=1) for i in range(largest_number): int2binary[i] = binary[i] X = [] for j in range(10000): a_int = np.random.randint(largest_number / 2) # int version a = int2binary[a_int] # binary encoding b_int = np.random.randint(largest_number / 2) # int version b = int2binary[b_int] # binary encoding c_int = a_int + b_int # true answer c = int2binary[c_int] problem = BinaryAddition(a, b) problem.set_output(c) X.append(problem) rnn = RNN(2, 16, 1) rnn.train(X) a = int2binary[np.random.randint(largest_number / 2)] # binary encoding b = int2binary[np.random.randint(largest_number / 2)] # binary encoding problem = BinaryAddition(a, b) rnn.predict(problem) problem.print()
# preparing dataset size = 8 int2binary = {} largest_number = pow(2, size) binary = np.unpackbits( np.array([range(largest_number)], dtype=np.uint8).T, axis=1) for i in range(largest_number): int2binary[i] = binary[i] X = [] for j in range(10000): a_int = np.random.randint(largest_number / 2) # int version a = int2binary[a_int] # binary encoding b_int = np.random.randint(largest_number / 2) # int version b = int2binary[b_int] # binary encoding c_int = a_int + b_int # true answer c = int2binary[c_int] problem = BinaryAddition(a, b) problem.set_output(c) X.append(problem) rnn = RNN(2, 16, 1) rnn.train(X) a = int2binary[np.random.randint(largest_number / 2)] # binary encoding b = int2binary[np.random.randint(largest_number / 2)] # binary encoding problem = BinaryAddition(a, b) rnn.predict(problem) problem.print()