def test_networks(self): """ Test the networks saved by run(). Save to json """ data = self.__get_test_data() result = {} for (min_speakers, max_speakers) in [[1, 10], [1, 20]]: result_for_trainset = {} for feature_type in self.feature_options: result_for_feature = {} # Load best performing model network = RNN() name = f'./trained_networks_with_augmentation/rnn_train_{min_speakers}_{max_speakers}/{feature_type}' network.load_from_file(name) # Test performance for test_name, test_data_current in data.items(): x, y = test_data_current['x'], test_data_current['y'] result_for_feature[test_name] = self.__test_net( network, x, y, feature_type) result_for_trainset[feature_type] = result_for_feature result[ f'train_{min_speakers}_{max_speakers}'] = result_for_trainset with open('experiment_networks_tested.json', 'w+') as fp: json.dump(result, fp) return result
def main(): patterns = loadData('pict.dat') # Pattern 1-11 patterns_1_3 = [patterns[index, :].reshape(1, 1024) for index in range(3)] patterns_4_11 = [ patterns[3 + index, :].reshape(1, 1024) for index in range(8) ] network = RNN(size=1024, sequential=False, random=False) network.init_weights(patterns_1_3) noises = np.arange(0, 100, 5) averages = 1000 for i, pattern in enumerate(patterns_1_3): OGpattern = pattern.copy() nCorrect = np.zeros((noises.shape[0], 1)) for k, noise in enumerate(noises): for j in range(averages): patternD = distort(OGpattern, noise) x_output = network.train(patternD) nCorrect[k][0] += ((np.count_nonzero(x_output == OGpattern)) / patternD.shape[1]) * 100 nCorrect = nCorrect / averages plt.plot(noises, nCorrect, label=("Pattern " + str(i + 1))) plt.legend() plt.show()
def train(train_id_data, num_vocabs, num_taget_class): max_epoch = 200 model_dir = "E:\Pycharm Project\FYP\RNN\Trained_models\save_models.ckpt" hps = RNN.get_default_hparams() hps.update(batch_size=150, num_steps=120, emb_size=100, enc_dim=150, vocab_size=num_vocabs + 1, num_target_class=num_taget_class) with tf.variable_scope("model"): model = RNN(hps, "train") sv = tf.train.Supervisor(is_chief=True, logdir=model_dir, summary_op=None, global_step=model.global_step) # tf assign compatible operators for gpu and cpu tf_config = tf.ConfigProto(allow_soft_placement=True) with sv.managed_session(config=tf_config) as sess: local_step = 0 prev_global_step = sess.run(model.global_step) train_data_set = SentimentDataset(train_id_data, hps.batch_size, hps.num_steps) losses = [] while not sv.should_stop(): fetches = [model.global_step, model.loss, model.train_op] a_batch_data = next(train_data_set.iterator) y, x, w = a_batch_data fetched = sess.run(fetches, { model.x: x, model.y: y, model.w: w, model.keep_prob: hps.keep_prob }) local_step += 1 _global_step = fetched[0] _loss = fetched[1] losses.append(_loss) if local_step < 10 or local_step % 10 == 0: epoch = train_data_set.get_epoch_num() print("Epoch = {:3d} Step = {:7d} loss = {:5.3f}".format( epoch, _global_step, np.mean(losses))) _loss = [] if epoch >= max_epoch: break print("Training is done.") sv.stop() # model.out_pred, model.out_probs freeze_graph( model_dir, "model/out_pred,model/out_probs", "Final_graph.tf.pb") ## freeze graph with params to probobuf format
def train_and_test_network(): """ Train a neural network and test it. Can also train on other feature types, or run the experimenter to run different configurations """ min_speakers = 1 max_speakers = 10 # Load data from filesystem data_loader = DataLoader(train_dir, test_src_dr, test_dest_dir) data_loader.force_recreate = False data_loader.min_speakers = min_speakers data_loader.max_speakers = max_speakers # Train network train, (test_x, test_y) = data_loader.load_data() libri_x, libri_y = data_loader.load_libricount(libri_dir) # Train and test network file = 'testing_rnn' net = RNN() net.save_to_file(file) net.train(train, min_speakers, max_speakers, FEATURE_TYPE) net.load_from_file(file) timit_results = net.test(test_x, test_y, FEATURE_TYPE) libri_results = net.test(libri_x, libri_y, FEATURE_TYPE)
def btn_clk_train(self): if os.path.isfile('params.pkl'): os.remove('params.pkl') time.sleep(2) self.rnn = RNN() self.rnn.train() QMessageBox.information(self, "RNN", "train finished")
def translate(): #data = LanguageLoader(en_path, fr_path, vocab_size, max_length) #rnn = RNN(data.input_size, data.output_size) model = RNN(data.input_size, data.output_size) model.load_state_dict(torch.load('models/baseline.module')) vecs = data.sentence_to_vec("Madam president<EOS>") print("in translate-- ",vecs) translation = model.eval(vecs) print("final result ",data.vec_to_sentence(translation))
def __init__(self, hidden_size: int, embedding_size: int, num_layers: int, bidirectional: bool): self.hidden_size = hidden_size self.embedding_size = embedding_size self.num_layers = num_layers self.bidirectional = bidirectional self.encoder = RNN(self.hidden_size, self.embedding_size, self.num_layers, self.bidirectional) self.decoder = RNN(self.hidden_size, self.embedding_size, self.num_layers, self.bidirectional)
def __init__(self, TRAIN_CONFIGS, GRU_CONFIGS, FFN_CONFIGS=None): self.TRAIN_CONFIGS = TRAIN_CONFIGS self.GRU_CONFIGS = self._process_gru_configs(GRU_CONFIGS) self.model = RNN(target=TRAIN_CONFIGS['target'], **self.GRU_CONFIGS, FFN_CONFIGS=FFN_CONFIGS) self.epochs_trained = 0 self.trained = False # Storage for later self.loss = self.val_loss = self.train_y_hat = self.train_y_true = self.val_y_hat = self.val_y_true = None
def compare_gradients(): tRNN = RNN(K, m, eta, seq_length, init='normal') for X_chars, Y_chars in get_batch(): num_grads = numerical_gradients(tRNN, X_chars, Y_chars, h) tRNN.train(X_chars, Y_chars, clip=False) for k in tRNN.weights: error = relative_error(tRNN.gradients[k], num_grads[k]) print("\n%s error:" % k) print(error) exit()
def test_rnn(model: RNN, dataset, padding_tag_idx=1): """ Test the model. Args: model: to test dataset: to test on """ iterator = data.BucketIterator(dataset, batch_size=128, device=device) criterion = nn.CrossEntropyLoss(ignore_index=padding_tag_idx) model = model.to(device) criterion = criterion.to(device) loss, acc = model.evaluate(iterator, criterion, padding_tag_idx) print(f'loss:{loss:.4f} acc:{acc:.4f}')
def main(): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') int_to_vocab, vocab_to_int, n_vocab, in_text, out_text = get_data_from_file( values.train_file, values.batch_size, values.seq_size) net = RNN(n_vocab, values.embedding_size, values.lstm_size) net = net.to(device) criterion, optimizer = get_loss_and_train_op(net, 0.001) net = train(net, criterion, optimizer, n_vocab, in_text, out_text, vocab_to_int, int_to_vocab, device) torch.save(net, '/data/myNet.pt')
def testSystem(): t = Tokenizer() xTrain, yTrain = t.getData() np.random.seed(10) model = RNN(15000) o, s = model.forwardPropagation(xTrain[30]) predictions = model.predict(xTrain[30]) print(o.shape) print(o) print(predictions.shape) print(predictions) print("Expected Loss: \n" + str(np.log(model.vocab))) print("Actual Loss:") print(model.calculateLoss(xTrain[:100], yTrain[:100]))
def synthesize(X_seq): x0 = X_seq[:, 0:1] synth = RNN.synthesize(x0, 200) text = "" for column in synth.T: text += ind_to_char[np.argmax(column)] return text
def __init__(self, embed_mat, opt): super(Boost, self).__init__() # self.model1 = model1 = TextCNN1(embed_mat, opt) # self.model1 = load_model(model1, model_dir=opt['model_dir'], model_name='TextCNN1', name="layer_5_finetune_epoch_6_2017-08-15#15:22:03.params") # self.model2 = model2 = TextCNN1(embed_mat, opt) # self.model2 = load_model(model2, model_dir=opt['model_dir'], model_name='TextCNN1', name="layer_2_epoch_5_2017-08-02#11:25:22_0.4095.params") # self.model3 = model3 = TextCNN1(embed_mat, opt) # self.model3 = load_model(model3, model_dir=opt['model_dir'], model_name='TextCNN1', name="layer_3_finetune_epoch_6_2017-08-14#04:07:52.params") # self.model4 = model4 = TextCNN1(embed_mat, opt) # self.model4 = load_model(model4, model_dir=opt['model_dir'], model_name='TextCNN1', name="layer_4_finetune_epoch_6_2017-08-14#07:28:16.params") #self.model5 = model5 = TextCNN(embed_mat, opt) #self.model5 = load_model(model5, model_dir=opt['model_dir'], model_name='TextCNN', name="layer_5_epoch_5_2017-08-12#19:10:02_0.4102.params") # self.model6 = model6 = TextCNN(embed_mat, opt) # self.model6 = load_model(model6, model_dir=opt['model_dir'], model_name='TextCNN', name="layer_6_finetune_top1_char_epoch_6_2017-08-13#01:16:15.params") # self.model7 = model7 = TextCNN(embed_mat, opt) # self.model7 = load_model(model7, model_dir=opt['model_dir'], model_name='TextCNN', name="layer_7_finetune_top1_char_epoch_6_2017-08-13#02:52:58.params") # self.model8 = model8 = TextCNN(embed_mat, opt) # self.model8 = load_model(model8, model_dir=opt['model_dir'], model_name='TextCNN', name="layer_8_finetune_top1_char_epoch_6_2017-08-13#04:29:34.params") # self.model9 = model9 = TextCNN(embed_mat, opt) # self.model9 = load_model(model9, model_dir=opt['model_dir'], model_name='TextCNN', name="layer_9_finetune_top1_char_epoch_6_2017-08-13#10:34:04.params") # self.model10 = model10 = TextCNN(embed_mat, opt) # self.model10 = load_model(model10, model_dir=opt['model_dir'], model_name='TextCNN', name="layer_10_finetune_top1_char_epoch_6_2017-08-13#12:11:21.params") self.model1 = model1 = RNN(embed_mat, opt) self.model1 = load_model( model1, model_dir=opt['model_dir'], model_name='RNN', name="layer_1_char_epoch_6_2017-08-15#15:27:18.params")
def __train_net(self, files: np.ndarray, min_speakers: int, max_speakers: int, feature_type: str, save_to: str): """ Train a network :param files: The train files :param min_speakers: The min number of speakers to generate files for :param max_speakers: The max number of speakers to generate files for :param feature_type: The feature type to use :param save_to: Location to save the best performing model to :return: RNN, history """ network = RNN() network.save_to_file(save_to) _, history = network.train(files, min_speakers, max_speakers, feature_type) return network, history
def test(option="lstm", file_desc=""): if FLAGS.seq is None: ones = np.random.choice(np.arange(FLAGS.seqlen), FLAGS.val, replace=False) seq = np.zeros(FLAGS.seqlen) seq[ones] = 1 else: seq = np.array(FLAGS.seq).astype(np.float32) seq = np.expand_dims(seq, axis=1) sess = tf.Session() if option == "lstm": lstm = LSTM(sess, FLAGS.hidden, FLAGS.seqlen) elif option == "rnn": lstm = RNN(sess, FLAGS.hidden, FLAGS.seqlen) sess.run(tf.global_variables_initializer()) print("\n\nLoading model/{}_lstm.pkl...".format(file_desc)) with open("model/{}_lstm.pkl".format(file_desc), 'rb') as file: lstm_weights = pickle.load(file) print("\n\nLoading model/{}_dense.pkl...\n\n".format(file_desc)) with open("model/{}_dense.pkl".format(file_desc), 'rb') as file: dense_weights = pickle.load(file) lstm.load_weights(lstm_weights, dense_weights) print(seq.reshape(-1)) predictions = lstm.test(seq) print(np.argmax(predictions))
def getModel (tokenized_sentences,word_to_index): x_train = get_x_train(tokenized_sentences, word_to_index) y_train = get_y_train(tokenized_sentences, word_to_index) model = RNN(_VOCABULARY_SIZE, hidden_dim=_HIDDEN_DIM) t1 = time.time() model.sgd_step(x_train[10], y_train[10], _LEARNING_RATE) t2 = time.time() print "SGD Step time: %f milliseconds" % ((t2 - t1) * 1000.) #if _MODEL_FILE != None: # load_model_parameters_theano(_MODEL_FILE, model) train_with_sgd(model, x_train, y_train, nepoch=_NEPOCH, learning_rate=_LEARNING_RATE) return model
def __init__(self, embed_mat, opt): super(Boost_RNN1_char, self).__init__() self.model1 = model1 = RNN(embed_mat, opt) self.model1 = load_model( model1, model_dir=opt['model_dir'], model_name='RNN', name="layer_1_finetune_char_epoch_6_2017-08-15#15:27:18")
def main(_): check_dir() print_config() gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5) run_option = tf.ConfigProto(gpu_options=gpu_options) with tf.Session(config=run_option) as sess: rnn = RNN(config=FLAGS, sess=sess) rnn.build_model() if FLAGS.is_training: rnn.train_model() if FLAGS.is_testing: rnn.test_model()
def __init__(self, embed_mat, opt): super(Emsemble, self).__init__() self.model1 = model1 = RNN(embed_mat, opt) self.model1 = load_model(model1, model_dir=opt['model_dir'], model_name='RNN') self.model2 = model2 = TextCNN(embed_mat, opt) self.model2 = load_model(model2, model_dir=opt['model_dir'], model_name='TextCNN')
def getModel(tokenized_sentences, word_to_index): x_train = get_x_train(tokenized_sentences, word_to_index) y_train = get_y_train(tokenized_sentences, word_to_index) model = RNN(_VOCABULARY_SIZE, hidden_dim=_HIDDEN_DIM) train_with_sgd(model, x_train, y_train, nepoch=_NEPOCH, learning_rate=_LEARNING_RATE) return model
def __test_net(self, network: RNN, x: np.ndarray, y: np.ndarray, feature_type: str): """ Test a trained network :param network: The trained network :param x: The test files (pre-merged) :param y: The corresponding labels :param feature_type: The feature type, must be the same as used for traning :return: MAE on different levels """ return network.test(x, y, feature_type)
def run(): l, V = pl.load_words() # `l` is list of sentences split into words # `V` is a dict() mapping word with index in vocabulary # Convert words to respective indices for i in range(len(l)): for j in range(len(l[i])): l[i][j] = V[l[i][j]] # Generate training data training_data = [] for sent in l: training_data.append((sent[:-1], sent[1:])) """ Initializing RNN with hidden state of dimension 20x1 """ rnet = RNN(20, len(V)) rnet.train(training_data[:25], learning_rate=3.0, bptt_step=10, transform=lambda sent: [pl.one_hot(len(V), x) for x in sent])
def select_network(net_type, inp_size, hid_size, nonlin, rinit, iinit, cuda, lastk, rsize): if net_type == 'RNN': rnn = RNN(inp_size, hid_size, nonlin, bias=True, cuda=cuda, r_initializer=rinit, i_initializer=iinit) elif net_type == 'MemRNN': rnn = MemRNN(inp_size, hid_size, nonlin, bias=True, cuda=cuda, r_initializer=rinit, i_initializer=iinit) elif net_type == 'RelMemRNN': rnn = RelMemRNN(inp_size, hid_size, lastk, rsize, nonlin, bias=True, cuda=cuda, r_initializer=rinit, i_initializer=iinit) elif net_type == 'LSTM': rnn = LSTM(inp_size, hid_size, cuda) elif net_type == 'RelLSTM': rnn = RelLSTM(inp_size, hid_size, lastk, rsize, cuda) return rnn
def __init__(self): super().__init__() self.setupUi(self) self.trayIcon = QSystemTrayIcon(self) self.trayIcon.setIcon(QIcon('ui/icon.png')) self.trayIcon.activated.connect(self.restore_window) self.WM = WindowManager() self.pre_window = self.WM.get_fore_window() self.rnn = RNN() self.runState = False self.startButton.clicked.connect(self.btn_clk_start) self.startState = True self.trainButton.clicked.connect(self.btn_clk_train) self.helpButton.clicked.connect(self.btn_clk_help) self.helpState = True self.timer = QTimer(self) self.timer.start(200) self.timer.timeout.connect(self.run)
def fitness(self): uow = UnitOfWork() genoWithSegSiz = [ geno for geno in self._genomes if geno._genName == 'segment_size' ] if genoWithSegSiz == []: self._shelveDataFile = uow._dataSet().PreparingData() else: segment_size = genoWithSegSiz[0]._value self._shelveDataFile = uow._dataSet.PreparingData(segment_size) RNN = RNN(self._shelveDataFile, self._genomes) self._accuracy = cnn.RunAndAccuracy() return self._accuracy
def MyRNN_H256(self, data, test_set=None): input_sizes, output_size, train_set, valid_set = data model = nn.Sequential( Squeeze, RNN(input_sizes[0], output_size, hidden_size=256, cuda=True)) network = ANN("MyRNN_H256", model, cuda=True) network.train(train_set, epochs=60, batch_size=20, criterion=nn.NLLLoss(), optimizer=optim.Adam(model.parameters(), lr=0.01), valid_set=valid_set) return network
class Generator: def __init__(self, fileName): t = Tokenizer() self.wordToInd = t.getWordToInd() self.indexToWord = t.getIndToWord() self.model = RNN(t.getVocabSize()) load(fileName, self.model) def postParse(self, sentence): sentence = [v for v in sentence if not v == "[" or v == "]"] out = str(sentence[0][0].upper() + sentence[0][1:]) for i in sentence[1:]: if i == "," or i == "." or i == ":" or i == ";" or i == "?" or i == "!": out += i elif i == "i": out += " I" else: out += " " + i return out def generateSentence(self): newSent = [self.wordToInd["SENTENCE_START"]] while not newSent[-1] == self.wordToInd["SENTENCE_END"]: nextWordProbs = self.model.forwardPropagation(newSent)[0] sampled = self.wordToInd["UNKNOWN_TOKEN"] while sampled == self.wordToInd["UNKNOWN_TOKEN"]: samples = np.random.multinomial(1, nextWordProbs[-1]) sampled = np.argmax(samples) newSent.append(sampled) sentence = [self.indexToWord[x] for x in newSent[1:-1]] #print (sentence) return (self.postParse(sentence)) def curateSentence(self): sentence = self.generateSentence() while (not (len(sentence) > 80 and len(sentence) < 270) or ("[") in sentence or ("]") in sentence): sentence = self.generateSentence() if len(sentence) > 140: ind = 0 for i in range(130, len(sentence)): if sentence[i] == " ": ind = i break split = [sentence[:ind + 1], sentence[ind + 1:]] if (len(split[0]) > 140 or len(split[1]) > 140): return self.curateSentence() else: return 2, [split[1], split[0]] else: return 1, [sentence]
def testTrain (): print ("Starting Test") np.random.seed(10) print ("Starting Tokenization") t = Tokenizer(vocabSize=15000) print ("Tokenizer Complete") vocabSize = t.getVocabSize() print ("Vocab Size: " + str(vocabSize)) xTrain, yTrain = t.getData() print ("Constructing Model") model = RNN(vocabSize) print ("Starting Timer") start = time.clock() model.sgdStep(xTrain[10], yTrain[10], .005) end = time.clock() print ("One Step Time: " + str(end-start)) print ("Starting Training") reset = open ("Data/Log.txt", "w") reset.write("") losses = trainWithSGD(model,xTrain, yTrain, cycles=50, evalAfterLoss=1) save("Data/Fakespeare.npz", model)
def main(args): new_model = args.new_model rnn = RNN() if not new_model: try: rnn.set_weights(config.rnn_weight) except: print("Either set --new_model or ensure {} exists".format( config.rnn_weight)) raise rnn_input = [] rnn_output = [] for i in range(130): # print('Building {}th...'.format(i)) input = np.load('./rnn_data/rnn_input_' + str(i) + '.npy') output = np.load('./rnn_data/rnn_output_' + str(i) + '.npy') # sequence pre-processing, for training LSTM the rnn_input must be (samples/episodes, time steps, features) input = pad_sequences(input, maxlen=40, dtype='float32', padding='post', truncating='post') output = pad_sequences(output, maxlen=40, dtype='float32', padding='post', truncating='post') rnn_input.append(input) rnn_output.append(output) input = rnn_input[0] output = rnn_output[0] for i in range(len(rnn_input) - 1): input = np.concatenate((input, rnn_input[i + 1]), axis=0) output = np.concatenate((output, rnn_output[i + 1]), axis=0) print(input.shape) print(output.shape) rnn.train(input, output) rnn.plot_loss()
def train(option="lstm", file_desc=""): epochs = FLAGS.epochs batchsize = FLAGS.batchsize shuffle_x = np.random.RandomState(42) shuffle_y = np.random.RandomState(42) task = CountingGame2() x, y = task.generate(length=FLAGS.seqlen, samples=FLAGS.samples) test_x, test_y = task.generate(length=FLAGS.seqlen, samples=1) sess = tf.Session() if option == "lstm": lstm = LSTM(sess, FLAGS.hidden, FLAGS.seqlen) elif option == "rnn": lstm = RNN(sess, FLAGS.hidden, FLAGS.seqlen) sess.run(tf.global_variables_initializer()) lstm_weights = sess.run(lstm.cells[0].lstm_weights) lstm.load_weights(lstm_weights) n_iters = len(x) / batchsize for i in np.arange(epochs): shuffle_x.shuffle(x) shuffle_y.shuffle(y) for j in np.arange(n_iters): start = int(j * batchsize) end = int(start + batchsize) loss, lstm_gradients = lstm.fit(x[start:end], y[start:end]) lstm_gradients = utils.average_gradients(lstm_gradients) lstm_weights = [ lstm_weights[i] - FLAGS.lr * grad for i, grad in enumerate(lstm_gradients) ] dense_weights = sess.run(lstm.dense_weights) lstm.load_weights(lstm_weights) if i % 5 == 0: print("\nEpoch #{} Loss: {}".format(i, loss)) print(test_x[0]) predictions = lstm.test(test_x[0]) print(np.argmax(predictions)) with open("model/{}_lstm.pkl".format(file_desc), 'wb') as file: pickle.dump(lstm_weights, file) with open("model/{}_dense.pkl".format(file_desc), 'wb') as file: pickle.dump(dense_weights, file)
def ready(self): """ Load all inputs and parameters to train RNN """ # input sentence self.x = T.matrix(name="x", dtype=theano.config.floatX) #target #self.y = T.matrix(name="y", dtype=theano.config.floatX) self.y = T.vector(name="y", dtype="int32") # initial hidden state of the RNN self.h0 = T.vector() #learning rate self.lr = T.scalar() self.rnn =RNN(input=self.x, n_in=self.n_in, n_hidden=self.n_hidden, n_out=self.n_out)
fr.close() j= 0 fr = open(target_language+"/"+target_language+"_train_data.txt", 'r') for line in fr: if j in testset: test2_.append(line) j += 1 fr.close() N1 = len(dic_lang1) N2 = len(dic_lang2) ######################### Vagueness Detection on English data set ########### print "Detecting vagueness in the provided English data set" rnn = RNN(N1, win, D, H, C, eta, lamb, we1, wx, wh, bh, w, b, h0) op = open("Outputs/english_op.txt", 'w') start_time = time.clock() for i in range(len(test1_)/100): lin_ind = map(lambda x: search_dictionary(dic_lang1, x), test1_[i].split()) cline = contextwin(lin_ind, win) y_pred = rnn.test(cline) op.write(test1_[i]) op.write("Vague Words: ") for j in range(len(y_pred)): if y_pred[j]: op.write(test1_[i].split()[j]) op.write(" ") op.write("\n\n") end_time = time.clock() op.close()
print("*****Building Model*****") #Create all of the layers each with a unique starting random state. rng = np.random.RandomState(np.random.randint(low=10, high=10000)) FeatureLayerIn = ANN(Xc=3 , Hc=6, rng=rng) rng = np.random.RandomState(np.random.randint(low=10, high=10000)) FeatureLayerOut = ANN(Xc=6 , Hc=3, rng=rng) rng = np.random.RandomState(np.random.randint(low=10, high=10000)) ContextIn = RNN(Xc=3 , Hc=6, rng=rng) rng = np.random.RandomState(np.random.randint(low=10, high=10000)) ContextOut = RNN(Xc=6 , Hc=2, rng=rng) rng = np.random.RandomState(np.random.randint(low=10, high=10000)) AnalysisLayer = ANN(Xc=5 , Hc=2, rng=rng) epoch = 0 #Iteration counter display = 1000 #Number of iterations between displays epoch_list = list(range(len(data))) #list of used for training order print('*****Training Model*****') print() while epoch < 1000000:
class SentenceCompletion(object): """ Read raw data from """ def __init__(self, n_in, n_hidden, n_out, learning_rate=0.01, learning_rate_decay=1, L2_reg=0.00, n_epochs=100): """ Initialise basic variables """ self.n_in = int(n_in) self.n_hidden = int(n_hidden) self.n_out = int(n_out) self.learning_rate = float(learning_rate) self.learning_rate_decay = float(learning_rate_decay) self.L2_reg = float(L2_reg) self.epochs = int(n_epochs) self.ready() def ready(self): """ Load all inputs and parameters to train RNN """ # input sentence self.x = T.matrix(name="x", dtype=theano.config.floatX) #target #self.y = T.matrix(name="y", dtype=theano.config.floatX) self.y = T.vector(name="y", dtype="int32") # initial hidden state of the RNN self.h0 = T.vector() #learning rate self.lr = T.scalar() self.rnn =RNN(input=self.x, n_in=self.n_in, n_hidden=self.n_hidden, n_out=self.n_out) def fit(self, word2vec, vocab,samples, X_train, Y_train, X_test=None, Y_test=None, validation=10000): """ Fit model Pass in X_test, Y_test to compute test error and report during training """ #train_set_x, train_set_y = self.shared_dataset((X_train, #Y_train)) #n_train = train_set_x.get_value(borrow=True).shape[0] n_train = len(X_train) ##################### # Build model # ##################### #index = T.lscalar("index") train_set_x = T.matrix() #train_set_y = T.matrix(dtype=theano.config.floatX) train_set_y = T.vector(dtype="int32") l_r = T.scalar("l_r", dtype=theano.config.floatX) cost = self.rnn.loss(self.y) + self.L2_reg * self.rnn.L2_sqr compute_train_error = theano.function(inputs=[train_set_x, train_set_y], outputs=self.rnn.loss(self.y), givens={ self.x: train_set_x, self.y: train_set_y }, mode=mode) # test config n_test = len(X_test) test_set_x = T.matrix() test_set_y = T.vector(dtype="int32") compute_test_error = theano.function(inputs=[test_set_x, test_set_y], outputs=self.rnn.loss(self.y), givens={ self.x: test_set_x, self.y: test_set_y }, mode=mode) # compute gradient of cost with respect to theta = (W, W_in, # W_out, h0, bh, by) # gradients on the weights using BPTT updates = [] for param in self.rnn.params: gparam = T.grad(cost, param) #gparams.append(gparam) updates.append((param, param - l_r * gparam)) # compiling a Theano function `train_model` that returns the # cost, but in the same time updates the parameters of the # model based on the rules defined in `updates` train_model = theano.function(inputs=[train_set_x, train_set_y, l_r], outputs=cost, updates=updates, givens={ self.x: train_set_x, self.y: train_set_y }, mode=mode) ############## # Train model# ############## epoch = 0 while (epoch < self.epochs): epoch += 1 for idx in xrange(n_train): train_model(X_train[idx], Y_train[idx], self.learning_rate) # validate learnt weights on training set iter = (epoch-1) * n_train + idx + 1 if iter % validation == 0: train_losses = [compute_train_error(X_train[i], Y_train[i]) for i in sample(xrange(n_train), samples)] this_train_loss = np.mean(train_losses) test_losses = [compute_test_error(X_test[i], Y_test[i]) for i in xrange(n_test)] this_test_loss = np.mean(test_losses) fmt = "epoch %i, seq %i/%i, train loss %f, test loss %f, lr: %f" logging.debug(fmt % (epoch, idx+1, n_train, this_train_loss, this_test_loss, self.learning_rate)) self.learning_rate *= self.learning_rate_decay if epoch % 10 == 0: filename = "rnn-100_%e-%d.npz" % (self.L2_reg ,epoch) np.savez(filename, W=self.rnn.W.get_value(), W_in=self.rnn.W_in.get_value(), W_out=self.rnn.W_out.get_value(), h0=self.rnn.h0.get_value(), bh=self.rnn.bh.get_value(), by=self.rnn.by.get_value())
return dictionary[item] else: return -1 def get_ae_input(i): l1 = numpy.zeros(N1+1) l2 = numpy.zeros(N2+1) for wrd in train1_[i].split(): l1[search_dictionary(dic_lang1, wrd)] = 1 for wrd in train2_[i].split(): l2[search_dictionary(dic_lang2, wrd)] = 1 return numpy.concatenate([l1, l2]) ######################### Training on English Train Set ##################### print "\nTraining the Vagueness Detector on English train set" rnn = RNN(N1, win, D, H, C, eta, lamb) avg_cost, prev_cost = 1.0, 2.0 costs = [] epoch = 0 while (epoch < 2*n_epochs) and (avg_cost > 0) and (avg_cost != prev_cost): epoch += 1 prev_cost = avg_cost start_time = time.clock() for i in range(len(test1_)): lin_ind = map(lambda x: search_dictionary(dic_lang1, x), test1_[i].split()) cline = contextwin(lin_ind, win) costs = rnn.train(cline, test1_labels[i]) end_time = time.clock() avg_cost = numpy.mean(costs) print "Training of epoch %i took %.2f m" % (epoch, (end_time-start_time)/60.0) print 'Average cost for epoch %i is %f' % (epoch, avg_cost)
valid, label_valid, mask_valid = read(valid_data_spec) n_streams = np.shape(label)[1] len_batch = np.shape(label)[2] n_classes = np.shape(label)[3] n_batches = len(input) n_test = len(test) n_valid = len(valid) error = np.zeros((n_test, n_streams, len_batch, n_classes)) # ############################ Create instance of class RNN ######################### var = np.random.RandomState() seed = var.randint(90000) if os.path.exists(filesave): filename = filesave log('...found previous configuration...') rnn = RNN(Nlayers, Ndirs, Nx, Nh, n_classes, Ah, Ay, predictPer, loss, L1reg, L2reg, momentum, seed, frontEnd, filename, initParams) ################################ TRAIN THE RNN ############################# train_cost = [] delta_train = 5.0 delta_valid = 10.0 old_training_error = 0.0 old_valid_error = 0.0 result = [] # list for saving all predictions made by the network # file = 'training_pred.pickle.gz' for k in range(n_epoch): correct_number_train = 0.0 correct_number_valid = 0.0 class_occurrence_train = np.zeros(n_classes) class_occurrence_valid = np.zeros(n_classes) confusion_matrix_train = np.zeros((n_classes, n_classes))
X_trees_test.append(Tree(s, t, labels)) elif k == 3: X_trees_dev.append(Tree(s, t, labels)) else: raise(Exception('Erreur dans le parsing train/test/dev')) ''' Deja dans l'init du RNN vocab = {} for i, w in enumerate(lexicon): vocab[w] = i ''' from RNN import RNN curve = [] if args.reg: for reg in np.logspace(-2, 3, 10): model = RNN(vocab=lexicon, reg=reg) l1, l2 = model.train(X_trees_train, max_iter=1000, val_set=X_trees_dev, strat='AdaGrad', mini_batch_size=30) curve.append(l2) np.save('reg_curve', curve) elif args.mb: for mb in np.linspace(20, 50, 10): model = RNN(vocab=lexicon, reg=1) l1, l2 = model.train(X_trees_train, max_iter=1000, val_set=X_trees_dev, strat='AdaGrad', mini_batch_size=mb) curve.append(l2) np.save('mb_curve', curve) else: for lr in np.logspace(-2, 3, 10): model = RNN(vocab=lexicon, reg=1) l1, l2 = model.train(X_trees_train, max_iter=1000, val_set=X_trees_dev,
# preparing dataset size = 8 int2binary = {} largest_number = pow(2, size) binary = np.unpackbits( np.array([range(largest_number)], dtype=np.uint8).T, axis=1) for i in range(largest_number): int2binary[i] = binary[i] X = [] for j in range(10000): a_int = np.random.randint(largest_number / 2) # int version a = int2binary[a_int] # binary encoding b_int = np.random.randint(largest_number / 2) # int version b = int2binary[b_int] # binary encoding c_int = a_int + b_int # true answer c = int2binary[c_int] problem = BinaryAddition(a, b) problem.set_output(c) X.append(problem) rnn = RNN(2, 16, 1) rnn.train(X) a = int2binary[np.random.randint(largest_number / 2)] # binary encoding b = int2binary[np.random.randint(largest_number / 2)] # binary encoding problem = BinaryAddition(a, b) rnn.predict(problem) problem.print()