test_cost = test(test_stream) print "Perplexity:", test_cost if test_cost < improvement_threshold * best_cost: best_cost = test_cost P.save(output_file) increase_count = 0 else: increase_count += 1 if increase_count > patience: break # Run training data_stream = data_io.randomise(data_stream, buffer_size=1024) data_stream = data_io.sortify(data_stream, key=lambda x: len(x), buffer_size=512) batch_data_stream = data_io.batch(data_stream, batch_size=batch_size) batch_data_stream = data_io.randomise(batch_data_stream) for batch in batch_data_stream: avg_cost = train(batch) if np.isnan(avg_cost): pprint([''.join(id2char[c] for c in l[1:]) for l in batch]) exit(1) print avg_cost, seen += len(batch) if seen > checkpoint: print "Saving...", P.save(tmp_file) seen = 0 print
best_error = errors length_limit += 2 else: # learning_rate = learning_rate / 2 # batch_size = max(1,batch_size//2) # print "Learning rate:",learning_rate P.save('tmp.model.pkl') buffer_size = 256 / batch_size train_group_answers = data_io.randomise(group_answers) training_data = data_io.story_question_answer_idx(train_group_answers,vocab_in) training_data = ( x for x in training_data if x[1].shape[0] <= length_limit ) training_data = data_io.sortify(training_data,key=lambda x:x[1].shape[0]) batched_training_data = data_io.batch( training_data, batch_size=batch_size, criteria=lambda x,x_:abs(x[1].shape[0] - x_[1].shape[0]) <= 2 ) batched_training_data = data_io.randomise(batched_training_data,buffer_size=buffer_size) group_count = 0 for batch in batched_training_data: loss = 0 count = 0 for input_data,idxs,question_data,ans_w,ans_evds in batch: print idxs.shape[0], curr_loss = np.array(acc(input_data,idxs,question_data,ans_w,ans_evds)) if np.isnan(curr_loss).any(): print curr_loss exit() loss += curr_loss
else: # learning_rate = learning_rate / 2 # batch_size = max(1,batch_size//2) # print "Learning rate:",learning_rate P.save('tmp.model.pkl') buffer_size = 256 / batch_size train_group_answers = data_io.randomise(group_answers) training_data = data_io.story_question_answer_idx( train_group_answers, vocab_in) training_data = (x for x in training_data if x[1].shape[0] <= length_limit) training_data = data_io.sortify(training_data, key=lambda x: x[1].shape[0]) batched_training_data = data_io.batch( training_data, batch_size=batch_size, criteria=lambda x, x_: abs(x[1].shape[0] - x_[1].shape[0]) <= 2) batched_training_data = data_io.randomise(batched_training_data, buffer_size=buffer_size) group_count = 0 for batch in batched_training_data: loss = 0 count = 0 for input_data, idxs, question_data, ans_w, ans_evds in batch: print idxs.shape[0], curr_loss = np.array( acc(input_data, idxs, question_data, ans_w, ans_evds)) if np.isnan(curr_loss).any(): print curr_loss exit()