def get_training_stats(model_id, model, one_hot_words, one_hot_dictionary): vis_model_id, vis_layer_id, fs = rnn_models.get_vis_model_from_id(model_id) max_feat_len, max_sent_len = rnn_models.get_max_lengths(model_id) ref = {} hypo = {} # Transform sentences into one hot vector sentences with open(sentence_path + 'sents_train_lc_nopunc.txt') as f: lines = f.readlines() # read the lines into an array random.shuffle(lines) IDS = [] X = [] Y = [] for line in lines: IDS.append(line.rstrip().split('\t')[0]) ref[line.rstrip().split('\t')[0]] = [line.rstrip().split('\t')[1],line.rstrip().split('\t')[1]] # need at least two ref for bleu X_sample = np.load(feature_path + vis_model_id + '/' + vis_layer_id + '/npy/' + line.rstrip().split('\t')[0] + '.npy') Y_sample = [] for word in line.rstrip().split('\t')[1].split(): Y_sample.append(one_hot_words[word]) Y_sample.append(one_hot_words['<eos>']) X.append(X_sample) Y.append(Y_sample) X = pad_sequences(X, maxlen=max_feat_len) Y = pad_sequences(Y, maxlen=max_sent_len) loss = model.evaluate(X, Y, batch_size=64, verbose=1) preds = model.predict_classes(X, batch_size=64, verbose=1) print(loss) for i in range(len(IDS)): pred_str = '' for word in preds[i]: if one_hot_dictionary[word] == '<eos>': break pred_str += one_hot_dictionary[word] + ' ' hypo[IDS[i]] = [pred_str] return loss, score(ref, hypo)["METEOR"]
def train(model_id, sentence_path, feature_path, nb_epoch, batch_size, model_path, load_epoch, extra_path=None): t_la = [[], []] t_l = [[], []] t_a = [[], []] v_l = [[], []] v_a = [[], []] # load vocab vocab_size, one_hot_words, one_hot_dictionary = rnn_utils.get_vocab(sentence_path, extra_path) # load model model = rnn_models.get_model_from_id(model_id, vocab_size) # maybe fix so 128 not hardcoded model_path = model_path + model_id if not os.path.exists(model_path): os.makedirs(model_path) log = open(model_path + '/log.txt', "a") log.write('\n\n\nTraining initialised: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now())) if load_epoch == 0: print('\nTraining model from scratch...') else: if load_epoch < 0 or load_epoch is None: # get latest for i in range(100, -1, -1): if os.path.isfile(model_path + '/' + model_id + '_' + str(i) + '.h5'): load_epoch = i break if load_epoch is None: load_epoch = 0 if load_epoch == 0: print('\nTraining model from scratch...') else: print('\nLoading past model to train from:') print('\n' + model_path + '/' + model_id + '_' + str(load_epoch) + '.h5') [t_la, t_l, t_a, v_l, v_a] = np.load(model_path + '/training_stats_' + str(load_epoch) + '.npy') model.load_weights(model_path + '/' + model_id + '_' + str(load_epoch) + '.h5') model = rnn_models.compile_model(model_id, model) # model.load_weights('/media/hayden/Storage1/UBUNTU/MODELS/SENTENCE/KERAS/GRU_01/GRU_01_500.h5') vis_model_id, vis_layer_id, fs = rnn_models.get_vis_model_from_id(model_id) max_feat_len, max_sent_len = rnn_models.get_max_lengths(model_id) # TRAIN LOOP start_time = time.clock() fig = None max_feat_val = -1 best_model = None best_a = -1 best_e = None for e in range(1, nb_epoch+1): print( "--------------------------------------------\nepoch %d\n--------------------------------------------" % e) log.write( "\n--------------------------------------------\nepoch %d\n--------------------------------------------" % e) # get data # Transform sentences into one hot vector sentences with open(sentence_path + 'sents_train_lc_nopunc.txt') as f: lines = f.readlines() # read the lines into an array random.shuffle(lines) if extra_path is not None: if max_feat_val < 0: min_feat_len = 1000000 min_feat_val = 1000000 max_feat_val = -1 for line in lines: X_sample = np.load( feature_path + vis_model_id + '/' + vis_layer_id + '/npy/' + line.rstrip().split('\t')[0] + '.npy') min_feat_len = min(min_feat_len, np.shape(X_sample)[0]) min_feat_val = min(min_feat_val,np.min(X_sample)) max_feat_val = max(max_feat_val,np.max(X_sample)) with open(extra_path) as f: extra_lines = f.readlines() # read the lines into an array random.shuffle(extra_lines) extra_lines = extra_lines[:1000] log.write('EXTRA LINES INCLUDED: '+extra_path+'\n') print('EXTRA LINES INCLUDED: '+extra_path) print('Will train on these first') X_batch = [] Y_batch = [] count = 0 past = 0 inner_count = 0 sum_loss = 0 start_time_inner = time.clock() for line in extra_lines: count += 1 if int(model_id[8:9]) == 1: X_sample = np.random.rand(randint(min_feat_len, max_feat_len-1), fs)*(max_feat_val-min_feat_val)+min_feat_val # RANDOM X SIMILAR TO NORMAL X elif int(model_id[8:9]) == 2: X_sample = np.zeros((randint(min_feat_len, max_feat_len-1), fs)) # ZEROS Y_sample = [] for word in line.rstrip().split('\t')[1].split(): Y_sample.append(one_hot_words[word]) Y_sample.append(one_hot_words['<eos>']) # Check lengths of sequences to be sure they lower than current maxs (check for train val and test) # if np.shape(X_sample)[0] > max_feat_len: # print np.shape(X_sample)[0] # if np.shape(Y_sample)[0] > max_sent_len: # print np.shape(Y_sample)[0] X_batch.append(X_sample) Y_batch.append(Y_sample) if (count % batch_size == 0) or (count == len(extra_lines)): X_batch = pad_sequences(X_batch, maxlen=max_feat_len, padding='pre') Y_batch = pad_sequences(Y_batch, maxlen=max_sent_len, padding='post') # h = model.fit(X_batch, Y_batch, batch_size=batch_size, nb_epoch=1) loss = model.train_on_batch(X_batch, Y_batch) inner_count += 1 sum_loss+=loss X_batch = [] Y_batch = [] if (int((float(count) / len(extra_lines)) * 100) > past) or (count == len(extra_lines)): tr = (len(extra_lines) - count) / ((count) / (time.clock() - start_time_inner)) trt = ((nb_epoch - e + 1) * len(extra_lines) - count) / ( ((e - 1) * len(extra_lines) + count) / (time.clock() - start_time)) print('(%d) [%.5f] Image: %d / %d; Epoch TR: %02d:%02d:%02d; Total TR: %02d:%02d:%02d;' % ( past, sum_loss / inner_count, count, len(extra_lines), int((tr / 60) / 60), int((tr / 60) % 60), int(tr % 60), int((trt / 60) / 60), int((trt / 60) % 60), int(trt % 60))) log.write( '\n(%d) [%.5f] Image: %d / %d; Epoch TR: %02d:%02d:%02d; Total TR: %02d:%02d:%02d;' % ( past, sum_loss / inner_count, count, len(extra_lines), int((tr / 60) / 60), int((tr / 60) % 60), int(tr % 60), int((trt / 60) / 60), int((trt / 60) % 60), int(trt % 60))) # log.close() # log = open(model_path + '/log.txt', "a") # log.write('\n(%d) [%.5f] Image: %d / %d; Epoch TR: %02d:%02d:%02d; Total TR: %02d:%02d:%02d;' % ( # past, sum_loss / inner_count, count, len(all_paths), int((tr / 60) / 60), int((tr / 60) % 60), # int(tr % 60), # int((trt / 60) / 60), int((trt / 60) % 60), int(trt % 60))) past += 10 sum_loss = 0 inner_count = 0 log.write('\n--------------------------------------------') print('\n--------------------------------------------') X_batch = [] Y_batch = [] count = 0 past = 0 inner_count = 0 sum_loss = 0 start_time_inner = time.clock() for line in lines: count += 1 X_sample = np.load(feature_path + vis_model_id + '/' + vis_layer_id + '/npy/' + line.rstrip().split('\t')[0] + '.npy') Y_sample = [] for word in line.rstrip().split('\t')[1].split(): Y_sample.append(one_hot_words[word]) Y_sample.append(one_hot_words['<eos>']) # Check lengths of sequences to be sure they lower than current maxs (check for train val and test) # if np.shape(X_sample)[0] > max_feat_len: # print np.shape(X_sample)[0] # if np.shape(Y_sample)[0] > max_sent_len: # print np.shape(Y_sample)[0] X_batch.append(X_sample) Y_batch.append(Y_sample) if (count % batch_size == 0) or (count == len(lines)): X_batch = pad_sequences(X_batch, maxlen=max_feat_len, padding='pre') Y_batch = pad_sequences(Y_batch, maxlen=max_sent_len, padding='post') # h = model.fit(X_batch, Y_batch, batch_size=batch_size, nb_epoch=1) loss = model.train_on_batch(X_batch, Y_batch) inner_count += 1 sum_loss+=loss X_batch = [] Y_batch = [] if (int((float(count) / len(lines)) * 100) > past) or (count == len(lines)): tr = (len(lines) - count) / ((count) / (time.clock() - start_time_inner)) trt = ((nb_epoch - e + 1) * len(lines) - count) / ( ((e - 1) * len(lines) + count) / (time.clock() - start_time)) print('(%d) [%.5f] Image: %d / %d; Epoch TR: %02d:%02d:%02d; Total TR: %02d:%02d:%02d;' % ( past, sum_loss / inner_count, count, len(lines), int((tr / 60) / 60), int((tr / 60) % 60), int(tr % 60), int((trt / 60) / 60), int((trt / 60) % 60), int(trt % 60))) log.write( '\n(%d) [%.5f] Image: %d / %d; Epoch TR: %02d:%02d:%02d; Total TR: %02d:%02d:%02d;' % ( past, sum_loss / inner_count, count, len(lines), int((tr / 60) / 60), int((tr / 60) % 60), int(tr % 60), int((trt / 60) / 60), int((trt / 60) % 60), int(trt % 60))) # log.close() # log = open(model_path + '/log.txt', "a") # log.write('\n(%d) [%.5f] Image: %d / %d; Epoch TR: %02d:%02d:%02d; Total TR: %02d:%02d:%02d;' % ( # past, sum_loss / inner_count, count, len(all_paths), int((tr / 60) / 60), int((tr / 60) % 60), # int(tr % 60), # int((trt / 60) / 60), int((trt / 60) % 60), int(trt % 60))) past += 10 sum_loss = 0 inner_count = 0 t_la[0].append(e) t_la[1].append(loss) tr = (nb_epoch - e) / (e / (time.clock() - start_time)) print('TR: %02d:%02d:%02d;' % (int((tr / 60) / 60), int((tr / 60) % 60), int(tr % 60))) #################################################################### if e % 10 == 0: l, a = get_training_stats(model_id, model, one_hot_words, one_hot_dictionary) t_l[0].append(e) t_l[1].append(l) t_a[0].append(e) t_a[1].append(a) l, a = get_val_stats(model_id, model, one_hot_words, one_hot_dictionary) v_l[0].append(e) v_l[1].append(l) v_a[0].append(e) v_a[1].append(a) log.write('\n -- Val: METEOR --') log.write('\n' + str(a)) log.write('\n -----------------') if a > best_a: best_a = a best_e = e best_model = model # graph it if fig: plt.close() fig, ax1 = plt.subplots() ax1.plot(t_la[0], t_la[1], 'g-') # ax1.plot(t_l[0], t_l[1], 'b-') # ax1.plot(v_l[0], v_l[1], 'r-') ax2 = ax1.twinx() ax2.plot(t_a[0], t_a[1], 'b--') ax2.plot(v_a[0], v_a[1], 'r--') # plt.plot(t_l[0], t_l[1]) # plt.plot(v_l[0],v_l[1]) # plt.plot(v_a[0],v_a[1]) plt.show(block=False) if e % 20 == 0: if not os.path.exists(model_path): os.makedirs(model_path) model.save_weights(model_path + '/' + model_id + '_' + str(e) + '.h5',overwrite=True) fig.savefig(model_path + '/training.png') fig.savefig(model_path + '/training.pdf') np.save(model_path + '/training_stats_' + str(e) + '.npy', [t_la, t_l, t_a, v_l, v_a]) results_path = model_path + '/RESULTS/' if not os.path.exists(results_path): os.makedirs(results_path) splits = ['train','val','test'] for split in splits: print(split) loss, scores, ref, hypo = rnn_test.test(split, model_id, model, sentence_path, feature_path, one_hot_words, one_hot_dictionary) output = "%f\n%s\n" % (loss, str(scores)) for k in ref: output += '________________________\n' output += k + '\n' output += ref[k][0] + '\n' output += hypo[k][0] + '\n' with open(results_path + split + '_' + str(e) + '.txt', 'w') as f: f.write(output) if e == nb_epoch: print(output) # save best log.write('\nBest Epoch: '+str(best_e)+' with val METEOR of '+str(best_a)) best_model.save_weights(model_path + '/' + model_id + '_BEST_' + str(best_e) + '.h5', overwrite=True) log.close() return model