def train(model_id, model_path, data_paths_path, feature_path, nb_epoch, batch_size, load_epoch): start_time = time.clock() t_la = [[], []] t_l = [[], []] t_a = [[], []] v_l = [[], []] v_a = [[], []] fig = None model = models.get_model_from_id(model_id) if model is None: return model_path = model_path + model_id # Load log if not os.path.exists(model_path): os.makedirs(model_path) log = open(model_path + '/log.txt', "a") log.write('\n\n\nTraining initialised: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now())) if load_epoch == 0: print 'Training model from scratch...' log.write('\nTraining model from scratch...') else: if load_epoch < 0 or load_epoch is None: # get latest for i in range(100, -1, -1): if os.path.isfile(model_path + '/' + model_id + '_' + str(i) + '.h5'): load_epoch = i break if load_epoch is None: load_epoch = 0 if load_epoch == 0: log.write('\nTraining model from scratch...') else: print 'Loading past model to train from:' print model_path + '/' + model_id + '_' + str(load_epoch) + '.h5' log.write('\nLoading past model to train from:') log.write('\n' + model_path + '/' + model_id + '_' + str(load_epoch) + '.h5') [t_l, v_l, v_a] = np.load(model_path + '/training_stats_' + str(load_epoch) + '.npy') model.load_weights(model_path + '/' + model_id + '_' + str(load_epoch) + '.h5') model = models.compile_model(model_id, model) for e in range(load_epoch + 1, nb_epoch+1): print( "--------------------------------------------\nepoch %d\n--------------------------------------------" % e) log.write( "\n--------------------------------------------\nepoch %d\n--------------------------------------------" % e) # get data with open(data_paths_path + 'train_paths_equalised.txt') as f: all_paths = f.readlines() random.shuffle(all_paths) # randomise order every epoch!! all_paths = [line.split() for line in all_paths] # split so x and y split X_batch = [] Y_batch = [] sum_loss = 0 past = 0 count = 0 inner_count = 0 start_time_inner = time.clock() for path in all_paths: count += 1 x, y = models.load_input(model_id, feature_path, path) X_batch.append(x) Y_batch.append(y) if (count % batch_size == 0) or (count == len(all_paths)): # print 'B' if count == len(all_paths): inner_count + 1 Y_batch = np.squeeze(Y_batch) loss, acc = model.train_on_batch(X_batch, Y_batch) sum_loss += loss inner_count += 1 # clear batch X_batch = [] Y_batch = [] if (int((float(count) / len(all_paths)) * 100) > past) or (count == len(all_paths)): tr = (len(all_paths) - count) / ((count) / (time.clock() - start_time_inner)) trt = ((nb_epoch - e + 1) * len(all_paths) - count) / ( ((e - 1) * len(all_paths) + count) / (time.clock() - start_time)) print '(%d) [%.5f] Image: %d / %d; Epoch TR: %02d:%02d:%02d; Total TR: %02d:%02d:%02d;' % ( past, sum_loss / inner_count, count, len(all_paths), int((tr / 60) / 60), int((tr / 60) % 60), int(tr % 60), int((trt / 60) / 60), int((trt / 60) % 60), int(trt % 60)) log.close() log = open(model_path + '/log.txt', "a") log.write('\n(%d) [%.5f] Image: %d / %d; Epoch TR: %02d:%02d:%02d; Total TR: %02d:%02d:%02d;' % ( past, sum_loss / inner_count, count, len(all_paths), int((tr / 60) / 60), int((tr / 60) % 60), int(tr % 60), int((trt / 60) / 60), int((trt / 60) % 60), int(trt % 60))) t_l[0].append((e - 1) + past * .01) t_l[1].append(sum_loss / inner_count) # graph it if fig: plt.close() fig, ax1 = plt.subplots() ax1.plot(t_l[0], t_l[1], 'g-') ax1.plot(v_l[0], v_l[1], 'b-') ax1.set_ylim(bottom=0) ax2 = ax1.twinx() ax2.plot(v_a[0], v_a[1], 'r-') ax2.set_ylim(top=1) # plt.plot(t_l[0], t_l[1]) # plt.plot(v_l[0],v_l[1]) # plt.plot(v_a[0],v_a[1]) # plt.show(block=False) past += 10 sum_loss = 0 inner_count = 0 # if past > 0: # break print '--------------------------------------------' print 'Validation results:' log.write('\n--------------------------------------------') log.write('\nValidation results:\n') with open(data_paths_path + 'val_paths_equalised.txt') as f: all_val_paths = f.readlines() random.shuffle(all_val_paths) # randomise order every epoch!! all_val_paths = [line.split() for line in all_val_paths] # split so x and y split X_val = [] Y_val = [] count = 0 past = 0 val_metrics = [] for path in all_val_paths: count += 1 x, y = models.load_input(model_id, feature_path, path) X_val.append(x) Y_val.append(y) if (count % batch_size == 0) or (count == len(all_paths)): # test Y_val = np.squeeze(Y_val) val_metrics.append(model.test_on_batch(X_val, Y_val)) # clear batch X_val = [] Y_val = [] if int((float(count) / len(all_val_paths)) * 100) > past: print('.'), log.write('.') past += 10 print '\n' val_results = np.average(val_metrics, axis=0) print val_results log.write('\n' + str(val_results)) v_l[0].append(e) v_l[1].append(val_results[0]) v_a[0].append(e) v_a[1].append(val_results[1]) if e % 1 == 0: if not os.path.exists(model_path): os.makedirs(model_path) model.save_weights(model_path + '/' + model_id + '_' + str(e) + '.h5', overwrite=True) # fig.savefig(model_path + '/training.png') fig.savefig(model_path + '/training.pdf') np.save(model_path + '/training_stats_' + str(e) + '.npy', [t_l, v_l, v_a]) tt = time.clock() - start_time print 'Total Time Taken: %02d:%02d:%02d;' % (int((tt / 60) / 60), int((tt / 60) % 60), int(tt % 60)) log.write('\n\nTotal Time Taken: %02d:%02d:%02d;' % (int((tt / 60) / 60), int((tt / 60) % 60), int(tt % 60))) return model
def predict(model_id, model_path, data_paths_path, feature_path, split, batch_size=None, load_epoch=None, layers=['pred'], save_path=None, equalised=False): start_time = time.clock() output_classes = 7 model = models.get_model_from_id(model_id) if model is None: return # Load log model_path = model_path + model_id if not os.path.exists(model_path): os.makedirs(model_path) if load_epoch is not None: print 'Loading model: ' + model_path + '/' + model_id + '_' + str(load_epoch) + '.h5' model.load_weights(model_path + '/' + model_id + '_' + str(load_epoch) + '.h5') else: print 'ERROR: Need load_epoch number to load' return # model = models.compile_model(model_id, model) # dont need to compile on prediction # get data if equalised: with open(data_paths_path + split + '_paths_equalised.txt') as f: all_paths = f.readlines() else: with open(data_paths_path + split + '_paths.txt') as f: all_paths = f.readlines() # all_paths = all_paths[:500] all_paths = [line.split() for line in all_paths] # split so x and y split for layer_name in layers: # model, output_classes = models.get_model_from_id(model_id) model = Model(input=model.input, output=model.get_layer(layer_name).output) X_batch = [] Y_batch = [] Y_gt = None Y_pred = None past = 0 count = 0 inner_count = 0 for path in all_paths: # print path count += 1 cor_path = DRIVE + path[0][path[0].find('/DATASETS/')+1:] if path[0] != cor_path: # print 'Paths in .txt files seem incorrect' # print 'Changed from: '+path[0] # print 'Changed to: '+ cor_path path[0] = cor_path x, y = models.load_input(model_id, feature_path, path) X_batch.append(x) Y_batch.append(y) if batch_size is not None: if (count % batch_size == 0) or (count == len(all_paths)): # Y_batch = np.eye(output_classes)[Y_batch] # train if Y_gt is None: Y_pred = model.predict_on_batch(np.array(X_batch)) Y_gt = Y_batch else: Y_pred = np.append(Y_pred, model.predict_on_batch(np.array(X_batch)),axis=0) Y_gt = np.append(Y_gt,Y_batch,axis=0) inner_count += 1 # clear batches X_batch, Y_batch = [], [] else: Y_pred = model.predict(x) Y_gt = np.eye(output_classes)[y] if int((float(count) / len(all_paths)) * 100) > past: tr = (len(all_paths) - count) / ((count) / (time.clock() - start_time)) print '(%d) Image: %d / %d; TR: %02d:%02d:%02d;' % (past, count, len(all_paths), int((tr / 60) / 60), int((tr / 60) % 60), int(tr % 60)) past += 5 # if batch_size is None: # Y_pred = model.predict_proba(X_batch, batch_size=32) # save predictions to file all_array = {} if save_path is not None: print '\nSaving ....' save_path += model_id + '_' + str(load_epoch)+'/' print save_path for p in range(len(Y_gt)): path = all_paths[p] # write out to npy files image_name = path[0].split('/')[len(path[0].split('/')) - 1] if not os.path.exists(save_path + layer_name + '/npy/ind/'): os.makedirs(save_path + layer_name + '/npy/ind/') np.save(save_path + layer_name + '/npy/ind/' + image_name[:-4] + '.npy', np.squeeze(Y_pred[p])) all_array[image_name[:-4]] = [Y_gt[p], Y_pred[p]] # np.save(save_path + layer_name + '/npy/'+split+'.npy', all_array) return Y_gt, Y_pred