def test(modelParams): # import corresponding model name as model_cnn, specifed at json file model_cnn = importlib.import_module('Model_Factory.' + modelParams['modelName']) if not os.path.exists(modelParams['dataDir']): raise ValueError("No such data directory %s" % modelParams['dataDir']) _setupLogging(os.path.join(modelParams['testLogDir'], "genlog")) with tf.Graph().as_default(): # Get images and transformation for model_cnn. images, pclA, pclB, tMatT, tfrecFileIDs = data_input.inputs( **modelParams) # Build a Graph that computes the HAB predictions from the # inference model. tMatP = model_cnn.inference(images, **modelParams) # Calculate loss. 2 options: # use mask to get degrees significant loss = model_cnn.weighted_loss(tMatP, tMatT, **modelParams) # pcl based #loss = model_cnn.pcl_loss(pclA, tMatP, tMatT, **modelParams) # Create a saver. saver = tf.train.Saver(tf.global_variables()) # Build the summary operation based on the TF collection of Summaries. summaryOp = tf.summary.merge_all() # Build an initialization operation to run below. #init = tf.initialize_all_variables() init = tf.global_variables_initializer() # Start running operations on the Graph. config = tf.ConfigProto( log_device_placement=modelParams['logDevicePlacement']) config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1 sess = tf.Session(config=config) #sess = tf_debug.LocalCLIDebugWrapperSession(sess) #sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan) sess.run(init) # restore a saver. saver = tf.train.Saver(tf.global_variables()) saver.restore( sess, modelParams['trainLogDir'] + '/model.ckpt-' + str(modelParams['trainMaxSteps'] - 1)) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summaryWriter = tf.summary.FileWriter(modelParams['testLogDir'], sess.graph) lossValueSum = 0 durationSum = 0 durationSumAll = 0 print('Warping images with batch size %d in %d steps' % (modelParams['activeBatchSize'], modelParams['maxSteps'])) testValueSampleResults = list() stepFinal = 0 for step in xrange(modelParams['maxSteps']): startTime = time.time() evImages, evPclA, evPclB, evtMatT, evtMatP, evtfrecFileIDs, evlossValue = sess.run( [images, pclA, pclB, tMatT, tMatP, tfrecFileIDs, loss]) duration = time.time() - startTime durationSum += duration lossValueSum += evlossValue #_write_to_csv(modelParams['testLogDir']+'/testRes'+jsonToRead.replace('.json', '_T.csv'), evtMatT) #_write_to_csv(modelParams['testLogDir']+'/testRes'+jsonToRead.replace('.json', '_P.csv'), evtMatP) # Write test outputs tfrecords #### put imageA, warpped imageB by pHAB, HAB-pHAB as new HAB, changed fileaddress tfrecFileIDs #if (step == 0): # data_output.output_with_test_image_files(evImagesOrig, evImages, evPOrig, evtHAB, evpHAB, evtfrecFileIDs, **modelParams) #else: data_output.output(evImages, evPclA, evPclB, evtMatT, evtMatP, evtfrecFileIDs, **modelParams) duration = time.time() - startTime durationSumAll += duration # print out control outputs if step % FLAGS.printOutStep == 0: numExamplesPerStep = modelParams['activeBatchSize'] examplesPerSec = numExamplesPerStep / duration secPerBatch = float(duration) format_str = ( '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch) avg_err_over_time = %.2f') logging.info(format_str % (datetime.now(), step, evlossValue, examplesPerSec, secPerBatch, lossValueSum / (step + 1))) # write summaries if (step % FLAGS.summaryWriteStep == 0) or ((step + 1) == modelParams['maxSteps']): summaryStr = sess.run(summaryOp) summaryWriter.add_summary(summaryStr, step) # Print Progress Info if ((step % FLAGS.ProgressStepReportStep) == 0) or ((step + 1) == modelParams['maxSteps']): print( 'Progress: %.2f%%, Loss: %.2f, Elapsed: %.2f mins, Training Completion in: %.2f mins' % ((100 * step) / modelParams['maxSteps'], lossValueSum / (step + 1), durationSum / 60, (((durationSum * modelParams['maxSteps']) / (step + 1)) / 60) - (durationSum / 60))) # print('Total Elapsed: %.2f mins, Training Completion in: %.2f mins' % # durationSumAll/60, (((durationSumAll*stepsForOneDataRound)/(step+1))/60)-(durationSumAll/60)) stepFinal = step step = stepFinal + 1 print( 'Average test error = %.2f - Average time per sample= %.2f s, Steps = %d, ex/sec = %.2f' % (lossValueSum / (step), duration / (step * modelParams['activeBatchSize']), step, modelParams['numExamples'] / durationSum))
def test(): _get_control_params() if not os.path.exists(modelParams['dataDir']): raise ValueError("No such data directory %s" % modelParams['dataDir']) _setupLogging(os.path.join(modelParams['testLogDir'], "genlog")) with tf.Graph().as_default(): # Get images and transformation for model_cnn. imagesOrig, images, pOrig, tHAB, tfrecFileIDs = data_input.inputs(**modelParams) # Build a Graph that computes the HAB predictions from the # inference model. pHAB = model_cnn.inference(images, **modelParams) # Calculate loss. loss = model_cnn.loss(pHAB, tHAB, **modelParams) # Create a saver. saver = tf.train.Saver(tf.global_variables()) # Build the summary operation based on the TF collection of Summaries. summaryOp = tf.summary.merge_all() # Build an initialization operation to run below. #init = tf.initialize_all_variables() init = tf.global_variables_initializer() # Start running operations on the Graph. config = tf.ConfigProto(log_device_placement=modelParams['logDevicePlacement']) config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1 sess = tf.Session(config=config) #sess = tf_debug.LocalCLIDebugWrapperSession(sess) #sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan) sess.run(init) # restore a saver. saver = tf.train.Saver(tf.global_variables()) saver.restore(sess, modelParams['trainLogDir']+'/model.ckpt-'+str(modelParams['trainMaxSteps']-1)) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summaryWriter = tf.summary.FileWriter(modelParams['testLogDir'], sess.graph) lossValueSum = 0 durationSum = 0 HABperPixelsum = 0 maxErrbatchsum = 0 print('Warping images with batch size %d in %d steps' % (modelParams['activeBatchSize'], modelParams['maxSteps'])) testValueSampleResults = list() stepFinal = 0 for step in xrange(modelParams['maxSteps']): # run and get inference startTime = time.time() evImagesOrig, evImages, evPOrig, evtHAB, evpHAB, evtfrecFileIDs, evlossValue = sess.run([imagesOrig, images, pOrig, tHAB, pHAB, tfrecFileIDs, loss]) duration = time.time() - startTime # Calculate actual pixel errors for the current batch with inference results durationSum += duration HABRES = evtHAB-evpHAB if step==1: step = 0 HABperPixel = 0 maxErrbatch = 0 for i in xrange(modelParams['activeBatchSize']): H = np.asarray([[HABRES[i][0], HABRES[i][1], HABRES[i][2], HABRES[i][3]], [HABRES[i][4], HABRES[i][5], HABRES[i][6], HABRES[i][7]]], np.float32) HABperPixel += np.sqrt((H*H).sum(axis=0)).mean() testValueSampleResults.append(HABperPixel) maxErr = np.asarray([[evtHAB[i][0], evtHAB[i][1], evtHAB[i][2], evtHAB[i][3]], [evtHAB[i][4], evtHAB[i][5], evtHAB[i][6], evtHAB[i][7]]], np.float32) maxErrbatch += np.sqrt((maxErr*maxErr).sum(axis=0)).mean() HABperPixel = HABperPixel/modelParams['activeBatchSize'] maxErrbatch = maxErrbatch/modelParams['activeBatchSize'] HABperPixelsum += HABperPixel maxErrbatchsum += maxErrbatch # print out control outputs if step % FLAGS.printOutStep == 0: numExamplesPerStep = modelParams['activeBatchSize'] examplesPerSec = numExamplesPerStep / duration secPerBatch = float(duration) format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch) pixel_err_avg = %.2f max_err_avg = %.2f') logging.info(format_str % (datetime.now(), step, HABperPixel, examplesPerSec, secPerBatch, HABperPixelsum/(step+1), maxErrbatchsum/(step+1))) # write summaries if (step % FLAGS.summaryWriteStep == 0) or ((step+1) == modelParams['maxSteps']): summaryStr = sess.run(summaryOp) summaryWriter.add_summary(summaryStr, step) # Print Progress Info if ((step % FLAGS.ProgressStepReportStep) == 0) or ((step+1) == modelParams['maxSteps']): print('Progress: %.2f%%, Loss: %.2f, Elapsed: %.2f mins, Training Completion in: %.2f mins, max_err_avg = %.2f' % ((100*step)/modelParams['maxSteps'], HABperPixelsum/(step+1), durationSum/60, (((durationSum*modelParams['maxSteps'])/(step+1))/60)-(durationSum/60), maxErrbatchsum/(step+1))) # Write test outputs tfrecords #### put imageA, warpped imageB by pHAB, HAB-pHAB as new HAB, changed fileaddress tfrecFileIDs if (step == 0): data_output.output_with_test_image_files(evImagesOrig, evImages, evPOrig, evtHAB, evpHAB, evtfrecFileIDs, **modelParams) else: data_output.output(evImagesOrig, evImages, evPOrig, evtHAB, evpHAB, evtfrecFileIDs, **modelParams) stepFinal = step break step = stepFinal+1 print('Average test pixel error = %.2f - Average max pixel error = %.2f - Average time per sample= %.2f s, Steps = %d' % (HABperPixelsum/(step), maxErrbatchsum/(step), duration/(step*modelParams['activeBatchSize']), step))
def train(modelParams, epochNumber): # import corresponding model name as model_cnn, specifed at json file model_cnn = importlib.import_module('Model_Factory.' + modelParams['modelName']) if not os.path.exists(modelParams['dataDir']): raise ValueError("No such data directory %s" % modelParams['dataDir']) _setupLogging(os.path.join(modelParams['logDir'], "genlog")) with tf.Graph().as_default(): # track the number of train calls (basically number of batches processed) globalStep = tf.get_variable('globalStep', [], initializer=tf.constant_initializer(0), trainable=False) # Get images inputs for model_cnn. if modelParams['phase'] == 'v': filename, pngTemp, targetT = data_input.inputs_vali(**modelParams) else: filename, pngTemp, targetT = data_input.inputs(**modelParams) print('Input ready') #TEST### filenametest, pngTemptest, targetTtest = data_input.inputs_test(**modelParams) # Build a Graph that computes the HAB predictions from the # inference model #targetP = model_cnn.inference(pngTemp, **modelParams) targetP, l2reg = model_cnn.inference_l2reg(pngTemp, **modelParams) #TEST### targetPtest = model_cnn.inference(pngTemptest, **modelParams) print(targetP.get_shape()) # loss model if modelParams.get('classificationModel'): print('Classification model...') # loss on last tuple #loss = model_cnn.loss(targetP, targetT, **modelParams) loss = model_cnn.loss_l2reg(targetP, targetT, l2reg, **modelParams) #TEST### losstest = model_cnn.loss(targetPtest, targetTtest, **modelParams) else: print('Regression model...') # loss on last tuple loss = model_cnn.loss(targetP, targetT, **modelParams) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. #opTrain = model_cnn.train(loss, globalStep, **modelParams) ############################## print('Testing ready') # Create a saver. saver = tf.train.Saver(tf.global_variables()) print('Saver ready') # Build the summary operation based on the TF collection of Summaries. summaryOp = tf.summary.merge_all() print('MergeSummary ready') # Build an initialization operation to run below. #init = tf.initialize_all_variables() # init = tf.global_variables_initializer() #opCheck = tf.add_check_numerics_ops() # Start running operations on the Graph. config = tf.ConfigProto( log_device_placement=modelParams['logDevicePlacement']) config.gpu_options.allow_growth = True config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1 sess = tf.Session(config=config) print('Session ready') #sess = tf_debug.LocalCLIDebugWrapperSession(sess) #sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan) # sess.run(init) # restore a saver. print('Loading Ex-Model with epoch number %d ...', epochNumber) print(' ', modelParams['trainLogDir'] + '_v/model.ckpt-' + str(epochNumber)) saver.restore( sess, (modelParams['trainLogDir'] + '_v/model.ckpt-' + str(epochNumber))) #saver.restore(sess, (modelParams['trainLogDir']+'_30k/model.ckpt-29000')) print('Ex-Model loaded') if True: # if True: freeze graph tf.train.write_graph(sess.graph.as_graph_def(), '.', modelParams['trainLogDir'] + '_v/model.pbtxt', as_text=True) # Output nodes output_node_names = [ n.name for n in tf.get_default_graph().as_graph_def().node ] # Freeze the graph frozen_graph_def = tf.graph_util.convert_variables_to_constants( sess, sess.graph_def, output_node_names) # Save the frozen graph with open(modelParams['trainLogDir'] + '_v/model.pb', 'wb') as f: f.write(frozen_graph_def.SerializeToString()) # Start the queue runners. tf.train.start_queue_runners(sess=sess) print('QueueRunner started') summaryWriter = tf.summary.FileWriter(modelParams['logDir'], sess.graph) summaryValiWriter = tf.summary.FileWriter(modelParams['logDir'] + '_v', sess.graph) #TEST### summaryValiWriter = tf.summary.FileWriter(modelParams['logDir']+'_test', sess.graph) print('Testing started') durationSum = 0 durationSumAll = 0 prevLoss = 99999 prevValiSumLoss = 99999 prevaccur = 0 prevLossStep = 0 prevStep = 21000 #TEST### prevTestSumLoss = 99999 prevStep = int(modelParams['maxSteps'] / 2) l = list() import cv2 lossValueSum = 0 l2regValueSum = 0 total_parameters = 0 for variable in tf.trainable_variables(): # shape is an array of tf.Dimension shape = variable.get_shape() #print(shape) #print(len(shape)) variable_parameters = 1 for dim in shape: #print(dim) variable_parameters *= dim.value #print(variable_parameters) total_parameters += variable_parameters print('-----total parameters-------- ', total_parameters) for step in xrange(0, modelParams['maxSteps']): #(0, 1000): startTime = time.time() #npfilename, npTargetP, npTargetT, lossValue, l2regValue, npPng = sess.run([filename, targetP, targetT, loss, l2reg, pngTemp]) npfilename, npTargetP, npTargetT, lossValue, l2regValue = sess.run( [filename, targetP, targetT, loss, l2reg]) duration = time.time() - startTime if step != 0: l.append(duration) print(duration, step, modelParams['maxSteps']) lossValueSum += lossValue l2regValueSum += l2regValue #print(npfilename) #print(npTargetT) #print(npTargetP) ################# DEMO for ibx in range(modelParams['activeBatchSize']): #print('hello') stat = 'False' if np.argmax(npTargetT[ibx]) == np.argmax(npTargetP[ibx]): stat = 'True' print(npfilename[ibx].decode('ascii'), 'Target:', np.argmax(npTargetT[ibx]), 'Estimate:', np.argmax(npTargetP[ibx]), stat) # npPng = cv2.imread('../Data/cold_wb/testpng352/'+npfilename[ibx].decode('ascii'), -1) # #npPng[npPng<24000] = 24000 # #npPng[npPng>31000] = 31000 # #hist,bins = np.histogram(npPng.flatten(),9000,[23000,32000]) # #plt.plot(hist) # #plt.show() # #npPng.astype('float32') # npPng = (npPng-npPng.min())/(npPng.max()-npPng.min()) # #print(npPng.shape, npPng.min(), npPng.max()) # #print(npPng.shape, npPng.min(), npPng.max(), npPng.mean()) # cv2.imshow('npPng', npPng) # #print(np.max(npPng[0,:,:,0]), np.max(npPng[0,:,:,1]), np.max(npPng[0,:,:,2])) # #print(np.mean(npPng[0,:,:,0]), np.mean(npPng[0,:,:,1]), np.mean(npPng[0,:,:,2])) # #p1 = npPng[0,:,:,1] # #p2 = npPng[0,:,:,2] # #p1 = (p1-np.min(p1)) / (np.max(p1)-np.min(p1)) # #p2 = (p2-np.min(p2)) / (np.max(p2)-np.min(p2)) # #cv2.imshow('npPng1', p1) # #cv2.imshow('npPng2', p2) # cv2.waitKey(0) ################# #p1 = npPng[0,:,:,0] #p2 = npPng[0,:,:,1] #p1 = (p1-np.min(p1)) / (np.max(p1)-np.min(p1)) #p2 = (p2-np.min(p2)) / (np.max(p2)-np.min(p2)) #print(duration, step, modelParams['maxSteps'], 'regul', l2regValue) data_output.output(str(10000 + step), npfilename, npTargetP, npTargetT, **modelParams) # Print Progress Info if ((step % FLAGS.ProgressStepReportStep) == 0) or ((step + 1) == modelParams['maxSteps']): print( 'Progress: %.2f%%, Elapsed: %.2f mins, Testing Completion in: %.2f mins --- %s' % ((100 * step) / modelParams['maxSteps'], durationSum / 60, (((durationSum * modelParams['maxSteps']) / (step + 1)) / 60) - (durationSum / 60), datetime.now())) #if step == 128: # modelParams['phase'] = 'train' # #if step == 130: # modelParams['phase'] = 'test' print(np.array(l).mean()) #l0 = np.array(l) #l1 = np.array(l[1:-1]) #print(np.average(l0)) #print(np.average(l1)) print('----- maxsteps:', modelParams['maxSteps'], '--- loss avg:', lossValueSum / modelParams['maxSteps'], '--- l2regu avg:', l2regValueSum / modelParams['maxSteps']) print('----- train scaled loss:', (lossValueSum / modelParams['maxSteps']) * modelParams['trainBatchSize']) print('----- train scaled l2regu:', (l2regValueSum / modelParams['maxSteps']) * modelParams['trainBatchSize']) print(modelParams['outputDir']) sess.close() tf.reset_default_graph()
def train(): _get_control_params() if not os.path.exists(modelParams['dataDir']): raise ValueError("No such data directory %s" % modelParams['dataDir']) #meanImgFi1000le = os.path.join(FLAGS.dataDir, "meta") #if not os.path.isfile(meanImgFile): # raise ValueError("Warning, no meta file found at %s" % meanImgFile) #else: # with open(meanImgFile, "r") as inMeanFile: # meanInfo = json.load(inMeanFile) # # meanImg = meanInfo['mean'] # # # also load the target output sizes # params['targSz'] = meanInfo["targSz"] #_setupLogging(os.path.join(modelParams['trainLogDir'], "genlog")) with tf.Graph().as_default(): # BGR to RGB #params['meanImg'] = tf.constant(meanImg, dtype=tf.float32) # track the number of train calls (basically number of batches processed) globalStep = tf.get_variable('globalStep', [], initializer=tf.constant_initializer(0), trainable=False) # Get images and transformation for model_cnn. imagesOrig, images, pOrig, tHAB, tfrecFileIDs = data_input.inputs( **modelParams) # Build a Graph that computes the HAB predictions from the # inference model. pHAB = model_cnn.inference(images, **modelParams) # Calculate loss. loss = model_cnn.loss(pHAB, tHAB, **modelParams) # Create a saver. saver = tf.train.Saver(tf.global_variables()) # Build an initialization operation to run below. #init = tf.initialize_all_variables() init = tf.global_variables_initializer() opCheck = tf.add_check_numerics_ops() # Start running operations on the Graph. config = tf.ConfigProto( log_device_placement=modelParams['logDevicePlacement']) config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1 sess = tf.Session(config=config) #sess = tf_debug.LocalCLIDebugWrapperSession(sess) #sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan) sess.run(init) # restore a saver. saver = tf.train.Saver(tf.global_variables()) saver.restore(sess, modelParams['trainLogDir'] + '/model.ckpt-89999') # Start the queue runners. tf.train.start_queue_runners(sess=sess) lossValueSum = 0 durationSum = 0 ######### USE LATEST STATE TO WARP IMAGES if modelParams['writeWarpedImages']: lossValueSum = 0 stepsForOneDataRound = int((modelParams['numExamples'] / modelParams['activeBatchSize'])) + 1 print('Warping images with batch size %d in %d steps' % (modelParams['activeBatchSize'], stepsForOneDataRound)) for step in xrange(stepsForOneDataRound): startTime = time.time() evImagesOrig, evImages, evPOrig, evtHAB, evpHAB, evtfrecFileIDs, evlossValue = sess.run( [ imagesOrig, images, pOrig, tHAB, pHAB, tfrecFileIDs, loss ]) lossValueSum += np.sqrt( evlossValue * (2 / (modelParams['activeBatchSize'] * 8))) durationSum += (time.time() - startTime) #### put imageA, warpped imageB by pHAB, HAB-pHAB as new HAB, changed fileaddress tfrecFileIDs data_output.output(evImagesOrig, evImages, evPOrig, evtHAB, evpHAB, evtfrecFileIDs, **modelParams) # Print Progress Info if ((step % FLAGS.ProgressStepReportStep) == 0) or (step + 1 == stepsForOneDataRound): print( 'Progress: %.2f%%, Loss: %.2f, Elapsed: %.2f mins, Training Completion in: %.2f mins' % ((100 * step) / stepsForOneDataRound, lossValueSum / (step + 1), durationSum / 60, (((durationSum * stepsForOneDataRound) / (step + 1)) / 60) - (durationSum / 60))) print( 'Average training loss = %.2f - Average time per sample= %.2f s, Steps = %d' % (lossValueSum / step, durationSum / (step * modelParams['activeBatchSize']), step))
def train(modelParams, epochNumber): # import corresponding model name as model_cnn, specifed at json file model_cnn = importlib.import_module('Model_Factory.' + modelParams['modelName']) if not os.path.exists(modelParams['dataDir']): raise ValueError("No such data directory %s" % modelParams['dataDir']) _setupLogging(os.path.join(modelParams['logDir'], "genlog")) with tf.Graph().as_default(): # track the number of train calls (basically number of batches processed) globalStep = tf.get_variable('globalStep', [], initializer=tf.constant_initializer(0), trainable=False) # Get images inputs for model_cnn. filename, pngTemp, targetT = data_input.inputs(**modelParams) print('Input ready') # Build a Graph that computes the HAB predictions from the # inference model #targetP = model_cnn.inference(pngTemp, **modelParams) targetP, l2reg = model_cnn.inference_l2reg(pngTemp, **modelParams) ############################## print('Inference ready') # Build an initialization operation to run below. #init = tf.initialize_all_variables() init = tf.global_variables_initializer() # Start running operations on the Graph. config = tf.ConfigProto( log_device_placement=modelParams['logDevicePlacement']) config.gpu_options.allow_growth = True config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1 sess = tf.Session(config=config) print('Session ready') sess.run(init) # Create a saver. saver = tf.train.Saver(tf.global_variables()) # restore a saver. print('Loading Ex-Model with epoch number %d ...', epochNumber) print(' ', modelParams['trainLogDir'] + '_v/model.ckpt-' + str(epochNumber)) saver.restore( sess, (modelParams['trainLogDir'] + '_v/model.ckpt-' + str(epochNumber))) #print(' ', modelParams['trainLogDir']+'/model.ckpt-'+str(epochNumber)) #saver.restore(sess, (modelParams['trainLogDir']+'/model.ckpt-'+str(epochNumber))) print('Ex-Model loaded') # Start the queue runners. tf.train.start_queue_runners(sess=sess) print('QueueRunner started') print('Training started') durationSum = 0 durationSumAll = 0 l = list() import cv2 for step in xrange(0, modelParams['maxSteps']): #(0, 1000): startTime = time.time() #npfilename, npTargetP, npTargetT, npPng = sess.run([filename, targetP, targetT, pngTemp]) npfilename, npTargetP, npTargetT = sess.run( [filename, targetP, targetT]) duration = time.time() - startTime #l.append(duration) print(duration, step, modelParams['maxSteps']) #print(npfilename) #print(npTargetT) #print(npTargetP) #p1 = npPng[0,:,:,0] #p2 = npPng[0,:,:,1] #p1 = (p1-np.min(p1)) / (np.max(p1)-np.min(p1)) #p2 = (p2-np.min(p2)) / (np.max(p2)-np.min(p2)) #cv2.imshow('img0', p1) #cv2.imshow('img1', p2) #cv2.waitKey(0) #print(npfilename) data_output.output(str(10000 + step), npfilename, npTargetP, npTargetT, **modelParams) # Print Progress Info if ((step % FLAGS.ProgressStepReportStep) == 0) or ((step + 1) == modelParams['maxSteps']): print( 'Progress: %.2f%%, Elapsed: %.2f mins, Training Completion in: %.2f mins --- %s' % ((100 * step) / modelParams['maxSteps'], durationSum / 60, (((durationSum * modelParams['maxSteps']) / (step + 1)) / 60) - (durationSum / 60), datetime.now())) #if step == 128: # modelParams['phase'] = 'train' # #if step == 130: # modelParams['phase'] = 'test' #print(l) #l0 = np.array(l) #l1 = np.array(l[1:-1]) #print(np.average(l0)) #print(np.average(l1)) sess.close() tf.reset_default_graph()
def train(): _get_control_params() if not os.path.exists(modelParams['dataDir']): raise ValueError("No such data directory %s" % modelParams['dataDir']) with tf.Graph().as_default(): # track the number of train calls (basically number of batches processed) globalStep = tf.get_variable('globalStep', [], initializer=tf.constant_initializer(0), trainable=False) # Get images and transformation for model_cnn. images, pclA, pclB, targetT, tfrecFileIDs = data_input.inputs( **modelParams) print('Input ready') # Build a Graph that computes the HAB predictions from the # inference model. targetP = model_cnn.inference(images, **modelParams) # Calculate loss. 2 options: # use mask to get degrees significant # What about adaptive mask to zoom into differences at each CNN stack !!! #loss = model_cnn.weighted_loss(targetP, targetT, **modelParams) loss = weighted_params_loss(targetP, targetT, **modelParams) # pcl based loss #loss = model_cnn.pcl_params_loss(pclA, targetP, targetT, **modelParams) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. opTrain = model_cnn.train(loss, globalStep, **modelParams) ############################## print('Training ready') # Create a saver. saver = tf.train.Saver(tf.global_variables()) print('Saver ready') # Build an initialization operation to run below. #init = tf.initialize_all_variables() init = tf.global_variables_initializer() opCheck = tf.add_check_numerics_ops() # Start running operations on the Graph. config = tf.ConfigProto( log_device_placement=modelParams['logDevicePlacement']) config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1 sess = tf.Session(config=config) print('Session ready') #sess = tf_debug.LocalCLIDebugWrapperSession(sess) #sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan) sess.run(init) # restore a saver. saver = tf.train.Saver(tf.global_variables()) saver.restore( sess, modelParams['trainLogDir'] + '/model.ckpt-' + str(modelParams['trainMaxSteps'] - 1)) print('Model loaded') # Start the queue runners. tf.train.start_queue_runners(sess=sess) print('QueueRunner started') print('Write started') ######### USE LATEST STATE TO WARP IMAGES filesDictionaryAccum = {} durationSum = 0 durationSumAll = 0 if modelParams['writeWarpedImages']: outputDIR = modelParams['warpedOutputFolder'] + '/' print( "Using final training state to output processed tfrecords\noutput folder: ", outputDIR) if tf.gfile.Exists(outputDIR): tf.gfile.DeleteRecursively(outputDIR) tf.gfile.MakeDirs(outputDIR) lossValueSum = 0 stepsForOneDataRound = int((modelParams['numExamples'] / modelParams['activeBatchSize'])) + 1 print('Warping %d images with batch size %d in %d steps' % (modelParams['numExamples'], modelParams['activeBatchSize'], stepsForOneDataRound)) for step in xrange(stepsForOneDataRound): startTime = time.time() evImages, evPclA, evPclB, evtargetT, evtargetP, evtfrecFileIDs, evlossValue = sess.run( [images, pclA, pclB, targetT, targetP, tfrecFileIDs, loss]) for fileIdx in range(modelParams['activeBatchSize']): fileIDname = str(evtfrecFileIDs[fileIdx][0]) + "_" + str( evtfrecFileIDs[fileIdx][1]) + "_" + str( evtfrecFileIDs[fileIdx][2]) if (fileIDname in filesDictionaryAccum): filesDictionaryAccum[fileIDname] += 1 else: filesDictionaryAccum[fileIDname] = 1 #### put imageA, warpped imageB by pHAB, HAB-pHAB as new HAB, changed fileaddress tfrecFileIDs data_output.output(evImages, evPclA, evPclB, evtargetT, evtargetP, evtfrecFileIDs, **modelParams) duration = time.time() - startTime durationSum += duration durationSumAll += duration # Print Progress Info if ((step % FLAGS.ProgressStepReportStep) == 0) or ((step + 1) == stepsForOneDataRound): print('Number of files used in training', len(filesDictionaryAccum)) print( 'Progress: %.2f%%, Loss: %.2f, Elapsed: %.2f mins, Training Completion in: %.2f mins --- %s' % ((100 * step) / stepsForOneDataRound, evlossValue / (step + 1), durationSum / 60, (((durationSum * stepsForOneDataRound) / (step + 1)) / 60) - (durationSum / 60), datetime.now())) #print('Total Elapsed: %.2f mins, Total Completion in: %.2f mins' % (durationSumAll/60), ((((durationSumAll*stepsForOneDataRound)/(step+1))/60)-(durationSumAll/60)) ) print('Number of files used in training', len(filesDictionaryAccum)) filesAccum = np.array(list(filesDictionaryAccum.values())) print('Access statistics for each file, mean max min std', np.mean(filesAccum), np.max(filesAccum), np.min(filesAccum), np.std(filesAccum)) print( 'Average training loss = %.2f - Average time per sample= %.2f s, Steps = %d' % (evlossValue / modelParams['activeBatchSize'], durationSum / (step * modelParams['activeBatchSize']), step))
def train(modelParams, epochNumber): # import corresponding model name as model_cnn, specifed at json file model_cnn = importlib.import_module('Model_Factory.'+modelParams['modelName']) if not os.path.exists(modelParams['dataDir']): raise ValueError("No such data directory %s" % modelParams['dataDir']) _setupLogging(os.path.join(modelParams['logDir'], "genlog")) with tf.Graph().as_default(): # track the number of train calls (basically number of batches processed) globalStep = tf.get_variable('globalStep', [], initializer=tf.constant_initializer(0), trainable=False) # Get images inputs for model_cnn. filename, pngTemp, targetT = data_input.inputs(**modelParams) print('Input ready') # Build a Graph that computes the HAB predictions from the # inference model targetP = model_cnn.inference(pngTemp, **modelParams) # loss model if modelParams.get('classificationModel'): print('Classification model...') # loss on last tuple loss = model_cnn.loss(targetP, targetT, **modelParams) else: print('Regression model...') # loss on last tuple loss = model_cnn.loss(targetP, targetT, **modelParams) ##### ##### # Build a Graph that trains the model with one batch of examples and ##### # updates the model parameters. ##### opTrain = model_cnn.train(loss, globalStep, **modelParams) ############################## print('Testing ready') # Create a saver. saver = tf.train.Saver(tf.global_variables()) print('Saver ready') # Build the summary operation based on the TF collection of Summaries. summaryOp = tf.summary.merge_all() print('MergeSummary ready') # Build an initialization operation to run below. #init = tf.initialize_all_variables() init = tf.global_variables_initializer() #opCheck = tf.add_check_numerics_ops() # Start running operations on the Graph. config = tf.ConfigProto(log_device_placement=modelParams['logDevicePlacement']) config.gpu_options.allow_growth = True config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1 sess = tf.Session(config=config) print('Session ready') #sess = tf_debug.LocalCLIDebugWrapperSession(sess) #sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan) sess.run(init) # restore a saver. print('Loading Ex-Model with epoch number %d ...', epochNumber) saver.restore(sess, (modelParams['trainLogDir']+'/model.ckpt-'+str(epochNumber))) print('Ex-Model loaded') # Start the queue runners. tf.train.start_queue_runners(sess=sess) print('QueueRunner started') summaryWriter = tf.summary.FileWriter(modelParams['logDir'], sess.graph) print('Testing started') durationSum = 0 durationSumAll = 0 for step in xrange(0, modelParams['maxSteps']): startTime = time.time() lossValue, npfilename, npTargetP, npTargetT = sess.run([loss, filename, targetP, targetT]) duration = time.time() - startTime durationSum += duration assert not np.isnan(lossValue), 'Model diverged with loss = NaN' data_output.output(str(10000+step), npfilename, npTargetP, npTargetT, **modelParams) print(step, modelParams['maxSteps'], modelParams['phase']) if step % FLAGS.printOutStep == 0: numExamplesPerStep = modelParams['activeBatchSize'] examplesPerSec = numExamplesPerStep / duration secPerBatch = float(duration) format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch), loss/batch = %.2f') logging.info(format_str % (datetime.now(), step, lossValue, examplesPerSec, secPerBatch, lossValue/modelParams['activeBatchSize'])) if step % FLAGS.summaryWriteStep == 0: summaryStr = sess.run(summaryOp) summaryWriter.add_summary(summaryStr, step) # Save the model checkpoint periodically. if step % FLAGS.modelCheckpointStep == 0 or (step + 1) == modelParams['maxSteps']: checkpointPath = os.path.join(modelParams['logDir'], 'model.ckpt') saver.save(sess, checkpointPath, global_step=step) # Print Progress Info if ((step % FLAGS.ProgressStepReportStep) == 0) or ((step+1) == modelParams['maxSteps']): print('Progress: %.2f%%, Elapsed: %.2f mins, Testing Completion in: %.2f mins --- %s' % ( (100*step)/modelParams['maxSteps'], durationSum/60, (((durationSum*modelParams['maxSteps'])/(step+1))/60)-(durationSum/60), datetime.now() ) )
def train(): _get_control_params() if not os.path.exists(modelParams['dataDir']): raise ValueError("No such data directory %s" % modelParams['dataDir']) _setupLogging(os.path.join(modelParams['trainLogDir'], "genlog")) with tf.Graph().as_default(): # track the number of train calls (basically number of batches processed) globalStep = tf.get_variable('globalStep', [], initializer=tf.constant_initializer(0), trainable=False) # Get images and transformation for model_cnn. images, pclA, pclB, tMatT, tfrecFileIDs = data_input.inputs( **modelParams) # Build a Graph that computes the HAB predictions from the # inference model. tMatP = model_cnn.inference(images, **modelParams) # Calculate loss. 2 options: # use mask to get degrees significant # What about adaptive mask to zoom into differences at each CNN stack !!! loss = model_cnn.weighted_loss(tMatP, tMatT, **modelParams) # pcl based #loss = model_cnn.pcl_loss(pclA, tMatP, tMatT, **modelParams) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. opTrain = model_cnn.train(loss, globalStep, **modelParams) ############################## # Create a saver. saver = tf.train.Saver(tf.global_variables()) # Build the summary operation based on the TF collection of Summaries. summaryOp = tf.summary.merge_all() # Build an initialization operation to run below. #init = tf.initialize_all_variables() init = tf.global_variables_initializer() opCheck = tf.add_check_numerics_ops() # Start running operations on the Graph. config = tf.ConfigProto( log_device_placement=modelParams['logDevicePlacement']) config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1 sess = tf.Session(config=config) #sess = tf_debug.LocalCLIDebugWrapperSession(sess) #sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan) sess.run(init) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summaryWriter = tf.summary.FileWriter(modelParams['trainLogDir'], sess.graph) durationSum = 0 for step in xrange(modelParams['maxSteps']): startTime = time.time() _, lossValue = sess.run([opTrain, loss]) duration = time.time() - startTime durationSum += duration assert not np.isnan(lossValue), 'Model diverged with loss = NaN' if step % FLAGS.printOutStep == 0: numExamplesPerStep = modelParams['activeBatchSize'] examplesPerSec = numExamplesPerStep / duration secPerBatch = float(duration) format_str = ( '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch), loss/batch = %.2f') logging.info( format_str % (datetime.now(), step, lossValue, examplesPerSec, secPerBatch, lossValue / modelParams['activeBatchSize'])) if step % FLAGS.summaryWriteStep == 0: summaryStr = sess.run(summaryOp) summaryWriter.add_summary(summaryStr, step) # Save the model checkpoint periodically. if step % FLAGS.modelCheckpointStep == 0 or ( step + 1) == modelParams['maxSteps']: checkpointPath = os.path.join(modelParams['trainLogDir'], 'model.ckpt') saver.save(sess, checkpointPath, global_step=step) # Print Progress Info if ((step % FLAGS.ProgressStepReportStep) == 0) or ((step + 1) == modelParams['maxSteps']): print( 'Progress: %.2f%%, Elapsed: %.2f mins, Training Completion in: %.2f mins' % ((100 * step) / modelParams['maxSteps'], durationSum / 60, (((durationSum * modelParams['maxSteps']) / (step + 1)) / 60) - (durationSum / 60))) ######### USE LATEST STATE TO WARP IMAGES if modelParams['writeWarpedImages']: lossValueSum = 0 stepsForOneDataRound = int((modelParams['numExamples'] / modelParams['activeBatchSize'])) + 1 print('Warping images with batch size %d in %d steps' % (modelParams['activeBatchSize'], stepsForOneDataRound)) for step in xrange(stepsForOneDataRound): startTime = time.time() evImages, evPclA, evPclB, evtMatT, evtMatP, evtfrecFileIDs, evlossValue = sess.run( [images, pclA, pclB, tMatT, tMatP, tfrecFileIDs, loss]) duration = time.time() - startTime durationSum += duration #### put imageA, warpped imageB by pHAB, HAB-pHAB as new HAB, changed fileaddress tfrecFileIDs data_output.output(evImages, evPclA, evPclB, evtMatT, evtMatP, evtfrecFileIDs, **modelParams) # Print Progress Info if ((step % FLAGS.ProgressStepReportStep) == 0) or ((step + 1) == stepsForOneDataRound): print( 'Progress: %.2f%%, Loss: %.2f, Elapsed: %.2f mins, Training Completion in: %.2f mins' % ((100 * step) / stepsForOneDataRound, evlossValue / (step + 1), durationSum / 60, (((durationSum * stepsForOneDataRound) / (step + 1)) / 60) - (durationSum / 60))) print( 'Average training loss = %.2f - Average time per sample= %.2f s, Steps = %d' % (evlossValue / modelParams['activeBatchSize'], durationSum / (step * modelParams['activeBatchSize']), step))
def train(): _get_control_params() if not os.path.exists(modelParams['dataDir']): raise ValueError("No such data directory %s" % modelParams['dataDir']) _setupLogging(os.path.join(modelParams['trainLogDir'], "genlog")) with tf.Graph().as_default(): # track the number of train calls (basically number of batches processed) globalStep = tf.get_variable('globalStep', [], initializer=tf.constant_initializer(0), trainable=False) # Get images and transformation for model_cnn. images, pclA, pclB, targetT, tfrecFileIDs = data_input.inputs( **modelParams) print('Input ready') # Build a Graph that computes the HAB predictions from the # inference model. targetP = model_cnn.inference(images, **modelParams) # Calculate loss. 2 options: # use mask to get degrees significant # What about adaptive mask to zoom into differences at each CNN stack !!! ########## model_cnn.loss is called in the loss function #loss = weighted_loss(targetP, targetT, **modelParams) loss = weighted_params_loss(targetP, targetT, **modelParams) # pcl based loss #loss = pcl_params_loss(pclA, targetP, targetT, **modelParams) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. opTrain = model_cnn.train(loss, globalStep, **modelParams) ############################## print('Training ready') # Create a saver. saver = tf.train.Saver(tf.global_variables()) print('Saver ready') # Build the summary operation based on the TF collection of Summaries. summaryOp = tf.summary.merge_all() print('MergeSummary ready') # Build an initialization operation to run below. #init = tf.initialize_all_variables() init = tf.global_variables_initializer() opCheck = tf.add_check_numerics_ops() # Start running operations on the Graph. config = tf.ConfigProto( log_device_placement=modelParams['logDevicePlacement']) config.gpu_options.allow_growth = True config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1 sess = tf.Session(config=config) print('Session ready') #sess = tf_debug.LocalCLIDebugWrapperSession(sess) #sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan) sess.run(init) # restore a saver. #saver.restore(sess, (modelParams['trainLogDir'].replace('_B_2','_B_1'))+'/model.ckpt-'+str(modelParams['trainMaxSteps']-1)) #print('Ex-Model loaded') # Start the queue runners. tf.train.start_queue_runners(sess=sess) print('QueueRunner started') summaryWriter = tf.summary.FileWriter(modelParams['trainLogDir'], sess.graph) print('Training started') filesDictionaryAccumTrain = {} durationSum = 0 durationSumAll = 0 for step in xrange(modelParams['maxSteps']): startTime = time.time() _, evtfrecFileIDs, lossValue = sess.run( [opTrain, tfrecFileIDs, loss]) for fileIdx in range(modelParams['activeBatchSize']): fileIDname = str(evtfrecFileIDs[fileIdx][0]) + "_" + str( evtfrecFileIDs[fileIdx][1]) + "_" + str( evtfrecFileIDs[fileIdx][2]) if (fileIDname in filesDictionaryAccumTrain): filesDictionaryAccumTrain[fileIDname] += 1 else: filesDictionaryAccumTrain[fileIDname] = 1 duration = time.time() - startTime durationSum += duration assert not np.isnan(lossValue), 'Model diverged with loss = NaN' if step % FLAGS.printOutStep == 0: numExamplesPerStep = modelParams['activeBatchSize'] examplesPerSec = numExamplesPerStep / duration secPerBatch = float(duration) format_str = ( '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch), loss/batch = %.2f') logging.info( format_str % (datetime.now(), step, lossValue, examplesPerSec, secPerBatch, lossValue / modelParams['activeBatchSize'])) if step % FLAGS.summaryWriteStep == 0: summaryStr = sess.run(summaryOp) summaryWriter.add_summary(summaryStr, step) # Save the model checkpoint periodically. if step % FLAGS.modelCheckpointStep == 0 or ( step + 1) == modelParams['maxSteps']: checkpointPath = os.path.join(modelParams['trainLogDir'], 'model.ckpt') saver.save(sess, checkpointPath, global_step=step) # Print Progress Info if ((step % FLAGS.ProgressStepReportStep) == 0) or ((step + 1) == modelParams['maxSteps']): print('Number of files used in training', len(filesDictionaryAccumTrain)) print( 'Progress: %.2f%%, Elapsed: %.2f mins, Training Completion in: %.2f mins --- %s' % ((100 * step) / modelParams['maxSteps'], durationSum / 60, (((durationSum * modelParams['maxSteps']) / (step + 1)) / 60) - (durationSum / 60), datetime.now())) print('Number of files used in training', len(filesDictionaryAccumTrain)) filesAccum = np.array(list(filesDictionaryAccumTrain.values())) print('Access statistics for each file, mean max min std', np.mean(filesAccum), np.max(filesAccum), np.min(filesAccum), np.std(filesAccum)) print( "\nTraining completed.....\n------------------------------\n------------------------------\n-------------------------------\n" ) ######### USE LATEST STATE TO WARP IMAGES #outputDirFileNum = len([name for name in os.listdir(outputDIR) if os.path.isfile(os.path.join(outputDIR, name))]) #outputDirFileNum = 0 filesDictionaryAccum = {} durationSum = 0 durationSumAll = 0 if modelParams['writeWarpedImages']: outputDIR = modelParams['warpedOutputFolder'] + '/' print( "Using final training state to output processed tfrecords\noutput folder: ", outputDIR) if tf.gfile.Exists(outputDIR): tf.gfile.DeleteRecursively(outputDIR) tf.gfile.MakeDirs(outputDIR) lossValueSum = 0 stepsForOneDataRound = int( (modelParams['numExamples'] / modelParams['activeBatchSize'])) print('Warping %d images with batch size %d in %d steps' % (modelParams['numExamples'], modelParams['activeBatchSize'], stepsForOneDataRound)) for step in xrange(stepsForOneDataRound): #step = 0 #while outputDirFileNum != 20400: startTime = time.time() evImages, evPclA, evPclB, evtargetT, evtargetP, evtfrecFileIDs, evlossValue = sess.run( [images, pclA, pclB, targetT, targetP, tfrecFileIDs, loss]) for fileIdx in range(modelParams['activeBatchSize']): fileIDname = str(evtfrecFileIDs[fileIdx][0]) + "_" + str( evtfrecFileIDs[fileIdx][1]) + "_" + str( evtfrecFileIDs[fileIdx][2]) if (fileIDname in filesDictionaryAccum): filesDictionaryAccum[fileIDname] += 1 else: filesDictionaryAccum[fileIDname] = 1 #### put imageA, warpped imageB by pHAB, HAB-pHAB as new HAB, changed fileaddress tfrecFileIDs data_output.output(evImages, evPclA, evPclB, evtargetT, evtargetP, evtfrecFileIDs, **modelParams) duration = time.time() - startTime durationSum += duration durationSumAll += duration # Print Progress Info if ((step % FLAGS.ProgressStepReportOutputWrite) == 0) or ((step + 1) == stepsForOneDataRound): print('Number of files used in training', len(filesDictionaryAccum)) print( 'Progress: %.2f%%, Loss: %.2f, Elapsed: %.2f mins, Training Completion in: %.2f mins' % ((100 * step) / stepsForOneDataRound, evlossValue / (step + 1), durationSum / 60, (((durationSum * stepsForOneDataRound) / (step + 1)) / 60) - (durationSum / 60))) #print('Total Elapsed: %.2f mins, Training Completion in: %.2f mins' % # durationSumAll/60, (((durationSumAll*stepsForOneDataRound)/(step+1))/60)-(durationSumAll/60)) #outputDirFileNum = len([name for name in os.listdir(outputDIR) if os.path.isfile(os.path.join(outputDIR, name))]) #step+=1 print('Write steps, one round steps', step, stepsForOneDataRound) print('Number of files used in training', len(filesDictionaryAccumTrain)) filesAccum = np.array(list(filesDictionaryAccumTrain.values())) print('Training access statistics for each file, mean max min std', np.mean(filesAccum), np.max(filesAccum), np.min(filesAccum), np.std(filesAccum)) print('Number of files used in training', len(filesDictionaryAccum)) filesAccum = np.array(list(filesDictionaryAccum.values())) print('Write access statistics for each file, mean max min std', np.mean(filesAccum), np.max(filesAccum), np.min(filesAccum), np.std(filesAccum)) print( 'Average training loss = %.2f - Average time per sample= %.2f s, Steps = %d' % (evlossValue / modelParams['activeBatchSize'], durationSum / (step * modelParams['activeBatchSize']), step))
def train(): _get_control_params() if not os.path.exists(modelParams['dataDir']): raise ValueError("No such data directory %s" % modelParams['dataDir']) #meanImgFi1000le = os.path.join(FLAGS.dataDir, "meta") #if not os.path.isfile(meanImgFile): # raise ValueError("Warning, no meta file found at %s" % meanImgFile) #else: # with open(meanImgFile, "r") as inMeanFile: # meanInfo = json.load(inMeanFile) # # meanImg = meanInfo['mean'] # # # also load the target output sizes # params['targSz'] = meanInfo["targSz"] _setupLogging(os.path.join(modelParams['trainLogDir'], "genlog")) with tf.Graph().as_default(): # BGR to RGB #params['meanImg'] = tf.constant(meanImg, dtype=tf.float32) # track the number of train calls (basically number of batches processed) globalStep = tf.get_variable('globalStep', [], initializer=tf.constant_initializer(0), trainable=False) # Get images and transformation for model_cnn. imagesOrig, images, pOrig, tHAB, tfrecFileIDs = data_input.inputs( **modelParams) # Build a Graph that computes the HAB predictions from the # inference model. pHAB = model_cnn.inference(images, **modelParams) # Calculate loss. loss = model_cnn.loss(pHAB, tHAB, **modelParams) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. opTrain = model_cnn.train(loss, globalStep, **modelParams) ############################## # Create a saver. saver = tf.train.Saver(tf.global_variables()) # Build the summary operation based on the TF collection of Summaries. summaryOp = tf.summary.merge_all() # Build an initialization operation to run below. #init = tf.initialize_all_variables() init = tf.global_variables_initializer() opCheck = tf.add_check_numerics_ops() # Start running operations on the Graph. config = tf.ConfigProto( log_device_placement=modelParams['logDevicePlacement']) config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1 sess = tf.Session(config=config) #sess = tf_debug.LocalCLIDebugWrapperSession(sess) #sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan) sess.run(init) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summaryWriter = tf.summary.FileWriter(modelParams['trainLogDir'], sess.graph) HABperPixelsum = 0 durationSum = 0 for step in xrange(modelParams['maxSteps']): startTime = time.time() _, lossValue = sess.run([opTrain, loss]) duration = time.time() - startTime durationSum += duration assert not np.isnan(lossValue), 'Model diverged with loss = NaN' if step % FLAGS.printOutStep == 0: numExamplesPerStep = modelParams['activeBatchSize'] examplesPerSec = numExamplesPerStep / duration secPerBatch = float(duration) format_str = ( '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch), loss/batch = %.2f') logging.info( format_str % (datetime.now(), step, lossValue, examplesPerSec, secPerBatch, lossValue / modelParams['activeBatchSize'])) if step % FLAGS.summaryWriteStep == 0: summaryStr = sess.run(summaryOp) summaryWriter.add_summary(summaryStr, step) # Save the model checkpoint periodically. if step % FLAGS.modelCheckpointStep == 0 or ( step + 1) == modelParams['maxSteps']: checkpointPath = os.path.join(modelParams['trainLogDir'], 'model.ckpt') saver.save(sess, checkpointPath, global_step=step) # Print Progress Info if ((step % FLAGS.ProgressStepReportStep) == 0) or ((step + 1) == modelParams['maxSteps']): print( 'Progress: %.2f%%, Elapsed: %.2f mins, Training Completion in: %.2f mins' % ((100 * step) / modelParams['maxSteps'], durationSum / 60, (((durationSum * modelParams['maxSteps']) / (step + 1)) / 60) - (durationSum / 60))) ######### USE LATEST STATE TO WARP IMAGES if modelParams['writeWarpedImages']: lossValueSum = 0 stepsForOneDataRound = int((modelParams['numExamples'] / modelParams['activeBatchSize'])) + 1 print('Warping images with batch size %d in %d steps' % (modelParams['activeBatchSize'], stepsForOneDataRound)) for step in xrange(stepsForOneDataRound): startTime = time.time() evImagesOrig, evImages, evPOrig, evtHAB, evpHAB, evtfrecFileIDs, evlossValue = sess.run( [ imagesOrig, images, pOrig, tHAB, pHAB, tfrecFileIDs, loss ]) duration = time.time() - startTime durationSum += duration HABRES = evtHAB - evpHAB HABperPixel = 0 for i in xrange(modelParams['activeBatchSize']): H = np.asarray([[ HABRES[i][0], HABRES[i][1], HABRES[i][2], HABRES[i][3] ], [ HABRES[i][4], HABRES[i][5], HABRES[i][6], HABRES[i][7] ]], np.float32) HABperPixel += np.sqrt((H * H).sum(axis=0)).mean() HABperPixel = HABperPixel / modelParams['activeBatchSize'] HABperPixelsum += HABperPixel #### put imageA, warpped imageB by pHAB, HAB-pHAB as new HAB, changed fileaddress tfrecFileIDs data_output.output(evImagesOrig, evImages, evPOrig, evtHAB, evpHAB, evtfrecFileIDs, **modelParams) # Print Progress Info if ((step % FLAGS.ProgressStepReportStep) == 0) or ((step + 1) == stepsForOneDataRound): print( 'Progress: %.2f%%, Loss: %.2f, Elapsed: %.2f mins, Training Completion in: %.2f mins' % ((100 * step) / stepsForOneDataRound, HABperPixelsum / (step + 1), durationSum / 60, (((durationSum * stepsForOneDataRound) / (step + 1)) / 60) - (durationSum / 60))) print( 'Average training loss = %.2f - Average time per sample= %.2f s, Steps = %d' % (HABperPixelsum / step, durationSum / (step * modelParams['activeBatchSize']), step))
def train(modelParams, epochNumber): # import corresponding model name as model_cnn, specifed at json file model_cnn = importlib.import_module('Model_Factory.' + modelParams['modelName']) if not os.path.exists(modelParams['dataDir']): raise ValueError("No such data directory %s" % modelParams['dataDir']) _setupLogging(os.path.join(modelParams['logDir'], "genlog")) with tf.Graph().as_default(): # track the number of train calls (basically number of batches processed) globalStep = tf.get_variable('globalStep', [], initializer=tf.constant_initializer(0), trainable=False) # Get images inputs for model_cnn. if modelParams['phase'] == 'v': filename, pngTemp, targetT = data_input.inputs_vali(**modelParams) else: input_data = data_input.inputs(**modelParams) print('Input ready') #TEST### filenametest, pngTemptest, targetTtest = data_input.inputs_test(**modelParams) # Build a Graph that computes the HAB predictions from the # inference model output_res = model_cnn.inference_l2reg(input_data['image'], **modelParams) # loss model #loss = model_cnn.loss_l2reg(output_res, input_data, **modelParams) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. #opTrain = model_cnn.train(loss, globalStep, **modelParams) ############################## print('Testing ready') # Create a saver. saver = tf.train.Saver(tf.global_variables()) print('Saver ready') # Build the summary operation based on the TF collection of Summaries. summaryOp = tf.summary.merge_all() print('MergeSummary ready') # Build an initialization operation to run below. #init = tf.initialize_all_variables() # init = tf.global_variables_initializer() #opCheck = tf.add_check_numerics_ops() # Start running operations on the Graph. config = tf.ConfigProto( log_device_placement=modelParams['logDevicePlacement']) config.gpu_options.allow_growth = True config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1 sess = tf.Session(config=config) print('Session ready') #sess = tf_debug.LocalCLIDebugWrapperSession(sess) #sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan) # sess.run(init) # restore a saver. print('Loading Ex-Model with epoch number %d ...', epochNumber) print(' ', modelParams['trainLogDir'] + '_v/model.ckpt-' + str(epochNumber)) saver.restore( sess, (modelParams['trainLogDir'] + '_v/model.ckpt-' + str(epochNumber))) #saver.restore(sess, (modelParams['trainLogDir']+'_30k/model.ckpt-29000')) print('Ex-Model loaded') if True: # if True: freeze graph tf.train.write_graph(sess.graph.as_graph_def(), '.', modelParams['trainLogDir'] + '_v/model.pbtxt', as_text=True) # Output nodes output_node_names = [ n.name for n in tf.get_default_graph().as_graph_def().node ] # Freeze the graph frozen_graph_def = tf.graph_util.convert_variables_to_constants( sess, sess.graph_def, output_node_names) # Save the frozen graph with open(modelParams['trainLogDir'] + '_v/model.pb', 'wb') as f: f.write(frozen_graph_def.SerializeToString()) # Start the queue runners. tf.train.start_queue_runners(sess=sess) print('QueueRunner started') summaryWriter = tf.summary.FileWriter(modelParams['logDir'], sess.graph) summaryValiWriter = tf.summary.FileWriter(modelParams['logDir'] + '_v', sess.graph) #TEST### summaryValiWriter = tf.summary.FileWriter(modelParams['logDir']+'_test', sess.graph) print('Testing started') durationSum = 0 durationSumAll = 0 prevLoss = 99999 prevValiSumLoss = 99999 prevaccur = 0 prevLossStep = 0 prevStep = 21000 #TEST### prevTestSumLoss = 99999 prevStep = int(modelParams['maxSteps'] / 2) l = list() import cv2 lossValueSum = 0 l2regValueSum = 0 total_parameters = 0 for variable in tf.trainable_variables(): # shape is an array of tf.Dimension shape = variable.get_shape() #print(shape) #print(len(shape)) variable_parameters = 1 for dim in shape: #print(dim) variable_parameters *= dim.value #print(variable_parameters) total_parameters += variable_parameters print('-----total parameters-------- ', total_parameters) true_counter = 0 for step in xrange(0, modelParams['maxSteps']): #(0, 1000): startTime = time.time() #npfilename, npTargetP, npTargetT, npPng = sess.run([filename, targetP, targetT, pngTemp]) #np_out, np_inp = sess.run([output_res, input_data]) #npfilename = np_inp['filename'] #npTargetP = np_out['clsf'] #npTargetT = np_inp['clsf'] #npfilename, npTargetT, npTargetP = sess.run([input_data['filename'], input_data['clsf'], output_res['clsf']]) npfilename, npTargetT, npTargetP, npImageP = sess.run([ input_data['filename'], input_data['clsf'], output_res['clsf'], output_res['deconv'] ]) #print(modelParams['outputDir']+str(step+10000)+'.jpg') # 1-mask imwrite cv2.imwrite(modelParams['outputDir'] + str(step + 10000) + '.jpg', npImageP[0, :, :, 0]) # 6-mask imwrite #for hemaps in range(modelParams['num_heatmap']): # cv2.imwrite(modelParams['outputDir']+str(step+10000)+'_'+str(hemaps)+'.jpg', npImageP[0,:,:,hemaps]) duration = time.time() - startTime if step != 0: l.append(duration) print(duration, step, modelParams['maxSteps']) #lossValueSum += l ossValue for i in range(modelParams['activeBatchSize']): if np.argmax(npTargetP[i, :]) == np.argmax(npTargetT[i, :]): match = True true_counter += 1 else: match = False print(np.argmax(npTargetP[i, :]), np.argmax(npTargetT[i, :]), match, '----counter:', true_counter) #inp_out_img = np.concatenate((np_inp['deconv'][i,:,:], np_out['deconv'][i,:,:]), axis=0) #cv2.imshow('in --- out', cv2.resize(inp_out_img,(350,300))) #cv2.waitKey(0) #print(npfilename) #print(npTargetT) #print(npTargetP) #p1 = npPng[0,:,:,0] #p2 = npPng[0,:,:,1] #p1 = (p1-np.min(p1)) / (np.max(p1)-np.min(p1)) #p2 = (p2-np.min(p2)) / (np.max(p2)-np.min(p2)) #cv2.imshow('img0', p1) #cv2.imshow('img1', p2) #cv2.waitKey(0) #print(npfilename) print(duration, step, modelParams['maxSteps']) data_output.output(str(10000 + step), npfilename, npTargetP, npTargetT, **modelParams) # Print Progress Info if ((step % FLAGS.ProgressStepReportStep) == 0) or ((step + 1) == modelParams['maxSteps']): print( 'Progress: %.2f%%, Elapsed: %.2f mins, Testing Completion in: %.2f mins --- %s' % ((100 * step) / modelParams['maxSteps'], durationSum / 60, (((durationSum * modelParams['maxSteps']) / (step + 1)) / 60) - (durationSum / 60), datetime.now())) #if step == 128: # modelParams['phase'] = 'train' # #if step == 130: # modelParams['phase'] = 'test' print(np.array(l).mean()) #l0 = np.array(l) #l1 = np.array(l[1:-1]) #print(np.average(l0)) #print(np.average(l1)) print('----- maxsteps:', modelParams['maxSteps'], '--- step:', step) #print('----- maxsteps:', modelParams['maxSteps'], '--- loss avg:', lossValueSum/modelParams['maxSteps']) #print('----- train scaled loss:', (lossValueSum/modelParams['maxSteps'])*modelParams['trainBatchSize']) print(modelParams['outputDir']) sess.close() tf.reset_default_graph()