def train(self, conn, dataset, config, comment, isUseBestEpoch=True, tune=False, nbTuning=20, showPlots=True): "Train the model" # hyper parameters confHyperParams = config.getHyperParams(conn, dataset) runHyperParams = {} runHyperParams.update(confHyperParams["hyperParameters"]) maxBestNbEpoch, maxBestAccuracyDevEpoch = -1, -1 initialNbEpoch = runHyperParams[const.KEY_NUM_EPOCHS] maxBestBeta = runHyperParams[const.KEY_BETA] maxBestKeep_prob = runHyperParams[const.KEY_KEEP_PROB] ## Prepare hyper params if tune: # Tune params beta_min = 0.000000000000001 beta_max = 0.5 keep_prob_min = 0.5 keep_prob_max = 1 tuning = {} maxAccuracyDev = -9999999999999 maxIdRun = -1 else: nbTuning = 1 # Display hyper parameters info logger.info("Start Learning rate : " + str(runHyperParams[const.KEY_START_LEARNING_RATE])) logger.info("Num epoch : " + str(runHyperParams[const.KEY_NUM_EPOCHS])) logger.info("Minibatch size : " + str(runHyperParams[const.KEY_MINIBATCH_SIZE])) logger.info("Beta : " + str(runHyperParams[const.KEY_BETA])) logger.info("keep_prob : " + str(runHyperParams[const.KEY_KEEP_PROB])) logger.info("isLoadWeights : " + str(runHyperParams[const.KEY_USE_WEIGHTS])) # Start time tsGlobalStart = time.time() if tune: # make sure seed is unique seed = time.time random.seed(seed) # Nb of pass to run nbPass = nbTuning # Pass used to recalculate training with best epoch bestEpochPass = -1 if (isUseBestEpoch): # Add one for best epoch rerun nbPass += 1 # last pass bestEpochPass = nbPass for j in range(1, nbPass + 1): # best epoch run if (j == bestEpochPass): logger.info( "***************************************************************************" ) logger.info( "Running interrupted gradient descent, nb epochs={0}". format(maxBestNbEpoch)) logger.info( "***************************************************************************" ) # select best epoch nb runHyperParams[const.KEY_BETA] = maxBestBeta runHyperParams[const.KEY_KEEP_PROB] = maxBestKeep_prob runHyperParams[const.KEY_NUM_EPOCHS] = maxBestNbEpoch else: if tune: logger.info("*****************************") logger.info("Tune round " + str(j) + "/" + str(nbTuning)) logger.info("*****************************") # calculate beta logBeta = random.uniform(math.log10(beta_min), math.log10(beta_max)) beta = math.pow(10, logBeta) logger.info("Beta = " + str(beta)) # calculate keep_prob logKeep_prob = random.uniform(math.log10(keep_prob_min), math.log10(keep_prob_max)) keep_prob = math.pow(10, logKeep_prob) logger.info("keep_prob = " + str(keep_prob)) # update hyper params runHyperParams[const.KEY_BETA] = beta runHyperParams[const.KEY_KEEP_PROB] = keep_prob # Create run self.idRun = db.createRun(conn, self.idDataset, config["id"], runHyperParams) # Update run before calling model db.updateRunBefore(conn, self.idRun, comment=comment, system_info=self.systemInfo, data_info=self.dataInfo) # Run model and update DB run with extra info accuracyDev, accuracyTrain, bestNbEpoch, bestAccuracyDevEpoch = self.optimizeModel( conn, self.idRun, config["structure"], runHyperParams, isCalculateBestEpoch=isUseBestEpoch and (j != bestEpochPass), show_plot=showPlots and not tune, extractImageErrors=not tune) # Print run run = db.getRun(conn, self.idRun) logger.info("Run stored in DB: " + str(run)) # Update selected run db.updateHpRunSelectorForRun(conn, dataset["id"], config["id"], run["id"]) conn.commit() # Manage best epoch params if ( # Using best epoch mode and not in best epoch finalization pass isUseBestEpoch and (j != bestEpochPass) and # mex of best epoch dev accuracy (bestAccuracyDevEpoch > maxBestAccuracyDevEpoch)): # Store it maxBestAccuracyDevEpoch = bestAccuracyDevEpoch maxBestNbEpoch = bestNbEpoch maxBestBeta = runHyperParams[const.KEY_BETA] maxBestKeep_prob = runHyperParams[const.KEY_KEEP_PROB] if tune: # Store results tuning[j] = { "beta": beta, "keep_prob": keep_prob, "accuracyDev": accuracyDev, "accuracyTrain": accuracyTrain } # Max if (accuracyDev > maxAccuracyDev): maxAccuracyDev = accuracyDev maxHyperParams = tuning[j] maxIdRun = self.idRun # get or create hyperparams idMaxHp = db.getOrCreateHyperParams(conn, runHyperParams) # Update config config["idHyperParams"] = idMaxHp # save config db.updateConfig(conn, config) # Update selected run db.updateHpRunSelectorForHp(conn, dataset["id"], config["id"], idMaxHp) # Commit result conn.commit() # print max logger.info("Max DEV accuracy: " + str(maxAccuracyDev)) logger.info("Max hyper params:") logger.info(maxHyperParams) logger.info("Max Best epoch DEV accuracy : " + str(maxBestAccuracyDevEpoch)) logger.info("Max Best epoch nb : " + str(maxBestNbEpoch)) else: # Not in tuning mode # if maxBestNbEpoch = nominal nbEpochs, stop last loop if (maxBestNbEpoch == initialNbEpoch): # Stop loop break # End of loops if tune: # Print tuning logger.info("Tuning:", tuning) logger.info("") logger.info("Max DEV accuracy : " + str(maxAccuracyDev)) logger.info("Max Best epoch DEV accuracy : " + str(maxBestAccuracyDevEpoch)) logger.info("Max Best epoch nb : " + str(maxBestNbEpoch)) logger.info("Max hyper params idRun: " + str(maxIdRun)) # Change selected hyper-parameters # Get max dev accuracy from all runs (_, absoluteMaxAccuracyDev, _) = db.getBestHyperParams(conn, dataset["id"], config["id"]) # If our max dev accuracy is better, change current select hps if (absoluteMaxAccuracyDev <= maxAccuracyDev): db.updateHpRunSelectorForHp(conn, dataset["id"], config["id"], idMaxHp) # Commit result conn.commit() # Start time tsGlobalEnd = time.time() globalElapsedSeconds = int(round(tsGlobalEnd - tsGlobalStart)) logger.info("Finished in " + str(globalElapsedSeconds) + " seconds")
def optimizeModel(self, conn, idRun, structure, hyperParams, print_cost=True, show_plot=True, extractImageErrors=True): costs = [] # To keep track of the cost DEV_accuracies = [] # for DEV accuracy graph # Get hyper parameters from dico self.beta = hyperParams[const.KEY_BETA] self.keep_prob = hyperParams[const.KEY_KEEP_PROB] self.num_epochs = hyperParams[const.KEY_NUM_EPOCHS] self.minibatch_size = hyperParams[const.KEY_MINIBATCH_SIZE] self.start_learning_rate = hyperParams[const.KEY_START_LEARNING_RATE] self.learning_rate_decay_nb = hyperParams[ const.KEY_LEARNING_RATE_DECAY_NB_EPOCH] self.learning_rate_decay_percent = hyperParams[ const.KEY_LEARNING_RATE_DECAY_PERCENT] self.useBatchNormalization = hyperParams[ const.KEY_USE_BATCH_NORMALIZATION] # Convert ( nbLines, dims... ) to ( None, dims... ) X_shape = [None] X_shape.extend(self.dataInfo[const.KEY_TRN_X_SHAPE][1:]) X_type = self.datasetTrn.X.dtype Y_shape = [None] Y_shape.extend(self.dataInfo[const.KEY_TRN_Y_SHAPE][1:]) Y_type = self.datasetTrn.Y.dtype self.modelInit(structure, X_shape, X_type, Y_shape, Y_type, training=True) seed = 3 # to keep consistent results # Start the session to compute the tensorflow graph with self.getSession() as sess: # initialize session variables self.initSessionVariables(sess) # current iteration iteration = -1 ## optimisation may overshoot locally ## To avoid returning an overshoot, we detect it and run extra epochs if needed finalizationMode = False current_num_epochs = hyperParams[const.KEY_NUM_EPOCHS] iEpoch = 0 minCost = 99999999999999 minCostFinalization = 99999999999999 finished = False # When to we display epochs stats nbStatusEpoch = math.ceil(current_num_epochs / 20) # intercept Ctrl-C self.interrupted = False import signal # signal.signal( signal.SIGINT, self.signal_handler ) self.initializeDataset(sess, self.datasetTrn) # Start time tsStart = time.time() # time to make sure we trace something each N minuts tsTraceStart = tsStart # Do the training loop while (not self.interrupted and not finished and (iEpoch <= current_num_epochs)): epoch_cost = 0. # Defines a cost related to an epoch if (self.minibatch_size < 0): # No mini-batch : do a gradient descent for whole data iteration += 1 epoch_cost = self.runIteration( iEpoch, 1, sess, self.datasetTrn.X, self.datasetTrn.Y, self.keep_prob, ) else: # Minibatch mode, non handled by data source m = self.dataInfo[ const. KEY_TRN_X_SIZE] # m : number of examples in the train set) num_minibatches = math.ceil( m / self.minibatch_size ) # number of minibatches of size minibatch_size in the train set seed = seed + 1 minibatches = self.random_mini_batches( self.datasetTrn.X, self.datasetTrn.Y, self.minibatch_size, seed) iterationMinibatch = 0 for minibatch in minibatches: iteration += 1 iterationMinibatch += 1 # Select a minibatch (minibatch_X, minibatch_Y) = minibatch minibatch_cost = self.runIteration( sess, (minibatch_X, minibatch_Y), iteration, num_minibatches, self.keep_prob) epoch_cost += minibatch_cost / num_minibatches if (print_cost and iteration == 0): # Display iteration 0 to allow verify cost calculation accross machines logger.info( "Current cost epoch %i; iteration %i; %f" % (iEpoch, iteration, epoch_cost)) # time to trace? tsTraceNow = time.time() tsTraceElapsed = tsTraceNow - tsTraceStart # Each 60 seconds if (tsTraceElapsed >= 60): # Display iteration 0 to allow verify cost calculation accross machines logger.info( "Current cost epoch %i; iteration %i; %f" % (iEpoch, iteration, epoch_cost)) # reset trace start tsTraceStart = tsTraceNow if print_cost and iEpoch % nbStatusEpoch == 0: logger.info("Cost after epoch %i; iteration %i; %f" % (iEpoch, iteration, epoch_cost)) if (iEpoch != 0): # Performance counters curElapsedSeconds, curPerfIndex = self.getPerfCounters( tsStart, iEpoch, self.datasetTrn.X.shape) logger.info(" current: elapsedTime:", curElapsedSeconds, "perfIndex:", curPerfIndex) # calculate DEV accuracy DEV_accuracy = self.accuracyEval( (self.datasetDev.X, self.datasetDev.Y), "dev") logger.info(" current: DEV accuracy: %f" % (DEV_accuracy)) DEV_accuracies.append(DEV_accuracy) if print_cost == True and iEpoch % 5 == 0: costs.append(epoch_cost) # Record min cost minCost = min(minCost, epoch_cost) # Next epoch iEpoch += 1 self.var_numEpoch.load(iEpoch) # Close to finish? if (not finalizationMode and (iEpoch > current_num_epochs)): # Activate finalization mode finalizationMode = True # local overshoot? if (epoch_cost > minCost): # Yes, run some extra epochs logger.warn( "Local cost overshoot detected, adding maximum 100 epochs to leave local cost overshoot" ) current_num_epochs += 100 minCostFinalization = minCost if (finalizationMode): # Check overshoot is finished if (epoch_cost <= minCostFinalization): # finished finished = True self.modelOptimizeEnd(sess) if (self.interrupted): logger.info("Training has been interrupted by Ctrl-C") logger.info("Store current epoch number '" + str(iEpoch) + "' in run hyper parameters") # Get runs and hps run = db.getRun(conn, self.idRun) idRunHps = run["idHyperParams"] runHps = db.getHyperParams(conn, idRunHps)["hyperParameters"] # Modify num epochs runHps[const.KEY_NUM_EPOCHS] = iEpoch # update run db.updateRun(conn, self.idRun, runHps) # Final cost print("Parameters have been trained!") logger.info("Final cost:", epoch_cost) ## Elapsed (seconds) elapsedSeconds, perfIndex = self.getPerfCounters( tsStart, iEpoch, self.datasetTrn.X.shape) perfInfo = {} logger.info("Elapsed (s):", elapsedSeconds) logger.info("Perf index :", perfIndex) self.persistModel(sess, idRun) accuracyTrain = self.accuracyEval( (self.datasetTrn.X, self.datasetTrn.Y), "trn") print("Train Accuracy:", accuracyTrain) accuracyDev = self.accuracyEval( (self.datasetDev.X, self.datasetDev.Y), "dev") print("Dev Accuracy:", accuracyDev) if (show_plot): # plot the cost plt.plot(np.squeeze(costs)) plt.ylabel('cost') plt.xlabel('iterations (per tens)') plt.title("Start learning rate =" + str(self.start_learning_rate)) plt.show() # plot the accuracies plt.plot(np.squeeze(DEV_accuracies)) plt.ylabel('DEV accuracy') plt.xlabel('iterations (100)') plt.title("Start learning rate =" + str(self.start_learning_rate)) plt.show() ## Errors resultInfo = {} if (extractImageErrors): # Lists of OK for training oks_train = self.correctPredictionEval( (self.datasetTrn.X, self.datasetTrn.Y)) map1, map2 = self.statsExtractErrors("train", dataset=self.datasetTrn, oks=oks_train, show_plot=show_plot) # Errors nb by data tag resultInfo[const.KEY_TRN_NB_ERROR_BY_TAG] = map1 resultInfo[const.KEY_TRN_PC_ERROR_BY_TAG] = map2 oks_dev = self.correctPredictionEval( (self.datasetDev.X, self.datasetDev.Y)) map1, map2 = self.statsExtractErrors("dev", dataset=self.datasetDev, oks=oks_dev, show_plot=show_plot) # Errors nb by data tag resultInfo[const.KEY_DEV_NB_ERROR_BY_TAG] = map1 resultInfo[const.KEY_DEV_PC_ERROR_BY_TAG] = map2 # Update DB run after execution, add extra info db.updateRunAfter(conn, idRun, perf_info=perfInfo, result_info=resultInfo, perf_index=perfIndex, elapsed_second=elapsedSeconds, train_accuracy=accuracyTrain.astype(float), dev_accuracy=accuracyDev.astype(float)) return accuracyDev, accuracyTrain
def optimizeModel( self, conn, idRun, structure, hyperParams, print_cost = True, show_plot = True, extractImageErrors = True, isCalculateBestEpoch = False ): tf.reset_default_graph() # Forget the past tf.set_random_seed( 1 ) # Repeatable operations costs = [] # To keep track of the cost DEV_accuracies = [] # for DEV accuracy graph # Get hyper parameters from dico self.beta = hyperParams[ const.KEY_BETA ] self.keep_prob = hyperParams[ const.KEY_KEEP_PROB ] self.num_epochs = hyperParams[ const.KEY_NUM_EPOCHS ] self.minibatch_size = hyperParams[ const.KEY_MINIBATCH_SIZE ] # Minibatch mode, non handled by data source m = self.dataInfo[ const.KEY_TRN_X_SIZE ] # m : number of examples in the train set) self.numMinibatches = math.ceil( m / self.minibatch_size ) # number of minibatches of size minibatch_size in the train set self.start_learning_rate = hyperParams[ const.KEY_START_LEARNING_RATE ] # Decay per epoch NB decayEpochNb = hyperParams[ const.KEY_LEARNING_RATE_DECAY_NB_EPOCH ] # Multiply by nb mini-batches by epoch to get decay by epoch self.learning_rate_decay_nb = decayEpochNb * self.numMinibatches self.learning_rate_decay_percent = hyperParams[ const.KEY_LEARNING_RATE_DECAY_PERCENT ] self.useBatchNormalization = hyperParams[ const.KEY_USE_BATCH_NORMALIZATION ] if ( self.minibatch_size < 0 ) : raise ValueError( "Mini-batch size is required" ) # Convert ( nbLines, dims... ) to ( None, dims... ) X_shape = [ None ] X_shape.extend( self.dataInfo[ const.KEY_TRN_X_SHAPE ] ) X_type = tf.float32 X_real_shape = [ self.minibatch_size ] X_real_shape.extend( self.dataInfo[ const.KEY_TRN_X_SHAPE ] ) Y_shape = [ None ] Y_shape.extend( self.dataInfo[ const.KEY_TRN_Y_SHAPE ] ) Y_type = tf.float32 # Init model self.modelInit( structure, X_shape, X_type, Y_shape, Y_type, training=True ) # Prepare reader if ( self.datasetTrn.inMemory ) : # In memory readers # Convert ( nbLines, dims... ) to ( None, dims... ) self.tfDatasetTrn = tf.data.Dataset.from_tensor_slices( ( self.datasetTrn.X, self.datasetTrn.Y, ) ) else : # TF record file based reader self.tfDatasetTrn = tf.data.TFRecordDataset( self.datasetTrn.XY ) # Shuffle data self.tfDatasetTrn = self.tfDatasetTrn.shuffle( buffer_size=100000, reshuffle_each_iteration=True, seed=1 ) # Pre-fetch for performance self.tfDatasetTrn = self.tfDatasetTrn.prefetch( self.minibatch_size * 16 ) # Data set, minibatch_size slices self.tfDatasetTrn = self.tfDatasetTrn.batch( self.minibatch_size ) # Trn Data set, repeat num_epochs self.tfDatasetTrn = self.tfDatasetTrn.repeat( self.phTrnNumEpochs ) # Prepare reader if ( self.datasetDev.inMemory ) : # In memory readers # Convert ( nbLines, dims... ) to ( None, dims... ) self.tfDatasetDev = tf.data.Dataset.from_tensor_slices( ( self.datasetDev.X, self.datasetDev.Y ) ) else : # TF record file based reader self.tfDatasetDev = tf.data.TFRecordDataset( self.datasetDev.XY ) # Pre-fetch and, minibatch_size slices self.tfDatasetDev = self.tfDatasetDev.prefetch( self.minibatch_size * 16 ).batch( self.minibatch_size ) trnIterator = self.tfDatasetTrn.make_initializable_iterator( shared_name="trnIterator" ) devIterator = self.tfDatasetDev.make_initializable_iterator( shared_name="devIterator" ) # Start the session to compute the tensorflow graph with self.getSession() as sess: self.initSessionVariables( sess ) # initialise variables iterators. sess.run( tf.global_variables_initializer() ) sess.run( [ trnIterator.initializer, devIterator.initializer ], { self.phTrnNumEpochs : self.num_epochs } ) # The `Iterator.string_handle()` method returns a tensor that can be evaluated # and used to feed the `handle` placeholder. trnHandle = sess.run( trnIterator.string_handle() ) devHandle = sess.run( devIterator.string_handle() ) ## optimisation may overshoot locally ## To avoid returning an overshoot, we detect it and run extra epochs if needed finalizationMode = False current_num_epochs = hyperParams[ const.KEY_NUM_EPOCHS ] minCost = 99999999999999 minCostFinalization = 99999999999999 finished = False # intercept Ctrl-C self.interrupted = False import signal # signal.signal( signal.SIGINT, self.signal_handler ) # Do the training loop iEpoch = 1 minibatch_cost = 0 epoch_cost = 0. # Defines a cost related to an epoch # current iteration iteration = 1 # Nb status epoch : if we reach it, calculate DEV efficiency nbStatusEpoch = math.ceil( self.num_epochs / 20 ) # Start time tsStart = time.time() # time to make sure we write epoch status each N seconds tsStatusEpochStart = tsStart secStatusEpoch = 120 # Status epoch each 120 seconds # time to make sure we trace something each N seconds tsTraceStart = tsStart secTrace = 60 #trace each 60 seconds # Best epoch values maxBestAccuracyDevEpoch = -1 maxBestNbEpoch = -1 # Start input enqueue threads. coord = tf.train.Coordinator() threads = tf.train.start_queue_runners( sess=sess, coord=coord ) lastEpochCost = 0 try : while ( not self.interrupted and not finished ) : minibatch_cost = self.runIteration( sess, trnHandle, self.keep_prob, iteration, self.numMinibatches ) epoch_cost += minibatch_cost / self.numMinibatches if ( print_cost and iteration == 0 ) : # Display iteration 0 to allow verify cost calculation accross machines logger.info( "Current cost epoch {0}; iteration {1}; {2}".format( iEpoch, iteration, epoch_cost ) ) # Tracing if ( print_cost and logger.isEnabledFor( logging.DEBUG ) ) : logger.debug( "Current cost epoch {0}; iteration {1}; {2}".format( iEpoch, iteration, epoch_cost ) ) # time to trace? tsTraceNow = time.time() tsTraceElapsed = tsTraceNow - tsTraceStart # Each 60 seconds if ( tsTraceElapsed >= secTrace ) : # Display iteration 0 to allow verify cost calculation accross machines logger.info( "Current cost epoch {0}; iteration {1}; {2}".format( iEpoch, iteration, epoch_cost ) ) # reset trace start tsTraceStart = tsTraceNow # Current epoch finished? if ( ( iteration % self.numMinibatches ) == 0 ) : # time to status epoch? tsEpochStatusNow = time.time() tsEpochStatusElapsed = tsEpochStatusNow - tsStatusEpochStart #print epoch cost if print_cost and ( iteration != 0 ) and ( ( iEpoch % nbStatusEpoch ) == 0 or ( tsEpochStatusElapsed >= secStatusEpoch ) ) : logger.info( "Cost after epoch {0}; iteration {1}; {2}".format( iEpoch, iteration, epoch_cost ) ) if ( iEpoch != 1 ) : # Performance counters, for current batch, m data * nbStatus epochs curElapsedSeconds, curPerfIndex = self.getPerfCounters( tsStart, iEpoch, X_real_shape, m * nbStatusEpoch ) logger.info( " current: elapsedTime; {0}; perfIndex; {1:.2f}".format( curElapsedSeconds, curPerfIndex ) ) # calculate DEV accuracy # Rewind DEV iterator sess.run( [ devIterator.initializer ] ) DEV_accuracy = self.accuracyEval( devHandle, "dev" ) logger.info( " current: DEV accuracy: {:.3%}".format( DEV_accuracy ) ) DEV_accuracies.append( DEV_accuracy ) # Update best epoch var if ( isCalculateBestEpoch and ( iEpoch > ( self.num_epochs / 2 ) ) ) : # max reached? if ( DEV_accuracy > maxBestAccuracyDevEpoch ) : maxBestAccuracyDevEpoch = DEV_accuracy maxBestNbEpoch = iEpoch # Reset status epoch timer tsStatusEpochStart = tsEpochStatusNow # Store cost for graph if print_cost == True and ( iteration != 0 ) and iEpoch % 5 == 0: costs.append( epoch_cost ) # Record min cost minCost = min( minCost, epoch_cost ) # epoch changed iEpoch += 1 lastEpochCost = epoch_cost epoch_cost = 0 # Close to finish? # if ( not finalizationMode and ( iEpoch > current_num_epochs ) ) : # # Activate finalization mode # finalizationMode = True # # local overshoot? # if ( epoch_cost > minCost ) : # # Yes, run some extra epochs # logger.info( "WARNING: local cost overshoot detected, adding maximum 100 epochs to leave local cost overshoot" ) # current_num_epochs += 100 # minCostFinalization = minCost # # if ( finalizationMode ) : # # Check overshoot is finished # if ( epoch_cost <= minCostFinalization ) : # # finished # finished = True iteration += 1 except tf.errors.OutOfRangeError: # walk finished # decrement iteration and epoch that didn't append iteration -= 1 iEpoch -= 1 epoch_cost = lastEpochCost finally : # When done, ask the threads to stop. coord.request_stop() # Wait for threads to finish. coord.join( threads ) self.modelOptimizeEnd( sess ) if ( self.interrupted ) : logger.info( "Training has been interrupted by Ctrl-C" ) logger.info( "Store current epoch number '" + str( iEpoch ) + "' in run hyper parameters" ) # Get runs and hps run = db.getRun( conn, self.idRun ) idRunHps = run[ "idHyperParams" ] runHps = db.getHyperParams( conn, idRunHps )[ "hyperParameters" ] # Modify num epochs runHps[ const.KEY_NUM_EPOCHS ] = iEpoch # update run db.updateRun( conn, self.idRun, runHps ) # Final cost logger.info( "Parameters have been trained!") logger.info( "Final cost after epoch {0}; iteration {1}; {2}".format( iEpoch, iteration, epoch_cost ) ) ## Elapsed (seconds), for whole data set * nb epochs elapsedSeconds, perfIndex = self.getPerfCounters( tsStart, iEpoch, X_real_shape, m * self.num_epochs ) perfInfo = {} logger.info( "Elapsed (s): {0}".format( elapsedSeconds ) ) logger.info( "Perf index : {0:.2f}".format( perfIndex ) ) self.persistModel( sess, idRun ) # Rewind data sets, 1 epoch for TRN data set sess.run( [ trnIterator.initializer, devIterator.initializer ], { self.phTrnNumEpochs : 1 } ) accuracyTrain = self.accuracyEval( trnHandle, "trn" ) logger.info( "TRN Accuracy: {:.3%}".format( accuracyTrain ) ) accuracyDev = self.accuracyEval( devHandle, "dev" ) logger.info( "DEV Accuracy: {:.3%}".format( accuracyDev ) ) if ( isCalculateBestEpoch ) : logger.info( "Best DEV nb epochs: {0}".format( maxBestNbEpoch ) ) logger.info( "Best DEV Accuracy : {:.3%}".format( maxBestAccuracyDevEpoch ) ) if ( show_plot ) : # plot the cost plt.plot(np.squeeze(costs)) plt.ylabel('cost') plt.xlabel('iterations (per tens)') plt.title("Start learning rate =" + str( self.start_learning_rate ) ) plt.show() # plot the accuracies plt.plot( np.squeeze( DEV_accuracies ) ) plt.ylabel('DEV accuracy') plt.xlabel('iterations (100)') plt.title("Start learning rate =" + str( self.start_learning_rate ) ) plt.show() ## Errors resultInfo = {} if ( extractImageErrors ) : # Rewind data sets, 1 epoch for TRN data set sess.run( [ trnIterator.initializer, devIterator.initializer ], { self.phTrnNumEpochs : 1 } ) # Lists of OK for training oks_train = self.correctPredictionEval( trnHandle ) map1, map2 = self.statsExtractErrors( "train", dataset = self.datasetTrn, oks = oks_train, show_plot=show_plot ) # Errors nb by data tag resultInfo[ const.KEY_TRN_NB_ERROR_BY_TAG ] = map1 resultInfo[ const.KEY_TRN_PC_ERROR_BY_TAG ] = map2 oks_dev = self.correctPredictionEval( devHandle ) map1, map2 = self.statsExtractErrors( "dev", dataset = self.datasetDev, oks = oks_dev, show_plot=show_plot ) # Errors nb by data tag resultInfo[ const.KEY_DEV_NB_ERROR_BY_TAG ] = map1 resultInfo[ const.KEY_DEV_PC_ERROR_BY_TAG ] = map2 # Update DB run after execution, add extra info db.updateRunAfter( conn, idRun, perf_info = perfInfo, result_info=resultInfo, perf_index=perfIndex, elapsed_second = elapsedSeconds, train_accuracy=accuracyTrain.astype( float ), dev_accuracy=accuracyDev.astype( float ) ) return accuracyDev, accuracyTrain, maxBestNbEpoch, maxBestAccuracyDevEpoch
# Get config hyper parameters hyperParams = db.getHyperParams( conn, idDataset, config[ "id" ] ) elif ( buttonClicked == "Predict" ) : # hyper parameters depend on choice choiceHp = predictParams[ "choiceHyperParams" ] if ( choiceHp == 1 ) : # Last idRun idRun = db.getRunIdLast( conn, config[ "id" ] ) # Config hyper params run = db.getRun( conn, idRun ) hyperParams = db.getHyperParamsById( conn, run[ "idHyperParams" ] ) elif ( choiceHp == 2 ) : # Get best hyper parameters ( hyperParams, _, idRun ) = db.getBestHyperParams( conn, idDataset, idConfig ) # Check run structure and pixel size match with conf run = db.getRun( conn, idRun ) runStructure = None if ( run[ "conf_saved_info" ] != None ) : runStructure = run[ "conf_saved_info" ][ "structure" ] # trim spaces runStructure = runStructure.strip()
def train( tune = True) : # hyper parameters hyperParams = {} # use tensorboard isUseTensorboard = False ## Init tensorflow multi-threading # When TF 1.8 available... # config = tf.ConfigProto() # config.intra_op_parallelism_threads = 16 # config.inter_op_parallelism_threads = 16 # tf.session( config=config ) # system info systemInfo = getSystemInfo( tf.__version__ ) ## Units of layers # structure = [ 1 ] # hyperParams[ const.KEY_MINIBATCH_SIZE ] = 64 # hyperParams[ const.KEY_NUM_EPOCHS ] = 100 # hyperParams[ const.KEY_USE_WEIGHTS ] = False # hyperParams[ const.KEY_START_LEARNING_RATE ]= 0.003 # hyperParams[ const.KEY_BETA ] = 0 # hyperParams[ const.KEY_KEEP_PROB ] = 1 ## Units of layers # structure = [ 50, 24, 1 ] # hyperParams[ const.KEY_MINIBATCH_SIZE ] = 64 # hyperParams[ const.KEY_NUM_EPOCHS ] = 2000 # hyperParams[ const.KEY_USE_WEIGHTS ] = False # hyperParams[ const.KEY_START_LEARNING_RATE ]= 0.0001 # # From tuning run id=42 # hyperParams[ const.KEY_BETA ] = 2.4233061084214308e-15 # hyperParams[ const.KEY_KEEP_PROB ] = 10.646631549280114 ## Units of layers structure = [ 100, 48, 1 ] hyperParams[ const.KEY_MINIBATCH_SIZE ] = 64 hyperParams[ const.KEY_NUM_EPOCHS ] = 2500 hyperParams[ const.KEY_USE_WEIGHTS ] = False hyperParams[ const.KEY_START_LEARNING_RATE ] = 0.0001 hyperParams[ const.KEY_BETA ] = 0 hyperParams[ const.KEY_KEEP_PROB ] = 1 if tune : # Tune params beta_min = 0.000000000000001 beta_max = 0.5 keep_prob_min = 0.5 keep_prob_max = 1 nbTuning = 20 tuning= {} maxAccuracyDev = -9999999999999 maxIdRun = -1 else : nbTuning = 1 ## Units of layers # structure = [ 50, 24, 12, 1 ] # num_epochs = 1000 # # Result from tuning # beta = 0 # keep_prob = 1 # learning_rate = 0.0001 #structure = [ 100, 48, 1 ] # Result from tuning #beta = 1.6980624617370184e-15 #keep_prob = 0.724123179663981 # structure = [ 25, 12, 1 ] # # Result from tuning # beta = 6.531654400821318e-14 # keep_prob = 0.8213956561201344 # learning_rate = 0.0001 # num_epochs = 1500 # Loading the dataset X_train_orig, Y_train_orig, PATH_train, TAG_train, WEIGHT_train, \ X_dev_orig , Y_dev_orig, PATH_dev, TAG_dev= \ load_dataset( hyperParams[ const.KEY_USE_WEIGHTS ] ) # Flatten the training and test images X_train_flatten = X_train_orig.reshape( X_train_orig.shape[0], -1 ).T X_dev_flatten = X_dev_orig.reshape( X_dev_orig.shape[0], -1 ).T # Normalize image vectors X_train = X_train_flatten / 255. X_dev = X_dev_flatten / 255. Y_train = Y_train_orig Y_dev = Y_dev_orig print( "Structure:", structure ) print() print ("number of training examples = " + str(X_train.shape[1])) print ("number of test examples = " + str(X_dev.shape[1])) print ("X_train shape: " + str(X_train.shape)) print ("Y_train shape: " + str(Y_train.shape)) print ("X_test shape: " + str(X_dev.shape)) print ("Y_test shape: " + str(Y_dev.shape)) print () print ("Start Learning rate :", str( hyperParams[ const.KEY_START_LEARNING_RATE ] ) ) print ("Num epoch :", str( hyperParams[ const.KEY_NUM_EPOCHS ] ) ) print ("Minibatch size :", str( hyperParams[ const.KEY_MINIBATCH_SIZE ] ) ) print ("Beta :", str( hyperParams[ const.KEY_BETA ] ) ) print ("keep_prob :", str( hyperParams[ const.KEY_KEEP_PROB ] ) ) print ("isLoadWeights :", hyperParams[ const.KEY_USE_WEIGHTS ] ) if ( hyperParams[ const.KEY_USE_WEIGHTS ] ) : print ( " Weights_train shape :", WEIGHT_train.shape ) dataInfo = { const.KEY_TRN_SIZE : str( X_train.shape[1] ), const.KEY_DEV_SIZE : str( X_dev.shape[1] ), const.KEY_TRN_SHAPE : str( X_train.shape ), const.KEY_DEV_SHAPE : str( X_dev.shape ), const.KEY_TRN_Y_SIZE : str( Y_dev.shape[1] ), const.KEY_TRN_Y_SHAPE : str( Y_dev.shape ), const.KEY_DEV_Y_SIZE : str( Y_dev.shape[1] ), const.KEY_DEV_Y_SHAPE : str( Y_dev.shape ), } # # tuning( num_epochs = num_epochs, learning_rate = learning_rate ) print() comment = input( "Run comment: " ) # Start time tsGlobalStart = time.time() # Init DB with db.initDb( APP_KEY, DB_DIR ) as conn: for j in range( 1, nbTuning + 1 ) : if tune: print( "*****************************" ) print( "Tune round", str( j ), "/", str( nbTuning ) ) print( "*****************************" ) # calculate beta logBeta = random.uniform( math.log10( beta_min ), math.log10( beta_max ) ) beta = math.pow( 10, logBeta ) print( "Beta = " + str( beta )) # calculate keep_prob logKeep_prob = random.uniform( math.log10( keep_prob_min ), math.log10( keep_prob_max ) ) keep_prob = math.pow( 10, logKeep_prob ) print( "keep_prob = " + str( keep_prob )) # update hyper params hyperParams[ const.KEY_BETA ] = beta hyperParams[ const.KEY_KEEP_PROB ] = keep_prob # Create run idRun = db.createRun( conn ) # Update run before calling model db.updateRunBefore( conn, idRun, structure=structure, comment=comment, system_info=systemInfo, hyper_params=hyperParams, data_info=dataInfo ) # Run model and update DB run with extra info _, accuracyDev, accuracyTrain = model( conn, idRun, structure, X_train, Y_train, PATH_train, TAG_train, WEIGHT_train, X_dev, Y_dev, PATH_dev, TAG_dev, X_train_orig, X_dev_orig, hyperParams, isTensorboard = isUseTensorboard, show_plot = not tune, extractImageErrors = not tune ) # Print run run = db.getRun( conn, idRun ) print( "Run stored in DB:", str( run ) ) if tune : # Store results tuning[ j ] = { "beta": beta, "keep_prob": keep_prob, "accuracyDev": accuracyDev, "accuracyTrain": accuracyTrain } # Max if ( accuracyDev > maxAccuracyDev ) : maxAccuracyDev = accuracyDev maxHyperParams = tuning[ j ] maxIdRun = idRun # print max print( "Max DEV accuracy:", maxAccuracyDev ) print( "Max hyper params:" ) print( maxHyperParams ) if tune : # Print tuning print( "Tuning:" , tuning ) print() print( "Max DEV accuracy :", maxAccuracyDev ) print( "Max hyper params idRun:", maxIdRun ) # Start time tsGlobalEnd = time.time() globalElapsedSeconds = int( round( tsGlobalEnd - tsGlobalStart ) ) print( "Finished in", globalElapsedSeconds, "seconds" )