Esempio n. 1
0
def train():
    _get_control_params()

    with tf.Graph().as_default():
        # track the number of train calls (basically number of batches processed)
        globalStep = tf.get_variable('globalStep', [],
                                     initializer=tf.constant_initializer(0),
                                     trainable=False)

        # Get images and transformation for model_cnn.
        images, pclA, pclB, targetT, tfrecFileIDs = data_input.inputs(
            **modelParams)
        print('Input        ready')
        # Build a Graph that computes the HAB predictions from the
        # inference model.
        # Build an initialization operation to run below.
        #init = tf.initialize_all_variables()
        init = tf.global_variables_initializer()

        config = tf.ConfigProto(
            log_device_placement=modelParams['logDevicePlacement'])
        config.gpu_options.allow_growth = True
        config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
        sess = tf.Session(config=config)
        print('Session      ready')

        #sess = tf_debug.LocalCLIDebugWrapperSession(sess)
        #sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)
        sess.run(init)

        # restore a saver.
        #saver.restore(sess, (modelParams['trainLogDir'].replace('_B_2','_B_1'))+'/model.ckpt-'+str(modelParams['trainMaxSteps']-1))
        #print('Ex-Model     loaded')

        # Start the queue runners.
        tf.train.start_queue_runners(sess=sess)
        print('QueueRunner  started')

        print('Training     started')
        durationSum = 0
        durationSumAll = 0
        for step in xrange(20):
            startTime = time.time()
            evalTfrecFileIDs = sess.run(tfrecFileIDs)
            duration = time.time() - startTime
            durationSum += duration
            print(evalTfrecFileIDs)
Esempio n. 2
0
def test():
    _get_control_params()

    if not os.path.exists(modelParams['dataDir']):
        raise ValueError("No such data directory %s" % modelParams['dataDir'])

    _setupLogging(os.path.join(modelParams['testLogDir'], "genlog"))

    with tf.Graph().as_default():
        # Get images and transformation for model_cnn.
        imagesOrig, images, pOrig, tHAB, tfrecFileIDs = data_input.inputs(**modelParams)

        # Build a Graph that computes the HAB predictions from the
        # inference model.
        pHAB = model_cnn.inference(images, **modelParams)

        # Calculate loss.
        loss = model_cnn.loss(pHAB, tHAB, **modelParams)

        # Create a saver.
        saver = tf.train.Saver(tf.global_variables())

        # Build the summary operation based on the TF collection of Summaries.
        summaryOp = tf.summary.merge_all()

        # Build an initialization operation to run below.
        #init = tf.initialize_all_variables()
        init = tf.global_variables_initializer()

        # Start running operations on the Graph.
        config = tf.ConfigProto(log_device_placement=modelParams['logDevicePlacement'])
        config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
        sess = tf.Session(config=config)

        #sess = tf_debug.LocalCLIDebugWrapperSession(sess)
        #sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)
        sess.run(init)

        # restore a saver.
        saver = tf.train.Saver(tf.global_variables())
        saver.restore(sess, modelParams['trainLogDir']+'/model.ckpt-'+str(modelParams['trainMaxSteps']-1))

        # Start the queue runners.
        tf.train.start_queue_runners(sess=sess)

        summaryWriter = tf.summary.FileWriter(modelParams['testLogDir'], sess.graph)

        lossValueSum = 0
        durationSum = 0
        HABperPixelsum = 0
        maxErrbatchsum = 0

        print('Warping images with batch size %d in %d steps' % (modelParams['activeBatchSize'], modelParams['maxSteps']))

        testValueSampleResults = list()
        stepFinal = 0
        for step in xrange(modelParams['maxSteps']):
            # run and get inference
            startTime = time.time()
            evImagesOrig, evImages, evPOrig, evtHAB, evpHAB, evtfrecFileIDs, evlossValue = sess.run([imagesOrig, images, pOrig, tHAB, pHAB, tfrecFileIDs, loss])
            duration = time.time() - startTime
            # Calculate actual pixel errors for the current batch with inference results 
            durationSum += duration
            HABRES = evtHAB-evpHAB
            if step==1:
                step = 0
                HABperPixel = 0
                maxErrbatch = 0
                for i in xrange(modelParams['activeBatchSize']):
                    H = np.asarray([[HABRES[i][0], HABRES[i][1], HABRES[i][2], HABRES[i][3]],
                                    [HABRES[i][4], HABRES[i][5], HABRES[i][6], HABRES[i][7]]], np.float32)
                    HABperPixel += np.sqrt((H*H).sum(axis=0)).mean()
                    testValueSampleResults.append(HABperPixel)
                    maxErr = np.asarray([[evtHAB[i][0], evtHAB[i][1], evtHAB[i][2], evtHAB[i][3]],
                                         [evtHAB[i][4], evtHAB[i][5], evtHAB[i][6], evtHAB[i][7]]], np.float32)
                    maxErrbatch += np.sqrt((maxErr*maxErr).sum(axis=0)).mean()
                HABperPixel = HABperPixel/modelParams['activeBatchSize']
                maxErrbatch = maxErrbatch/modelParams['activeBatchSize']
                HABperPixelsum += HABperPixel
                maxErrbatchsum += maxErrbatch

                # print out control outputs 
                if step % FLAGS.printOutStep == 0:
                    numExamplesPerStep = modelParams['activeBatchSize']
                    examplesPerSec = numExamplesPerStep / duration
                    secPerBatch = float(duration)
                    format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                                  'sec/batch) pixel_err_avg = %.2f max_err_avg = %.2f')
                    logging.info(format_str % (datetime.now(), step, HABperPixel,
                                               examplesPerSec, secPerBatch,
                                               HABperPixelsum/(step+1), maxErrbatchsum/(step+1)))
                # write summaries
                if (step % FLAGS.summaryWriteStep == 0) or ((step+1) == modelParams['maxSteps']):
                    summaryStr = sess.run(summaryOp)
                    summaryWriter.add_summary(summaryStr, step)

                # Print Progress Info
                if ((step % FLAGS.ProgressStepReportStep) == 0) or ((step+1) == modelParams['maxSteps']):
                    print('Progress: %.2f%%, Loss: %.2f, Elapsed: %.2f mins, Training Completion in: %.2f mins, max_err_avg = %.2f' %
                            ((100*step)/modelParams['maxSteps'], HABperPixelsum/(step+1), durationSum/60,
                             (((durationSum*modelParams['maxSteps'])/(step+1))/60)-(durationSum/60), maxErrbatchsum/(step+1)))
                # Write test outputs tfrecords
                #### put imageA, warpped imageB by pHAB, HAB-pHAB as new HAB, changed fileaddress tfrecFileIDs
                if (step == 0):
                    data_output.output_with_test_image_files(evImagesOrig, evImages, evPOrig, evtHAB, evpHAB, evtfrecFileIDs, **modelParams)
                else:
                    data_output.output(evImagesOrig, evImages, evPOrig, evtHAB, evpHAB, evtfrecFileIDs, **modelParams)
                stepFinal = step
                break
        step = stepFinal+1
        print('Average test pixel error = %.2f - Average max pixel error = %.2f - Average time per sample= %.2f s, Steps = %d' %
                        (HABperPixelsum/(step), maxErrbatchsum/(step), duration/(step*modelParams['activeBatchSize']), step))
Esempio n. 3
0
def train(modelParams, epochNumber):
    # import corresponding model name as model_cnn, specifed at json file
    model_cnn = importlib.import_module('Model_Factory.' +
                                        modelParams['modelName'])

    if not os.path.exists(modelParams['dataDir']):
        raise ValueError("No such data directory %s" % modelParams['dataDir'])

    _setupLogging(os.path.join(modelParams['logDir'], "genlog"))

    with tf.Graph().as_default():
        # track the number of train calls (basically number of batches processed)
        globalStep = tf.get_variable('globalStep', [],
                                     initializer=tf.constant_initializer(0),
                                     trainable=False)

        # Get images inputs for model_cnn.
        if modelParams['phase'] == 'v':
            filename, pngTemp, targetT = data_input.inputs_vali(**modelParams)
        else:
            filename, pngTemp, targetT = data_input.inputs(**modelParams)
        print('Input        ready')
        #TEST###        filenametest, pngTemptest, targetTtest = data_input.inputs_test(**modelParams)

        # Build a Graph that computes the HAB predictions from the
        # inference model
        #targetP = model_cnn.inference(pngTemp, **modelParams)
        targetP, l2reg = model_cnn.inference_l2reg(pngTemp, **modelParams)
        #TEST###        targetPtest = model_cnn.inference(pngTemptest, **modelParams)
        print(targetP.get_shape())
        # loss model
        if modelParams.get('classificationModel'):
            print('Classification model...')
            # loss on last tuple
            #loss = model_cnn.loss(targetP, targetT, **modelParams)
            loss = model_cnn.loss_l2reg(targetP, targetT, l2reg, **modelParams)


#TEST###            losstest = model_cnn.loss(targetPtest, targetTtest, **modelParams)
        else:
            print('Regression model...')
            # loss on last tuple
            loss = model_cnn.loss(targetP, targetT, **modelParams)

        # Build a Graph that trains the model with one batch of examples and
        # updates the model parameters.
        opTrain = model_cnn.train(loss, globalStep, **modelParams)
        ##############################
        print('Training     ready')
        # Create a saver.
        saver = tf.train.Saver(tf.global_variables())
        print('Saver        ready')

        # Build the summary operation based on the TF collection of Summaries.
        summaryOp = tf.summary.merge_all()
        print('MergeSummary ready')
        # Build an initialization operation to run below.
        #init = tf.initialize_all_variables()
        init = tf.global_variables_initializer()

        #opCheck = tf.add_check_numerics_ops()
        # Start running operations on the Graph.
        config = tf.ConfigProto(
            log_device_placement=modelParams['logDevicePlacement'])
        config.gpu_options.allow_growth = True
        config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
        sess = tf.Session(config=config)
        print('Session      ready')

        #sess = tf_debug.LocalCLIDebugWrapperSession(sess)
        #sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)
        sess.run(init)

        # restore a saver.
        if epochNumber > 0:
            print('Loading Ex-Model with epoch number %d ...', epochNumber)
            saver.restore(sess, (modelParams['trainLogDir'] + '/model.ckpt-' +
                                 str(epochNumber)))
            #saver.restore(sess, (modelParams['trainLogDir']+'_30k/model.ckpt-29000'))
            print('Ex-Model     loaded')

        tf.train.write_graph(sess.graph.as_graph_def(),
                             '.',
                             modelParams['trainLogDir'] + '/model.pbtxt',
                             as_text=True)

        # Start the queue runners.
        tf.train.start_queue_runners(sess=sess)
        print('QueueRunner  started')

        summaryWriter = tf.summary.FileWriter(modelParams['logDir'],
                                              sess.graph)
        summaryValiWriter = tf.summary.FileWriter(modelParams['logDir'] + '_v',
                                                  sess.graph)
        #TEST###        summaryValiWriter = tf.summary.FileWriter(modelParams['logDir']+'_test', sess.graph)

        total_parameters = 0
        for variable in tf.trainable_variables():
            # shape is an array of tf.Dimension
            shape = variable.get_shape()
            #print(shape)
            #print(len(shape))
            variable_parameters = 1
            for dim in shape:
                #print(dim)
                variable_parameters *= dim.value
                #print(variable_parameters)
            total_parameters += variable_parameters
        print('-----total parameters-------- ', total_parameters)

        print('Training     started')
        durationSum = 0
        durationSumAll = 0
        prevLoss = 99999
        prevValiSumLoss = 99999
        prevaccur = 0
        prevLossStep = 0
        prevStep = 21000
        #TEST###        prevTestSumLoss = 99999
        prevStep = int(modelParams['maxSteps'] / 2)
        for step in xrange(epochNumber, modelParams['maxSteps']):
            startTime = time.time()
            #_, lossValue = sess.run([opTrain, loss])
            _, lossValue, l2regValue = sess.run([opTrain, loss, l2reg])
            #print(lossValue, l2regValue)
            duration = time.time() - startTime
            durationSum += duration
            assert not np.isnan(lossValue), 'Model diverged with loss = NaN'

            if step % FLAGS.printOutStep == 0:
                numExamplesPerStep = modelParams['activeBatchSize']
                examplesPerSec = numExamplesPerStep / duration
                secPerBatch = float(duration)
                format_str = (
                    '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                    'sec/batch), loss/batch = %.2f, l2reg = %.2f')
                logging.info(format_str %
                             (datetime.now(), step, lossValue, examplesPerSec,
                              secPerBatch, lossValue /
                              modelParams['activeBatchSize'], l2regValue))

            if step % FLAGS.summaryWriteStep == 0:
                summaryStr = sess.run(summaryOp)
                summaryWriter.add_summary(summaryStr, step)
            # Save the model checkpoint periodically.
            if step % FLAGS.modelCheckpointStep == 0 or (
                    step + 1) == modelParams['maxSteps']:
                checkpointPath = os.path.join(modelParams['logDir'],
                                              'model.ckpt')
                saver.save(sess, checkpointPath, global_step=step)
            # Print Progress Info
            if ((step % FLAGS.ProgressStepReportStep)
                    == 0) or ((step + 1) == modelParams['maxSteps']):
                print(
                    'Progress: %.2f%%, Elapsed: %.2f mins, Training Completion in: %.2f mins --- %s'
                    %
                    ((100 * step) / modelParams['maxSteps'], durationSum / 60,
                     (((durationSum * modelParams['maxSteps']) /
                       (step + 1)) / 60) - (durationSum / 60), datetime.now()))
Esempio n. 4
0
def test(modelParams):
    # import corresponding model name as model_cnn, specifed at json file
    model_cnn = importlib.import_module('Model_Factory.' +
                                        modelParams['modelName'])

    if not os.path.exists(modelParams['dataDir']):
        raise ValueError("No such data directory %s" % modelParams['dataDir'])

    _setupLogging(os.path.join(modelParams['testLogDir'], "genlog"))

    with tf.Graph().as_default():
        # Get images and transformation for model_cnn.
        images, pclA, pclB, tMatT, tfrecFileIDs = data_input.inputs(
            **modelParams)
        # Build a Graph that computes the HAB predictions from the
        # inference model.
        tMatP = model_cnn.inference(images, **modelParams)

        # Calculate loss. 2 options:

        # use mask to get degrees significant
        loss = model_cnn.weighted_loss(tMatP, tMatT, **modelParams)

        # pcl based
        #loss = model_cnn.pcl_loss(pclA, tMatP, tMatT, **modelParams)

        # Create a saver.
        saver = tf.train.Saver(tf.global_variables())

        # Build the summary operation based on the TF collection of Summaries.
        summaryOp = tf.summary.merge_all()

        # Build an initialization operation to run below.
        #init = tf.initialize_all_variables()
        init = tf.global_variables_initializer()

        # Start running operations on the Graph.
        config = tf.ConfigProto(
            log_device_placement=modelParams['logDevicePlacement'])
        config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
        sess = tf.Session(config=config)

        #sess = tf_debug.LocalCLIDebugWrapperSession(sess)
        #sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)
        sess.run(init)

        # restore a saver.
        saver = tf.train.Saver(tf.global_variables())
        saver.restore(
            sess, modelParams['trainLogDir'] + '/model.ckpt-' +
            str(modelParams['trainMaxSteps'] - 1))

        # Start the queue runners.
        tf.train.start_queue_runners(sess=sess)

        summaryWriter = tf.summary.FileWriter(modelParams['testLogDir'],
                                              sess.graph)

        lossValueSum = 0
        durationSum = 0
        durationSumAll = 0
        print('Warping images with batch size %d in %d steps' %
              (modelParams['activeBatchSize'], modelParams['maxSteps']))

        testValueSampleResults = list()
        stepFinal = 0
        for step in xrange(modelParams['maxSteps']):
            startTime = time.time()
            evImages, evPclA, evPclB, evtMatT, evtMatP, evtfrecFileIDs, evlossValue = sess.run(
                [images, pclA, pclB, tMatT, tMatP, tfrecFileIDs, loss])
            duration = time.time() - startTime
            durationSum += duration
            lossValueSum += evlossValue

            #_write_to_csv(modelParams['testLogDir']+'/testRes'+jsonToRead.replace('.json', '_T.csv'), evtMatT)
            #_write_to_csv(modelParams['testLogDir']+'/testRes'+jsonToRead.replace('.json', '_P.csv'), evtMatP)

            # Write test outputs tfrecords
            #### put imageA, warpped imageB by pHAB, HAB-pHAB as new HAB, changed fileaddress tfrecFileIDs
            #if (step == 0):
            #    data_output.output_with_test_image_files(evImagesOrig, evImages, evPOrig, evtHAB, evpHAB, evtfrecFileIDs, **modelParams)
            #else:
            data_output.output(evImages, evPclA, evPclB, evtMatT, evtMatP,
                               evtfrecFileIDs, **modelParams)
            duration = time.time() - startTime
            durationSumAll += duration

            # print out control outputs
            if step % FLAGS.printOutStep == 0:
                numExamplesPerStep = modelParams['activeBatchSize']
                examplesPerSec = numExamplesPerStep / duration
                secPerBatch = float(duration)
                format_str = (
                    '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                    'sec/batch) avg_err_over_time = %.2f')
                logging.info(format_str %
                             (datetime.now(), step, evlossValue,
                              examplesPerSec, secPerBatch, lossValueSum /
                              (step + 1)))
            # write summaries
            if (step % FLAGS.summaryWriteStep
                    == 0) or ((step + 1) == modelParams['maxSteps']):
                summaryStr = sess.run(summaryOp)
                summaryWriter.add_summary(summaryStr, step)

            # Print Progress Info
            if ((step % FLAGS.ProgressStepReportStep)
                    == 0) or ((step + 1) == modelParams['maxSteps']):
                print(
                    'Progress: %.2f%%, Loss: %.2f, Elapsed: %.2f mins, Training Completion in: %.2f mins'
                    % ((100 * step) / modelParams['maxSteps'], lossValueSum /
                       (step + 1), durationSum / 60,
                       (((durationSum * modelParams['maxSteps']) /
                         (step + 1)) / 60) - (durationSum / 60)))
            # print('Total Elapsed: %.2f mins, Training Completion in: %.2f mins' %
            #             durationSumAll/60, (((durationSumAll*stepsForOneDataRound)/(step+1))/60)-(durationSumAll/60))
            stepFinal = step

        step = stepFinal + 1
        print(
            'Average test error = %.2f - Average time per sample= %.2f s, Steps = %d, ex/sec = %.2f'
            % (lossValueSum / (step), duration /
               (step * modelParams['activeBatchSize']), step,
               modelParams['numExamples'] / durationSum))
Esempio n. 5
0
def train():
    _get_control_params()

    if not os.path.exists(modelParams['dataDir']):
        raise ValueError("No such data directory %s" % modelParams['dataDir'])

    #meanImgFi1000le = os.path.join(FLAGS.dataDir, "meta")
    #if not os.path.isfile(meanImgFile):
    #    raise ValueError("Warning, no meta file found at %s" % meanImgFile)
    #else:
    #    with open(meanImgFile, "r") as inMeanFile:
    #        meanInfo = json.load(inMeanFile)
    #
    #    meanImg = meanInfo['mean']
    #
    #    # also load the target output sizes
    #    params['targSz'] = meanInfo["targSz"]

    #_setupLogging(os.path.join(modelParams['trainLogDir'], "genlog"))

    with tf.Graph().as_default():
        # BGR to RGB
        #params['meanImg'] = tf.constant(meanImg, dtype=tf.float32)

        # track the number of train calls (basically number of batches processed)
        globalStep = tf.get_variable('globalStep', [],
                                     initializer=tf.constant_initializer(0),
                                     trainable=False)

        # Get images and transformation for model_cnn.
        imagesOrig, images, pOrig, tHAB, tfrecFileIDs = data_input.inputs(
            **modelParams)

        # Build a Graph that computes the HAB predictions from the
        # inference model.
        pHAB = model_cnn.inference(images, **modelParams)

        # Calculate loss.
        loss = model_cnn.loss(pHAB, tHAB, **modelParams)

        # Create a saver.
        saver = tf.train.Saver(tf.global_variables())

        # Build an initialization operation to run below.
        #init = tf.initialize_all_variables()
        init = tf.global_variables_initializer()

        opCheck = tf.add_check_numerics_ops()
        # Start running operations on the Graph.
        config = tf.ConfigProto(
            log_device_placement=modelParams['logDevicePlacement'])
        config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
        sess = tf.Session(config=config)

        #sess = tf_debug.LocalCLIDebugWrapperSession(sess)
        #sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)
        sess.run(init)

        # restore a saver.
        saver = tf.train.Saver(tf.global_variables())
        saver.restore(sess, modelParams['trainLogDir'] + '/model.ckpt-89999')

        # Start the queue runners.
        tf.train.start_queue_runners(sess=sess)

        lossValueSum = 0
        durationSum = 0

        ######### USE LATEST STATE TO WARP IMAGES
        if modelParams['writeWarpedImages']:
            lossValueSum = 0
            stepsForOneDataRound = int((modelParams['numExamples'] /
                                        modelParams['activeBatchSize'])) + 1
            print('Warping images with batch size %d in %d steps' %
                  (modelParams['activeBatchSize'], stepsForOneDataRound))
            for step in xrange(stepsForOneDataRound):
                startTime = time.time()
                evImagesOrig, evImages, evPOrig, evtHAB, evpHAB, evtfrecFileIDs, evlossValue = sess.run(
                    [
                        imagesOrig, images, pOrig, tHAB, pHAB, tfrecFileIDs,
                        loss
                    ])
                lossValueSum += np.sqrt(
                    evlossValue * (2 / (modelParams['activeBatchSize'] * 8)))
                durationSum += (time.time() - startTime)
                #### put imageA, warpped imageB by pHAB, HAB-pHAB as new HAB, changed fileaddress tfrecFileIDs
                data_output.output(evImagesOrig, evImages, evPOrig, evtHAB,
                                   evpHAB, evtfrecFileIDs, **modelParams)
                # Print Progress Info
                if ((step % FLAGS.ProgressStepReportStep)
                        == 0) or (step + 1 == stepsForOneDataRound):
                    print(
                        'Progress: %.2f%%, Loss: %.2f, Elapsed: %.2f mins, Training Completion in: %.2f mins'
                        % ((100 * step) / stepsForOneDataRound, lossValueSum /
                           (step + 1), durationSum / 60,
                           (((durationSum * stepsForOneDataRound) /
                             (step + 1)) / 60) - (durationSum / 60)))
            print(
                'Average training loss = %.2f - Average time per sample= %.2f s, Steps = %d'
                % (lossValueSum / step, durationSum /
                   (step * modelParams['activeBatchSize']), step))
Esempio n. 6
0
def train(modelParams, epochNumber):
    # import corresponding model name as model_cnn, specifed at json file
    model_cnn = importlib.import_module('Model_Factory.' +
                                        modelParams['modelName'])

    if not os.path.exists(modelParams['dataDir']):
        raise ValueError("No such data directory %s" % modelParams['dataDir'])

    _setupLogging(os.path.join(modelParams['logDir'], "genlog"))

    with tf.Graph().as_default():
        # track the number of train calls (basically number of batches processed)
        globalStep = tf.get_variable('globalStep', [],
                                     initializer=tf.constant_initializer(0),
                                     trainable=False)
        # Get images inputs for model_cnn.
        if modelParams['phase'] == 'v':
            filename, pngTemp, targetT = data_input.inputs_vali(**modelParams)
        else:
            filename, pngTemp, targetT = data_input.inputs(**modelParams)
        print('Input        ready')
        #TEST###        filenametest, pngTemptest, targetTtest = data_input.inputs_test(**modelParams)

        # Build a Graph that computes the HAB predictions from the
        # inference model
        #targetP = model_cnn.inference(pngTemp, **modelParams)
        targetP, l2reg = model_cnn.inference_l2reg(pngTemp, **modelParams)
        #TEST###        targetPtest = model_cnn.inference(pngTemptest, **modelParams)
        print(targetP.get_shape())
        # loss model
        if modelParams.get('classificationModel'):
            print('Classification model...')
            # loss on last tuple
            #loss = model_cnn.loss(targetP, targetT, **modelParams)
            loss = model_cnn.loss_l2reg(targetP, targetT, l2reg, **modelParams)


#TEST###            losstest = model_cnn.loss(targetPtest, targetTtest, **modelParams)
        else:
            print('Regression model...')
            # loss on last tuple
            loss = model_cnn.loss(targetP, targetT, **modelParams)

        # Build a Graph that trains the model with one batch of examples and
        # updates the model parameters.
        #opTrain = model_cnn.train(loss, globalStep, **modelParams)
        ##############################
        print('Testing     ready')
        # Create a saver.
        saver = tf.train.Saver(tf.global_variables())
        print('Saver        ready')

        # Build the summary operation based on the TF collection of Summaries.
        summaryOp = tf.summary.merge_all()
        print('MergeSummary ready')
        # Build an initialization operation to run below.
        #init = tf.initialize_all_variables()
        #        init = tf.global_variables_initializer()

        #opCheck = tf.add_check_numerics_ops()
        # Start running operations on the Graph.
        config = tf.ConfigProto(
            log_device_placement=modelParams['logDevicePlacement'])
        config.gpu_options.allow_growth = True
        config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
        sess = tf.Session(config=config)
        print('Session      ready')

        #sess = tf_debug.LocalCLIDebugWrapperSession(sess)
        #sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)
        #        sess.run(init)

        # restore a saver.
        print('Loading Ex-Model with epoch number %d ...', epochNumber)
        print('     ',
              modelParams['trainLogDir'] + '_v/model.ckpt-' + str(epochNumber))
        saver.restore(
            sess,
            (modelParams['trainLogDir'] + '_v/model.ckpt-' + str(epochNumber)))
        #saver.restore(sess, (modelParams['trainLogDir']+'_30k/model.ckpt-29000'))
        print('Ex-Model     loaded')

        if True:
            # if True: freeze graph
            tf.train.write_graph(sess.graph.as_graph_def(),
                                 '.',
                                 modelParams['trainLogDir'] + '_v/model.pbtxt',
                                 as_text=True)
            # Output nodes
            output_node_names = [
                n.name for n in tf.get_default_graph().as_graph_def().node
            ]
            # Freeze the graph
            frozen_graph_def = tf.graph_util.convert_variables_to_constants(
                sess, sess.graph_def, output_node_names)
            # Save the frozen graph
            with open(modelParams['trainLogDir'] + '_v/model.pb', 'wb') as f:
                f.write(frozen_graph_def.SerializeToString())

        # Start the queue runners.
        tf.train.start_queue_runners(sess=sess)
        print('QueueRunner  started')

        summaryWriter = tf.summary.FileWriter(modelParams['logDir'],
                                              sess.graph)
        summaryValiWriter = tf.summary.FileWriter(modelParams['logDir'] + '_v',
                                                  sess.graph)
        #TEST###        summaryValiWriter = tf.summary.FileWriter(modelParams['logDir']+'_test', sess.graph)

        print('Testing     started')
        durationSum = 0
        durationSumAll = 0
        prevLoss = 99999
        prevValiSumLoss = 99999
        prevaccur = 0
        prevLossStep = 0
        prevStep = 21000
        #TEST###        prevTestSumLoss = 99999
        prevStep = int(modelParams['maxSteps'] / 2)
        l = list()
        import cv2
        lossValueSum = 0
        l2regValueSum = 0

        total_parameters = 0
        for variable in tf.trainable_variables():
            # shape is an array of tf.Dimension
            shape = variable.get_shape()
            #print(shape)
            #print(len(shape))
            variable_parameters = 1
            for dim in shape:
                #print(dim)
                variable_parameters *= dim.value
            #print(variable_parameters)
            total_parameters += variable_parameters
        print('-----total parameters-------- ', total_parameters)

        for step in xrange(0, modelParams['maxSteps']):  #(0, 1000):
            startTime = time.time()
            #npfilename, npTargetP, npTargetT, lossValue, l2regValue, npPng = sess.run([filename, targetP, targetT, loss, l2reg, pngTemp])
            npfilename, npTargetP, npTargetT, lossValue, l2regValue = sess.run(
                [filename, targetP, targetT, loss, l2reg])
            duration = time.time() - startTime
            if step != 0:
                l.append(duration)
            print(duration, step, modelParams['maxSteps'])
            lossValueSum += lossValue
            l2regValueSum += l2regValue
            #print(npfilename)
            #print(npTargetT)
            #print(npTargetP)
            ################# DEMO
            for ibx in range(modelParams['activeBatchSize']):
                #print('hello')
                stat = 'False'
                if np.argmax(npTargetT[ibx]) == np.argmax(npTargetP[ibx]):
                    stat = 'True'
                print(npfilename[ibx].decode('ascii'), 'Target:',
                      np.argmax(npTargetT[ibx]), 'Estimate:',
                      np.argmax(npTargetP[ibx]), stat)
                # npPng = cv2.imread('../Data/cold_wb/testpng352/'+npfilename[ibx].decode('ascii'), -1)
                # #npPng[npPng<24000] = 24000
                # #npPng[npPng>31000] = 31000
                # #hist,bins = np.histogram(npPng.flatten(),9000,[23000,32000])
                # #plt.plot(hist)
                # #plt.show()
                # #npPng.astype('float32')
                # npPng = (npPng-npPng.min())/(npPng.max()-npPng.min())
                # #print(npPng.shape, npPng.min(), npPng.max())
                # #print(npPng.shape, npPng.min(), npPng.max(), npPng.mean())
                # cv2.imshow('npPng', npPng)
                # #print(np.max(npPng[0,:,:,0]), np.max(npPng[0,:,:,1]), np.max(npPng[0,:,:,2]))
                # #print(np.mean(npPng[0,:,:,0]), np.mean(npPng[0,:,:,1]), np.mean(npPng[0,:,:,2]))
                # #p1 = npPng[0,:,:,1]
                # #p2 = npPng[0,:,:,2]
                # #p1 = (p1-np.min(p1)) / (np.max(p1)-np.min(p1))
                # #p2 = (p2-np.min(p2)) / (np.max(p2)-np.min(p2))
                # #cv2.imshow('npPng1', p1)
                # #cv2.imshow('npPng2', p2)
                # cv2.waitKey(0)

            #################
            #p1 = npPng[0,:,:,0]
            #p2 = npPng[0,:,:,1]
            #p1 = (p1-np.min(p1)) / (np.max(p1)-np.min(p1))
            #p2 = (p2-np.min(p2)) / (np.max(p2)-np.min(p2))

            #print(duration, step, modelParams['maxSteps'], 'regul', l2regValue)
            data_output.output(str(10000 + step), npfilename, npTargetP,
                               npTargetT, **modelParams)
            # Print Progress Info
            if ((step % FLAGS.ProgressStepReportStep)
                    == 0) or ((step + 1) == modelParams['maxSteps']):
                print(
                    'Progress: %.2f%%, Elapsed: %.2f mins, Testing Completion in: %.2f mins --- %s'
                    %
                    ((100 * step) / modelParams['maxSteps'], durationSum / 60,
                     (((durationSum * modelParams['maxSteps']) /
                       (step + 1)) / 60) - (durationSum / 60), datetime.now()))
            #if step == 128:
            #    modelParams['phase'] = 'train'
            #
            #if step == 130:
            #    modelParams['phase'] = 'test'
        print(np.array(l).mean())
        #l0 = np.array(l)
        #l1 = np.array(l[1:-1])
        #print(np.average(l0))
        #print(np.average(l1))
        print('----- maxsteps:', modelParams['maxSteps'], '--- loss avg:',
              lossValueSum / modelParams['maxSteps'], '--- l2regu avg:',
              l2regValueSum / modelParams['maxSteps'])
        print('----- train scaled loss:',
              (lossValueSum / modelParams['maxSteps']) *
              modelParams['trainBatchSize'])
        print('----- train scaled l2regu:',
              (l2regValueSum / modelParams['maxSteps']) *
              modelParams['trainBatchSize'])
        print(modelParams['outputDir'])

        sess.close()
    tf.reset_default_graph()
Esempio n. 7
0
def train():
    _get_control_params()

    if not os.path.exists(modelParams['dataDir']):
        raise ValueError("No such data directory %s" % modelParams['dataDir'])

    with tf.Graph().as_default():
        # track the number of train calls (basically number of batches processed)
        globalStep = tf.get_variable('globalStep', [],
                                     initializer=tf.constant_initializer(0),
                                     trainable=False)

        # Get images and transformation for model_cnn.
        images, pclA, pclB, targetT, tfrecFileIDs = data_input.inputs(
            **modelParams)
        print('Input        ready')
        # Build a Graph that computes the HAB predictions from the
        # inference model.
        targetP = model_cnn.inference(images, **modelParams)

        # Calculate loss. 2 options:

        # use mask to get degrees significant
        # What about adaptive mask to zoom into differences at each CNN stack !!!
        #loss = model_cnn.weighted_loss(targetP, targetT, **modelParams)
        loss = weighted_params_loss(targetP, targetT, **modelParams)
        # pcl based loss
        #loss = model_cnn.pcl_params_loss(pclA, targetP, targetT, **modelParams)

        # Build a Graph that trains the model with one batch of examples and
        # updates the model parameters.
        opTrain = model_cnn.train(loss, globalStep, **modelParams)
        ##############################
        print('Training     ready')
        # Create a saver.
        saver = tf.train.Saver(tf.global_variables())
        print('Saver        ready')

        # Build an initialization operation to run below.
        #init = tf.initialize_all_variables()
        init = tf.global_variables_initializer()

        opCheck = tf.add_check_numerics_ops()
        # Start running operations on the Graph.
        config = tf.ConfigProto(
            log_device_placement=modelParams['logDevicePlacement'])
        config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
        sess = tf.Session(config=config)
        print('Session      ready')

        #sess = tf_debug.LocalCLIDebugWrapperSession(sess)
        #sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)
        sess.run(init)

        # restore a saver.
        saver = tf.train.Saver(tf.global_variables())
        saver.restore(
            sess, modelParams['trainLogDir'] + '/model.ckpt-' +
            str(modelParams['trainMaxSteps'] - 1))
        print('Model        loaded')
        # Start the queue runners.
        tf.train.start_queue_runners(sess=sess)
        print('QueueRunner  started')

        print('Write        started')

        ######### USE LATEST STATE TO WARP IMAGES
        filesDictionaryAccum = {}
        durationSum = 0
        durationSumAll = 0
        if modelParams['writeWarpedImages']:
            outputDIR = modelParams['warpedOutputFolder'] + '/'
            print(
                "Using final training state to output processed tfrecords\noutput folder: ",
                outputDIR)
            if tf.gfile.Exists(outputDIR):
                tf.gfile.DeleteRecursively(outputDIR)
            tf.gfile.MakeDirs(outputDIR)
            lossValueSum = 0
            stepsForOneDataRound = int((modelParams['numExamples'] /
                                        modelParams['activeBatchSize'])) + 1
            print('Warping %d images with batch size %d in %d steps' %
                  (modelParams['numExamples'], modelParams['activeBatchSize'],
                   stepsForOneDataRound))
            for step in xrange(stepsForOneDataRound):
                startTime = time.time()
                evImages, evPclA, evPclB, evtargetT, evtargetP, evtfrecFileIDs, evlossValue = sess.run(
                    [images, pclA, pclB, targetT, targetP, tfrecFileIDs, loss])
                for fileIdx in range(modelParams['activeBatchSize']):
                    fileIDname = str(evtfrecFileIDs[fileIdx][0]) + "_" + str(
                        evtfrecFileIDs[fileIdx][1]) + "_" + str(
                            evtfrecFileIDs[fileIdx][2])
                    if (fileIDname in filesDictionaryAccum):
                        filesDictionaryAccum[fileIDname] += 1
                    else:
                        filesDictionaryAccum[fileIDname] = 1
                #### put imageA, warpped imageB by pHAB, HAB-pHAB as new HAB, changed fileaddress tfrecFileIDs
                data_output.output(evImages, evPclA, evPclB, evtargetT,
                                   evtargetP, evtfrecFileIDs, **modelParams)
                duration = time.time() - startTime
                durationSum += duration
                durationSumAll += duration
                # Print Progress Info
                if ((step % FLAGS.ProgressStepReportStep)
                        == 0) or ((step + 1) == stepsForOneDataRound):
                    print('Number of files used in training',
                          len(filesDictionaryAccum))
                    print(
                        'Progress: %.2f%%, Loss: %.2f, Elapsed: %.2f mins, Training Completion in: %.2f mins --- %s'
                        % ((100 * step) / stepsForOneDataRound, evlossValue /
                           (step + 1), durationSum / 60,
                           (((durationSum * stepsForOneDataRound) /
                             (step + 1)) / 60) -
                           (durationSum / 60), datetime.now()))
                    #print('Total Elapsed: %.2f mins, Total Completion in: %.2f mins' % (durationSumAll/60), ((((durationSumAll*stepsForOneDataRound)/(step+1))/60)-(durationSumAll/60)) )
            print('Number of files used in training',
                  len(filesDictionaryAccum))
            filesAccum = np.array(list(filesDictionaryAccum.values()))
            print('Access statistics for each file, mean max min std',
                  np.mean(filesAccum), np.max(filesAccum), np.min(filesAccum),
                  np.std(filesAccum))
            print(
                'Average training loss = %.2f - Average time per sample= %.2f s, Steps = %d'
                % (evlossValue / modelParams['activeBatchSize'], durationSum /
                   (step * modelParams['activeBatchSize']), step))
Esempio n. 8
0
def train(modelParams, epochNumber):
    # import corresponding model name as model_cnn, specifed at json file
    model_cnn = importlib.import_module('Model_Factory.' +
                                        modelParams['modelName'])

    if not os.path.exists(modelParams['dataDir']):
        raise ValueError("No such data directory %s" % modelParams['dataDir'])

    _setupLogging(os.path.join(modelParams['logDir'], "genlog"))

    with tf.Graph().as_default():
        # track the number of train calls (basically number of batches processed)
        globalStep = tf.get_variable('globalStep', [],
                                     initializer=tf.constant_initializer(0),
                                     trainable=False)

        # Get images inputs for model_cnn.
        filename, pngTemp, targetT = data_input.inputs(**modelParams)
        print('Input        ready')

        # Build a Graph that computes the HAB predictions from the
        # inference model
        #targetP = model_cnn.inference(pngTemp, **modelParams)
        targetP, l2reg = model_cnn.inference_l2reg(pngTemp, **modelParams)
        ##############################
        print('Inference    ready')
        # Build an initialization operation to run below.
        #init = tf.initialize_all_variables()
        init = tf.global_variables_initializer()

        # Start running operations on the Graph.
        config = tf.ConfigProto(
            log_device_placement=modelParams['logDevicePlacement'])
        config.gpu_options.allow_growth = True
        config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
        sess = tf.Session(config=config)
        print('Session      ready')

        sess.run(init)

        # Create a saver.
        saver = tf.train.Saver(tf.global_variables())
        # restore a saver.
        print('Loading Ex-Model with epoch number %d ...', epochNumber)
        print('     ',
              modelParams['trainLogDir'] + '_v/model.ckpt-' + str(epochNumber))
        saver.restore(
            sess,
            (modelParams['trainLogDir'] + '_v/model.ckpt-' + str(epochNumber)))
        #print('     ', modelParams['trainLogDir']+'/model.ckpt-'+str(epochNumber))
        #saver.restore(sess, (modelParams['trainLogDir']+'/model.ckpt-'+str(epochNumber)))
        print('Ex-Model     loaded')

        # Start the queue runners.
        tf.train.start_queue_runners(sess=sess)
        print('QueueRunner  started')

        print('Training     started')
        durationSum = 0
        durationSumAll = 0
        l = list()
        import cv2
        for step in xrange(0, modelParams['maxSteps']):  #(0, 1000):
            startTime = time.time()
            #npfilename, npTargetP, npTargetT, npPng = sess.run([filename, targetP, targetT, pngTemp])
            npfilename, npTargetP, npTargetT = sess.run(
                [filename, targetP, targetT])
            duration = time.time() - startTime
            #l.append(duration)
            print(duration, step, modelParams['maxSteps'])

            #print(npfilename)
            #print(npTargetT)
            #print(npTargetP)

            #p1 = npPng[0,:,:,0]
            #p2 = npPng[0,:,:,1]
            #p1 = (p1-np.min(p1)) / (np.max(p1)-np.min(p1))
            #p2 = (p2-np.min(p2)) / (np.max(p2)-np.min(p2))
            #cv2.imshow('img0', p1)
            #cv2.imshow('img1', p2)
            #cv2.waitKey(0)
            #print(npfilename)
            data_output.output(str(10000 + step), npfilename, npTargetP,
                               npTargetT, **modelParams)
            # Print Progress Info
            if ((step % FLAGS.ProgressStepReportStep)
                    == 0) or ((step + 1) == modelParams['maxSteps']):
                print(
                    'Progress: %.2f%%, Elapsed: %.2f mins, Training Completion in: %.2f mins --- %s'
                    %
                    ((100 * step) / modelParams['maxSteps'], durationSum / 60,
                     (((durationSum * modelParams['maxSteps']) /
                       (step + 1)) / 60) - (durationSum / 60), datetime.now()))
            #if step == 128:
            #    modelParams['phase'] = 'train'
            #
            #if step == 130:
            #    modelParams['phase'] = 'test'
        #print(l)
        #l0 = np.array(l)
        #l1 = np.array(l[1:-1])
        #print(np.average(l0))
        #print(np.average(l1))
        sess.close()
    tf.reset_default_graph()
Esempio n. 9
0
def train(modelParams, epochNumber):
    # import corresponding model name as model_cnn, specifed at json file
    model_cnn = importlib.import_module('Model_Factory.' +
                                        modelParams['modelName'])

    if not os.path.exists(modelParams['dataDir']):
        raise ValueError("No such data directory %s" % modelParams['dataDir'])

    _setupLogging(os.path.join(modelParams['logDir'], "genlog"))

    with tf.Graph().as_default():
        # track the number of train calls (basically number of batches processed)
        globalStep = tf.get_variable('globalStep', [],
                                     initializer=tf.constant_initializer(0),
                                     trainable=False)

        # Get images inputs for model_cnn.
        filename, pngTemp, targetT = data_input.inputs(**modelParams)
        print('Input        ready')
        filenamevali, pngTempvali, targetTvali = data_input.inputs_vali(
            **modelParams)
        #TEST###        filenametest, pngTemptest, targetTtest = data_input.inputs_test(**modelParams)

        # Build a Graph that computes the HAB predictions from the
        # inference model
        targetP = model_cnn.inference(pngTemp, **modelParams)
        targetPvali = model_cnn.inference(pngTempvali, **modelParams)
        #TEST###        targetPtest = model_cnn.inference(pngTemptest, **modelParams)
        print(targetP.get_shape())
        # loss model
        if modelParams.get('classificationModel'):
            print('Classification model...')
            # loss on last tuple
            loss = model_cnn.loss(targetP, targetT, **modelParams)
            lossvali = model_cnn.loss(targetPvali, targetTvali, **modelParams)
#TEST###            losstest = model_cnn.loss(targetPtest, targetTtest, **modelParams)
        else:
            print('Regression model...')
            # loss on last tuple
            loss = model_cnn.loss(targetP, targetT, **modelParams)

        # Build a Graph that trains the model with one batch of examples and
        # updates the model parameters.
        opTrain = model_cnn.train(loss, globalStep, **modelParams)
        ##############################
        print('Training     ready')
        # Create a saver.
        saver = tf.train.Saver(tf.global_variables())
        print('Saver        ready')

        # Build the summary operation based on the TF collection of Summaries.
        summaryOp = tf.summary.merge_all()
        print('MergeSummary ready')
        # Build an initialization operation to run below.
        #init = tf.initialize_all_variables()
        init = tf.global_variables_initializer()

        #opCheck = tf.add_check_numerics_ops()
        # Start running operations on the Graph.
        config = tf.ConfigProto(
            log_device_placement=modelParams['logDevicePlacement'])
        config.gpu_options.allow_growth = True
        config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
        sess = tf.Session(config=config)
        print('Session      ready')

        #sess = tf_debug.LocalCLIDebugWrapperSession(sess)
        #sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)
        sess.run(init)

        # restore a saver.
        if epochNumber > 0:
            print('Loading Ex-Model with epoch number %d ...', epochNumber)
            saver.restore(sess, (modelParams['trainLogDir'] + '/model.ckpt-' +
                                 str(epochNumber)))
            #saver.restore(sess, (modelParams['trainLogDir']+'_30k/model.ckpt-29000'))
            print('Ex-Model     loaded')

        # Start the queue runners.
        tf.train.start_queue_runners(sess=sess)
        print('QueueRunner  started')

        summaryWriter = tf.summary.FileWriter(modelParams['logDir'],
                                              sess.graph)
        summaryValiWriter = tf.summary.FileWriter(
            modelParams['logDir'] + '_validation', sess.graph)
        #TEST###        summaryValiWriter = tf.summary.FileWriter(modelParams['logDir']+'_test', sess.graph)

        print('Training     started')
        durationSum = 0
        durationSumAll = 0
        prevLoss = 99999
        prevValiSumLoss = 99999
        prevaccur = 0
        prevLossStep = 0
        prevStep = 21000
        #TEST###        prevTestSumLoss = 99999
        prevStep = int(modelParams['maxSteps'] / 2)
        for step in xrange(epochNumber, modelParams['maxSteps']):
            startTime = time.time()
            _, lossValue = sess.run([opTrain, loss])
            duration = time.time() - startTime
            durationSum += duration
            assert not np.isnan(lossValue), 'Model diverged with loss = NaN'

            if step % FLAGS.printOutStep == 0:
                numExamplesPerStep = modelParams['activeBatchSize']
                examplesPerSec = numExamplesPerStep / duration
                secPerBatch = float(duration)
                format_str = (
                    '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                    'sec/batch), loss/batch = %.2f')
                logging.info(
                    format_str %
                    (datetime.now(), step, lossValue, examplesPerSec,
                     secPerBatch, lossValue / modelParams['activeBatchSize']))

            if step % FLAGS.summaryWriteStep == 0:
                summaryStr = sess.run(summaryOp)
                summaryWriter.add_summary(summaryStr, step)
            # Save the model checkpoint periodically.
            if step % FLAGS.modelCheckpointStep == 0 or (
                    step + 1) == modelParams['maxSteps']:
                checkpointPath = os.path.join(modelParams['logDir'],
                                              'model.ckpt')
                saver.save(sess, checkpointPath, global_step=step)
            # Print Progress Info
            if ((step % FLAGS.ProgressStepReportStep)
                    == 0) or ((step + 1) == modelParams['maxSteps']):
                print(
                    'Progress: %.2f%%, Elapsed: %.2f mins, Training Completion in: %.2f mins --- %s'
                    %
                    ((100 * step) / modelParams['maxSteps'], durationSum / 60,
                     (((durationSum * modelParams['maxSteps']) /
                       (step + 1)) / 60) - (durationSum / 60), datetime.now()))
            if step > prevStep and step % 1000 == 0:
                #if step % 1000 == 0:
                #prevLoss = lossValue
                prevStep = step
                print('     Validation Function in progress... step ', step)
                lossvalidationsum = 0
                for i in range(0, modelParams['testMaxSteps']):
                    lossvalsum, pvali, tvali = sess.run(
                        [lossvali, targetPvali, targetTvali])
                    lossvalidationsum += np.mean(np.array(lossvalsum))
#TEST###                print('     Average loss = ', lossvalidationsum/modelParams['valiSteps'])

                pos1 = 0
                neg1 = 0
                for jacc in range(pvali.shape[0]):
                    pidx = np.argmax(pvali[jacc])
                    tidx = np.argmax(tvali[jacc])
                    if tidx == pidx:
                        pos1 += 1
                    else:
                        neg1 += 1
                accur = 100 * pos1 / (pos1 + neg1)
                print("		Accuracy	  = ", accur)
                print("		Prev Accuracy = ", prevaccur)
                print('     Average loss  = ',
                      lossvalidationsum / modelParams['testMaxSteps'])
                print('     Prev    loss  = ',
                      prevValiSumLoss / modelParams['testMaxSteps'],
                      '    prevLossStep = ', prevLossStep)
                if accur > prevaccur:
                    print('     Saving model')
                    shutil.copy(
                        modelParams['logDir'] + '/model.ckpt-' + str(step) +
                        '.data-00000-of-00001',
                        modelParams['logDir'] + '_validation/model.ckpt-' +
                        str(step) + '.data-00000-of-00001')
                    shutil.copy(
                        modelParams['logDir'] + '/model.ckpt-' + str(step) +
                        '.index', modelParams['logDir'] +
                        '_validation/model.ckpt-' + str(step) + '.index')
                    shutil.copy(
                        modelParams['logDir'] + '/model.ckpt-' + str(step) +
                        '.meta', modelParams['logDir'] +
                        '_validation/model.ckpt-' + str(step) + '.meta')
                    prevaccur = accur
                    prevValiSumLoss = lossvalidationsum
                    prevLossStep = step
                summaryStr = sess.run(summaryOp)
                summaryValiWriter.add_summary(summaryStr, step)
            if step > prevStep and step - prevStep > 1001:
                print('     ----------------SKIPPED')
                print('     ----------------SKIPPED')
Esempio n. 10
0
def train():
    _get_control_params()

    if not os.path.exists(modelParams['dataDir']):
        raise ValueError("No such data directory %s" % modelParams['dataDir'])

    #meanImgFi1000le = os.path.join(FLAGS.dataDir, "meta")
    #if not os.path.isfile(meanImgFile):
    #    raise ValueError("Warning, no meta file found at %s" % meanImgFile)
    #else:
    #    with open(meanImgFile, "r") as inMeanFile:
    #        meanInfo = json.load(inMeanFile)
    #
    #    meanImg = meanInfo['mean']
    #
    #    # also load the target output sizes
    #    params['targSz'] = meanInfo["targSz"]

    _setupLogging(os.path.join(modelParams['trainLogDir'], "genlog"))

    with tf.Graph().as_default():
        # BGR to RGB
        #params['meanImg'] = tf.constant(meanImg, dtype=tf.float32)

        # track the number of train calls (basically number of batches processed)
        globalStep = tf.get_variable('globalStep', [],
                                     initializer=tf.constant_initializer(0),
                                     trainable=False)

        # Get images and transformation for model_cnn.
        imagesOrig, images, pOrig, tHAB, prevPredHAB, tfrecFileIDs = data_input.inputs(
            **modelParams)

        # Build a Graph that computes the HAB predictions from the
        # inference model.
        pHAB = model_cnn.inference(images, **modelParams)

        # Calculate loss.
        loss = model_cnn.loss(pHAB, tHAB, **modelParams)

        # Build a Graph that trains the model with one batch of examples and
        # updates the model parameters.
        opTrain = model_cnn.train(loss, globalStep, **modelParams)
        ##############################

        # Create a saver.
        saver = tf.train.Saver(tf.global_variables())

        # Build the summary operation based on the TF collection of Summaries.
        summaryOp = tf.summary.merge_all()

        # Build an initialization operation to run below.
        #init = tf.initialize_all_variables()
        init = tf.global_variables_initializer()

        # Start running operations on the Graph.
        config = tf.ConfigProto(
            log_device_placement=modelParams['logDevicePlacement'])
        config.gpu_options.allow_growth = True
        #config.gpu_options.per_process_gpu_memory_fraction = 0.4
        config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
        sess = tf.Session(config=config)

        #sess = tf_debug.LocalCLIDebugWrapperSession(sess)
        #sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)
        sess.run(init)

        # restore a saver.
        #saver = tf.train.Saver(tf.global_variables())
        #saver.restore(sess, modelParams['trainLogDir']+'/model.ckpt-9000')

        # Start the queue runners.
        tf.train.start_queue_runners(sess=sess)

        summaryWriter = tf.summary.FileWriter(modelParams['trainLogDir'],
                                              sess.graph)

        HABperPixelsum = 0
        durationSum = 0
        for step in xrange(modelParams['maxSteps']):
            startTime = time.time()
            _, lossValue = sess.run([opTrain, loss])
            duration = time.time() - startTime
            durationSum += duration
            assert not np.isnan(lossValue), 'Model diverged with loss = NaN'

            if step % FLAGS.printOutStep == 0:
                numExamplesPerStep = modelParams['activeBatchSize']
                examplesPerSec = numExamplesPerStep / duration
                secPerBatch = float(duration)
                format_str = (
                    '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                    'sec/batch), loss/batch = %.2f')
                logging.info(
                    format_str %
                    (datetime.now(), step, lossValue, examplesPerSec,
                     secPerBatch, lossValue / modelParams['activeBatchSize']))

            if step % FLAGS.summaryWriteStep == 0:
                summaryStr = sess.run(summaryOp)
                summaryWriter.add_summary(summaryStr, step)

            # Save the model checkpoint periodically.
            if step % FLAGS.modelCheckpointStep == 0 or (
                    step + 1) == modelParams['maxSteps']:
                checkpointPath = os.path.join(modelParams['trainLogDir'],
                                              'model.ckpt')
                saver.save(sess, checkpointPath, global_step=step)

            # Print Progress Info
            if ((step % FLAGS.ProgressStepReportStep)
                    == 0) or ((step + 1) == modelParams['maxSteps']):
                print(
                    'Progress: %.2f%%, Elapsed: %.2f mins, Training Completion in: %.2f mins'
                    %
                    ((100 * step) / modelParams['maxSteps'], durationSum / 60,
                     (((durationSum * modelParams['maxSteps']) /
                       (step + 1)) / 60) - (durationSum / 60)))
Esempio n. 11
0
def train():
    _get_control_params()

    if not os.path.exists(modelParams['dataDir']):
        raise ValueError("No such data directory %s" % modelParams['dataDir'])

    _setupLogging(os.path.join(modelParams['trainLogDir'], "genlog"))

    with tf.Graph().as_default():
        # track the number of train calls (basically number of batches processed)
        globalStep = tf.get_variable('globalStep', [],
                                     initializer=tf.constant_initializer(0),
                                     trainable=False)

        # Get images and transformation for model_cnn.
        images, pclA, pclB, targetT, tfrecFileIDs = data_input.inputs(
            **modelParams)
        print('Input        ready')
        # Build a Graph that computes the HAB predictions from the
        # inference model.
        targetP = model_cnn.inference(images, **modelParams)

        # Calculate loss. 2 options:

        # use mask to get degrees significant
        # What about adaptive mask to zoom into differences at each CNN stack !!!
        ########## model_cnn.loss is called in the loss function
        #loss = weighted_loss(targetP, targetT, **modelParams)
        loss = weighted_params_loss(targetP, targetT, **modelParams)
        # pcl based loss
        #loss = pcl_params_loss(pclA, targetP, targetT, **modelParams)

        # Build a Graph that trains the model with one batch of examples and
        # updates the model parameters.
        opTrain = model_cnn.train(loss, globalStep, **modelParams)
        ##############################
        print('Training     ready')
        # Create a saver.
        saver = tf.train.Saver(tf.global_variables())
        print('Saver        ready')

        # Build the summary operation based on the TF collection of Summaries.
        summaryOp = tf.summary.merge_all()
        print('MergeSummary ready')

        # Build an initialization operation to run below.
        #init = tf.initialize_all_variables()
        init = tf.global_variables_initializer()

        opCheck = tf.add_check_numerics_ops()
        # Start running operations on the Graph.
        config = tf.ConfigProto(
            log_device_placement=modelParams['logDevicePlacement'])
        config.gpu_options.allow_growth = True
        config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
        sess = tf.Session(config=config)
        print('Session      ready')

        #sess = tf_debug.LocalCLIDebugWrapperSession(sess)
        #sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)
        sess.run(init)

        # restore a saver.
        #saver.restore(sess, (modelParams['trainLogDir'].replace('_B_2','_B_1'))+'/model.ckpt-'+str(modelParams['trainMaxSteps']-1))
        #print('Ex-Model     loaded')

        # Start the queue runners.
        tf.train.start_queue_runners(sess=sess)
        print('QueueRunner  started')

        summaryWriter = tf.summary.FileWriter(modelParams['trainLogDir'],
                                              sess.graph)

        print('Training     started')
        durationSum = 0
        durationSumAll = 0
        for step in xrange(modelParams['maxSteps']):
            startTime = time.time()
            _, lossValue = sess.run([opTrain, loss])
            duration = time.time() - startTime
            durationSum += duration
            assert not np.isnan(lossValue), 'Model diverged with loss = NaN'

            if step % FLAGS.printOutStep == 0:
                numExamplesPerStep = modelParams['activeBatchSize']
                examplesPerSec = numExamplesPerStep / duration
                secPerBatch = float(duration)
                format_str = (
                    '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                    'sec/batch), loss/batch = %.2f')
                logging.info(
                    format_str %
                    (datetime.now(), step, lossValue, examplesPerSec,
                     secPerBatch, lossValue / modelParams['activeBatchSize']))

            if step % FLAGS.summaryWriteStep == 0:
                summaryStr = sess.run(summaryOp)
                summaryWriter.add_summary(summaryStr, step)

            # Save the model checkpoint periodically.
            if step % FLAGS.modelCheckpointStep == 0 or (
                    step + 1) == modelParams['maxSteps']:
                checkpointPath = os.path.join(modelParams['trainLogDir'],
                                              'model.ckpt')
                saver.save(sess, checkpointPath, global_step=step)

            # Print Progress Info
            if ((step % FLAGS.ProgressStepReportStep)
                    == 0) or ((step + 1) == modelParams['maxSteps']):
                print(
                    'Progress: %.2f%%, Elapsed: %.2f mins, Training Completion in: %.2f mins --- %s'
                    %
                    ((100 * step) / modelParams['maxSteps'], durationSum / 60,
                     (((durationSum * modelParams['maxSteps']) /
                       (step + 1)) / 60) - (durationSum / 60), datetime.now()))
Esempio n. 12
0
def train():
    _get_control_params()

    if not os.path.exists(modelParams['dataDir']):
        raise ValueError("No such data directory %s" % modelParams['dataDir'])

    _setupLogging(os.path.join(modelParams['trainLogDir'], "genlog"))

    with tf.Graph().as_default():
        # track the number of train calls (basically number of batches processed)
        globalStep = tf.get_variable('globalStep', [],
                                     initializer=tf.constant_initializer(0),
                                     trainable=False)

        # Get images and transformation for model_cnn.
        images, pclA, pclB, tMatT, tfrecFileIDs = data_input.inputs(
            **modelParams)
        # Build a Graph that computes the HAB predictions from the
        # inference model.
        tMatP = model_cnn.inference(images, **modelParams)

        # Calculate loss. 2 options:

        # use mask to get degrees significant
        # What about adaptive mask to zoom into differences at each CNN stack !!!
        loss = model_cnn.weighted_loss(tMatP, tMatT, **modelParams)

        # pcl based
        #loss = model_cnn.pcl_loss(pclA, tMatP, tMatT, **modelParams)

        # Build a Graph that trains the model with one batch of examples and
        # updates the model parameters.
        opTrain = model_cnn.train(loss, globalStep, **modelParams)
        ##############################

        # Create a saver.
        saver = tf.train.Saver(tf.global_variables())

        # Build the summary operation based on the TF collection of Summaries.
        summaryOp = tf.summary.merge_all()

        # Build an initialization operation to run below.
        #init = tf.initialize_all_variables()
        init = tf.global_variables_initializer()

        opCheck = tf.add_check_numerics_ops()
        # Start running operations on the Graph.
        config = tf.ConfigProto(
            log_device_placement=modelParams['logDevicePlacement'])
        config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
        sess = tf.Session(config=config)

        #sess = tf_debug.LocalCLIDebugWrapperSession(sess)
        #sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)
        sess.run(init)

        # Start the queue runners.
        tf.train.start_queue_runners(sess=sess)

        summaryWriter = tf.summary.FileWriter(modelParams['trainLogDir'],
                                              sess.graph)

        durationSum = 0
        for step in xrange(modelParams['maxSteps']):
            startTime = time.time()
            _, lossValue = sess.run([opTrain, loss])
            duration = time.time() - startTime
            durationSum += duration
            assert not np.isnan(lossValue), 'Model diverged with loss = NaN'

            if step % FLAGS.printOutStep == 0:
                numExamplesPerStep = modelParams['activeBatchSize']
                examplesPerSec = numExamplesPerStep / duration
                secPerBatch = float(duration)
                format_str = (
                    '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                    'sec/batch), loss/batch = %.2f')
                logging.info(
                    format_str %
                    (datetime.now(), step, lossValue, examplesPerSec,
                     secPerBatch, lossValue / modelParams['activeBatchSize']))

            if step % FLAGS.summaryWriteStep == 0:
                summaryStr = sess.run(summaryOp)
                summaryWriter.add_summary(summaryStr, step)

            # Save the model checkpoint periodically.
            if step % FLAGS.modelCheckpointStep == 0 or (
                    step + 1) == modelParams['maxSteps']:
                checkpointPath = os.path.join(modelParams['trainLogDir'],
                                              'model.ckpt')
                saver.save(sess, checkpointPath, global_step=step)

            # Print Progress Info
            if ((step % FLAGS.ProgressStepReportStep)
                    == 0) or ((step + 1) == modelParams['maxSteps']):
                print(
                    'Progress: %.2f%%, Elapsed: %.2f mins, Training Completion in: %.2f mins'
                    %
                    ((100 * step) / modelParams['maxSteps'], durationSum / 60,
                     (((durationSum * modelParams['maxSteps']) /
                       (step + 1)) / 60) - (durationSum / 60)))

        ######### USE LATEST STATE TO WARP IMAGES
        if modelParams['writeWarpedImages']:
            lossValueSum = 0
            stepsForOneDataRound = int((modelParams['numExamples'] /
                                        modelParams['activeBatchSize'])) + 1
            print('Warping images with batch size %d in %d steps' %
                  (modelParams['activeBatchSize'], stepsForOneDataRound))
            for step in xrange(stepsForOneDataRound):
                startTime = time.time()
                evImages, evPclA, evPclB, evtMatT, evtMatP, evtfrecFileIDs, evlossValue = sess.run(
                    [images, pclA, pclB, tMatT, tMatP, tfrecFileIDs, loss])
                duration = time.time() - startTime
                durationSum += duration
                #### put imageA, warpped imageB by pHAB, HAB-pHAB as new HAB, changed fileaddress tfrecFileIDs
                data_output.output(evImages, evPclA, evPclB, evtMatT, evtMatP,
                                   evtfrecFileIDs, **modelParams)
                # Print Progress Info
                if ((step % FLAGS.ProgressStepReportStep)
                        == 0) or ((step + 1) == stepsForOneDataRound):
                    print(
                        'Progress: %.2f%%, Loss: %.2f, Elapsed: %.2f mins, Training Completion in: %.2f mins'
                        % ((100 * step) / stepsForOneDataRound, evlossValue /
                           (step + 1), durationSum / 60,
                           (((durationSum * stepsForOneDataRound) /
                             (step + 1)) / 60) - (durationSum / 60)))
            print(
                'Average training loss = %.2f - Average time per sample= %.2f s, Steps = %d'
                % (evlossValue / modelParams['activeBatchSize'], durationSum /
                   (step * modelParams['activeBatchSize']), step))
Esempio n. 13
0
def train():
    _get_control_params()

    if not os.path.exists(modelParams['dataDir']):
        raise ValueError("No such data directory %s" % modelParams['dataDir'])

    _setupLogging(os.path.join(modelParams['trainLogDir'], "genlog"))

    with tf.Graph().as_default():
        # track the number of train calls (basically number of batches processed)
        globalStep = tf.get_variable('globalStep', [],
                                     initializer=tf.constant_initializer(0),
                                     trainable=False)

        # Get images and transformation for model_cnn.
        images, pclA, pclB, targetT, tfrecFileIDs = data_input.inputs(
            **modelParams)
        print('Input        ready')
        # Build a Graph that computes the HAB predictions from the
        # inference model.
        targetP = model_cnn.inference(images, **modelParams)

        # Calculate loss. 2 options:

        # use mask to get degrees significant
        # What about adaptive mask to zoom into differences at each CNN stack !!!
        ########## model_cnn.loss is called in the loss function
        #loss = weighted_loss(targetP, targetT, **modelParams)
        loss = weighted_params_loss(targetP, targetT, **modelParams)
        # pcl based loss
        #loss = pcl_params_loss(pclA, targetP, targetT, **modelParams)

        # Build a Graph that trains the model with one batch of examples and
        # updates the model parameters.
        opTrain = model_cnn.train(loss, globalStep, **modelParams)
        ##############################
        print('Training     ready')
        # Create a saver.
        saver = tf.train.Saver(tf.global_variables())
        print('Saver        ready')

        # Build the summary operation based on the TF collection of Summaries.
        summaryOp = tf.summary.merge_all()
        print('MergeSummary ready')

        # Build an initialization operation to run below.
        #init = tf.initialize_all_variables()
        init = tf.global_variables_initializer()

        opCheck = tf.add_check_numerics_ops()
        # Start running operations on the Graph.
        config = tf.ConfigProto(
            log_device_placement=modelParams['logDevicePlacement'])
        config.gpu_options.allow_growth = True
        config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
        sess = tf.Session(config=config)
        print('Session      ready')

        #sess = tf_debug.LocalCLIDebugWrapperSession(sess)
        #sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)
        sess.run(init)

        # restore a saver.
        #saver.restore(sess, (modelParams['trainLogDir'].replace('_B_2','_B_1'))+'/model.ckpt-'+str(modelParams['trainMaxSteps']-1))
        #print('Ex-Model     loaded')

        # Start the queue runners.
        tf.train.start_queue_runners(sess=sess)
        print('QueueRunner  started')

        summaryWriter = tf.summary.FileWriter(modelParams['trainLogDir'],
                                              sess.graph)

        print('Training     started')
        filesDictionaryAccumTrain = {}
        durationSum = 0
        durationSumAll = 0
        for step in xrange(modelParams['maxSteps']):
            startTime = time.time()
            _, evtfrecFileIDs, lossValue = sess.run(
                [opTrain, tfrecFileIDs, loss])
            for fileIdx in range(modelParams['activeBatchSize']):
                fileIDname = str(evtfrecFileIDs[fileIdx][0]) + "_" + str(
                    evtfrecFileIDs[fileIdx][1]) + "_" + str(
                        evtfrecFileIDs[fileIdx][2])
                if (fileIDname in filesDictionaryAccumTrain):
                    filesDictionaryAccumTrain[fileIDname] += 1
                else:
                    filesDictionaryAccumTrain[fileIDname] = 1
            duration = time.time() - startTime
            durationSum += duration
            assert not np.isnan(lossValue), 'Model diverged with loss = NaN'

            if step % FLAGS.printOutStep == 0:
                numExamplesPerStep = modelParams['activeBatchSize']
                examplesPerSec = numExamplesPerStep / duration
                secPerBatch = float(duration)
                format_str = (
                    '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                    'sec/batch), loss/batch = %.2f')
                logging.info(
                    format_str %
                    (datetime.now(), step, lossValue, examplesPerSec,
                     secPerBatch, lossValue / modelParams['activeBatchSize']))

            if step % FLAGS.summaryWriteStep == 0:
                summaryStr = sess.run(summaryOp)
                summaryWriter.add_summary(summaryStr, step)

            # Save the model checkpoint periodically.
            if step % FLAGS.modelCheckpointStep == 0 or (
                    step + 1) == modelParams['maxSteps']:
                checkpointPath = os.path.join(modelParams['trainLogDir'],
                                              'model.ckpt')
                saver.save(sess, checkpointPath, global_step=step)

            # Print Progress Info
            if ((step % FLAGS.ProgressStepReportStep)
                    == 0) or ((step + 1) == modelParams['maxSteps']):
                print('Number of files used in training',
                      len(filesDictionaryAccumTrain))
                print(
                    'Progress: %.2f%%, Elapsed: %.2f mins, Training Completion in: %.2f mins --- %s'
                    %
                    ((100 * step) / modelParams['maxSteps'], durationSum / 60,
                     (((durationSum * modelParams['maxSteps']) /
                       (step + 1)) / 60) - (durationSum / 60), datetime.now()))
        print('Number of files used in training',
              len(filesDictionaryAccumTrain))
        filesAccum = np.array(list(filesDictionaryAccumTrain.values()))
        print('Access statistics for each file, mean max min std',
              np.mean(filesAccum), np.max(filesAccum), np.min(filesAccum),
              np.std(filesAccum))

        print(
            "\nTraining completed.....\n------------------------------\n------------------------------\n-------------------------------\n"
        )
        ######### USE LATEST STATE TO WARP IMAGES
        #outputDirFileNum = len([name for name in os.listdir(outputDIR) if os.path.isfile(os.path.join(outputDIR, name))])
        #outputDirFileNum = 0
        filesDictionaryAccum = {}
        durationSum = 0
        durationSumAll = 0
        if modelParams['writeWarpedImages']:
            outputDIR = modelParams['warpedOutputFolder'] + '/'
            print(
                "Using final training state to output processed tfrecords\noutput folder: ",
                outputDIR)
            if tf.gfile.Exists(outputDIR):
                tf.gfile.DeleteRecursively(outputDIR)
            tf.gfile.MakeDirs(outputDIR)
            lossValueSum = 0
            stepsForOneDataRound = int(
                (modelParams['numExamples'] / modelParams['activeBatchSize']))
            print('Warping %d images with batch size %d in %d steps' %
                  (modelParams['numExamples'], modelParams['activeBatchSize'],
                   stepsForOneDataRound))
            for step in xrange(stepsForOneDataRound):
                #step = 0
                #while outputDirFileNum != 20400:
                startTime = time.time()
                evImages, evPclA, evPclB, evtargetT, evtargetP, evtfrecFileIDs, evlossValue = sess.run(
                    [images, pclA, pclB, targetT, targetP, tfrecFileIDs, loss])
                for fileIdx in range(modelParams['activeBatchSize']):
                    fileIDname = str(evtfrecFileIDs[fileIdx][0]) + "_" + str(
                        evtfrecFileIDs[fileIdx][1]) + "_" + str(
                            evtfrecFileIDs[fileIdx][2])
                    if (fileIDname in filesDictionaryAccum):
                        filesDictionaryAccum[fileIDname] += 1
                    else:
                        filesDictionaryAccum[fileIDname] = 1
                #### put imageA, warpped imageB by pHAB, HAB-pHAB as new HAB, changed fileaddress tfrecFileIDs
                data_output.output(evImages, evPclA, evPclB, evtargetT,
                                   evtargetP, evtfrecFileIDs, **modelParams)
                duration = time.time() - startTime
                durationSum += duration
                durationSumAll += duration
                # Print Progress Info
                if ((step % FLAGS.ProgressStepReportOutputWrite)
                        == 0) or ((step + 1) == stepsForOneDataRound):
                    print('Number of files used in training',
                          len(filesDictionaryAccum))
                    print(
                        'Progress: %.2f%%, Loss: %.2f, Elapsed: %.2f mins, Training Completion in: %.2f mins'
                        % ((100 * step) / stepsForOneDataRound, evlossValue /
                           (step + 1), durationSum / 60,
                           (((durationSum * stepsForOneDataRound) /
                             (step + 1)) / 60) - (durationSum / 60)))
                    #print('Total Elapsed: %.2f mins, Training Completion in: %.2f mins' %
                    #        durationSumAll/60, (((durationSumAll*stepsForOneDataRound)/(step+1))/60)-(durationSumAll/60))
                #outputDirFileNum = len([name for name in os.listdir(outputDIR) if os.path.isfile(os.path.join(outputDIR, name))])
                #step+=1
            print('Write steps, one round steps', step, stepsForOneDataRound)
            print('Number of files used in training',
                  len(filesDictionaryAccumTrain))
            filesAccum = np.array(list(filesDictionaryAccumTrain.values()))
            print('Training access statistics for each file, mean max min std',
                  np.mean(filesAccum), np.max(filesAccum), np.min(filesAccum),
                  np.std(filesAccum))
            print('Number of files used in training',
                  len(filesDictionaryAccum))
            filesAccum = np.array(list(filesDictionaryAccum.values()))
            print('Write access statistics for each file, mean max min std',
                  np.mean(filesAccum), np.max(filesAccum), np.min(filesAccum),
                  np.std(filesAccum))
            print(
                'Average training loss = %.2f - Average time per sample= %.2f s, Steps = %d'
                % (evlossValue / modelParams['activeBatchSize'], durationSum /
                   (step * modelParams['activeBatchSize']), step))
Esempio n. 14
0
def train():
    _get_control_params()

    if not os.path.exists(modelParams['dataDir']):
        raise ValueError("No such data directory %s" % modelParams['dataDir'])

    #meanImgFi1000le = os.path.join(FLAGS.dataDir, "meta")
    #if not os.path.isfile(meanImgFile):
    #    raise ValueError("Warning, no meta file found at %s" % meanImgFile)
    #else:
    #    with open(meanImgFile, "r") as inMeanFile:
    #        meanInfo = json.load(inMeanFile)
    #
    #    meanImg = meanInfo['mean']
    #
    #    # also load the target output sizes
    #    params['targSz'] = meanInfo["targSz"]

    _setupLogging(os.path.join(modelParams['trainLogDir'], "genlog"))

    with tf.Graph().as_default():
        # BGR to RGB
        #params['meanImg'] = tf.constant(meanImg, dtype=tf.float32)

        # track the number of train calls (basically number of batches processed)
        globalStep = tf.get_variable('globalStep', [],
                                     initializer=tf.constant_initializer(0),
                                     trainable=False)

        # Get images and transformation for model_cnn.
        imagesOrig, images, pOrig, tHAB, tfrecFileIDs = data_input.inputs(
            **modelParams)

        # Build a Graph that computes the HAB predictions from the
        # inference model.
        pHAB = model_cnn.inference(images, **modelParams)

        # Calculate loss.
        loss = model_cnn.loss(pHAB, tHAB, **modelParams)

        # Build a Graph that trains the model with one batch of examples and
        # updates the model parameters.
        opTrain = model_cnn.train(loss, globalStep, **modelParams)
        ##############################

        # Create a saver.
        saver = tf.train.Saver(tf.global_variables())

        # Build the summary operation based on the TF collection of Summaries.
        summaryOp = tf.summary.merge_all()

        # Build an initialization operation to run below.
        #init = tf.initialize_all_variables()
        init = tf.global_variables_initializer()

        opCheck = tf.add_check_numerics_ops()
        # Start running operations on the Graph.
        config = tf.ConfigProto(
            log_device_placement=modelParams['logDevicePlacement'])
        config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
        sess = tf.Session(config=config)

        #sess = tf_debug.LocalCLIDebugWrapperSession(sess)
        #sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)
        sess.run(init)

        # Start the queue runners.
        tf.train.start_queue_runners(sess=sess)

        summaryWriter = tf.summary.FileWriter(modelParams['trainLogDir'],
                                              sess.graph)

        HABperPixelsum = 0
        durationSum = 0
        for step in xrange(modelParams['maxSteps']):
            startTime = time.time()
            _, lossValue = sess.run([opTrain, loss])
            duration = time.time() - startTime
            durationSum += duration
            assert not np.isnan(lossValue), 'Model diverged with loss = NaN'

            if step % FLAGS.printOutStep == 0:
                numExamplesPerStep = modelParams['activeBatchSize']
                examplesPerSec = numExamplesPerStep / duration
                secPerBatch = float(duration)
                format_str = (
                    '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                    'sec/batch), loss/batch = %.2f')
                logging.info(
                    format_str %
                    (datetime.now(), step, lossValue, examplesPerSec,
                     secPerBatch, lossValue / modelParams['activeBatchSize']))

            if step % FLAGS.summaryWriteStep == 0:
                summaryStr = sess.run(summaryOp)
                summaryWriter.add_summary(summaryStr, step)

            # Save the model checkpoint periodically.
            if step % FLAGS.modelCheckpointStep == 0 or (
                    step + 1) == modelParams['maxSteps']:
                checkpointPath = os.path.join(modelParams['trainLogDir'],
                                              'model.ckpt')
                saver.save(sess, checkpointPath, global_step=step)

            # Print Progress Info
            if ((step % FLAGS.ProgressStepReportStep)
                    == 0) or ((step + 1) == modelParams['maxSteps']):
                print(
                    'Progress: %.2f%%, Elapsed: %.2f mins, Training Completion in: %.2f mins'
                    %
                    ((100 * step) / modelParams['maxSteps'], durationSum / 60,
                     (((durationSum * modelParams['maxSteps']) /
                       (step + 1)) / 60) - (durationSum / 60)))

        ######### USE LATEST STATE TO WARP IMAGES
        if modelParams['writeWarpedImages']:
            lossValueSum = 0
            stepsForOneDataRound = int((modelParams['numExamples'] /
                                        modelParams['activeBatchSize'])) + 1
            print('Warping images with batch size %d in %d steps' %
                  (modelParams['activeBatchSize'], stepsForOneDataRound))
            for step in xrange(stepsForOneDataRound):
                startTime = time.time()
                evImagesOrig, evImages, evPOrig, evtHAB, evpHAB, evtfrecFileIDs, evlossValue = sess.run(
                    [
                        imagesOrig, images, pOrig, tHAB, pHAB, tfrecFileIDs,
                        loss
                    ])
                duration = time.time() - startTime
                durationSum += duration
                HABRES = evtHAB - evpHAB
                HABperPixel = 0
                for i in xrange(modelParams['activeBatchSize']):
                    H = np.asarray([[
                        HABRES[i][0], HABRES[i][1], HABRES[i][2], HABRES[i][3]
                    ], [
                        HABRES[i][4], HABRES[i][5], HABRES[i][6], HABRES[i][7]
                    ]], np.float32)
                    HABperPixel += np.sqrt((H * H).sum(axis=0)).mean()
                HABperPixel = HABperPixel / modelParams['activeBatchSize']
                HABperPixelsum += HABperPixel
                #### put imageA, warpped imageB by pHAB, HAB-pHAB as new HAB, changed fileaddress tfrecFileIDs
                data_output.output(evImagesOrig, evImages, evPOrig, evtHAB,
                                   evpHAB, evtfrecFileIDs, **modelParams)
                # Print Progress Info
                if ((step % FLAGS.ProgressStepReportStep)
                        == 0) or ((step + 1) == stepsForOneDataRound):
                    print(
                        'Progress: %.2f%%, Loss: %.2f, Elapsed: %.2f mins, Training Completion in: %.2f mins'
                        %
                        ((100 * step) / stepsForOneDataRound, HABperPixelsum /
                         (step + 1), durationSum / 60,
                         (((durationSum * stepsForOneDataRound) /
                           (step + 1)) / 60) - (durationSum / 60)))
            print(
                'Average training loss = %.2f - Average time per sample= %.2f s, Steps = %d'
                % (HABperPixelsum / step, durationSum /
                   (step * modelParams['activeBatchSize']), step))