Beispiel #1
0
def main(nb_epoch):
  # get Option
  GPU = Option.GPU
  batchSize = Option.batchSize
  pathLog = '../log/' + Option.Time + '(' + Option.Notes + ')' + '.txt'
  Log.Log(pathLog, 'w+', 1) # set log file
  print(time.strftime('%Y-%m-%d %X', time.localtime()), '\n')
  print(open('Option.py').read())

  # get data
  numThread = 4*len(GPU)
  assert batchSize % len(GPU) == 0, ('batchSize must be divisible by number of GPUs')

  with tf.device('/cpu:0'):
    batchTrainX,batchTrainY,batchTestX,batchTestY,numTrain,numTest,label = getData.loadData(Option.dataSet,batchSize,numThread)
  batchNumTrain = int(numTrain / batchSize)
  batchNumTest = int(numTest / 100)

  optimizer = Option.optimizer
  global_step = tf.get_variable('global_step', [], dtype=tf.int32, initializer=tf.constant_initializer(0), trainable=False)
  Net = []


  # on my machine, alexnet does not fit multi-GPU training
  # for single GPU
  with tf.device('/gpu:%d' % GPU[0]):
    Net.append(NN.NN(batchTrainX, batchTrainY, training=True, global_step=global_step))
    lossTrainBatch, errorTrainBatch = Net[-1].build_graph()
    update_op = tf.get_collection(tf.GraphKeys.UPDATE_OPS)  # batchnorm moving average update ops (not used now)

    # since we quantize W at the beginning and the update delta_W is quantized,
    # there is no need to quantize W every iteration
    # we just clip W after each iteration for speed
    update_op += Net[0].W_clip_op

    gradTrainBatch = optimizer.compute_gradients(lossTrainBatch)

    gradTrainBatch_quantize = quantizeGrads(gradTrainBatch)
    with tf.control_dependencies(update_op):
      train_op = optimizer.apply_gradients(gradTrainBatch_quantize, global_step=global_step)

    tf.get_variable_scope().reuse_variables()
    Net.append(NN.NN(batchTestX, batchTestY, training=False))
    _, errorTestBatch = Net[-1].build_graph()



  showVariable()

  # Build an initialization operation to run below.
  config = tf.ConfigProto()
  config.gpu_options.allow_growth = True
  config.allow_soft_placement = True
  config.log_device_placement = False
  sess = Option.sess = tf.InteractiveSession(config=config)
  sess.run(tf.global_variables_initializer())
  saver = tf.train.Saver(max_to_keep=None)
  # Start the queue runners.
  tf.train.start_queue_runners(sess=sess)


  def getErrorTest():
    errorTest = 0.
    for i in tqdm(range(batchNumTest),desc = 'Test', leave=False):
      errorTest += sess.run([errorTestBatch])[0]
    errorTest /= batchNumTest
    return errorTest

  if Option.loadModel is not None:
    print('Loading model from %s ...' % Option.loadModel),
    saver.restore(sess, Option.loadModel)
    print('Finished'),
    errorTestBest = getErrorTest()
    print('Test:', errorTestBest)

  else:
    # at the beginning, we discrete W
    sess.run([Net[0].W_q_op])

  print('\nOptimization Start!\n')
  
  lossTotal_ = []
  errorTotal_ = []
  errorTest_ = []
  #####################################################################################################################################################################
  gradH_ = []
  gradW_ = []
  gradWq_ = []
  
  for epoch in range(nb_epoch):
    # check lr_schedule
    if len(Option.lr_schedule) / 2:
      if epoch == Option.lr_schedule[0]:
        Option.lr_schedule.pop(0)
        lr_new = Option.lr_schedule.pop(0)
        if lr_new == 0:
          print('Optimization Ended!')
          exit(0)
        lr_old = sess.run(Option.lr)
        sess.run(Option.lr.assign(lr_new))
        print('lr: {} -> {}'.format(lr_old, lr_new))

    print('Epoch: {}'.format(epoch)),


    lossTotal = 0.
    errorTotal = 0
    t0 = time.time()
    for batchNum in tqdm(range(batchNumTrain), desc='Epoch: %03d' % epoch, leave=False, smoothing=0.1):
      if Option.debug is False:
        _, loss_delta, error_delta = sess.run([train_op, lossTrainBatch, errorTrainBatch])
      else:
        _, loss_delta, error_delta, H, W, W_q, gradH, gradW, gradW_q=\
        sess.run([train_op, lossTrainBatch, errorTrainBatch, Net[0].H, Net[0].W, Net[0].W_q, Net[0].gradsH, Net[0].gradsW, gradTrainBatch_quantize])

      lossTotal += loss_delta
      errorTotal += error_delta

    lossTotal /= batchNumTrain
    errorTotal /= batchNumTrain

    print('Loss: {} Train: {}'.format(lossTotal, errorTotal)),

    # get test error
    errorTest = getErrorTest()
    print('Test: {} FPS: {}'.format(errorTest, numTrain / (time.time() - t0))),

    if epoch == 0:
      errorTestBest = errorTest
    if errorTest < errorTestBest:
      if Option.saveModel is not None:
        saver.save(sess, Option.saveModel)
        print('S'),
    if errorTest < errorTestBest:
      errorTestBest = errorTest
      print('BEST')

    lossTotal_.append(lossTotal)
    errorTotal_.append(errorTotal)
    errorTest_.append(errorTest)


    if epoch%5 == 0: 
      dd = {}
      dd['lossTotal'] = lossTotal_
      dd['errorTotal'] = errorTotal_
      dd['errorTest'] = errorTest_
      filename = '../savedata/temp_savedata.pkl'
      with open(filename, 'wb') as  f: 
        pickle.dump(dd, f)

  
  # _, loss_delta, error_delta, H, W, W_q, gradH, gradW, gradW_q=\
  # 	sess.run([train_op, lossTrainBatch, errorTrainBatch, Net[0].H, Net[0].W, Net[0].W_q, Net[0].gradsH, Net[0].gradsW, gradTrainBatch_quantize])

  return lossTotal_, errorTotal_, errorTest_ #,  H, W, W_q, gradH, gradW, gradW_q
Beispiel #2
0
def main():
    # get Option
    GPU = Option.GPU
    batchSize = Option.batchSize
    pathLog = '../log/' + Option.Time + '(' + Option.Notes + ')' + '.txt'
    Log.Log(pathLog, 'w+', 1)  # set log file
    print time.strftime('%Y-%m-%d %X', time.localtime()), '\n'
    print open('Option.py').read()

    # get data
    numThread = 4 * len(GPU)
    assert batchSize % len(GPU) == 0, (
        'batchSize must be divisible by number of GPUs')

    with tf.device('/cpu:0'):
        batchTrainX,batchTrainY,batchTestX,batchTestY,numTrain,numTest,label =\
            getData.loadData(Option.dataSet,batchSize,numThread,Option.validNum)

    batchNumTrain = numTrain / batchSize
    batchNumTest = numTest / 100

    optimizer = Option.optimizer
    global_step = tf.get_variable('global_step', [],
                                  dtype=tf.int32,
                                  initializer=tf.constant_initializer(0),
                                  trainable=False)
    Net = []
    # for single GPU
    with tf.device('/gpu:%d' % GPU[0]):
        Net.append(
            NN.NN(batchTrainX,
                  batchTrainY,
                  training=True,
                  global_step=global_step))
        lossTrainBatch, errorTrainBatch = Net[-1].build_graph()
        update_op = tf.get_collection(
            tf.GraphKeys.UPDATE_OPS)  # batchnorm moving average update ops
        update_op += Net[0].W_clip_op

        gradTrainBatch = quantizeGrads(
            optimizer.compute_gradients(lossTrainBatch))
        with tf.control_dependencies(update_op):
            train_op = optimizer.apply_gradients(gradTrainBatch,
                                                 global_step=global_step)
        tf.get_variable_scope().reuse_variables()
        Net.append(NN.NN(batchTestX, batchTestY, training=False))
        _, errorTestBatch = Net[-1].build_graph()

    # Build an initialization operation to run below.
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.allow_soft_placement = True
    config.log_device_placement = False
    sess = Option.sess = tf.InteractiveSession(config=config)
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver(max_to_keep=None)
    # Start the queue runners.
    tf.train.start_queue_runners(sess=sess)

    def getErrorTest():
        errorTest = 0.
        for i in tqdm(xrange(batchNumTest), desc='Test', leave=False):
            errorTest += sess.run([errorTestBatch])[0]
        errorTest /= batchNumTest
        return errorTest

    if Option.loadModel is not None:
        print 'Loading model from %s ...' % Option.loadModel,
        saver.restore(sess, Option.loadModel)
        print 'Finished',
        errorTestBest = getErrorTest()
        print 'Test: %.4f ' % (errorTestBest)

    sess.run([Net[0].W_q_op])
    print "\nOptimization Start!\n"
    for epoch in xrange(1000):

        # check lr_schedule
        if len(Option.lr_schedule) / 2:
            if epoch == Option.lr_schedule[0]:
                Option.lr_schedule.pop(0)
                lr_new = Option.lr_schedule.pop(0)
                if lr_new == 0:
                    print 'Optimization Ended!'
                    exit(0)
                lr_old = sess.run(Option.lr)
                sess.run(Option.lr.assign(lr_new))
                print 'lr: %f -> %f' % (lr_old, lr_new)

        print 'Epoch: %03d ' % (epoch),

        lossTotal = 0.
        errorTotal = 0
        t0 = time.time()
        for batchNum in tqdm(xrange(batchNumTrain),
                             desc='Epoch: %03d' % epoch,
                             leave=False,
                             smoothing=0.1):
            _, loss_delta, error_delta = sess.run(
                [train_op, lossTrainBatch, errorTrainBatch])
            # _, loss_delta, error_delta, H, W, W_q, gradsH, gradsW, gradW_q=\
            # sess.run([train_op, lossTrainBatch, errorTrainBatch, Net[0].H, Net[0].W, Net[0].W_q, Net[0].gradsH, Net[0].gradsW, gradTrainBatch])

            lossTotal += loss_delta
            errorTotal += error_delta

        lossTotal /= batchNumTrain
        errorTotal /= batchNumTrain

        print 'Loss: %.6f Train: %.4f' % (lossTotal, errorTotal),

        # get test error
        errorTest = getErrorTest()
        print 'Test: %.4f FPS: %d' % (errorTest, numTrain /
                                      (time.time() - t0)),

        if epoch == 0:
            errorTestBest = errorTest

        if errorTest < errorTestBest:
            if Option.saveModel is not None:
                saver.save(sess, Option.saveModel)
                print 'S',
        if errorTest < errorTestBest:
            errorTestBest = errorTest
            print 'BEST',

        print ''
Beispiel #3
0
def main():
    # get Option
    GPU = Option.GPU
    batchSize = Option.batchSize
    pathLog = os.path.join('../log', Option.modelName)
    os.system('mkdir -p ' + pathLog)
    logFile = os.path.join(pathLog, 'train.txt')
    fLog = open(logFile, 'w+')

    sys.stdout = Log.Log(fLog, sys.stdout)  # set log file
    print('>>> Path to log file ' + pathLog)
    print('>>> Start at ' + time.strftime('%Y-%m-%d %X', time.localtime()) +
          '\n')
    open('Option.py').read()

    # get data
    numThread = 4 * len(GPU)
    assert batchSize % len(GPU) == 0, (
        'batchSize must be divisible by number of GPUs')

    with tf.device('/cpu:0'):
        batchTrainX,batchTrainY,batchTestX,batchTestY,numTrain,numTest,label =\
          getData.loadData(Option.dataSet, batchSize, numThread)

    batchNumTrain = numTrain / batchSize
    batchNumTest = numTest / 100

    optimizer = Option.optimizer
    global_step = tf.get_variable('global_step', [],
                                  dtype=tf.int32,
                                  initializer=tf.constant_initializer(0),
                                  trainable=False)
    Net = []

    # on my machine, alexnet does not fit multi-GPU training
    # for single GPU
    with tf.device('/gpu:%d' % GPU[0]):
        Net.append(
            NN.NN(batchTrainX,
                  batchTrainY,
                  training=True,
                  global_step=global_step))
        lossTrainBatch, errorTrainBatch = Net[-1].build_graph()
        update_op = tf.get_collection(
            tf.GraphKeys.UPDATE_OPS
        )  # batchnorm moving average update ops (not used now)

        # since we quantize W at the beginning and the update delta_W is quantized,
        # there is no need to quantize W every iteration
        # we just clip W after each iteration for speed
        update_op += Net[0].W_clip_op

        gradTrainBatch = optimizer.compute_gradients(lossTrainBatch)

        gradTrainBatch_quantize = quantizeGrads(gradTrainBatch)
        with tf.control_dependencies(update_op):
            train_op = optimizer.apply_gradients(gradTrainBatch_quantize,
                                                 global_step=global_step)

        tf.get_variable_scope().reuse_variables()

        Net.append(NN.NN(batchTestX, batchTestY, training=False))
        _, errorTestBatch = Net[-1].build_graph()
        w_max = []

    showVariable(debug=Option.debug)

    # Build an initialization operation to run below.
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.allow_soft_placement = True
    config.log_device_placement = False
    sess = Option.sess = tf.InteractiveSession(config=config)
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver(max_to_keep=None)
    # Start the queue runners.
    tf.train.start_queue_runners(sess=sess)

    #----
    def getErrorTest():
        errorTest = 0.
        for i in tqdm(range(int(batchNumTest)), desc='Test', leave=False):
            errorTest += sess.run([errorTestBatch])[0]
        errorTest /= batchNumTest
        return errorTest

    #---- Resume
    if Option.loadModel is not None:
        if os.path.isfile(Option.loadModel):
            print('>>> Loading model from %s ...' % Option.loadModel, end='')
            saver.restore(sess, Option.loadModel)
            print(' finished')
            errorTestBest = getErrorTest()
            print('>>> Test error  %.3f' % errorTestBest)
        else:
            print('>>> Model does not exist for restoring')
            sys.exit()
    else:
        # at the beginning, we discrete W
        sess.run([Net[0].W_q_op])

    #---- Derive LR
    print('>>> Optimization starting ...')
    for epoch in range(Option.epoches):
        # check lr_schedule
        if len(Option.lr_schedule) / 2:
            if epoch == Option.lr_schedule[0]:
                Option.lr_schedule.pop(0)
                lr_new = Option.lr_schedule.pop(0)
                if lr_new == 0:
                    print('  - optimization ended')
                    exit(0)
                lr_old = sess.run(Option.lr)
                sess.run(Option.lr.assign(lr_new))
                print('>>> Learning rate lr: %f -> %f' % (lr_old, lr_new))

        #--- Training
        lossTotal = 0.0
        errorTotal = 0.0
        t0 = time.time()
        for batchNum in tqdm(range(int(batchNumTrain)),
                             desc='Epoch: {:3d}'.format(epoch),
                             leave=False,
                             smoothing=0.1):
            if Option.debug is False:
                _, loss_delta, error_delta = sess.run(
                    [train_op, lossTrainBatch, errorTrainBatch])
            else:
                _, loss_delta, error_delta, H, W, W_q, gradH, gradW, gradW_q = \
                sess.run([train_op, lossTrainBatch, errorTrainBatch, Net[0].H, Net[0].W, Net[0].W_q, Net[0].gradsH, Net[0].gradsW, gradTrainBatch_quantize])
            lossTotal += loss_delta
            errorTotal += error_delta

        lossTotal /= batchNumTrain
        errorTotal /= batchNumTrain

        #for i in W_q:
        #  print i
        print('>>>> Epoch: %3d: ' % (epoch), end='')
        print('Total loss %12.4f, Train error %.4f, ' %
              (lossTotal, errorTotal),
              end='')

        # get test error
        errorTest = getErrorTest()
        print('Test error %.4f, FPS: %4d' % (errorTest, numTrain /
                                             (time.time() - t0)),
              end='')

        if epoch == 0:
            errorTestBest = errorTest

        if errorTest < errorTestBest:
            if Option.saveModel is not None:
                saver.save(sess, Option.saveModel)
                print(', Saved', end='')
        if errorTest < errorTestBest:
            errorTestBest = errorTest
            print(', Best test error %.4f' % errorTestBest, end='')

        print('')

    print('>>> Configuration')
    print('  - Dataset                 %s' % Option.dataSet)
    print('  - Number of epoches       %d' % Option.epoches)
    print('  - Batch size              %d' % Option.batchSize)
    print('  - batchNumTrain           %d' % batchNumTrain)
    print('  - batchNumTest            %d' % batchNumTest)
    print('  - modelName               %s' % Option.modelName)
    print('  - lr_schedule             %s' % str(Option.lr_schedule))
    print('  - loss function type      %s' % Option.lossFunc)

    print('  - Quantization')
    print('    + bit width of weights       %d' % Option.bitsW)
    print('    + bit width of activations   %d' % Option.bitsA)
    print('    + bit width of gradients     %d' % Option.bitsG)
    print('    + bit width of errrors       %d' % Option.bitsE)
def main():
    # get Option
    GPU = Option.GPU
    batchSize = Option.batchSize
    pathLog = '../log/' + Option.Time + '(' + Option.Notes + ')' + '.txt'
    Log.Log(pathLog, 'w+', 1)  # set log file
    print time.strftime('%Y-%m-%d %X', time.localtime()), '\n'
    print open('Option.py').read()

    # get data
    numThread = 4 * len(GPU)
    assert batchSize % len(GPU) == 0, (
        'batchSize must be divisible by number of GPUs')

    with tf.device('/cpu:0'):
        batchTrainX,batchTrainY,batchTestX,batchTestY,numTrain,numTest,label =\
            getData.loadData(Option.dataSet,batchSize,numThread)

    batchNumTrain = numTrain / batchSize
    batchNumTest = numTest / 100

    optimizer = Option.optimizer
    global_step = tf.get_variable('global_step', [],
                                  dtype=tf.int32,
                                  initializer=tf.constant_initializer(0),
                                  trainable=False)
    Net = []

    # on my machine, alexnet does not fit multi-GPU training
    # for single GPU
    with tf.device('/gpu:%d' % GPU[0]):
        Net.append(NN.NN(batchTestX, batchTestY, training=False))
        _, errorTestBatch = Net[-1].build_graph()

    showVariable()

    # Build an initialization operation to run below.
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.allow_soft_placement = True
    config.log_device_placement = False
    sess = Option.sess = tf.InteractiveSession(config=config)
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver(max_to_keep=None)
    # Start the queue runners.
    tf.train.start_queue_runners(sess=sess)

    def getErrorTest():
        errorTest = 0.
        for i in tqdm(xrange(batchNumTest), desc='Test', leave=False):
            errorTest += sess.run([errorTestBatch])[0]
        errorTest /= batchNumTest
        return errorTest

    if Option.loadModel is not None:
        print 'Loading model from %s ...' % Option.loadModel,
        saver.restore(sess, Option.loadModel)
        print 'Finished',
        errorTestBest = getErrorTest()
        print 'Test Error:', errorTestBest
        H, W = sess.run([Net[0].input_array, Net[0].W_b])
        hardware_estimation(H, W, Option.bitsW, Option.bitsA)

    else:
        print "No saved model"
Beispiel #5
0
def main():
    # get Option
    GPU = Option.GPU
    batchSize = Option.batchSize
    pathLog = '../log/' + Option.Time + '(exp1)' + '.txt'
    # f = open(pathLog, 'w')
    # sys.stdout = f
    Log.Log(pathLog, 'w+', 1)  # set log file
    print
    time.strftime('%Y-%m-%d %X', time.localtime()), '\n'
    print
    open('Option.py').read()

    # get data
    numThread = 4 * len(GPU)
    assert batchSize % len(GPU) == 0, ('batchSize must be divisible by number of GPUs')

    with tf.device('/cpu:0'):
        batchTrainX, batchTrainY, batchTestX, batchTestY, numTrain, numTest, label = \
            getData.loadData(Option.dataSet, batchSize, numThread)

    batchNumTrain = numTrain / batchSize
    batchNumTest = numTest / 100

    optimizer = Option.optimizer
    global_step = tf.get_variable('global_step', [], dtype=tf.int32, initializer=tf.constant_initializer(0),
                                  trainable=False)
    Net = []

    # on my machine, alexnet does not fit multi-GPU training
    # for single GPU
    with tf.device('/gpu:%d' % GPU[0]):
        Net.append(ResNet.NN(batchTrainX, batchTrainY, training=True, global_step=global_step))
        lossTrainBatch, errorTrainBatch = Net[-1].build_graph()
        update_op = tf.get_collection(tf.GraphKeys.UPDATE_OPS)  # batchnorm moving average update ops (not used now)

        # since we quantize W at the beginning and the update delta_W is quantized,
        # there is no need to quantize W every iteration
        # we just clip W after each iteration for speed
        update_op += Net[0].W_clip_op

        gradTrainBatch = optimizer.compute_gradients(lossTrainBatch)

        gradTrainBatch_quantize = quantizeGrads(gradTrainBatch)
        with tf.control_dependencies(update_op):
            train_op = optimizer.apply_gradients(gradTrainBatch_quantize, global_step=global_step)

        tf.get_variable_scope().reuse_variables()
        Net.append(ResNet.NN(batchTestX, batchTestY, training=False))
        _, errorTestBatch = Net[-1].build_graph()

    showVariable()

    # Build an initialization operation to run below.
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.allow_soft_placement = True
    config.log_device_placement = False
    sess = Option.sess = tf.InteractiveSession(config=config)
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver(max_to_keep=None)
    # Start the queue runners.
    tf.train.start_queue_runners(sess=sess)

    def getErrorTest():
        errorTest = 0.
        for i in tqdm(range(batchNumTest), desc='Test', leave=False):
            errorTest += sess.run([errorTestBatch])[0]
        errorTest /= batchNumTest
        return errorTest

    if Option.loadModel is not None:
        print
        'Loading model from %s ...' % Option.loadModel,
        saver.restore(sess, Option.loadModel)
        print
        'Finished',
        errorTestBest = getErrorTest()
        print
        'Test:', errorTestBest

    else:
        # at the beginning, we discrete W
        sess.run([Net[0].W_q_op])

    print
    "\nOptimization Start!\n"
    tmp_lr_schedule = [0, 1./8, 5, 0]
    for epoch in range(1000):
        # check lr_schedule
        if len(Option.lr_schedule) / 2:

            if epoch == tmp_lr_schedule[0]:
                tmp_lr_schedule.pop(0)
                lr_new = tmp_lr_schedule.pop(0)
                if lr_new == 0:
                    print('Optimization Ended!')
                    break
                lr_old = sess.run(Option.lr)
                sess.run(Option.lr.assign(lr_new))
                print
                'lr: %f -> %f' % (lr_old, lr_new)

        print
        'Epoch: %03d ' % (epoch),

        lossTotal = 0.
        errorTotal = 0
        t0 = time.time()
        for batchNum in tqdm(range(batchNumTrain), desc='Epoch: %03d' % epoch, leave=False, smoothing=0.1):
            if Option.debug is False:
                _, loss_delta, error_delta = sess.run([train_op, lossTrainBatch, errorTrainBatch])
            else:
                _, loss_delta, error_delta, H, W, W_q, gradH, gradW, gradW_q = \
                    sess.run([train_op, lossTrainBatch, errorTrainBatch, Net[0].H, Net[0].W, Net[0].W_q, Net[0].gradsH,
                              Net[0].gradsW, gradTrainBatch_quantize])

            lossTotal += loss_delta
            errorTotal += error_delta

        lossTotal /= batchNumTrain
        errorTotal /= batchNumTrain

        print
        'Loss: %.4f Train: %.4f' % (lossTotal, errorTotal),

        # get test error
        errorTest = getErrorTest()
        print
        'Test: %.4f FPS: %d' % (errorTest, numTrain / (time.time() - t0)),

        if epoch == 0:
            errorTestBest = errorTest
        # if errorTest < errorTestBest:
        #     if Option.saveModel is not None:
        #         saver.save(sess, Option.saveModel)
        #         print
        #         'S',
        if errorTest < errorTestBest:
            errorTestBest = errorTest
            print
            'BEST',

        print
        ""



    save_np(sess)
    print('End of Training')
Beispiel #6
0
def main():
  # get Option
  GPU = Option.GPU
  batchSize = Option.batchSize  #  batchSize = 128
  pathLog = '../log/' + Option.Time + '(' + Option.Notes + ')' + '.txt'
  Log.Log(pathLog, 'w+', 1) # set log file
  print time.strftime('%Y-%m-%d %X', time.localtime()), '\n'
  print open('Option.py').read()

  # get data
  numThread = 4*len(GPU)         # len(GPU) = 1
  assert batchSize % len(GPU) == 0, ('batchSize must be divisible by number of GPUs')  # 128 = 32 * 4
  with tf.device('/cpu:0'):  # 첫번째 CPU사용.
    batchTrainX,batchTrainY,batchTestX,batchTestY,numTrain,numTest,label =\
        getData.loadData(Option.dataSet,batchSize,numThread)

  batchNumTrain = numTrain / batchSize   # batchSize = 128, numTrain = 
  batchNumTest = numTest / 100

  optimizer = Option.optimizer
  # global_step이란 변수를 0으로 초기화하여 생성.
  global_step = tf.get_variable('global_step', [], dtype=tf.int32, initializer=tf.constant_initializer(0), trainable=False)
  Net = []

  # CPU와 GPU를 동시에 지원하게하면 GPU에 먼저 배치.
  # on my machine, alexnet does not fit multi-GPU training
  # for single GPU
  with tf.device('/gpu:%d' % GPU[0]):  # GPU[0] = 0
    Net.append(NN.NN(batchTrainX, batchTrainY, training=True, global_step=global_step))
    lossTrainBatch, errorTrainBatch = Net[-1].build_graph()
    update_op = tf.get_collection(tf.GraphKeys.UPDATE_OPS)  # batchnorm moving average update ops (not used now)
    # batchnorm에서 test할때 사용할 moving_average와 moving_var이 train할때 계산을 해줘야하는데,
    # train과정에서 moving_average와 moving_var이 직접 호출되지 않기에 train 하는것과 별도로
    # 이것들에 대한 op를 실행해줘서 업데이트를 해줘야한다(갱신연산)

    # since we quantize W at the beginning and the update delta_W is quantized,
    # there is no need to quantize W every iteration
    # we just clip W after each iteration for speed
    update_op += Net[0].W_clip_op

    gradTrainBatch = optimizer.compute_gradients(lossTrainBatch)

    gradTrainBatch_quantize = quantizeGrads(gradTrainBatch)
    #train_op update_ops 연산이 끝난 후에 연산되게 함.
    with tf.control_dependencies(update_op):
      train_op = optimizer.apply_gradients(gradTrainBatch_quantize, global_step=global_step)

    tf.get_variable_scope().reuse_variables()  # 변수를 재사용하기 위한 플래그
    Net.append(NN.NN(batchTestX, batchTestY, training=False))
    _, errorTestBatch = Net[-1].build_graph()



  showVariable()
  # 변수 생성,초기화,저장.
  # Build an initialization operation to run below.
  config = tf.ConfigProto()
  config.gpu_options.allow_growth = True  # GPU 메모리 증가 허용(요구되는 만큼의 GPU만 할당)
  config.allow_soft_placement = True  # 명시된 디바이스가 없을경우 TF가 자동으로 디바이스 선택.
  config.log_device_placement = False  # 하지만 디바이스 명시하지 않음
  sess = Option.sess = tf.InteractiveSession(config=config)
  sess.run(tf.global_variables_initializer())
  saver = tf.train.Saver(max_to_keep=None)  # 최대 모델 수 정하지 않고 저장.
  # Start the queue runners.
  tf.train.start_queue_runners(sess=sess)



  def getErrorTest():
    errorTest = 0.
    for i in tqdm(xrange(batchNumTest),desc = 'Test', leave=False):
      errorTest += sess.run([errorTestBatch])[0]
    errorTest /= batchNumTest
    return errorTest

  if Option.loadModel is not None:
    print 'Loading model from %s ...' % Option.loadModel,
    saver.restore(sess, Option.loadModel)
    print 'Finished',
    errorTestBest = getErrorTest()
    print 'Test:', errorTestBest

  else:  # True이므로
   # at the beginning, we discrete W
    sess.run([Net[0].W_q_op])

  print "\nOptimization Start!\n"
  for epoch in xrange(1000):
    # check lr_schedule
    if len(Option.lr_schedule) / 2:
      if epoch == Option.lr_schedule[0]: #epoch이 0이면 [0]을 뽑아내고
        Option.lr_schedule.pop(0)
        lr_new = Option.lr_schedule.pop(0)
        if lr_new == 0:
          print 'Optimization Ended!'  #
          exit(0)
        lr_old = sess.run(Option.lr)
        sess.run(Option.lr.assign(lr_new))
        print 'lr: %f -> %f' % (lr_old, lr_new)

    print 'Epoch: %03d ' % (epoch),


    lossTotal = 0.
    errorTotal = 0
    t0 = time.time()
    for batchNum in tqdm(xrange(batchNumTrain), desc='Epoch: %03d' % epoch, leave=False, smoothing=0.1):
      if Option.debug is False:
        _, loss_delta, error_delta = sess.run([train_op, lossTrainBatch, errorTrainBatch])
      else:
        _, loss_delta, error_delta, H, W, W_q, gradH, gradW, gradW_q=\
        sess.run([train_op, lossTrainBatch, errorTrainBatch, Net[0].H, Net[0].W, Net[0].W_q, Net[0].gradsH, Net[0].gradsW, gradTrainBatch_quantize])

      lossTotal += loss_delta
      errorTotal += error_delta

    lossTotal /= batchNumTrain
    errorTotal /= batchNumTrain

    print 'Loss: %.4f Train: %.4f' % (lossTotal, errorTotal),

    # get test error
    errorTest = getErrorTest()
    print 'Test: %.4f FPS: %d' % (errorTest, numTrain / (time.time() - t0)),

    if epoch == 0:
      errorTestBest = errorTest
    if errorTest < errorTestBest:
      if Option.saveModel is not None:
        saver.save(sess, Option.saveModel)
        print 'S',
    if errorTest < errorTestBest:
      errorTestBest = errorTest
      print 'BEST',

    print ''