import numpy as np import time,os,sys import util print(util.toYellow("=======================================================")) print(util.toYellow("train_Donly.py (ST-GAN discriminator only)")) print(util.toYellow("=======================================================")) import tensorflow as tf import data import graph,warp import options opt = options.set(training=True) assert(opt.warpN==0) # create directories for model output main_folder = "/content/gdrive/My Drive/Colab Notebooks/spatial-transformer-GAN/glasses/" os.makedirs(main_folder + "models_{0}".format(opt.group), exist_ok=True) print(util.toMagenta("building graph...")) tf.reset_default_graph() # build graph with tf.device(opt.GPUdevice): # ------ define input data ------ imageRealData = tf.placeholder(tf.float32,shape=[opt.batchSize,opt.dataH,opt.dataW,3]) imageBGfakeData = tf.placeholder(tf.float32,shape=[opt.batchSize,opt.dataH,opt.dataW,3]) imageFGfake = tf.placeholder(tf.float32,shape=[opt.batchSize,opt.H,opt.W,4]) PH = [imageBGfakeData,imageRealData,imageFGfake] # ------ generate perturbation ------ imageReal = data.perturbBG(opt,imageRealData)
import numpy as np import time, os, sys import util print(util.toYellow("=======================================================")) print(util.toYellow("eval_STGAN.py (ST-GAN with homography)")) print(util.toYellow("=======================================================")) import tensorflow as tf import data import graph, warp import options opt = options.set(training=False) print(util.toMagenta("building graph...")) tf.reset_default_graph() # build graph with tf.device(opt.GPUdevice): # ------ define input data ------ imageBG = tf.placeholder(tf.float32, shape=[opt.batchSize, opt.H, opt.W, 3]) imageFG = tf.placeholder(tf.float32, shape=[opt.batchSize, opt.H, opt.W, 4]) PH = [imageBG, imageFG] pPertFG = opt.pertFG * tf.random_normal([opt.batchSize, opt.warpDim]) # ------ define GP and D ------ geometric = graph.geometric_multires # ------ geometric predictor ------ imageFGwarpAll, _, _ = geometric(opt, imageBG, imageFG, pPertFG) # ------ composite image ------
import numpy as np import scipy.misc, scipy.io import time, os, sys import threading import util print(util.toYellow("=======================================================")) print( util.toYellow( "train.py (train with joint 2D optimization with novel viewpoints)")) print(util.toYellow("=======================================================")) import tensorflow as tf import data, graph, transform import options print(util.toMagenta("setting configurations...")) opt = options.set(training=True) # create directories for model output util.mkdir("models_{0}".format(opt.group)) print(util.toMagenta("building graph...")) tf.reset_default_graph() # build graph with tf.device("/gpu:0"): # ------ define input data ------ inputImage = tf.placeholder(tf.float32, shape=[opt.batchSize, opt.inH, opt.inW, 3]) renderTrans = tf.placeholder(tf.float32, shape=[opt.batchSize, opt.novelN, 4])
import numpy as np import scipy.misc, scipy.io import time, os, sys import util print(util.toYellow("=======================================================")) print( util.toYellow( "pretrain_homo.py (pretrain STN with homography perturbation)")) print(util.toYellow("=======================================================")) import tensorflow as tf import data import graph, warp import options opt = options.set(training=True) # create directories for model output os.makedirs("models_{0}".format(opt.group), exist_ok=True) print(util.toMagenta("building graph...")) tf.reset_default_graph() # build graph with tf.device(opt.GPUdevice): # ------ define input data ------ imageBGreal = tf.placeholder(tf.float32, shape=[opt.batchSize, opt.H, opt.W, 3]) imageFGreal = tf.placeholder(tf.float32, shape=[opt.batchSize, opt.H, opt.W, 4]) PH = [imageBGreal, imageFGreal]
def set(training): # parse input arguments parser = argparse.ArgumentParser() parser.add_argument("--group", default="0", help="name for group") parser.add_argument("--name", default="test", help="name for model instance") parser.add_argument("--loadGP", default=None, help="load pretrained model (GP)") parser.add_argument("--size", default="128x128", help="resolution of foreground image") parser.add_argument("--warpType", default="affine", help="type of warp function on foreground image") parser.add_argument("--warpN", type=int, default=1, help="number of spatial transformations") parser.add_argument("--stdGP", type=float, default=0.01, help="initialization stddev (GP)") parser.add_argument("--stdD", type=float, default=0.01, help="initialization stddev (D)") if training: # training parser.add_argument("--loadD", default=None, help="load pretrained model (D)") parser.add_argument("--lrGP", type=float, default=1e-5, help="base learning rate (GP)") parser.add_argument("--lrGPdecay", type=float, default=1.0, help="learning rate decay (GP)") parser.add_argument("--lrGPstep", type=int, default=20000, help="learning rate decay step size (GP)") parser.add_argument("--lrD", type=float, default=1e-5, help="base learning rate (D)") parser.add_argument("--lrDdecay", type=float, default=1.0, help="learning rate decay (D)") parser.add_argument("--lrDstep", type=int, default=20000, help="learning rate decay step size (D)") parser.add_argument("--dplambda", type=float, default=1.0, help="warp update norm penalty factor") parser.add_argument("--gradlambda", type=float, default=10.0, help="gradient penalty factor") parser.add_argument("--updateD", type=int, default=2, help="update N times (D)") parser.add_argument("--updateGP", type=int, default=1, help="update N times (GP)") parser.add_argument("--batchSize", type=int, default=20, help="batch size for SGD") parser.add_argument("--histSize", type=float, default=10, help="history size in batch") parser.add_argument("--histQsize", type=int, default=10000, help="history queue size for updating D") parser.add_argument("--fromIt", type=int, default=0, help="resume training from iteration number") parser.add_argument("--toIt", type=int, default=50000, help="run training to iteration number") parser.add_argument("--pertFG", type=float, default=0.1, help="scale of initial perturbation (bags)") parser.add_argument("--pertBG", type=float, default=0.1, help="scale of initial perturbation (face)") else: # evaluation parser.add_argument("--batchSize", type=int, default=10, help="batch size for evaluation") parser.add_argument("--pertFG", type=float, default=0.0, help="scale of initial perturbation (bags)") parser.add_argument("--pertBG", type=float, default=0.0, help="scale of initial perturbation (face)") parser.add_argument("--loadImage", default=None, help="load image to test") opt = parser.parse_args() # ------ probably won't touch these ------ ## for original network # opt.dataH,opt.dataW = 144,144 # opt.centerY,opt.centerX = 72,72 ## for our new network opt.dataH,opt.dataW = 128,128 opt.centerY,opt.centerX = 64,64 opt.warpDim = 8 if opt.warpType=="homography" else \ 6 if opt.warpType=="affine" else None opt.warpApprox = 20 opt.GPUdevice = "/gpu:0" # ------ below automatically set ------ opt.training = training opt.H,opt.W = [int(x) for x in opt.size.split("x")] if training: opt.visBlockSize = int(np.floor(np.sqrt(opt.batchSize))) opt.canon4pts = np.array([[-1,-1],[-1,1],[1,1],[1,-1]],dtype=np.float32) opt.image4pts = np.array([[0,0],[0,opt.H-1],[opt.W-1,opt.H-1],[opt.W-1,0]],dtype=np.float32) opt.refMtrx = warp.fit(Xsrc=opt.canon4pts,Xdst=opt.image4pts) opt.image4pts_b = np.array([[opt.centerX-opt.W//2,opt.centerY-opt.H//2], [opt.centerX-opt.W//2,opt.centerY+opt.H//2], [opt.centerX+opt.W//2,opt.centerY+opt.H//2], [opt.centerX+opt.W//2,opt.centerY-opt.H//2]],dtype=np.float32) opt.refMtrx_b = warp.fit(Xsrc=opt.canon4pts,Xdst=opt.image4pts_b) print("({0}) {1}".format( util.toGreen("{0}".format(opt.group)), util.toGreen("{0}".format(opt.name)))) print("------------------------------------------") print("batch size: {0}, warps: {1}".format( util.toYellow("{0}".format(opt.batchSize)), util.toYellow("{0}".format(opt.warpN)))) print("image size: {0}x{1}".format( util.toYellow("{0}".format(opt.H)), util.toYellow("{0}".format(opt.W)))) if training: print("[GP] stddev={3}, lr={0}, decay={1}, step={2}, update={4}".format( util.toYellow("{0:.0e}".format(opt.lrGP)), util.toYellow("{0}".format(opt.lrGPdecay)), util.toYellow("{0}".format(opt.lrGPstep)), util.toYellow("{0:.0e}".format(opt.stdGP)), util.toYellow("{0}".format(opt.updateGP)))) print("[D] stddev={3}, lr={0}, decay={1}, step={2}, update={4}".format( util.toYellow("{0:.0e}".format(opt.lrD)), util.toYellow("{0}".format(opt.lrDdecay)), util.toYellow("{0}".format(opt.lrDstep)), util.toYellow("{0:.0e}".format(opt.stdD)), util.toYellow("{0}".format(opt.updateD)))) print("------------------------------------------") if training: print(util.toMagenta("training model ({0}) {1}...".format(opt.group,opt.name))) return opt
import numpy as np import scipy.misc, scipy.io import time, os, sys import threading import util print(util.toYellow("=======================================================")) print( util.toYellow( "evaluate_dist.py (evaluate average distance of generated point cloud)" )) print(util.toYellow("=======================================================")) import tensorflow as tf import data import options print(util.toMagenta("setting configurations...")) opt = options.set(training=False) with tf.device("/gpu:0"): VsPH = tf.placeholder(tf.float64, [None, 3]) VtPH = tf.placeholder(tf.float64, [None, 3]) _, minDist = util.projection(VsPH, VtPH) # compute test error for one prediction def computeTestError(Vs, Vt, type): VsN, VtN = len(Vs), len(Vt) if type == "pred->GT": evalN, VsBatchSize, VtBatchSize = min(VsN, 200), 200, 100000
""" Training the Spatial Transformer GAN """ import numpy as np import time,os,sys import util print(util.toYellow("=======================================================")) print(util.toYellow("train_STGAN.py (ST-GAN with homography)")) print(util.toYellow("=======================================================")) import tensorflow as tf import data import graph,warp import options opt = options.set(training=True) # create directories for model output util.mkdir("models_{0}".format(opt.group)) print(util.toMagenta("building graph...")) tf.reset_default_graph() # build graph with tf.device(opt.GPUdevice): # ------ define input data ------ imageRealData = tf.placeholder(tf.float32,shape=[opt.batchSize,opt.dataH,opt.dataW,3]) imageBGfakeData = tf.placeholder(tf.float32,shape=[opt.batchSize,opt.dataH,opt.dataW,3]) imageFGfake = tf.placeholder(tf.float32,shape=[opt.batchSize,opt.H,opt.W,4]) PH = [imageBGfakeData,imageRealData,imageFGfake]
import numpy as np import scipy.misc, scipy.io import time, os, sys import threading import util print(util.toYellow("=======================================================")) print( util.toYellow( "pretrain.py (pretrain structure generator with fixed viewpoints)")) print(util.toYellow("=======================================================")) import tensorflow as tf import data, graph import options print(util.toMagenta("setting configurations...")) opt = options.set(training=True) # create directories for model output util.mkdir("models_{0}".format(opt.group)) print(util.toMagenta("building graph...")) tf.reset_default_graph() # build graph with tf.device("/gpu:0"): # ------ define input data ------ inputImage = tf.placeholder(tf.float32, shape=[opt.batchSize, opt.inH, opt.inW, 3]) depthGT = tf.placeholder( tf.float32, shape=[opt.batchSize, opt.outH, opt.outW, opt.outViewN])
with tf.name_scope("adam3"): optimGP3 = tf.train.AdamOptimizer(learning_rate=lrGP_PH).minimize(loss_GP,var_list=vars_all) # load data print(util.toMagenta("loading training data...")) trainData = data.load() # prepare model saver/summary writer saver_GP = tf.train.Saver(max_to_keep=20) summaryWriter = tf.summary.FileWriter("summary") summary_op=tf.summary.merge_all() print(util.toYellow("======= TRAINING START =======")) timeStart = time.time() # start session tfConfig = tf.ConfigProto(allow_soft_placement=True) tfConfig.gpu_options.allow_growth = True with tf.Session(config=tfConfig) as sess: sess.run(tf.global_variables_initializer()) summaryWriter.add_graph(sess.graph) # training loop for i in range(toIt): lrGP = lrGP*lrGPdecay**(i//lrGPstep) batch = data.makeBatch(batchSize,trainData,PH) batch[lrGP_PH] = lrGP
print(util.toMagenta('=== Generating rendering commands...')) for line in listFile: if CATEGORY in line.strip(): MODEL = line.strip().split("/")[1] else: MODEL = line.strip() command = 'nice -n 10 blender %s -b -P render_depth_pair_lambert_func_continuous_persp_template.py -- %s %s %s %d %d'\ %(BLENDER_FILE, CATEGORY, MODEL, NAME, RESOLUTION, model_index) #command = 'nice -n 10 blender %s -b -P render_depth_pair_lambert_func_continuous_template_persp.py -- %s %s %d %d %d'\ # %(BLENDER_FILE, CATEGORY, MODEL, RESOLUTION, VIEWS, model_index) commands.append(command) model_index += 1 print(command) # for debug #if model_index >= 5: # break print( util.toMagenta( '=== Rendering %d commands on %d workers, it takes a long time...' % (len(commands), pool_num))) pool = Pool(pool_num) for idx, return_code in enumerate( pool.imap(partial(call, shell=True), commands)): # print(util.toBlue('[%s] Rendering command %d of %d: %s' % (datetime.datetime.now().time(), idx, len(commands), commands[idx]))) if return_code != 0: print( util.toYellow('Rendering command %d of %d (\"%s\") failed' % (idx, len(commands), commands[idx])))
with tf.device(GPUdevice): # ------ define input data ------ WarpdData = tf.placeholder(tf.float32,shape=[batchSize,dataH,dataW,3]) PH = [WarpdData] pPertFG = pert*tf.random_normal([batchSize,warpDim]) # ------ define GP ------ geometric = graph.combine # ------ geometric predictor ------ imageWarped = geometric(WarpdData,stdGP,batchSize,dataH,dataW,pPertFG,warpN) # ------ optimizer ------ #varsGP = [v for v in tf.global_variables() if "geometric" in v.name] # prepare model saver/summary writer saver_GP = tf.train.Saver() print(util.toYellow("======= EVALUATION START =======")) # start session tfConfig = tf.ConfigProto(allow_soft_placement=True) tfConfig.gpu_options.allow_growth = True with tf.Session(config=tfConfig) as sess: sess.run(tf.global_variables_initializer()) #restore the model saver_GP.restore(sess,trained_model) print(util.toMagenta("start evaluation...")) testImage = util.imread(loadImage) batch = data.makeBatchEval_tps(batchSize,testImage,PH) runList = [WarpdData,imageWarped] ic0,icf = sess.run(runList,feed_dict=batch) #print(ic0.shape,icf.shape) util.imsave("eval/image__input.png",1-ic0[0])
import numpy as np import time,os,sys import argparse import util import pdb import matplotlib.pyplot as plt print(util.toYellow("=======================================================")) print(util.toYellow("evaluation.py (evaluating on MNIST)")) print(util.toYellow("=======================================================")) import tensorflow as tf import data,graph,warp,util import options print(util.toMagenta("setting configurations...")) opt = options.set(training=True) tf.reset_default_graph() # build graph with tf.device("/gpu:0"): # ------ define input data ------ opt.batchSize = 1 image = tf.placeholder(tf.float32,shape=[opt.batchSize,opt.H,opt.W]) label = tf.placeholder(tf.int64,shape=[opt.batchSize]) # ------ generate perturbation ------ pInit = data.genPerturbations(opt) pInitMtrx = warp.vec2mtrx(opt,pInit) # ------ build network ------ image = tf.expand_dims(image,axis=-1) imagePert = warp.transformImage(opt,image,pInitMtrx)
import numpy as np import scipy.misc,scipy.io import time,os,sys import threading import util print(util.toYellow("=======================================================")) print(util.toYellow("evaluate.py (evaluate/generate point cloud)")) print(util.toYellow("=======================================================")) import tensorflow as tf import data,graph,transform import options print(util.toMagenta("setting configurations...")) opt = options.set(training=False) opt.batchSize = opt.inputViewN opt.chunkSize = 50 # create directories for evaluation output util.mkdir("results_{0}/{1}".format(opt.group,opt.load)) print(util.toMagenta("building graph...")) tf.reset_default_graph() # build graph with tf.device("/gpu:0"): # ------ define input data ------ inputImage = tf.placeholder(tf.float32,shape=[opt.batchSize,opt.inH,opt.inW,3]) renderTrans = tf.placeholder(tf.float32,shape=[opt.batchSize,opt.novelN,4]) depthGT = tf.placeholder(tf.float32,shape=[opt.batchSize,opt.novelN,opt.H,opt.W,1]) maskGT = tf.placeholder(tf.float32,shape=[opt.batchSize,opt.novelN,opt.H,opt.W,1])
import numpy as np import time, os, sys import argparse import util print(util.toYellow("=======================================================")) print(util.toYellow("train.py (training on MNIST)")) print(util.toYellow("=======================================================")) import torch import data, graph, warp, util import options print(util.toMagenta("setting configurations...")) opt = options.set(training=True) # create directories for model output util.mkdir("models_{0}".format(opt.group)) print(util.toMagenta("building network...")) with torch.cuda.device(0): # ------ build network ------ if opt.netType == "CNN": geometric = graph.Identity() classifier = graph.FullCNN(opt) elif opt.netType == "STN": geometric = graph.STN(opt) classifier = graph.CNN(opt) elif opt.netType == "IC-STN": geometric = graph.ICSTN(opt) classifier = graph.CNN(opt)
def set(training): # parse input arguments parser = argparse.ArgumentParser() parser.add_argument("--group", default="0", help="name for group") parser.add_argument("--name", default="test", help="name for model instance") parser.add_argument("--loadGP", default=None, help="load pretrained model (GP)") parser.add_argument("--gpu", default="0", help="ID of GPU device (if there are multiple)") parser.add_argument("--size", default="120x160", help="resolution of background image") parser.add_argument("--warpN", type=int, default=1, help="number of spatial transformations") parser.add_argument("--stdGP", type=float, default=0.01, help="initialization stddev (GP)") parser.add_argument("--stdD", type=float, default=0.01, help="initialization stddev (D)") if training: # training parser.add_argument("--loadD", default=None, help="load pretrained model (D)") parser.add_argument("--lrGP", type=float, default=1e-6, help="base learning rate (GP)") parser.add_argument("--lrGPdecay", type=float, default=1.0, help="learning rate decay (GP)") parser.add_argument("--lrGPstep", type=int, default=10000, help="learning rate decay step size (GP)") parser.add_argument("--lrD", type=float, default=1e-4, help="base learning rate (D)") parser.add_argument("--lrDdecay", type=float, default=1.0, help="learning rate decay (D)") parser.add_argument("--lrDstep", type=int, default=10000, help="learning rate decay step size (D)") parser.add_argument("--unpaired", action="store_true", help="feed unpaired samples to D") parser.add_argument("--dplambda", type=float, default=0.3, help="warp update norm penalty factor") parser.add_argument("--gradlambda", type=float, default=10.0, help="gradient penalty factor") parser.add_argument("--updateD", type=int, default=2, help="update N times (D)") parser.add_argument("--updateGP", type=int, default=1, help="update N times (GP)") parser.add_argument("--batchSize", type=int, default=20, help="batch size for SGD") parser.add_argument("--fromIt", type=int, default=0, help="resume training from iteration number") parser.add_argument("--toIt", type=int, default=40000, help="run training to iteration number") parser.add_argument("--initPert", type=float, default=0.1, help="scale of initial perturbation") parser.add_argument("--homoPert", type=float, default=0.1, help="scale of homography perturbation") else: # evaluation parser.add_argument("--batchSize", type=int, default=1, help="batch size for evaluation") parser.add_argument("--initPert", type=float, default=0.0, help="scale of initial perturbation") opt = parser.parse_args() # ------ probably won't touch these ------ opt.warpType = "homography" opt.warpDim = 8 opt.warpApprox = 20 opt.GPUdevice = "/gpu:0" # ------ below automatically set ------ opt.training = training opt.H, opt.W = [int(x) for x in opt.size.split("x")] if training: opt.visBlockSize = int(np.floor(np.sqrt(opt.batchSize))) # opt.visBlockSize = 2 opt.canon4pts = np.array([[-1, -1], [-1, 1], [1, 1], [1, -1]], dtype=np.float32) opt.image4pts = np.array( [[0, 0], [0, opt.H - 1], [opt.W - 1, opt.H - 1], [opt.W - 1, 0]], dtype=np.float32) opt.refMtrx = warp.fit(Xsrc=opt.canon4pts, Xdst=opt.image4pts) print("({0}) {1}".format(util.toGreen("{0}".format(opt.group)), util.toGreen("{0}".format(opt.name)))) print("------------------------------------------") print("GPU device: {0}, batch size: {1}, warps: {2}".format( util.toYellow("{0}".format(opt.gpu)), util.toYellow("{0}".format(opt.batchSize)), util.toYellow("{0}".format(opt.warpN)))) print("image size: {0}x{1}".format(util.toYellow("{0}".format(opt.H)), util.toYellow("{0}".format(opt.W)))) if training: print( "[GP] stddev={3}, lr={0}, decay={1}, step={2}, update={4}".format( util.toYellow("{0:.0e}".format(opt.lrGP)), util.toYellow("{0}".format(opt.lrGPdecay)), util.toYellow("{0}".format(opt.lrGPstep)), util.toYellow("{0:.0e}".format(opt.stdGP)), util.toYellow("{0}".format(opt.updateGP)))) print( "[D] stddev={3}, lr={0}, decay={1}, step={2}, update={4}".format( util.toYellow("{0:.0e}".format(opt.lrD)), util.toYellow("{0}".format(opt.lrDdecay)), util.toYellow("{0}".format(opt.lrDstep)), util.toYellow("{0:.0e}".format(opt.stdD)), util.toYellow("{0}".format(opt.updateD)))) print("------------------------------------------") if training: print( util.toMagenta("training model ({0}) {1}...".format( opt.group, opt.name))) return opt
def set(training): # parse input arguments parser = argparse.ArgumentParser() parser.add_argument("--category", default="03001627", help="category ID number") parser.add_argument("--group", default="0", help="name for group") parser.add_argument("--model", default="test", help="name for model instance") parser.add_argument("--load", default=None, help="load trained model to fine-tune/evaluate") parser.add_argument("--std", type=float, default=0.1, help="initialization standard deviation") parser.add_argument("--outViewN", type=int, default=8, help="number of fixed views (output)") parser.add_argument("--inSize", default="64x64", help="resolution of encoder input") parser.add_argument("--outSize", default="128x128", help="resolution of decoder output") parser.add_argument("--predSize", default="128x128", help="resolution of prediction") parser.add_argument("--upscale", type=int, default=5, help="upscaling factor for rendering") parser.add_argument("--novelN", type=int, default=5, help="number of novel views simultaneously") parser.add_argument("--arch", default=None) if training: # training parser.add_argument("--batchSize", type=int, default=20, help="batch size for training") parser.add_argument("--chunkSize", type=int, default=100, help="data chunk size to load") parser.add_argument("--itPerChunk", type=int, default=50, help="training iterations per chunk") parser.add_argument("--lr", type=float, default=1e-4, help="base learning rate (AE)") parser.add_argument("--lrDecay", type=float, default=1.0, help="learning rate decay multiplier") parser.add_argument("--lrStep", type=int, default=20000, help="learning rate decay step size") parser.add_argument("--lambdaDepth", type=float, default=1.0, help="loss weight factor (depth)") parser.add_argument("--fromIt", type=int, default=0, help="resume training from iteration number") parser.add_argument("--toIt", type=int, default=100000, help="run training to iteration number") else: # evaluation parser.add_argument("--batchSize", type=int, default=1, help="batch size for evaluation") opt = parser.parse_args() # these stay fixed opt.sampleN = 100 opt.renderDepth = 1.0 opt.BNepsilon = 1e-5 opt.BNdecay = 0.999 opt.inputViewN = 24 # ------ below automatically set ------ opt.training = training opt.inH, opt.inW = [int(x) for x in opt.inSize.split("x")] opt.outH, opt.outW = [int(x) for x in opt.outSize.split("x")] opt.H, opt.W = [int(x) for x in opt.predSize.split("x")] opt.visBlockSize = int(np.floor(np.sqrt(opt.batchSize))) opt.Khom3Dto2D = np.array( [[opt.W, 0, 0, opt.W / 2], [0, -opt.H, 0, opt.H / 2], [0, 0, -1, 0], [0, 0, 0, 1]], dtype=np.float32) opt.Khom2Dto3D = np.array( [[opt.outW, 0, 0, opt.outW / 2], [0, -opt.outH, 0, opt.outH / 2], [0, 0, -1, 0], [0, 0, 0, 1]], dtype=np.float32) opt.fuseTrans = np.load("trans_fuse{0}.npy".format(opt.outViewN)) print("({0}) {1}".format(util.toGreen("{0}".format(opt.group)), util.toGreen("{0}".format(opt.model)))) print("------------------------------------------") print("batch size: {0}, category: {1}".format( util.toYellow("{0}".format(opt.batchSize)), util.toYellow("{0}".format(opt.category)))) print("size: {0}x{1}(in), {2}x{3}(out), {4}x{5}(pred)".format( util.toYellow("{0}".format(opt.inH)), util.toYellow("{0}".format(opt.inW)), util.toYellow("{0}".format(opt.outH)), util.toYellow("{0}".format(opt.outW)), util.toYellow("{0}".format(opt.H)), util.toYellow("{0}".format(opt.W)))) if training: print("learning rate: {0} (decay: {1}, step size: {2})".format( util.toYellow("{0:.2e}".format(opt.lr)), util.toYellow("{0}".format(opt.lrDecay)), util.toYellow("{0}".format(opt.lrStep)))) print("depth loss weight: {0}".format( util.toYellow("{0}".format(opt.lambdaDepth)))) print("viewN: {0}(out), upscale: {1}, novelN: {2}".format( util.toYellow("{0}".format(opt.outViewN)), util.toYellow("{0}".format(opt.upscale)), util.toYellow("{0}".format(opt.novelN)))) print("------------------------------------------") if training: print( util.toMagenta("training model ({0}) {1}...".format( opt.group, opt.model))) return opt
def set(training): # parse input arguments parser = argparse.ArgumentParser() parser.add_argument("netType", choices=["CNN", "STN", "IC-STN"], help="type of network") parser.add_argument("--group", default="0", help="name for group") parser.add_argument("--model", default="test", help="name for model instance") parser.add_argument("--size", default="36x36", help="image resolution") parser.add_argument("--sizeFull", default="50x50", help="full image resolution") parser.add_argument( "--warpType", default="homography", help="type of warp function on images", choices=["translation", "similarity", "affine", "homography"]) parser.add_argument( "--warpN", type=int, default=4, help="number of recurrent transformations (for IC-STN)") parser.add_argument("--stdC", type=float, default=0.01, help="initialization stddev (classification network)") parser.add_argument("--stdGP", type=float, default=0.001, help="initialization stddev (geometric predictor)") parser.add_argument("--pertScale", type=float, default=0.25, help="initial perturbation scale") parser.add_argument("--transScale", type=float, default=0.25, help="initial translation scale") if training: # training parser.add_argument("--batchSize", type=int, default=100, help="batch size for SGD") parser.add_argument("--lrC", type=float, default=1e-2, help="learning rate (classification network)") parser.add_argument( "--lrCdecay", type=float, default=0.1, help="learning rate decay (classification network)") parser.add_argument( "--lrCstep", type=int, default=500000, help="learning rate decay step size (classification network)") parser.add_argument("--lrGP", type=float, default=None, help="learning rate (geometric predictor)") parser.add_argument("--lrGPdecay", type=float, default=0.1, help="learning rate decay (geometric predictor)") parser.add_argument( "--lrGPstep", type=int, default=500000, help="learning rate decay step size (geometric predictor)") parser.add_argument("--fromIt", type=int, default=0, help="resume training from iteration number") parser.add_argument("--toIt", type=int, default=1000000, help="run training to iteration number") else: # evaluation parser.add_argument("--batchSize", type=int, default=1, help="batch size for evaluation") opt = parser.parse_args() if opt.lrGP is None: opt.lrGP = 0 if opt.netType=="CNN" else \ 1e-3 if opt.netType=="STN" else \ 3e-5 if opt.netType=="IC-STN" else None # --- below are automatically set --- opt.training = training opt.H, opt.W = [int(x) for x in opt.size.split("x")] opt.fullH, opt.fullW = [int(x) for x in opt.sizeFull.split("x")] opt.visBlockSize = int(np.floor(np.sqrt(opt.batchSize))) opt.warpDim = 2 if opt.warpType == "translation" else \ 4 if opt.warpType == "similarity" else \ 6 if opt.warpType == "affine" else \ 8 if opt.warpType == "homography" else None opt.labelN = 43 opt.canon4pts = np.array([[-1, -1], [-1, 1], [1, 1], [1, -1]], dtype=np.float32) opt.image4pts = np.array( [[0, 0], [0, opt.H - 1], [opt.W - 1, opt.H - 1], [opt.W - 1, 0]], dtype=np.float32) opt.bbox = [ int(opt.fullW / 2 - opt.W / 2), int(opt.fullH / 2 - opt.H / 2), int(opt.fullW / 2 + opt.W / 2), int(opt.fullH / 2 + opt.H / 2) ] opt.bbox4pts = np.array( [[opt.bbox[0], opt.bbox[1]], [opt.bbox[0], opt.bbox[3]], [opt.bbox[2], opt.bbox[3]], [opt.bbox[2], opt.bbox[1]]], dtype=np.float32) opt.refMtrx = warp.fit(Xsrc=opt.canon4pts, Xdst=opt.image4pts) opt.bboxRefMtrx = warp.fit(Xsrc=opt.canon4pts, Xdst=opt.bbox4pts) if opt.netType == "STN": opt.warpN = 1 print("({0}) {1}".format(util.toGreen("{0}".format(opt.group)), util.toGreen("{0}".format(opt.model)))) print("------------------------------------------") print("network type: {0}, recurrent warps: {1}".format( util.toYellow("{0}".format(opt.netType)), util.toYellow( "{0}".format(opt.warpN if opt.netType == "IC-STN" else "X")))) print("batch size: {0}, image size: {1}x{2}".format( util.toYellow("{0}".format(opt.batchSize)), util.toYellow("{0}".format(opt.H)), util.toYellow("{0}".format(opt.W)))) print("warpScale: (pert) {0} (trans) {1}".format( util.toYellow("{0}".format(opt.pertScale)), util.toYellow("{0}".format(opt.transScale)))) if training: print("[geometric predictor] stddev={0}, lr={1}".format( util.toYellow("{0:.0e}".format(opt.stdGP)), util.toYellow("{0:.0e}".format(opt.lrGP)))) print("[classification network] stddev={0}, lr={1}".format( util.toYellow("{0:.0e}".format(opt.stdC)), util.toYellow("{0:.0e}".format(opt.lrC)))) print("------------------------------------------") if training: print( util.toMagenta("training model ({0}) {1}...".format( opt.group, opt.model))) return opt
def set(training): # parse input arguments parser = argparse.ArgumentParser() parser.add_argument("netType", choices=["CNN", "STN", "IC-STN"], help="type of network") parser.add_argument("--group", default="0", help="name for group") parser.add_argument("--model", default="test", help="name for model instance") parser.add_argument("--size", default="28x28", help="image resolution") parser.add_argument( "--warpType", default="homography", help="type of warp function on images", choices=["translation", "similarity", "affine", "homography"]) parser.add_argument( "--warpN", type=int, default=4, help="number of recurrent transformations (for IC-STN)") parser.add_argument("--stdC", type=float, default=0.1, help="initialization stddev (classification network)") parser.add_argument("--stdGP", type=float, default=0.1, help="initialization stddev (geometric predictor)") parser.add_argument("--pertScale", type=float, default=0.25, help="initial perturbation scale") parser.add_argument("--transScale", type=float, default=0.25, help="initial translation scale") if training: # training parser.add_argument("--port", type=int, default=8097, help="port number for visdom visualization") parser.add_argument("--batchSize", type=int, default=100, help="batch size for SGD") parser.add_argument("--lrC", type=float, default=1e-2, help="learning rate (classification network)") parser.add_argument("--lrGP", type=float, default=None, help="learning rate (geometric predictor)") parser.add_argument("--lrDecay", type=float, default=1.0, help="learning rate decay") parser.add_argument("--lrStep", type=int, default=100000, help="learning rate decay step size") parser.add_argument("--fromIt", type=int, default=0, help="resume training from iteration number") parser.add_argument("--toIt", type=int, default=500000, help="run training to iteration number") else: # evaluation parser.add_argument("--batchSize", type=int, default=1, help="batch size for evaluation") opt = parser.parse_args() if opt.lrGP is None: opt.lrGP = 0 if opt.netType=="CNN" else \ 1e-2 if opt.netType=="STN" else \ 1e-4 if opt.netType=="IC-STN" else None # --- below are automatically set --- assert (torch.cuda.is_available()) # support only training on GPU for now torch.set_default_tensor_type("torch.cuda.FloatTensor") opt.training = training opt.H, opt.W = [int(x) for x in opt.size.split("x")] opt.visBlockSize = int(np.floor(np.sqrt(opt.batchSize))) opt.warpDim = 2 if opt.warpType == "translation" else \ 4 if opt.warpType == "similarity" else \ 6 if opt.warpType == "affine" else \ 8 if opt.warpType == "homography" else None opt.labelN = 10 opt.canon4pts = np.array([[-1, -1], [-1, 1], [1, 1], [1, -1]], dtype=np.float32) opt.image4pts = np.array( [[0, 0], [0, opt.H - 1], [opt.W - 1, opt.H - 1], [opt.W - 1, 0]], dtype=np.float32) opt.refMtrx = np.eye(3).astype(np.float32) if opt.netType == "STN": opt.warpN = 1 print("({0}) {1}".format(util.toGreen("{0}".format(opt.group)), util.toGreen("{0}".format(opt.model)))) print("------------------------------------------") print("network type: {0}, recurrent warps: {1}".format( util.toYellow("{0}".format(opt.netType)), util.toYellow( "{0}".format(opt.warpN if opt.netType == "IC-STN" else "X")))) print("batch size: {0}, image size: {1}x{2}".format( util.toYellow("{0}".format(opt.batchSize)), util.toYellow("{0}".format(opt.H)), util.toYellow("{0}".format(opt.W)))) print("warpScale: (pert) {0} (trans) {1}".format( util.toYellow("{0}".format(opt.pertScale)), util.toYellow("{0}".format(opt.transScale)))) if training: print("[geometric predictor] stddev={0}, lr={1}".format( util.toYellow("{0:.0e}".format(opt.stdGP)), util.toYellow("{0:.0e}".format(opt.lrGP)))) print("[classification network] stddev={0}, lr={1}".format( util.toYellow("{0:.0e}".format(opt.stdC)), util.toYellow("{0:.0e}".format(opt.lrC)))) print("------------------------------------------") if training: print( util.toMagenta("training model ({0}) {1}...".format( opt.group, opt.model))) return opt