def __init__(self, name_scope, indim, latentdim, half=False): super(VAE, self).__init__() self.half = half self.name_scope = name_scope if self.half is False: self.encoder = tf.keras.Sequential([ tf.keras.layers.Dense(latentdim * 2), tf.keras.layers.Activation('elu'), tf.keras.layers.Dense(latentdim * 2), tf.keras.layers.Activation('elu'), tf.keras.layers.Dense(latentdim * 2) ], name="encoder") self.mean = tf.keras.layers.Dense(latentdim, name="mean") self.logvar = tf.keras.layers.Dense(latentdim, name="logvar") self.bikld = BiKLD() dec_out = indim self.decoder = tf.keras.Sequential([ tf.keras.layers.Dense(latentdim * 2), tf.keras.layers.Activation('elu'), tf.keras.layers.Dense(latentdim * 2), tf.keras.layers.Activation('elu'), tf.keras.layers.Dense(dec_out) ], name="decoder") self.sampler = reparameterize()
def __init__(self, indim, latentdim, half=False): super(VAE, self).__init__() self.half = half if self.half is False: self.encoder = nn.Sequential( nn.Linear(indim, latentdim * 2), nn.ELU(inplace=True), nn.Linear(latentdim * 2, latentdim * 2), nn.ELU(inplace=True), nn.Linear(latentdim * 2, latentdim * 2)) self.mean = nn.Linear(latentdim * 2, latentdim) self.logvar = nn.Linear(latentdim * 2, latentdim) self.bikld = BiKLD() dec_out = indim self.decoder = nn.Sequential(nn.Linear(latentdim, latentdim * 2), nn.ELU(inplace=True), nn.Linear(latentdim * 2, latentdim * 2), nn.ELU(inplace=True), nn.Linear(latentdim * 2, dec_out)) self.sampler = reparameterize()
def __init__(self, hiddim=160, latentdim=12, word_size=[-1, 16, 16], pos_size=[4, 1, 1], nres=4, nlayers=1, nonlinear='elu', dictionary=None, op=['PROD', 'CAT'], lmap_size=0, downsample=2, gpu_ids=None, multigpu_full=False, lambdakl=-1, bg_bias=False, normalize='instance_norm', loss=None, debug_mode=True, batch_size=None): super(PNPNet, self).__init__() ## basic settings word_size[0] = latentdim # self.trainable_variables = OrderedDict() self.word_size = word_size self.latentdim = latentdim self.hiddim = hiddim self.downsample = downsample # -> downsample times self.ds = 2**self.downsample # -> downsample ratio self.nres = nres self.nlayers = nlayers self.lmap_size = lmap_size self.im_size = lmap_size * self.ds self.multigpu_full = multigpu_full self.bg_bias = bg_bias self.normalize = normalize self.debug_mode = debug_mode self.batch_size = batch_size # self.img = tf.placeholder(tf.float32, shape=[None,self.im_size, self.im_size,3]) # dictionary self.dictionary = dictionary ########## modules ########## # proposal networks self.reader = Reader(indim=3, hiddim=hiddim, outdim=hiddim, ds_times=self.downsample, normalize=normalize, nlayers=nlayers) # tf.keras.layers.Conv2D(outdim,3,1,padding="same") # self.h_mean = nn.Conv2d(hiddim, latentdim, 3, 1, 1) self.h_mean = h_mean(latentdim) # self.h_var = nn.Conv2d(hiddim, latentdim, 3, 1, 1) self.h_var = h_var(latentdim) # pixel writer # visual words # print(word_size,"word size") self.vis_dist = ConceptMapper(word_size) self.pos_dist = ConceptMapper(pos_size) # neural modules self.combine_vis = Combine_Vis(hiddim_v=latentdim, op=op[0]) self.combine_pos = Combine_Pos(hiddim_p=pos_size[0], op=op[0]) self.describe_vis = Describe_Vis(hiddim_v=latentdim, op=op[1]) self.describe_pos = Describe_Pos(hiddim_p=pos_size[0], op=op[1]) self.transform = Transform(matrix='default') # small vaes for bounding boxes and offsets learning # input: H, W self.box_vae = VAE("box_vae", indim=2, latentdim=pos_size[0]) self.offset_vae = VAE("offset_vae", indim=4, latentdim=pos_size[0]) self.renderer = DistributionRender(hiddim=latentdim) # input: [x0, y0, x1, y1] + condition: [H0, W0, H1, W1, im_H, im_W, (im_H-H0), (im_W-W0), (im_H-H1), (im_W-W1)] self.writer = Writer(indim=latentdim, hiddim=hiddim, outdim=3, ds_times=self.downsample, normalize=normalize, nlayers=nlayers) ## loss functions&sampler self.sampler = reparameterize() if lambdakl > 0: from lib.LambdaBiKLD import BiKLD self.bikld = BiKLD(lambda_t=lambdakl, k=None) else: from lib.BiKLD import BiKLD self.bikld = BiKLD() if loss == 'l1': self.pixelrecon_criterion = tf.losses.MeanAbsoluteError() elif loss == 'l2': self.pixelrecon_criterion = tf.losses.MeanSquaredError() # self.pixelrecon_criterion.size_average = False self.pos_criterion = tf.losses.MeanSquaredError() # self.pos_criterion.size_average = False ## biases # self.bias_mean = nn.Linear(1, self.latentdim * self.lmap_size * self.lmap_size, bias=False) # self.bias_var = nn.Linear(1, self.latentdim * self.lmap_size * self.lmap_size, bias=False) self.latent_canvas_size = [ 1, self.lmap_size, self.lmap_size, self.latentdim ]
def __init__(self, hiddim=160, latentdim=12, word_size=[-1, 16, 16], pos_size=[4, 1, 1], nres=4, nlayers=1, nonlinear='elu', dictionary=None, op=['PROD', 'CAT'], lmap_size=0, downsample=2, gpu_ids=None, multigpu_full=False, lambdakl=-1, bg_bias=False, normalize='instance_norm', loss=None, debug_mode=True): super(PNPNet, self).__init__() ## basic settings word_size[0] = latentdim self.word_size = word_size self.latentdim = latentdim self.hiddim = hiddim self.downsample = downsample # -> downsample times self.ds = 2**self.downsample # -> downsample ratio self.nres = nres self.nlayers = nlayers self.lmap_size = lmap_size self.im_size = lmap_size * self.ds self.multigpu_full = multigpu_full self.bg_bias = bg_bias self.normalize = normalize self.debug_mode = debug_mode self.color = [ 'gray', 'green', 'blue', 'purple', 'yellow', 'cyan', 'brown', 'red' ] self.size = ['large', 'small'] self.shape = ['sphere', 'cube', 'cylinder'] self.material = ['rubber', 'metal'] #self.dictionaries = [self.shape, self.material, self.color, self.size] #gt order -> shape, material, color, size self.dictionaries = [self.color, self.size, self.shape, self.material] #self.order = [2, 3, 0, 1] self.dimension = sum(len(l) for l in self.dictionaries) # dictionary self.dictionary = dictionary ########## modules ########## # proposal networks self.reader = Reader(indim=3, hiddim=hiddim, outdim=hiddim, ds_times=self.downsample, normalize=normalize, nlayers=nlayers) self.h_mean = nn.Conv2d(hiddim, latentdim, 3, 1, 1) self.h_var = nn.Conv2d(hiddim, latentdim, 3, 1, 1) # pixel writer self.writer = Writer(indim=latentdim, hiddim=hiddim, outdim=3, ds_times=self.downsample, normalize=normalize, nlayers=nlayers) # visual words #self.vis_dist = ConceptMapper(word_size, len(dictionary)) self.pos_dist = ConceptMapper(pos_size, len(dictionary)) self.renderer = DistributionRender(hiddim=latentdim) # neural modules self.combine = Combine(hiddim_v=latentdim, hiddim_p=pos_size[0], op=op[0]) self.describe = Describe(hiddim_v=latentdim, hiddim_p=pos_size[0], op=op[1]) self.transform = Transform(matrix='default') # small vaes for bounding boxes and offsets learning # input: H, W self.box_vae = VAE(indim=2, latentdim=pos_size[0]) # input: [x0, y0, x1, y1] + condition: [H0, W0, H1, W1, im_H, im_W, (im_H-H0), (im_W-W0), (im_H-H1), (im_W-W1)] self.offset_vae = VAE(indim=4, latentdim=pos_size[0]) ## loss functions&sampler self.sampler = reparameterize() if lambdakl > 0: from lib.LambdaBiKLD import BiKLD self.bikld = BiKLD(lambda_t=lambdakl, k=None) else: from lib.BiKLD import BiKLD self.bikld = BiKLD() if loss == 'l1': self.pixelrecon_criterion = nn.L1Loss() elif loss == 'l2': self.pixelrecon_criterion = nn.MSELoss() self.pixelrecon_criterion.size_average = False self.pos_criterion = nn.MSELoss() self.pos_criterion.size_average = False ## biases self.bias_mean = nn.Linear(1, self.latentdim * self.lmap_size * self.lmap_size, bias=False) self.bias_var = nn.Linear(1, self.latentdim * self.lmap_size * self.lmap_size, bias=False) self.latent_canvas_size = torch.Size( [1, self.latentdim, self.lmap_size, self.lmap_size])