def svhn_encoder(x, numHidden, labels, num_labels, mb_size, image_width): in_width = image_width layerLst = [] c = [3, 64, 128, 256, 256] layerLst += [ConvPoolLayer(in_channels = c[0], out_channels = c[1], kernel_len = 5, batch_norm = False)] layerLst += [ConvPoolLayer(in_channels = c[1], out_channels = c[1], kernel_len = 5, batch_norm = False)] layerLst += [ConvPoolLayer(in_channels = c[1], out_channels = c[1], kernel_len = 5, stride=2, batch_norm = False)] layerLst += [ConvPoolLayer(in_channels = c[1], out_channels = c[2], kernel_len = 5, batch_norm = False)] layerLst += [ConvPoolLayer(in_channels = c[2], out_channels = c[2], kernel_len = 5, batch_norm = False)] layerLst += [ConvPoolLayer(in_channels = c[2], out_channels = c[2], kernel_len = 5, stride=2, batch_norm = False)] layerLst += [ConvPoolLayer(in_channels = c[2], out_channels = c[3], kernel_len = 5, batch_norm = False)] layerLst += [ConvPoolLayer(in_channels = c[3], out_channels = c[3], kernel_len = 5, batch_norm = False)] layerLst += [ConvPoolLayer(in_channels = c[3], out_channels = c[4], kernel_len = 5, stride=2, batch_norm = False)] layerLst += [HiddenLayer(num_in = 4 * 4 * c[4], num_out = numHidden, flatten_input = True, batch_norm = False)] layerLst += [HiddenLayer(num_in = numHidden, num_out = numHidden, batch_norm = True)] outputs = [normalize(x.transpose(0,3,1,2))] for i in range(0, len(layerLst)): outputs += [layerLst[i].output(outputs[-1])] h1 = HiddenLayer(num_in = numHidden + num_labels, num_out = numHidden, batch_norm = True) h2 = HiddenLayer(num_in = numHidden, num_out = numHidden, batch_norm = True) h1_out = h1.output(T.concatenate([outputs[-1], labels], axis = 1)) h2_out = h2.output(h1_out) return {'layers' : layerLst + [h1,h2], 'extra_params' : [], 'output' : h2_out}
def imagenet_encoder(x, numHidden, mb_size, image_width): vgg_out = vgg_network(x, mb_size, image_width) h1 = HiddenLayer(num_in = 4 * 4 * 512, num_out = numHidden, flatten_input = True, activation = 'relu', batch_norm = True) h2 = HiddenLayer(num_in = numHidden, num_out = numHidden, activation = 'relu', batch_norm = True) out1 = h1.output(vgg_out['output']) out2 = h2.output(out1) params = vgg_out['params'] return {'layers' : [h1, h2], 'extra_params' : params, 'output' : out2}
def imagenet_decoder_1(z, z_sampled, numLatent, numHidden, mb_size): h3 = HiddenLayer(z, num_in = numLatent, num_out = numHidden, initialization = 'xavier', name = "h3", activation = "relu") h3_generated = HiddenLayer(z_sampled, num_in = numLatent, num_out = numHidden, initialization = 'xavier', paramMap = h3.getParams(), name = "h3", activation = "relu") deconv_shapes = [512,256,128,64,32,16,3] h4 = HiddenLayer(h3.output, num_in = numHidden, num_out = 4 * 4 * deconv_shapes[0], initialization = 'xavier', name = "h4", activation = "relu") h4_generated = HiddenLayer(h3_generated.output, num_in = numHidden, num_out = 4 * 4 * deconv_shapes[0], initialization = 'xavier', paramMap = h4.getParams(), name = "h4", activation = "relu") h4_reshaped = h4.output.reshape((mb_size, 4,4,deconv_shapes[0])).dimshuffle(0, 3, 1, 2) h4_generated_reshaped = h4_generated.output.reshape((mb_size,4,4,deconv_shapes[0])).dimshuffle(0, 3, 1, 2) o1 = DeConvLayer(h4_reshaped, in_channels = deconv_shapes[0], out_channels = deconv_shapes[1], kernel_len = 5, in_rows = 4, in_columns = 4, batch_size = 100, bias_init = 0.0, name = 'o1', paramMap = None, upsample_rate = 2, activation = 'relu') o2 = DeConvLayer(o1.output, in_channels = deconv_shapes[1], out_channels = deconv_shapes[2], kernel_len = 5, in_rows = 8, in_columns = 8, batch_size = 100, bias_init = 0.0, name = 'o2', paramMap = None, upsample_rate = 2, activation = 'relu') o3 = DeConvLayer(o2.output, in_channels = deconv_shapes[2], out_channels = deconv_shapes[3], kernel_len = 5, in_rows = 16, in_columns = 16, batch_size = 100, bias_init = 0.0, name = 'o3', paramMap = None, upsample_rate = 2, activation = 'relu') o4 = DeConvLayer(o3.output, in_channels = deconv_shapes[3], out_channels = deconv_shapes[4], kernel_len = 5, in_rows = 32, in_columns = 32, batch_size = 100, bias_init = 0.0, name = 'o4', paramMap = None, upsample_rate = 2, activation = 'relu') o5 = DeConvLayer(o4.output, in_channels = deconv_shapes[4], out_channels = deconv_shapes[5], kernel_len = 5, in_rows = 64, in_columns = 64, batch_size = 100, bias_init = 0.0, name = 'o5', paramMap = None, upsample_rate = 2, activation = 'relu') y = DeConvLayer(o5.output, in_channels = deconv_shapes[5], out_channels = deconv_shapes[6], kernel_len = 5, in_rows = 128, in_columns = 128, batch_size = 100, bias_init = 0.0, name = 'y', paramMap = None, upsample_rate = 2, activation = None, batch_norm = False) o1_generated = DeConvLayer(h4_generated_reshaped, in_channels = deconv_shapes[0], out_channels = deconv_shapes[1], kernel_len = 5, in_rows = 4, in_columns = 4, batch_size = 100, bias_init = 0.0, name = 'o1', paramMap = o1.getParams(), upsample_rate = 2, activation = 'relu') o2_generated = DeConvLayer(o1_generated.output, in_channels = deconv_shapes[1], out_channels = deconv_shapes[2], kernel_len = 5, in_rows = 8, in_columns = 8, batch_size = 100, bias_init = 0.0, name = 'o2', paramMap = o2.getParams(), upsample_rate = 2, activation = 'relu') o3_generated = DeConvLayer(o2_generated.output, in_channels = deconv_shapes[2], out_channels = deconv_shapes[3], kernel_len = 5, in_rows = 16, in_columns = 16, batch_size = 100, bias_init = 0.0, name = 'o3', paramMap = o3.getParams(), upsample_rate = 2, activation = 'relu') o4_generated = DeConvLayer(o3_generated.output, in_channels = deconv_shapes[3], out_channels = deconv_shapes[4], kernel_len = 5, in_rows = 32, in_columns = 32, batch_size = 100, bias_init = 0.0, name = 'o4', paramMap = o4.getParams(), upsample_rate = 2, activation = 'relu') o5_generated = DeConvLayer(o4_generated.output, in_channels = deconv_shapes[4], out_channels = deconv_shapes[5], kernel_len = 5, in_rows = 64, in_columns = 64, batch_size = 100, bias_init = 0.0, name = 'o5', paramMap = o5.getParams(), upsample_rate = 2, activation = 'relu') y_generated = DeConvLayer(o5_generated.output, in_channels = deconv_shapes[5], out_channels = deconv_shapes[6], kernel_len = 5, in_rows = 128, in_columns = 128, batch_size = 100, bias_init = 0.0, name = 'y', paramMap = y.getParams(), upsample_rate = 2, activation = None, batch_norm = False) output = y.output.dimshuffle(0, 2, 3, 1) sample_output = y_generated.output.dimshuffle(0, 2, 3, 1) layers = {'h3' : h3, 'h4' : h4, 'o1' : o1, 'o2' : o2, 'o3' : o3, 'o4' : o4, 'o5' : o5, 'y' : y} return {'layers' : layers, 'output' : output, 'output_generated' : sample_output}
from Encoders.Svhn import svhn_encoder as encoder_class elif config['dataset'] == 'stl': from Encoders.Stl import encoder as encoder_class else: raise Exception() labels_reshaped = T.zeros(shape = (config['mb_size'], config['num_labels'])) labels_reshaped = T.set_subtensor(labels_reshaped[T.arange(config['mb_size']), labels], 1.0) encoder = encoder_class(x, numHidden, mb_size=config['mb_size'], image_width=config['image_width']) encoder_layers = encoder['layers'] encoder_output = encoder['output'] encoder_extra_params = encoder['extra_params'] z_mean_layer = HiddenLayer(num_in=numHidden, num_out=numLatent, activation=None) z_var_layer = HiddenLayer(num_in=numHidden, num_out=numLatent, activation='softplus') z_mean = z_mean_layer.output(encoder_output) z_var = T.maximum(1.0e-6, z_var_layer.output(encoder_output)) z_sampled = T.matrix() z_reconstruction = z_mean + z_sampled * T.sqrt(z_var) def join(a,b): return T.concatenate([a,b], axis = 1) if config["dataset"] == "imagenet": from Decoders.Imagenet import decoder
if config['dataset'] == 'imagenet': from Encoders.Imagenet import imagenet_encoder as encoder_class elif config['dataset'] == 'svhn' or config['dataset'] == 'cifar': from Encoders.Svhn import svhn_encoder as encoder_class elif config['dataset'] == 'stl': from Encoders.Stl import encoder as encoder_class else: raise Exception() encoder = encoder_class(x, numHidden, mb_size=config['mb_size'], image_width=config['image_width']) encoder_layers = encoder['layers'] encoder_output = encoder['output'] encoder_extra_params = encoder['extra_params'] z_mean_layer = HiddenLayer(num_in=numHidden, num_out=numLatent, activation=None) z_var_layer = HiddenLayer(num_in=numHidden, num_out=numLatent, activation='softplus') z_mean = z_mean_layer.output(encoder_output) z_var = T.maximum(z_var_layer.output(encoder_output), 1e-12) z_sampled = srng.normal(size=(config['mb_size'], numLatent)) z = z_sampled * T.sqrt(z_var) + z_mean if config["dataset"] == "imagenet": from Decoders.Imagenet import imagenet_decoder decoder = imagenet_decoder(z=z, z_sampled=z_sampled, numHidden=numHidden, numLatent=numLatent, mb_size=config['mb_size'], image_width=config['image_width']) elif config["dataset"] == "svhn" or config['dataset'] == 'cifar': from Decoders.Svhn import svhn_decoder