def inference(images): """Build the model. Args: images: Images returned from distorted_inputs() or inputs(). train: inference in training(True) or evaluating(False). sess: Session for assigning pretrained model. Returns: Logits. """ with tf.name_scope('inference'): model = vgg_model.Vgg16() model.build(images) return model.get_softmax_linear()
def __init__(self, hyperparameters): super(UNIT_Trainer, self).__init__() lr = hyperparameters['lr'] # Initiate the networks self.gen_a = VAEGen(hyperparameters['input_dim_a'], hyperparameters['gen']) # auto-encoder for domain a self.gen_b = VAEGen(hyperparameters['input_dim_b'], hyperparameters['gen']) # auto-encoder for domain b self.dis_a = MsImageDis(hyperparameters['input_dim_a'], hyperparameters['dis']) # discriminator for domain a self.dis_b = MsImageDis(hyperparameters['input_dim_b'], hyperparameters['dis']) # discriminator for domain b self.instancenorm = nn.InstanceNorm2d(512, affine=False) self.mse_loss = nn.MSELoss() # Setup the optimizers beta1 = hyperparameters['beta1'] beta2 = hyperparameters['beta2'] dis_params = list(self.dis_a.parameters()) + list(self.dis_b.parameters()) gen_params = list(self.gen_a.parameters()) + list(self.gen_b.parameters()) self.dis_opt = torch.optim.Adam([p for p in dis_params if p.requires_grad], lr=lr, betas=(beta1, beta2), weight_decay=hyperparameters['weight_decay']) self.gen_opt = torch.optim.Adam([p for p in gen_params if p.requires_grad], lr=lr, betas=(beta1, beta2), weight_decay=hyperparameters['weight_decay']) self.dis_scheduler = get_scheduler(self.dis_opt, hyperparameters) self.gen_scheduler = get_scheduler(self.gen_opt, hyperparameters) # Network weight initialization self.apply(weights_init(hyperparameters['init'])) self.dis_a.apply(weights_init('gaussian')) self.dis_b.apply(weights_init('gaussian')) # Load VGG model if needed if 'vgg_w' in hyperparameters.keys() and hyperparameters['vgg_w'] > 0: self.vgg = load_vgg16(hyperparameters['vgg_model_path'] + '/models') self.vgg.eval() for param in self.vgg.parameters(): param.requires_grad = False if "vgg_content" in hyperparameters.keys() and hyperparameters["vgg_content"] > 0: self.vgg_content = vgg.Vgg16() self.vgg_content.eval() for param in self.vgg_content.parameters(): param.requires_grad = False self.add_noise = add_noise if "add_noise" in hyperparameters and hyperparameters["add_noise"] else None
def __init__(self, opt): self.vocab_size = opt.vocab_size self.input_encoding_size = opt.input_encoding_size self.rnn_size = opt.rnn_size self.num_layers = opt.num_layers self.drop_prob_lm = opt.drop_prob_lm self.seq_length = opt.seq_length self.vocab_size = opt.vocab_size self.seq_per_img = opt.seq_per_img self.att_hid_size = opt.att_hid_size self.opt = opt # Variable indicating in training mode or evaluation mode self.training = tf.Variable(True, trainable=False, name="training") # Input variables self.images = tf.placeholder(tf.float32, [None, 224, 224, 3], name="images") self.labels = tf.placeholder(tf.int32, [None, self.seq_length + 2], name="labels") self.masks = tf.placeholder(tf.float32, [None, self.seq_length + 2], name="masks") # Build CNN if vars(self.opt).get('start_from', None): cnn_weight = None else: cnn_weight = vars(self.opt).get('cnn_weight', None) if self.opt.cnn_model == 'vgg16': self.cnn = vgg.Vgg16(cnn_weight) if self.opt.cnn_model == 'vgg19': self.cnn = vgg.Vgg19(cnn_weight) with tf.variable_scope("cnn"): self.cnn.build(self.images) if self.opt.cnn_model == 'vgg16': self.context = self.cnn.conv5_3 if self.opt.cnn_model == 'vgg19': self.context = self.cnn.conv5_4 self.fc7 = self.cnn.drop7 self.cnn_training = self.cnn.training # Variable in language model with tf.variable_scope("rnnlm"): # Word Embedding table self.Wemb = tf.Variable(tf.random_uniform( [self.vocab_size + 1, self.input_encoding_size], -0.1, 0.1), name='Wemb') # RNN cell if opt.rnn_type == 'rnn': self.cell_fn = cell_fn = tf.contrib.rnn.BasicRNNCell elif opt.rnn_type == 'gru': self.cell_fn = cell_fn = tf.contrib.rnn.GRUCell elif opt.rnn_type == 'lstm': self.cell_fn = cell_fn = tf.contrib.rnn.LSTMCell else: raise Exception("RNN type not supported: {}".format( opt.rnn_type)) # keep_prob is a function of training flag self.keep_prob = tf.cond( self.training, lambda: tf.constant(1 - self.drop_prob_lm), lambda: tf.constant(1.0), name='keep_prob') # basic cell has dropout wrapper self.basic_cell = cell = tf.contrib.rnn.DropoutWrapper( cell_fn(self.rnn_size), 1.0, self.keep_prob) # cell is the final cell of each timestep self.cell = tf.contrib.rnn.MultiRNNCell([cell] * opt.num_layers)
def __init__(self, opt): self.vocab_size = opt.vocab_size self.input_encoding_size = opt.input_encoding_size self.rnn_size = opt.rnn_size self.num_layers = opt.num_layers self.drop_prob_lm = opt.drop_prob_lm self.seq_length = opt.seq_length self.vocab_size = opt.vocab_size self.seq_per_img = opt.seq_per_img #self.batch_size = opt.batch_size self.opt = opt # Variable indicating in training mode or evaluation mode self.training = tf.Variable(True, trainable=False, name="training") # Input varaibles self.images = tf.placeholder(tf.float32, [None, 224, 224, 3], name="images") self.labels = tf.placeholder(tf.int32, [None, self.seq_length + 2], name="labels") self.masks = tf.placeholder(tf.float32, [None, self.seq_length + 2], name="masks") # VGG 16 if self.opt.start_from is not None: cnn_weight = None else: cnn_weight = self.opt.cnn_weight if self.opt.cnn_model == 'vgg16': self.cnn = vgg.Vgg16(cnn_weight) if self.opt.cnn_model == 'vgg19': self.cnn = vgg.Vgg19(cnn_weight) with tf.variable_scope("cnn"): self.cnn.build(self.images) self.fc7 = self.cnn.drop7 self.cnn_training = self.cnn.training """ # Old model loading with open(self.opt.cnn_model) as f: fileContent = f.read() graph_def = tf.GraphDef() graph_def.ParseFromString(fileContent) tf.import_graph_def(graph_def, input_map={"images": self.images}, name='vgg16') self.vgg16 = tf.get_default_graph() self.fc7 = self.vgg16.get_tensor_by_name("vgg16/Relu_1:0") """ # Variable in language model with tf.variable_scope("rnnlm"): # Word Embedding table #with tf.device("/cpu:0"): self.Wemb = tf.Variable(tf.random_uniform( [self.vocab_size + 1, self.input_encoding_size], -0.1, 0.1), name='Wemb') # self.embed_word_W = tf.Variable(tf.random_uniform( [self.rnn_size, self.vocab_size + 1], -0.1, 0.1), name='embed_word_W') self.embed_word_b = self.init_bias(self.vocab_size + 1, name='embed_word_b') # RNN cell if opt.rnn_type == 'rnn': self.cell_fn = cell_fn = tf.nn.rnn_cell.BasicRNNCell elif opt.rnn_type == 'gru': self.cell_fn = cell_fn = tf.nn.rnn_cell.GRUCell elif opt.rnn_type == 'lstm': self.cell_fn = cell_fn = tf.nn.rnn_cell.LSTMCell else: raise Exception("RNN type not supported: {}".format( opt.rnn_type)) self.keep_prob = tf.cond( self.training, lambda: tf.constant(1 - self.drop_prob_lm), lambda: tf.constant(1.0), name='keep_prob') self.basic_cell = cell = tf.nn.rnn_cell.DropoutWrapper( cell_fn(self.rnn_size, state_is_tuple=True), 1.0, self.keep_prob) self.cell = tf.nn.rnn_cell.MultiRNNCell([cell] * opt.num_layers, state_is_tuple=True)
def __init__(self, opt): self.opt = opt self.vocab_size = opt.vocab_size self.input_encoding_size = opt.input_encoding_size self.drop_prob_lm = opt.drop_prob_lm self.seq_length = opt.seq_length self.rnn_size = opt.rnn_size self.seq_per_img = opt.seq_per_img self.batch_size = opt.batch_size self.seq_per_img = opt.seq_per_img self.att_size = opt.att_size self.num_boxes = opt.num_boxes # Variable indicating in training mode or evaluation mode self.training = tf.Variable(True, trainable=False, name="training") self.num_layers = 1 self.cap_iter = opt.cap_iter self.b = tf.placeholder(tf.float32, [None, self.num_boxes, 1, 1]) if self.opt.cnn_model == "frcnn": print "using frcnn feature" self.cnn_dim = 2048 self.images = tf.placeholder(tf.float32, [None, self.num_boxes, self.cnn_dim], name="features") self.context = self.images elif self.opt.cnn_model == 'vgg16' or self.opt.cnn_model == 'vgg19': print "using cnn model" self.images = tf.placeholder(tf.float32, [None, 224, 224, 3], name="images") cnn_weight = vars(self.opt).get('cnn_weight', None) if self.opt.cnn_model == 'vgg16': import vgg self.cnn = vgg.Vgg16(cnn_weight) self.cnn_dim = 512 elif self.opt.cnn_model == 'vgg19': import vgg self.cnn = vgg.Vgg19(cnn_weight) self.cnn_dim = 512 with tf.variable_scope("cnn"): self.cnn.build(self.images) if self.opt.cnn_model == 'vgg16': self.context = self.cnn.conv5_3 elif self.opt.cnn_model == 'vgg19': self.context = self.cnn.conv5_4 self.context = tf.reshape(self.context, [-1, self.num_boxes, self.cnn_dim]) elif self.opt.cnn_model == 'resnet': print "using resnet feature" self.cnn_dim = 2048 self.images = tf.placeholder(tf.float32, [None, self.num_boxes, self.cnn_dim], name="features") self.context = self.images self.labels = tf.placeholder(tf.int32, [None, self.seq_length + 2], name="labels") self.masks = tf.placeholder(tf.float32, [None, self.seq_length + 2], name="masks") with tf.variable_scope("rnnlm"): #l2_norm self.features = tf.nn.l2_normalize(self.context, axis=-1) #self.att_feat = slim.fully_connected(self.features, self.att_size,activation_fn=None, scope='att_feature_proj') self.avgFeat = tf.reduce_mean(self.features, axis=1, keep_dims=False) # Word Embedding table self.Wemb = tf.Variable(tf.random_uniform( [self.vocab_size, self.input_encoding_size], -0.1, 0.1), name='Wemb') # RNN cell if opt.rnn_type == 'rnn': self.cell_fn = cell_fn = tf.contrib.rnn.BasicRNNCell elif opt.rnn_type == 'gru': self.cell_fn = cell_fn = tf.contrib.rnn.GRUCell elif opt.rnn_type == 'lstm': self.cell_fn = cell_fn = tf.contrib.rnn.LSTMCell else: raise Exception("RNN type not supported: {}".format( opt.rnn_type)) self.keep_prob = tf.cond( self.training, lambda: tf.constant(1 - self.drop_prob_lm), lambda: tf.constant(1.0), name='keep_prob') # basic cell has dropout wrapper self.basic_cell1 = cell1 = tf.contrib.rnn.DropoutWrapper( cell_fn(self.rnn_size), 1.0, self.keep_prob) self.basic_cell2 = cell2 = tf.contrib.rnn.DropoutWrapper( cell_fn(self.rnn_size), 1.0, self.keep_prob) # cell is the final cell of each timestep self.cell1 = tf.contrib.rnn.MultiRNNCell([cell1] * self.num_layers) self.cell2 = tf.contrib.rnn.MultiRNNCell([cell2] * self.num_layers)
parser.add_argument("--outputImg", default=None) parser.add_argument("--vgg", default="models/vgg_normalised.pth") parser.add_argument("--decoder", default="models/decoder.pth") parser.add_argument("--cuda", default=False) parser.add_argument( "--alpha", default=1 ) #, 'The weight that controls the degree of stylization. Should be between 0 and 1') args = parser.parse_args() # import vgg_normalised # vgg = vgg_normalised.vgg_normalised # vgg.load_state_dict(torch.load(args.vgg)) # vgg = nn.Sequential(*(vgg[i] for i in range(31))) # vgg.cuda. import vgg vgg = vgg.Vgg16(requires_grad=False) # decode from decoder import Decoder decoder = Decoder() decoder.model.load_state_dict(torch.load(args.decoder)) ## load images contentImg = Image.open(args.contentImg) styleImg = Image.open(args.styleImg) content_transform = transforms.Compose([ transforms.Scale(512), transforms.ToTensor(), ]) content = content_transform(contentImg)