def inference(images):
    """Build the model.

    Args:
        images: Images returned from distorted_inputs() or inputs().
        train: inference in training(True) or evaluating(False).
        sess: Session for assigning pretrained model.

    Returns:
        Logits.
    """
    with tf.name_scope('inference'):
        model = vgg_model.Vgg16()
        model.build(images)
        return model.get_softmax_linear()
Example #2
0
    def __init__(self, hyperparameters):
        super(UNIT_Trainer, self).__init__()
        lr = hyperparameters['lr']
        # Initiate the networks
        self.gen_a = VAEGen(hyperparameters['input_dim_a'], hyperparameters['gen'])  # auto-encoder for domain a
        self.gen_b = VAEGen(hyperparameters['input_dim_b'], hyperparameters['gen'])  # auto-encoder for domain b
        self.dis_a = MsImageDis(hyperparameters['input_dim_a'], hyperparameters['dis'])  # discriminator for domain a
        self.dis_b = MsImageDis(hyperparameters['input_dim_b'], hyperparameters['dis'])  # discriminator for domain b
        self.instancenorm = nn.InstanceNorm2d(512, affine=False)
        self.mse_loss = nn.MSELoss()

        # Setup the optimizers
        beta1 = hyperparameters['beta1']
        beta2 = hyperparameters['beta2']
        dis_params = list(self.dis_a.parameters()) + list(self.dis_b.parameters())
        gen_params = list(self.gen_a.parameters()) + list(self.gen_b.parameters())
        self.dis_opt = torch.optim.Adam([p for p in dis_params if p.requires_grad],
                                        lr=lr, betas=(beta1, beta2), weight_decay=hyperparameters['weight_decay'])
        self.gen_opt = torch.optim.Adam([p for p in gen_params if p.requires_grad],
                                        lr=lr, betas=(beta1, beta2), weight_decay=hyperparameters['weight_decay'])
        self.dis_scheduler = get_scheduler(self.dis_opt, hyperparameters)
        self.gen_scheduler = get_scheduler(self.gen_opt, hyperparameters)

        # Network weight initialization
        self.apply(weights_init(hyperparameters['init']))
        self.dis_a.apply(weights_init('gaussian'))
        self.dis_b.apply(weights_init('gaussian'))

        # Load VGG model if needed
        if 'vgg_w' in hyperparameters.keys() and hyperparameters['vgg_w'] > 0:
            self.vgg = load_vgg16(hyperparameters['vgg_model_path'] + '/models')
            self.vgg.eval()
            for param in self.vgg.parameters():
                param.requires_grad = False

        if "vgg_content" in hyperparameters.keys() and hyperparameters["vgg_content"] > 0:
            self.vgg_content = vgg.Vgg16()
            self.vgg_content.eval()
            for param in self.vgg_content.parameters():
                param.requires_grad = False
        self.add_noise = add_noise if "add_noise" in hyperparameters and hyperparameters["add_noise"] else None
    def __init__(self, opt):
        self.vocab_size = opt.vocab_size
        self.input_encoding_size = opt.input_encoding_size
        self.rnn_size = opt.rnn_size
        self.num_layers = opt.num_layers
        self.drop_prob_lm = opt.drop_prob_lm
        self.seq_length = opt.seq_length
        self.vocab_size = opt.vocab_size
        self.seq_per_img = opt.seq_per_img
        self.att_hid_size = opt.att_hid_size

        self.opt = opt

        # Variable indicating in training mode or evaluation mode
        self.training = tf.Variable(True, trainable=False, name="training")

        # Input variables
        self.images = tf.placeholder(tf.float32, [None, 224, 224, 3],
                                     name="images")
        self.labels = tf.placeholder(tf.int32, [None, self.seq_length + 2],
                                     name="labels")
        self.masks = tf.placeholder(tf.float32, [None, self.seq_length + 2],
                                    name="masks")

        # Build CNN
        if vars(self.opt).get('start_from', None):
            cnn_weight = None
        else:
            cnn_weight = vars(self.opt).get('cnn_weight', None)
        if self.opt.cnn_model == 'vgg16':
            self.cnn = vgg.Vgg16(cnn_weight)
        if self.opt.cnn_model == 'vgg19':
            self.cnn = vgg.Vgg19(cnn_weight)

        with tf.variable_scope("cnn"):
            self.cnn.build(self.images)

        if self.opt.cnn_model == 'vgg16':
            self.context = self.cnn.conv5_3
        if self.opt.cnn_model == 'vgg19':
            self.context = self.cnn.conv5_4
        self.fc7 = self.cnn.drop7
        self.cnn_training = self.cnn.training

        # Variable in language model
        with tf.variable_scope("rnnlm"):
            # Word Embedding table
            self.Wemb = tf.Variable(tf.random_uniform(
                [self.vocab_size + 1, self.input_encoding_size], -0.1, 0.1),
                                    name='Wemb')

            # RNN cell
            if opt.rnn_type == 'rnn':
                self.cell_fn = cell_fn = tf.contrib.rnn.BasicRNNCell
            elif opt.rnn_type == 'gru':
                self.cell_fn = cell_fn = tf.contrib.rnn.GRUCell
            elif opt.rnn_type == 'lstm':
                self.cell_fn = cell_fn = tf.contrib.rnn.LSTMCell
            else:
                raise Exception("RNN type not supported: {}".format(
                    opt.rnn_type))

            # keep_prob is a function of training flag
            self.keep_prob = tf.cond(
                self.training,
                lambda: tf.constant(1 - self.drop_prob_lm),
                lambda: tf.constant(1.0),
                name='keep_prob')

            # basic cell has dropout wrapper
            self.basic_cell = cell = tf.contrib.rnn.DropoutWrapper(
                cell_fn(self.rnn_size), 1.0, self.keep_prob)
            # cell is the final cell of each timestep
            self.cell = tf.contrib.rnn.MultiRNNCell([cell] * opt.num_layers)
    def __init__(self, opt):
        self.vocab_size = opt.vocab_size
        self.input_encoding_size = opt.input_encoding_size
        self.rnn_size = opt.rnn_size
        self.num_layers = opt.num_layers
        self.drop_prob_lm = opt.drop_prob_lm
        self.seq_length = opt.seq_length
        self.vocab_size = opt.vocab_size
        self.seq_per_img = opt.seq_per_img
        #self.batch_size = opt.batch_size

        self.opt = opt

        # Variable indicating in training mode or evaluation mode
        self.training = tf.Variable(True, trainable=False, name="training")

        # Input varaibles
        self.images = tf.placeholder(tf.float32, [None, 224, 224, 3],
                                     name="images")
        self.labels = tf.placeholder(tf.int32, [None, self.seq_length + 2],
                                     name="labels")
        self.masks = tf.placeholder(tf.float32, [None, self.seq_length + 2],
                                    name="masks")

        # VGG 16
        if self.opt.start_from is not None:
            cnn_weight = None
        else:
            cnn_weight = self.opt.cnn_weight
        if self.opt.cnn_model == 'vgg16':
            self.cnn = vgg.Vgg16(cnn_weight)
        if self.opt.cnn_model == 'vgg19':
            self.cnn = vgg.Vgg19(cnn_weight)

        with tf.variable_scope("cnn"):
            self.cnn.build(self.images)
        self.fc7 = self.cnn.drop7
        self.cnn_training = self.cnn.training
        """
        # Old model loading
        with open(self.opt.cnn_model) as f:
            fileContent = f.read()
            graph_def = tf.GraphDef()
            graph_def.ParseFromString(fileContent)
            tf.import_graph_def(graph_def, input_map={"images": self.images}, name='vgg16')
            self.vgg16 = tf.get_default_graph()

        self.fc7 = self.vgg16.get_tensor_by_name("vgg16/Relu_1:0")
        """

        # Variable in language model
        with tf.variable_scope("rnnlm"):
            # Word Embedding table
            #with tf.device("/cpu:0"):
            self.Wemb = tf.Variable(tf.random_uniform(
                [self.vocab_size + 1, self.input_encoding_size], -0.1, 0.1),
                                    name='Wemb')

            #
            self.embed_word_W = tf.Variable(tf.random_uniform(
                [self.rnn_size, self.vocab_size + 1], -0.1, 0.1),
                                            name='embed_word_W')
            self.embed_word_b = self.init_bias(self.vocab_size + 1,
                                               name='embed_word_b')

            # RNN cell
            if opt.rnn_type == 'rnn':
                self.cell_fn = cell_fn = tf.nn.rnn_cell.BasicRNNCell
            elif opt.rnn_type == 'gru':
                self.cell_fn = cell_fn = tf.nn.rnn_cell.GRUCell
            elif opt.rnn_type == 'lstm':
                self.cell_fn = cell_fn = tf.nn.rnn_cell.LSTMCell
            else:
                raise Exception("RNN type not supported: {}".format(
                    opt.rnn_type))

            self.keep_prob = tf.cond(
                self.training,
                lambda: tf.constant(1 - self.drop_prob_lm),
                lambda: tf.constant(1.0),
                name='keep_prob')

            self.basic_cell = cell = tf.nn.rnn_cell.DropoutWrapper(
                cell_fn(self.rnn_size, state_is_tuple=True), 1.0,
                self.keep_prob)

            self.cell = tf.nn.rnn_cell.MultiRNNCell([cell] * opt.num_layers,
                                                    state_is_tuple=True)
Example #5
0
    def __init__(self, opt):
        self.opt = opt
        self.vocab_size = opt.vocab_size
        self.input_encoding_size = opt.input_encoding_size
        self.drop_prob_lm = opt.drop_prob_lm
        self.seq_length = opt.seq_length
        self.rnn_size = opt.rnn_size
        self.seq_per_img = opt.seq_per_img
        self.batch_size = opt.batch_size
        self.seq_per_img = opt.seq_per_img
        self.att_size = opt.att_size
        self.num_boxes = opt.num_boxes

        # Variable indicating in training mode or evaluation mode
        self.training = tf.Variable(True, trainable=False, name="training")
        self.num_layers = 1
        self.cap_iter = opt.cap_iter

        self.b = tf.placeholder(tf.float32, [None, self.num_boxes, 1, 1])
        if self.opt.cnn_model == "frcnn":
            print "using frcnn feature"
            self.cnn_dim = 2048
            self.images = tf.placeholder(tf.float32,
                                         [None, self.num_boxes, self.cnn_dim],
                                         name="features")
            self.context = self.images
        elif self.opt.cnn_model == 'vgg16' or self.opt.cnn_model == 'vgg19':
            print "using cnn model"

            self.images = tf.placeholder(tf.float32, [None, 224, 224, 3],
                                         name="images")
            cnn_weight = vars(self.opt).get('cnn_weight', None)
            if self.opt.cnn_model == 'vgg16':
                import vgg
                self.cnn = vgg.Vgg16(cnn_weight)
                self.cnn_dim = 512
            elif self.opt.cnn_model == 'vgg19':
                import vgg
                self.cnn = vgg.Vgg19(cnn_weight)
                self.cnn_dim = 512
            with tf.variable_scope("cnn"):
                self.cnn.build(self.images)

            if self.opt.cnn_model == 'vgg16':
                self.context = self.cnn.conv5_3
            elif self.opt.cnn_model == 'vgg19':
                self.context = self.cnn.conv5_4
            self.context = tf.reshape(self.context,
                                      [-1, self.num_boxes, self.cnn_dim])
        elif self.opt.cnn_model == 'resnet':
            print "using resnet feature"
            self.cnn_dim = 2048
            self.images = tf.placeholder(tf.float32,
                                         [None, self.num_boxes, self.cnn_dim],
                                         name="features")
            self.context = self.images

        self.labels = tf.placeholder(tf.int32, [None, self.seq_length + 2],
                                     name="labels")

        self.masks = tf.placeholder(tf.float32, [None, self.seq_length + 2],
                                    name="masks")

        with tf.variable_scope("rnnlm"):
            #l2_norm
            self.features = tf.nn.l2_normalize(self.context, axis=-1)

            #self.att_feat = slim.fully_connected(self.features, self.att_size,activation_fn=None, scope='att_feature_proj')

            self.avgFeat = tf.reduce_mean(self.features,
                                          axis=1,
                                          keep_dims=False)
            # Word Embedding table
            self.Wemb = tf.Variable(tf.random_uniform(
                [self.vocab_size, self.input_encoding_size], -0.1, 0.1),
                                    name='Wemb')
            # RNN cell

            if opt.rnn_type == 'rnn':
                self.cell_fn = cell_fn = tf.contrib.rnn.BasicRNNCell
            elif opt.rnn_type == 'gru':
                self.cell_fn = cell_fn = tf.contrib.rnn.GRUCell
            elif opt.rnn_type == 'lstm':
                self.cell_fn = cell_fn = tf.contrib.rnn.LSTMCell
            else:
                raise Exception("RNN type not supported: {}".format(
                    opt.rnn_type))

            self.keep_prob = tf.cond(
                self.training,
                lambda: tf.constant(1 - self.drop_prob_lm),
                lambda: tf.constant(1.0),
                name='keep_prob')

            # basic cell has dropout wrapper
            self.basic_cell1 = cell1 = tf.contrib.rnn.DropoutWrapper(
                cell_fn(self.rnn_size), 1.0, self.keep_prob)
            self.basic_cell2 = cell2 = tf.contrib.rnn.DropoutWrapper(
                cell_fn(self.rnn_size), 1.0, self.keep_prob)
            # cell is the final cell of each timestep
            self.cell1 = tf.contrib.rnn.MultiRNNCell([cell1] * self.num_layers)
            self.cell2 = tf.contrib.rnn.MultiRNNCell([cell2] * self.num_layers)
Example #6
0
parser.add_argument("--outputImg", default=None)
parser.add_argument("--vgg", default="models/vgg_normalised.pth")
parser.add_argument("--decoder", default="models/decoder.pth")
parser.add_argument("--cuda", default=False)
parser.add_argument(
    "--alpha", default=1
)  #, 'The weight that controls the degree of stylization. Should be between 0 and 1')
args = parser.parse_args()

# import vgg_normalised
# vgg = vgg_normalised.vgg_normalised
# vgg.load_state_dict(torch.load(args.vgg))
# vgg = nn.Sequential(*(vgg[i] for i in range(31)))
# vgg.cuda.
import vgg
vgg = vgg.Vgg16(requires_grad=False)

# decode
from decoder import Decoder
decoder = Decoder()
decoder.model.load_state_dict(torch.load(args.decoder))

## load images
contentImg = Image.open(args.contentImg)
styleImg = Image.open(args.styleImg)

content_transform = transforms.Compose([
    transforms.Scale(512),
    transforms.ToTensor(),
])
content = content_transform(contentImg)