def __init__(self, inplace, patch_size=64, step=-1, nblock=6, drop_rate=0.5, backbone='vgg8'): """ Parameter: @patch_size: the patch size of the local pathway @step: the step size of the sliding window of the local patches @nblock: the number of blocks for the Global-Local Transformer @Drop_rate: dropout rate @backbone: the backbone of extract the features """ super().__init__() self.patch_size = patch_size self.step = step self.nblock = nblock if self.step <= 0: self.step = int(patch_size // 2) if backbone == 'vgg8': self.global_feat = vnet.VGG8(inplace) self.local_feat = vnet.VGG8(inplace) hidden_size = 512 elif backbone == 'vgg16': self.global_feat = vnet.VGG16(inplace) self.local_feat = vnet.VGG16(inplace) hidden_size = 512 else: raise ValueError('% model does not supported!' % backbone) self.attnlist = nn.ModuleList() self.fftlist = nn.ModuleList() for n in range(nblock): atten = GlobalAttention(transformer_num_heads=8, hidden_size=hidden_size, transformer_dropout_rate=drop_rate) self.attnlist.append(atten) fft = Feedforward(inplace=hidden_size * 2, outplace=hidden_size) self.fftlist.append(fft) self.avg = nn.AdaptiveAvgPool2d(1) out_hidden_size = hidden_size self.gloout = nn.Linear(out_hidden_size, 1) self.locout = nn.Linear(out_hidden_size, 1)
def migrate_model(src, dst): tmp_path = "/tmp/migrate_vgg" src_weights = np.load(src) dst_model = vgg.VGG16() serializers.save_npz(tmp_path, dst_model) dst_weights = dict(np.load(tmp_path)) for key, ary in src_weights.items(): dst_weights[key] = ary np.savez(dst, **dst_weights) os.remove(tmp_path)
def evaluate(): with tf.Graph().as_default(): log_dir = './log/train/' test_dir = './data/cifar10_data/cifar-10-batches-bin' test_images, test_labels = input_data.read_cifar10(test_dir, False, BATCH_SIZE, False) logits = vgg.VGG16(test_images, 10, 1) correct = layers.correct_number(logits, test_labels) saver = tf.train.Saver(tf.global_variables()) with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(log_dir) if ckpt and ckpt.model_checkpoint_path: global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] saver.restore(sess, ckpt.model_checkpoint_path) print('Loading success, global_step is %s' % global_step) else: print('No checkpoint file found!') return coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: print('########################## Start Validation ##########################') print('\nEvaluating......') num_step = int(math.floor(NUM_TEST / BATCH_SIZE)) num_sample = num_step * BATCH_SIZE step = 0 total_correct = 0 while step < num_step and not coord.should_stop(): batch_correct = sess.run(correct) total_correct += np.sum(batch_correct) step += 1 # ******************************** Information of Testing ******************************** print('Total testing samples: %d' % num_sample) print('Total correct predictions: %d' % total_correct) print('Average accuracy: %.2f%%' % (100 * total_correct / num_sample)) except Exception as e: coord.request_stop(e) finally: coord.request_stop() coord.join(threads)
def inference(file_path): im = Image.open(file_path) image = im.resize([IMG_H, IMG_W], Image.ANTIALIAS) image_array = (np.array(image) - (255 / 2.0)) / 255 # print(image_array.shape) with tf.Graph().as_default(): image = tf.cast(image_array, tf.float32) image = tf.reshape(image, [1, IMG_H, IMG_W, 3]) logit = vgg.VGG16(image, 5, 1) pred = tf.nn.softmax(logits=logit) x = tf.placeholder(tf.float32, shape=[IMG_H, IMG_W, 3]) saver = tf.train.Saver() with tf.Session() as sess: print('Reading checkpoints...') ckpt = tf.train.get_checkpoint_state(logs_train_dir) if ckpt and ckpt.model_checkpoint_path: global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] saver.restore(sess, ckpt.model_checkpoint_path) print('loading sucess,global_step is %s' % global_step) else: print('No checkpoint file found') return start_time = time.time() prediction = sess.run(pred, feed_dict={x: image_array}) duration = time.time() - start_time label = np.loadtxt('./dr_class.txt', str, delimiter='\t') index = np.argmax(prediction) grade = label[index] print('########################### Parameters ###########################') print('The label is:', grade) print('The probability is: ', np.max(prediction)) print('Time used: %.4f' % (duration * 1000), 'ms') plt.imshow(im) plt.axis('off') plt.title('The grade of it is: %s' % index) plt.show()
def __init__(self, path_to_weight, path_to_data, beta, use_quaternion=True, resume_training=False, just_test=False): self.network_input_size = 224 self.output_dim = 7 if use_quaternion else 6 self.sess = tf.Session(config=tf.ConfigProto( log_device_placement=True)) if resume_training: self.restore_network(path_to_weight) else: self.image_inputs = tf.placeholder( tf.float32, [None, self.network_input_size, self.network_input_size, 3]) self.label_inputs = tf.placeholder( tf.float32, [None, self.output_dim]) # [ X Y Z W P Q R] self.network = vgg.VGG16({'data': self.image_inputs}) self.regen_regression_network() self.build_loss(beta) self.saver = tf.train.Saver() self.merged_summary = tf.summary.merge_all() now = datetime.now() self.summary_now = now.strftime("%Y%m%d-%H%M%S") #self.train_writer = tf.summary.FileWriter('./summary/train/', self.sess.graph) self.train_writer = tf.summary.FileWriter( './summary/train' + self.summary_now + "/", self.sess.graph) self.test_writer = tf.summary.FileWriter( './summary/test' + now.strftime("%Y%m%d-%H%M%S") + "/") # initialize if just_test == False: self.init_data_handler(path_to_data) self.init_op = tf.global_variables_initializer( ) # tf.variables_initializer(self.init_vars ) if not resume_training: self.sess.run(self.init_op) self.load_weight(path_to_weight) print("Model initialized")
def training(): pretrained_weights = './pretrain/vgg16.npy' data_dir = './data/cifar10_data/cifar-10-batches-bin' train_log_dir = './log/train/' val_log_dir = './log/val/' with tf.name_scope('input'): images_train, labels_train = input_data.read_cifar10(data_dir, is_train=True, batch_size=BATCH_SIZE, shuffle=True) images_val, labels_val = input_data.read_cifar10(data_dir, is_train=False, batch_size=BATCH_SIZE, shuffle=False) image_holder = tf.placeholder(tf.float32, shape=[BATCH_SIZE, IMG_W, IMG_H, 3]) label_holder = tf.placeholder(tf.int32, shape=[BATCH_SIZE, N_CLASSES]) logits = vgg.VGG16(image_holder, N_CLASSES, 0.8) loss = layers.loss(logits, label_holder) accuracy = layers.accuracy(logits, label_holder) global_steps = tf.Variable(0, name='global_step', trainable=False) train_op = layers.optimize(loss, LEARNING_RATE, global_steps) saver = tf.train.Saver(tf.global_variables()) # Refenrnce: https://stackoverflow.com/questions/35413618/tensorflow-placeholder-error-when-using-tf-merge-all-summaries summary_op = tf.summary.merge_all() # summary_op = tf.summary.merge([loss_summary, accuracy_summary], tf.GraphKeys.SUMMARIES) # The main thread init = tf.global_variables_initializer() sess = tf.InteractiveSession() sess.run(init) print('########################## Start Training ##########################') layers.load_with_skip(pretrained_weights, sess, ['fc6', 'fc7', 'fc8']) # Coordinate the relationship between threads # Reference: http://wiki.jikexueyuan.com/project/tensorflow-zh/how_tos/threading_and_queues.html coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) train_summary_writer = tf.summary.FileWriter(train_log_dir, graph=sess.graph) val_summary_writer = tf.summary.FileWriter(val_log_dir, graph=sess.graph) try: for step in np.arange(MAX_STEP): if coord.should_stop(): break # start_time = time .time() train_images, train_labels = sess.run([images_train, labels_train]) _, train_loss, train_acc, summary_str = sess.run([train_op, loss, accuracy, summary_op], feed_dict={image_holder: train_images, label_holder: train_labels}) # duration = time.time() - start_time if step % 50 == 0 or (step + 1) == MAX_STEP: print('step %d, loss = %.4f, accuracy = %.4f%%' % (step, train_loss, train_acc)) #summary_str = sess.run(summary_op) train_summary_writer.add_summary(summary_str, step) if step % 200 == 0 or (step + 1) == MAX_STEP: val_images, val_labels = sess.run([images_val, labels_val]) val_loss, val_acc = sess.run([loss, accuracy], feed_dict={image_holder: val_images, label_holder: val_labels}) print('step %d, val loss = %.2f, val accuracy = %.2f%%' % (step, val_loss, val_acc)) #summary_str2 = sess.run(summary_op) val_summary_writer.add_summary(summary_str, step) # Why not use global_step=global_steps instead of step ??? if step % 2000 == 0 or (step + 1) == MAX_STEP: checkpoint_path = os.path.join(train_log_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) except tf.errors.OutOfRangeError: coord.request_stop() coord.request_stop() coord.join(threads) sess.close()
def training(): pretrained_weights = './pretrain/vgg16.npy' train_log_dir = './log_dr50000/train/' val_log_dir = './log_dr50000/val/' with tf.name_scope('input'): images_train, labels_train = dr5_input.input_data(True, BATCH_SIZE) images_val, labels_val = dr5_input.input_data(False, BATCH_SIZE) image_holder = tf.placeholder(tf.float32, shape=[BATCH_SIZE, IMG_W, IMG_H, 3]) label_holder = tf.placeholder(tf.int32, shape=[BATCH_SIZE, N_CLASSES]) logits = vgg.VGG16(image_holder, N_CLASSES, 0.5) loss = layers.loss(logits, label_holder) accuracy = layers.accuracy(logits, label_holder) global_steps = tf.Variable(0, name='global_step', trainable=False) LEARNING_RATE = tf.train.exponential_decay(start_rate, global_steps, decay_steps, deacy_rate, staircase=True) train_op = layers.optimize(loss, LEARNING_RATE, global_steps) saver = tf.train.Saver(tf.global_variables()) summary_op = tf.summary.merge_all() # The main thread init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess = tf.InteractiveSession() sess.run(init) print( '########################## Start Training ##########################') layers.load_with_skip(pretrained_weights, sess, ['fc6', 'fc7', 'fc8']) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) train_summary_writer = tf.summary.FileWriter(train_log_dir, graph=sess.graph) val_summary_writer = tf.summary.FileWriter(val_log_dir, graph=sess.graph) try: for step in np.arange(MAX_STEP): if coord.should_stop(): break # start_time = time .time() train_images, train_labels = sess.run([images_train, labels_train]) _, train_loss, train_acc, summary_str = sess.run( [train_op, loss, accuracy, summary_op], feed_dict={ image_holder: train_images, label_holder: train_labels }) # duration = time.time() - start_time if step % 50 == 0 or (step + 1) == MAX_STEP: print('step %d, loss = %.4f, accuracy = %.4f%%' % (step, train_loss, train_acc)) train_summary_writer.add_summary(summary_str, step) if step % 200 == 0 or (step + 1) == MAX_STEP: val_images, val_labels = sess.run([images_val, labels_val]) val_loss, val_acc = sess.run([loss, accuracy], feed_dict={ image_holder: val_images, label_holder: val_labels }) print('step %d, val loss = %.2f, val accuracy = %.2f%%' % (step, val_loss, val_acc)) val_summary_writer.add_summary(summary_str, step) if step % 2000 == 0 or (step + 1) == MAX_STEP: checkpoint_path = os.path.join(train_log_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) lr = sess.run(LEARNING_RATE) print("step %d, learning_rate= %f" % (step, lr)) except tf.errors.OutOfRangeError: coord.request_stop() coord.request_stop() coord.join(threads) sess.close()
model = alex.Alex() elif args.arch == 'alexbn': import alexbn model = alexbn.AlexBN() elif args.arch == 'googlenet': import googlenet model = googlenet.GoogLeNet() elif args.arch == 'googlenetbn': import googlenetbn model = googlenetbn.GoogLeNetBN() elif args.arch == 'vgg11': import vgg model = vgg.VGG11() elif args.arch == 'vgg16': import vgg model = vgg.VGG16() elif args.arch == 'caffenet': import caffenet model = caffenet.CaffeNet() else: raise ValueError('Invalid architecture name') if args.gpu >= 0: cuda.get_device(args.gpu).use() model.to_gpu() # Setup optimizer optimizer_class = optimizers.MomentumSGD if args.compress_gradient: from optimizer_sgd_compress import MomentumSGDCompress optimizer_class = MomentumSGDCompress
trainX = np.array(fh.get('Train/trainX')) trainY = np.array(fh.get('Train/trainY')) trainValX = np.array(fh.get('TrainVal/trainValX')) trainValY = np.array(fh.get('TrainVal/trainValY')) fh.close() print('train:\t',trainX.shape,trainY.shape) print('trainVal:',trainValX.shape,trainValY.shape) if len(np.shape(trainX)) < 4: trainX = np.expand_dims(trainX, axis=3) trainValX = np.expand_dims(trainValX, axis=3) print('Expand the channel dimension for TensorFlow') model = vgg.VGG16(7, drop_rate, 'Emotion_Recognition') optimizer = tf.optimizers.Adam(learning_rate=0.001) save_name = args.save_name model_save_path = '{0}/saved_model/{1}'.format(current_path, save_name) logdir = os.path.join('{0}/tensorboard/{1}'.format(current_path, save_name), datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) log_writer = tf.summary.create_file_writer(logdir) tf.summary.trace_on(graph=True, profiler=False) iteration = args.iteration one_hot = 'no' batch_size = 128 train_loss = [] train_acc = []
def train(): # Seeds torch.manual_seed(SEED) torch.cuda.manual_seed(SEED) np.random.seed(SEED) random.seed(SEED) # Device device = "cuda" if torch.cuda.is_available() else "cpu" # Dataset and Dataloader transform = transforms.Compose([ transforms.Resize(TRAIN_IMAGE_SIZE), transforms.CenterCrop(TRAIN_IMAGE_SIZE), transforms.ToTensor(), transforms.Lambda(lambda x: x.mul(255)), ]) train_dataset = datasets.ImageFolder(DATASET_PATH, transform=transform) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True) # Load networks TransformerNetwork = transformer.TransformerNetwork().to(device) VGG = vgg.VGG16().to(device) # Get Style Features imagenet_neg_mean = (torch.tensor([-103.939, -116.779, -123.68], dtype=torch.float32).reshape( 1, 3, 1, 1).to(device)) style_image = utils.load_image(STYLE_IMAGE_PATH) style_tensor = utils.itot(style_image).to(device) style_tensor = style_tensor.add(imagenet_neg_mean) B, C, H, W = style_tensor.shape style_features = VGG(style_tensor.expand([BATCH_SIZE, C, H, W])) style_gram = {} for key, value in style_features.items(): style_gram[key] = utils.gram(value) # Optimizer settings optimizer = optim.Adam(TransformerNetwork.parameters(), lr=ADAM_LR) # Loss trackers content_loss_history = [] style_loss_history = [] total_loss_history = [] batch_content_loss_sum = 0 batch_style_loss_sum = 0 batch_total_loss_sum = 0 # Optimization/Training Loop batch_count = 1 start_time = time.time() for epoch in range(NUM_EPOCHS): print("========Epoch {}/{}========".format(epoch + 1, NUM_EPOCHS)) for content_batch, _ in train_loader: # Get current batch size in case of odd batch sizes curr_batch_size = content_batch.shape[0] # Free-up unneeded cuda memory torch.cuda.empty_cache() # Zero-out Gradients optimizer.zero_grad() # Generate images and get features content_batch = content_batch[:, [2, 1, 0]].to(device) generated_batch = TransformerNetwork(content_batch) content_features = VGG(content_batch.add(imagenet_neg_mean)) generated_features = VGG(generated_batch.add(imagenet_neg_mean)) # Content Loss MSELoss = nn.MSELoss().to(device) content_loss = CONTENT_WEIGHT * MSELoss( generated_features["relu2_2"], content_features["relu2_2"]) batch_content_loss_sum += content_loss # Style Loss style_loss = 0 for key, value in generated_features.items(): s_loss = MSELoss(utils.gram(value), style_gram[key][:curr_batch_size]) style_loss += s_loss style_loss *= STYLE_WEIGHT batch_style_loss_sum += style_loss.item() # Total Loss total_loss = content_loss + style_loss batch_total_loss_sum += total_loss.item() # Backprop and Weight Update total_loss.backward() optimizer.step() # Save Model and Print Losses if ((batch_count - 1) % SAVE_MODEL_EVERY == 0) or (batch_count == NUM_EPOCHS * len(train_loader)): # Print Losses print("========Iteration {}/{}========".format( batch_count, NUM_EPOCHS * len(train_loader))) print("\tContent Loss:\t{:.2f}".format(batch_content_loss_sum / batch_count)) print("\tStyle Loss:\t{:.2f}".format(batch_style_loss_sum / batch_count)) print("\tTotal Loss:\t{:.2f}".format(batch_total_loss_sum / batch_count)) print("Time elapsed:\t{} seconds".format(time.time() - start_time)) # Save Model checkpoint_path = (SAVE_MODEL_PATH + "checkpoint_" + str(batch_count - 1) + ".pth") torch.save(TransformerNetwork.state_dict(), checkpoint_path) print("Saved TransformerNetwork checkpoint file at {}".format( checkpoint_path)) # Save sample generated image sample_tensor = generated_batch[0].clone().detach().unsqueeze( dim=0) sample_image = utils.ttoi(sample_tensor.clone().detach()) sample_image_path = (SAVE_IMAGE_PATH + "sample0_" + str(batch_count - 1) + ".png") utils.saveimg(sample_image, sample_image_path) print("Saved sample tranformed image at {}".format( sample_image_path)) # Save loss histories content_loss_history.append(batch_total_loss_sum / batch_count) style_loss_history.append(batch_style_loss_sum / batch_count) total_loss_history.append(batch_total_loss_sum / batch_count) # Iterate Batch Counter batch_count += 1 stop_time = time.time() # Print loss histories print("Done Training the Transformer Network!") print("Training Time: {} seconds".format(stop_time - start_time)) print("========Content Loss========") print(content_loss_history) print("========Style Loss========") print(style_loss_history) print("========Total Loss========") print(total_loss_history) # Save TransformerNetwork weights TransformerNetwork.eval() TransformerNetwork.cpu() final_path = SAVE_MODEL_PATH + "transformer_weight.pth" print("Saving TransformerNetwork weights at {}".format(final_path)) torch.save(TransformerNetwork.state_dict(), final_path) print("Done saving final model") # Plot Loss Histories if PLOT_LOSS: utils.plot_loss_hist(content_loss_history, style_loss_history, total_loss_history)
from keras import callbacks from keras import optimizers from keras.datasets import cifar10 from keras.engine import Model from keras.layers import Dropout, Flatten, Dense from keras.preprocessing.image import ImageDataGenerator from keras.utils import np_utils from keras import applications import vgg #tf.python.control_flow_ops = tf img_width, img_height = 32, 32 base_model = vgg.VGG16(weights='imagenet', include_top=False, input_shape=(32, 32, 3)) nb_epoch = 50 nb_classes = 10 (X_train, y_train), (X_test, y_test) = cifar10.load_data() Y_train = np_utils.to_categorical(y_train, nb_classes) Y_test = np_utils.to_categorical(y_test, nb_classes) nb_train_samples = X_train.shape[0] nb_validation_samples = X_test.shape[0] print nb_train_samples # Extract the last layer from third block of vgg16 model
train_loader = torch.utils.data.DataLoader(getCifar10Dataset(args.data_dir, isTrain=True), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(getCifar10Dataset(args.data_dir, isTrain=False), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) if args.arch == "vgg": model = vgg.VGG16(input_size=(3, 32, 32), num_classes=10) elif args.arch == "resnet": model = resnet.Resnet18(input_size=(3, 32, 32), num_classes=10) else: raise RuntimeError("Unsupported model architecture") model.cuda() # Load model or initialize weights if args.model: if os.path.isfile(args.model): checkpoint = torch.load(args.model) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print("=> Load model '{}' @ epoch {}".format( args.model, args.start_epoch))
num_threads=1, capacity=1000 + 3 * 25, min_after_dequeue=1000) #images_test_batch, label_test_batch = tf.train.batch([images_test, label_test], batch_size = 125, num_threads = 64, capacity = 1000+3*15) #filename_test = os.path.join(train_dir, tfrecords_file_train) #images_test, label_test = read_and_decode(filename_test) x = tf.placeholder(tf.float32, [None, 28, 28, 1], name="x") tf.summary.image('input', x, 3) # Define loss and optimizer y_ = tf.placeholder(tf.float32, [None, 19], name="labels") # Build the graph for the deep net y_conv = vgg.VGG16(x, 19, True) with tf.name_scope('loss'): cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv) cross_entropy = tf.reduce_mean(cross_entropy, name="loss") tf.summary.scalar("loss", cross_entropy) with tf.name_scope('adam_optimizer'): train_step = tf.train.AdamOptimizer(1e-6).minimize(cross_entropy) with tf.name_scope('accuracy'): correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1)) correct_prediction = tf.cast(correct_prediction, tf.float32) accuracy = tf.reduce_mean(correct_prediction) tf.summary.scalar("accuracy", accuracy)
if WANNAFASTTRAINING == 1: X_train = X_train[0:1000, :, :, :] y_train = y_train[0:1000] X_test = X_test[0:200, :, :, :] y_test = y_test[0:200] Y_train = np_utils.to_categorical(y_train, nb_classes) Y_test = np_utils.to_categorical(y_test, nb_classes) nb_train_samples = X_train.shape[0] nb_validation_samples = X_test.shape[0] if USEPREVIOUSTRAININGWEIGHTS == 0: base_model = vgg.VGG16(weights='imagenet', include_top=False, input_shape=(img_width, img_height, dimensionx)) # Extract the last layer from third block of vgg16 model last = base_model.get_layer('block3_pool').output # Add classification layers on top of it x = Flatten()(last) x = Dense(256, activation='relu')(x) x = Dropout(0.5)(x) pred = Dense(nb_classes, activation='sigmoid')(x) model = Model(base_model.input, pred) else: previouslytrainedModelpath = './trained_models/cifar10-vgg16_model_alllayers.h5' print('Loading previously trained model from path:' + previouslytrainedModelpath) model = load_model(previouslytrainedModelpath)
if isinstance(m, nn.Conv2d): if bias: yield m.bias else: yield m.weight elif isinstance(m, nn.ConvTranspose2d): # weight is frozen because it is just a bilinear upsampling if bias: assert m.bias is None elif isinstance(m, modules_skipped): continue else: raise ValueError('Unexpected module: %s' % str(m)) if __name__ == "__main__": fcn8_at_once = FCN8sAtOnce(21) vgg16 = vgg.VGG16(True) fcn8_at_once.copy_params_from_vgg16(vgg16) data_pre = [] for datas in fcn8_at_once.parameters(): data_pre.append(datas) fcn8_at_once_none_pre = FCN8sAtOnce(21) vgg16_no = vgg.VGG16(False) fcn8_at_once_none_pre.copy_params_from_vgg16(vgg16_no) data_no_pre = [] for datas in fcn8_at_once_none_pre.parameters(): print data_no_pre.append(datas) for i, j in zip(data_pre, data_no_pre): print torch.sum(i - j)
def optimize(content_targets, style_target, content_weight, style_weight, tv_weight, vgg_path, epochs=2, print_iterations=1000, batch_size=4, save_path='saver/fns.ckpt', slow=False, learning_rate=1e-3, debug=False): if slow: batch_size = 1 mod = len(content_targets) % batch_size if mod > 0: print("Train set has been trimmed slightly..") content_targets = content_targets[:-mod] style_features = {} batch_shape = (batch_size,256,256,3) style_shape = (1,) + style_target.shape print(style_shape) with tf.Graph().as_default(), tf.Session() as sess: with tf.name_scope('vgg'): vgg_model = vgg.VGG16(weights='imagenet', include_top=False) print(vgg_model.summary()) # style_image = tf.placeholder(tf.float32, shape=style_shape, name='style_image') # style_image_pre = vgg.preprocess(style_image) # net = vgg.net(vgg_path, style_image_pre) style_layers_output = [vgg_model.get_layer(l).output for l in STYLE_LAYERS] style_model = Model(vgg_model.input, style_layers_output) style_image_pre = vgg.preprocess(np.float32(style_target)[None,...]) # Precompute style features style_feats = style_model.predict(style_image_pre) for layer, features in zip(STYLE_LAYERS, style_feats): features = np.reshape(features, (-1, features.shape[3])) gram = np.matmul(features.T, features) / features.size style_features[layer] = gram # Setup content layer model content_model = Model(vgg_model.input, vgg_model.get_layer(CONTENT_LAYER).output) X_content = tf.placeholder(tf.float32, shape=batch_shape, name="X_content") content_features_X = content_model(X_content) preds = transform.net(X_content/255.) preds_pre = vgg.preprocess(preds) # Run preds_pre through content model content_features_preds_pre = content_model(preds_pre) content_size = _tensor_size(content_features_X)*batch_size assert _tensor_size(content_features_X) == _tensor_size(content_features_preds_pre) content_loss = content_weight * (2 * tf.nn.l2_loss( content_features_preds_pre - content_features_X) / content_size ) style_feats_preds_pre = style_model(preds_pre) style_losses = [] for style_layer, style_fmap in zip(STYLE_LAYERS, style_feats_preds_pre): bs, height, width, filters = map(lambda i:i.value,style_fmap.get_shape()) size = height * width * filters feats = tf.reshape(style_fmap, (bs, height * width, filters)) feats_T = tf.transpose(feats, perm=[0,2,1]) grams = tf.matmul(feats_T, feats) / size style_gram = style_features[style_layer] style_losses.append(2 * tf.nn.l2_loss(grams - style_gram)/style_gram.size) style_loss = style_weight * functools.reduce(tf.add, style_losses) / batch_size # total variation denoising tv_y_size = _tensor_size(preds[:,1:,:,:]) tv_x_size = _tensor_size(preds[:,:,1:,:]) y_tv = tf.nn.l2_loss(preds[:,1:,:,:] - preds[:,:batch_shape[1]-1,:,:]) x_tv = tf.nn.l2_loss(preds[:,:,1:,:] - preds[:,:,:batch_shape[2]-1,:]) tv_loss = tv_weight*2*(x_tv/tv_x_size + y_tv/tv_y_size)/batch_size loss = content_loss + style_loss + tv_loss # Gather trainable vars, excluding VGG train_vars = [v for v in tf.trainable_variables() if 'vgg' not in v.name] global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step') train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss, var_list=train_vars, global_step=global_step) # Initialize all vars except for VGG global_vars_novgg = [v for v in tf.global_variables() if 'vgg' not in v.name] sess.run(tf.variables_initializer(global_vars_novgg)) for epoch in range(epochs): num_examples = len(content_targets) iterations = 0 while iterations * batch_size < num_examples: start_time = time.time() curr = iterations * batch_size step = curr + batch_size X_batch = np.zeros(batch_shape, dtype=np.float32) for j, img_p in enumerate(content_targets[curr:step]): X_batch[j] = get_img(img_p, (256,256,3)).astype(np.float32) iterations += 1 assert X_batch.shape[0] == batch_size feed_dict = { X_content: X_batch } train_step.run(feed_dict=feed_dict) end_time = time.time() delta_time = end_time - start_time if debug: print("Epoch: {} Batch: {} Time: {}".format(epoch, iterations, delta_time)) is_print_iter = int(iterations) % print_iterations == 0 is_last = epoch == epochs - 1 and iterations * batch_size >= num_examples should_print = is_print_iter or is_last if should_print: to_get = [style_loss, content_loss, tv_loss, loss, preds] test_feed_dict = { X_content:X_batch } tup = sess.run(to_get, feed_dict = test_feed_dict) _style_loss,_content_loss,_tv_loss,_loss,_preds = tup losses = (_style_loss, _content_loss, _tv_loss, _loss) saver = tf.train.Saver() res = saver.save(sess, save_path, global_step=global_step) yield(_preds, losses, iterations, epoch)