def prepare_dataset(input_dir, output_dir, size, images_per_file, file_prefix): assert os.path.exists(input_dir), 'Input directory does not exist' assert os.path.isdir(input_dir), '%s is not a directory' % input_dir assert os.path.exists(output_dir), 'Output directory does not exist' assert os.path.isdir(output_dir), '%s is not a directory' % output_dir filenames = extract_image_names_recursive(input_dir) random.shuffle(filenames) print("%s images found in %s" % (len(filenames), input_dir)) start = time.time() errors = 0 rate = 0 update_stat_every = 100 writer = None for i, filename in enumerate(filenames): if i % images_per_file == 0: # roll to a new file pass if writer: writer.close() output_file = '%s-%04i.tfrecords' % (file_prefix, i // images_per_file) output_path = os.path.join(output_dir, output_file) writer = tf.python_io.TFRecordWriter(output_path) try: image = load_image(filename, size) image = prepare_image(image, normalize=False) example = build_example(image) writer.write(example.SerializeToString()) except (OSError, OverflowError, ValueError): errors += 1 print(i, '\t', '%0.4f image/sec, %s errors' % (rate, errors), end='\r') if i % update_stat_every == 0: rate = i / (time.time() - start) print('%s images processed at %0.4f image/sec. %s errors occurred.' % (len(filenames), rate, errors))
def style_transfer( content=None, content_dir=None, content_size=512, style=None, style_dir=None, style_size=512, crop=None, preserve_color=None, alpha=1.0, style_interp_weights=None, mask=None, output_dir='/output', save_ext='jpg', gpu=0, vgg_weights='/floyd_models/vgg19_weights_normalized.h5', decoder_weights='/floyd_models/decoder_weights.h5', tf_checkpoint_dir=None): assert bool(content) != bool(content_dir), 'Either content or content_dir should be given' assert bool(style) != bool(style_dir), 'Either style or style_dir should be given' if not os.path.exists(output_dir): print('Creating output dir at', output_dir) os.mkdir(output_dir) # Assume that it is either an h5 file or a name of a TensorFlow checkpoint # NOTE: For now, artificially switching off pretrained h5 weights decoder_in_h5 = False # decoder_weights.endswith('.h5') if content: content_batch = [content] else: assert mask is None, 'For spatial control use the --content option' content_batch = extract_image_names_recursive(content_dir) if style: style = style.split(',') if mask: assert len(style) == 2, 'For spatial control provide two style images' style_batch = [style] elif len(style) > 1: # Style blending if not style_interp_weights: # by default, all styles get equal weights style_interp_weights = np.array([1.0/len(style)] * len(style)) else: # normalize weights so that their sum equals to one style_interp_weights = [float(w) for w in style_interp_weights.split(',')] style_interp_weights = np.array(style_interp_weights) style_interp_weights /= np.sum(style_interp_weights) assert len(style) == len(style_interp_weights), """--style and --style_interp_weights must have the same number of elements""" style_batch = [style] else: style_batch = style else: assert mask is None, 'For spatial control use the --style option' style_batch = extract_image_names_recursive(style_dir) print('Number of content images:', len(content_batch)) print('Number of style images:', len(style_batch)) if gpu >= 0: os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu) data_format = 'channels_first' else: os.environ['CUDA_VISIBLE_DEVICES'] = '' data_format = 'channels_last' image, content, style, target, encoder, decoder, otherLayers = _build_graph(vgg_weights, decoder_weights if decoder_in_h5 else None, alpha, data_format=data_format) with tf.Session() as sess: if decoder_in_h5: sess.run(tf.global_variables_initializer()) elif tf_checkpoint_dir is not None: # Some checkpoint was provided saver = tf.train.Saver() saver.restore(sess, os.path.join(tf_checkpoint_dir, 'adain-final')) for content_path, style_path in product(content_batch, style_batch): content_name = get_filename(content_path) content_image = load_image(content_path, content_size, crop=True) if isinstance(style_path, list): # Style blending/Spatial control style_paths = style_path style_name = '_'.join(map(get_filename, style_paths)) # Gather all style images in one numpy array in order to get # their activations in one pass style_images = None for i, style_path in enumerate(style_paths): style_image = load_image(style_path, style_size, crop) if preserve_color: style_image = coral(style_image, content_image) style_image = prepare_image(style_image) if style_images is None: shape = tuple([len(style_paths)]) + style_image.shape style_images = np.empty(shape) assert style_images.shape[1:] == style_image.shape, """Style images must have the same shape""" style_images[i] = style_image style_features = sess.run(encoder, feed_dict={ image: style_images }) content_image = prepare_image(content_image) content_feature = sess.run(encoder, feed_dict={ image: content_image[np.newaxis,:] }) if mask: # For spatial control, extract foreground and background # parts of the content using the corresponding masks, # run them individually through AdaIN then combine if data_format == 'channels_first': _, c, h, w = content_feature.shape content_view_shape = (c, -1) mask_shape = lambda mask: (c, len(mask), 1) mask_slice = lambda mask: (slice(None),mask) else: _, h, w, c = content_feature.shape content_view_shape = (-1, c) mask_shape = lambda mask: (1, len(mask), c) mask_slice = lambda mask: (mask,slice(None)) mask = load_mask(mask, h, w).reshape(-1) fg_mask = np.flatnonzero(mask == 1) bg_mask = np.flatnonzero(mask == 0) content_feat_view = content_feature.reshape(content_view_shape) content_feat_fg = content_feat_view[mask_slice(fg_mask)].reshape(mask_shape(fg_mask)) content_feat_bg = content_feat_view[mask_slice(bg_mask)].reshape(mask_shape(bg_mask)) style_feature_fg = style_features[0] style_feature_bg = style_features[1] target_feature_fg = sess.run(target, feed_dict={ content: content_feat_fg[np.newaxis,:], style: style_feature_fg[np.newaxis,:] }) target_feature_fg = np.squeeze(target_feature_fg) target_feature_bg = sess.run(target, feed_dict={ content: content_feat_bg[np.newaxis,:], style: style_feature_bg[np.newaxis,:] }) target_feature_bg = np.squeeze(target_feature_bg) target_feature = np.zeros_like(content_feat_view) target_feature[mask_slice(fg_mask)] = target_feature_fg target_feature[mask_slice(bg_mask)] = target_feature_bg target_feature = target_feature.reshape(content_feature.shape) else: # For style blending, get activations for each style then # take a weighted sum. target_feature = np.zeros(content_feature.shape) for style_feature, weight in zip(style_features, style_interp_weights): target_feature += sess.run(target, feed_dict={ content: content_feature, style: style_feature[np.newaxis,:] }) * weight else: # NOTE: This is the part we care about, if only 1 style image is provided. style_name = get_filename(style_path) style_image = load_image(style_path, style_size, crop=True) # This only gives us square crop style_image = center_crop_np(style_image) # Actually crop the center out if preserve_color: style_image = coral(style_image, content_image) style_image = prepare_image(style_image, True, data_format) content_image = prepare_image(content_image, True, data_format) # Extract other layers conv3_1_layer, conv4_1_layer = otherLayers style_feature, conv3_1_out_style, conv4_1_out_style = sess.run([encoder, conv3_1_layer, conv4_1_layer], feed_dict={ image: style_image[np.newaxis,:] }) content_feature = sess.run(encoder, feed_dict={ image: content_image[np.newaxis,:] }) target_feature = sess.run(target, feed_dict={ content: content_feature, style: style_feature }) output = sess.run(decoder, feed_dict={ content: content_feature, target: target_feature, style: style_feature }) # Grab the relevant layer outputs to see what's being minimized. conv3_1_out_output, conv4_1_out_output = sess.run([conv3_1_layer, conv4_1_layer], feed_dict={ image: output }) filename = '%s_stylized_%s.%s' % (content_name, style_name, save_ext) filename = os.path.join(output_dir, filename) save_image(filename, output[0], data_format=data_format) print('Output image saved at', filename) # TODO: Change these layers. layersToViz = [conv3_1_out_style, conv4_1_out_style, conv3_1_out_output, conv4_1_out_output]
def initialize_model(): global vgg global encoder global decoder global target global weighted_target global image global content global style global persistent_session global data_format alpha = 1.0 graph = tf.Graph() # build the detection model graph from the saved model protobuf with graph.as_default(): image = tf.placeholder(shape=(None, 3, None, None), dtype=tf.float32) content = tf.placeholder(shape=(1, 512, None, None), dtype=tf.float32) style = tf.placeholder(shape=(1, 512, None, None), dtype=tf.float32) target = adain(content, style, data_format=data_format) weighted_target = target * alpha + (1 - alpha) * content with open_weights('models/vgg19_weights_normalized.h5') as w: vgg = build_vgg(image, w, data_format=data_format) encoder = vgg['conv4_1'] with open_weights('models/decoder_weights.h5') as w: decoder = build_decoder(weighted_target, w, trainable=False, data_format=data_format) # the default session behavior is to consume the entire GPU RAM during inference! config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.12 # the persistent session across function calls exposed to external code interfaces persistent_session = tf.Session(graph=graph, config=config) persistent_session.run(tf.global_variables_initializer()) print('Initialized model') while True: with ai_integration.get_next_input(inputs_schema={ "style": { "type": "image" }, "content": { "type": "image" }, }) as inputs_dict: # only update the negative fields if we reach the end of the function - then update successfully result_data = {"content-type": 'text/plain', "data": None, "success": False, "error": None} print('Starting inference') start = time.time() content_size = 512 style_size = 512 crop = False preserve_color = False content_image = load_image(io.BytesIO(inputs_dict['content']), content_size, crop) style_image = load_image(io.BytesIO(inputs_dict['style']), style_size, crop) if preserve_color: style_image = coral(style_image, content_image) style_image = prepare_image(style_image) content_image = prepare_image(content_image) style_feature = persistent_session.run(encoder, feed_dict={ image: style_image[np.newaxis, :] }) content_feature = persistent_session.run(encoder, feed_dict={ image: content_image[np.newaxis, :] }) target_feature = persistent_session.run(target, feed_dict={ content: content_feature, style: style_feature }) output = persistent_session.run(decoder, feed_dict={ content: content_feature, target: target_feature }) output_img_bytes = save_image_in_memory(output[0], data_format=data_format) result_data["content-type"] = 'image/jpeg' result_data["data"] = output_img_bytes result_data["success"] = True result_data["error"] = None print('Finished inference and it took ' + str(time.time() - start)) ai_integration.send_result(result_data)
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) train_filename = './tfrecords/train.tfrecords' # address to save the TFRecords file # open the TFRecords file writer = tf.python_io.TFRecordWriter(train_filename) train_addrs = [] # List of filenames train_labels = [] for i in range(len(train_addrs)): # print how many images are saved every 1000 images if not i % 1000: print 'Train data: {}/{}'.format(i, len(train_addrs)) sys.stdout.flush() # Load the image img = load_image(train_addrs[i]) label = train_labels[i] # Create a feature feature = {'train/label': _int64_feature(label), 'train/image': _bytes_feature(tf.compat.as_bytes(img.tostring()))} # Create an example protocol buffer example = tf.train.Example(features=tf.train.Features(feature=feature)) # Serialize to string and write on the file writer.write(example.SerializeToString()) writer.close()
def style_transfer(content_img=None, content_size=512, style_img=None, style_size=512, crop=None, alpha=1.0, content_dir='content', style_dir='style', output_dir='output', vgg_weights='models/vgg19_weights_normalized.h5', decoder_weights='models/decoder_weights.h5'): decoder_in_h5 = decoder_weights.endswith('.h5') os.environ['CUDA_VISIBLE_DEVICES'] = '' data_format = 'channels_last' if not os.path.exists(content_dir): os.mkdir(content_dir) if not os.path.exists(style_dir): os.mkdir(style_dir) if not os.path.exists(output_dir): os.mkdir(output_dir) image, content, style, target, encoder, decoder = init_graph( vgg_weights, decoder_weights if decoder_in_h5 else None, alpha, data_format=data_format) with tf.Session() as sess: if decoder_in_h5: sess.run(tf.global_variables_initializer()) else: saver = tf.train.Saver() saver.restore(sess, decoder_weights) content_name = content_img.filename style_name = style_img.filename content_path = os.path.join(content_dir, content_name) style_path = os.path.join(style_dir, style_name) with open(content_path, "wb") as f: f.write(content_img.read()) with open(style_path, "wb") as f: f.write(style_img.read()) content_image = load_image(content_path, content_size, crop) style_image = load_image(style_path, style_size, crop) style_image = prepare_image(style_image) content_image = prepare_image(content_image) style_feature = sess.run(encoder, feed_dict={image: style_image[np.newaxis, :]}) content_feature = sess.run( encoder, feed_dict={image: content_image[np.newaxis, :]}) target_feature = sess.run(target, feed_dict={ content: content_feature, style: style_feature }) output = sess.run(decoder, feed_dict={ content: content_feature, target: target_feature }) name = f"{content_name.split('.')[0]}_stylized_{style_name.split('.')[0]}.jpg" filename = os.path.join(output_dir, name) save_image(filename, output[0], data_format=data_format) return name.split('.')[0], filename
from adain.image import load_image, prepare_image from scipy.misc import imread, imresize, imsave import matplotlib.pyplot as plt import numpy as np imageName = "./images/content_dir/content_3.jpg" newIm = load_image(imageName, size=100, crop=True) newIm2 = load_image(imageName, size=100, crop=False) plt.imshow(newIm) plt.show() plt.imshow(newIm2) plt.show() # Seems like for the most part, the preprocessing step works well.
def style_transfer(style_path=None, style_size=512, crop=None, preserve_color=None, content_size=512, alpha=1.0, gpu=0, vgg_weights='models/vgg19_weights_normalized.h5', decoder_weights='models/decoder_weights.h5', save_model_to=None, load_model_from=None): # Assume that it is either an h5 file or a name of a TensorFlow checkpoint decoder_in_h5 = decoder_weights.endswith('.h5') if gpu >= 0: os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu) data_format = CHANNELS_FIRST else: os.environ['CUDA_VISIBLE_DEVICES'] = '' data_format = CHANNELS_LAST if load_model_from is not None: graph = load_graph(load_model_from) content = graph.get_tensor_by_name('prefix/content:0') style = graph.get_tensor_by_name('prefix/style:0') decoder = graph.get_tensor_by_name('prefix/output:0') else: graph = tf.Graph() content, style, decoder = _build_graph( vgg_weights, decoder_weights if decoder_in_h5 else None, alpha, data_format=data_format) # TODO Why would this be needed???? # fourcc = cv2.VideoWriter_fourcc(*'XVID') # out = cv2.VideoWriter('output.avi', fourcc, 15.0, (x_new, y_new)) cap = WebcamVideoStream(resolution=content_size, src=0) # cap = cv2.VideoCapture(0) # Set resolution # if content_size is not None: # x_length, y_length = content_size, content_size # cap.set(3, x_length) # 3 and 4 are OpenCV property IDs. # cap.set(4, y_length) # x_new = int(cap.get(3)) # y_new = int(cap.get(4)) # # print('Resolution is: {0} by {1}'.format(x_new, y_new)) cv2.namedWindow('frame', cv2.WND_PROP_FULLSCREEN) cv2.setWindowProperty('frame', cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN) # Initial capture of style #import ipdb; ipdb.set_trace() if style_path is None: style_from_cam = True style_path = '/tmp/tmp_style_img.jpg' while True: original = cap.read() original = cv2.flip(original, 1) cv2.imshow('frame', original) if cv2.waitKey(30) & 0xFF == ord('n'): original = cap.read() print(original.shape) cv2.imwrite(style_path, original) break else: style_from_cam = False with tf.Session(graph=graph) as sess: print("Startnig session") style_images = [] style_images_show = [] print(style_path) if not isinstance(style_path, list): style_paths = [style_path] else: style_paths = style_path for style_path in style_paths: style_image = load_image(style_path, style_size, crop) style_image, style_image_show = read_style_image( style_image, data_format=data_format) style_images_show.append(style_image_show) style_images.append(style_image) print('length', len(style_images)) cycler = cycle(style_images) cycler_show = cycle(style_images_show) current_style_show = next(cycler_show) if decoder_in_h5: if load_model_from is not None: sess.run(tf.global_variables_initializer()) else: saver = tf.train.Saver() saver.restore(sess, decoder_weights) if save_model_to is not None: tf.saved_model.simple_save(session=sess, export_dir=save_model_to, inputs={ 'style': style, 'content': content }, outputs={'output': decoder}) saver = tf.train.Saver() dir = '{}model'.format(save_model_to) os.makedirs(dir) saver.save(sess=sess, save_path='{}/model.ckpt'.format(dir)) return while True: original = cap.read() content_image = original.astype(np.float32) content_image = cv2.cvtColor(content_image, cv2.COLOR_BGR2RGB) content_image = prepare_image(content_image, data_format=data_format) content_image = content_image[np.newaxis, ...] print(content_image.shape) img_out = sess.run(decoder, feed_dict={ content: content_image, style: style_image }) img_out = np.clip(img_out * 255, 0, 255) img_out = np.squeeze(img_out).astype(np.uint8) if data_format == CHANNELS_FIRST: img_out = img_out.transpose(1, 2, 0) img_out = cv2.cvtColor(img_out, cv2.COLOR_BGR2RGB) img_out = cv2.flip(img_out, 1) img_out[:100, :100, :] = current_style_show cv2.imshow('frame', img_out) #if cv2.waitKey(1) & 0xFF == ord('q'): # break if cv2.waitKey(1) & 0xFF == ord('n'): if style_from_cam: cv2.imshow('frame', np.ones_like(img_out)) original = cap.read() style_image, current_style_show = read_style_image( original, from_cam=True, data_format=data_format) sleep(1) else: style_image = next(cycler) current_style_show = next(cycler_show) print(style_image.shape)