def build_model(self): print("build_model() ------------------------------------------>") if self.y_dim: self.y = tf.placeholder(tf.float32, [None, self.y_dim], name='y') tf.set_random_seed(self.random_seed) # image_size = self.image_size self.feature_size_tile = self.params.chunk_size * self.params.chunk_num self.feature_size = self.feature_size_tile * NUM_TILES_L2_MIX ########################### STL-10 BEGIN def _parse(image, label): # as in [70]: "We randomly resize the images and extract 96 x 96 crops" # => randomly resize and extract 64 x 64 crops # the image augmentation is analogous to S. Jennis code in paper "Self-Supervised Feature Learning by Learning to Spot Artifacts" image_processed = DCGAN.img_preprocessor.process(image) return image_processed, label self.images_plh = tf.placeholder(tf.float32, shape=[None, 96, 96, 3]) self.labels_plh = tf.placeholder(tf.int32, shape=[None]) dataset = tf.data.Dataset.from_tensor_slices( (self.images_plh, self.labels_plh)).repeat().shuffle( self.batch_size).batch(self.batch_size) self.dataset = dataset.map(_parse) self.iterator = self.dataset.make_initializable_iterator() images, labels = self.iterator.get_next( ) # Notice: for both train + test images!! print("************************************", images) images = tf.reshape( images, [self.batch_size, self.image_size, self.image_size, 3]) print("images: ", images) print("labels: ", labels) y = tf.one_hot(labels, 10, dtype=tf.int32) y_onehot = tf.reshape(y, [self.batch_size, 10]) ########################### STL-10 END self.images_I_ref = images self.labels = labels self.labels_onehot = y_onehot if self.params.encoder_type == 'alexnet': with tf.variable_scope('alexnet'): self.I_ref_f = self.alexnet(self.images_I_ref) elif self.params.encoder_type == 'encoder': with tf.variable_scope('generator'): model = self.params.autoencoder_model coordConvLayer = True #################### print("using encoder for TL...") self.I_ref_f = encoder_dense(self.images_I_ref, self.batch_size, self.feature_size, dropout_p=0.0, preset_model=model, addCoordConv=coordConvLayer) else: assert 1 == 0, self.params.encoder_type + " not supported!" with tf.variable_scope('classifier'): print("self.I_ref_f: ", self.I_ref_f.shape) self.lin_cls_logits = self.linear_classifier(self.I_ref_f) with tf.variable_scope('classifier_loss'): self.cls_loss = tf.losses.softmax_cross_entropy( onehot_labels=self.labels_onehot, logits=self.lin_cls_logits, reduction=tf.losses.Reduction.SUM_BY_NONZERO_WEIGHTS) tf.summary.scalar('cls_loss', self.cls_loss) with tf.name_scope('accuracy'): correct = tf.equal(tf.argmax(self.lin_cls_logits, 1), tf.argmax(self.labels_onehot, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct, tf.float32)) tf.summary.scalar('accuracy', self.accuracy) t_vars = tf.trainable_variables() save_vars = [] if self.params.encoder_type == 'alexnet': self.enc_vars = [var for var in t_vars if 'alexnet' in var.name] # just alexnet self.gen_vars = [] save_vars.extend(self.enc_vars) elif self.params.encoder_type == 'encoder': self.gen_vars = [ var for var in t_vars if 'generator' in var.name and 'g_' in var.name ] # encoder (generator) print("gen_vars:", self.gen_vars) self.enc_vars = [] save_vars.extend(self.gen_vars) else: assert 1 == 0, self.params.encoder_type + " not supported!" self.cls_vars = [var for var in t_vars if 'classifier' in var.name] save_vars.extend(self.cls_vars) list = [] list.extend(self.gen_vars) list.extend(self.cls_vars) list.extend(self.enc_vars) assert collections.Counter(list) == collections.Counter(t_vars) assert collections.Counter(save_vars) == collections.Counter(t_vars) del list print("parameters after print_model_params: ****************") self.print_model_params(t_vars) print("*****************************************************") # save encoder_type + CLS self.saver = tf.train.Saver(save_vars, max_to_keep=5) if self.params.is_train: self.saver_metrics = tf.train.Saver(save_vars, max_to_keep=None) print("build_model() ------------------------------------------<")
def build_model(self): print("build_model() ------------------------------------------>") if self.y_dim: self.y = tf.placeholder(tf.float32, [None, self.y_dim], name='y') tf.set_random_seed(self.random_seed) image_size = self.image_size isIdeRun = 'lz826' in os.path.realpath(sys.argv[0]) file_train = self.params.tfrecords_path if not isIdeRun else '../data/train-00011-of-00060.tfrecords' #################################################################################### reader = tf.TFRecordReader() rrm_fn = lambda name : read_record_max(name, reader, image_size) filenames, train_images, t1_10nn_ids, t1_10nn_subids, t1_10nn_L2, t2_10nn_ids, t2_10nn_subids, t2_10nn_L2, t3_10nn_ids, t3_10nn_subids, t3_10nn_L2, t4_10nn_ids, t4_10nn_subids, t4_10nn_L2 = \ get_pipeline(file_train, self.batch_size, self.epochs, rrm_fn) print('train_images.shape..:', train_images.shape) self.fnames_I_ref = filenames self.images_I_ref = train_images # create tiles for I_ref (only for logging purposes) tile_size = image_size / 2 assert tile_size.is_integer() tile_size = int(tile_size) self.I_ref_t1 = tf.image.crop_to_bounding_box(self.images_I_ref, 0, 0, tile_size, tile_size) self.I_ref_t2 = tf.image.crop_to_bounding_box(self.images_I_ref, 0, tile_size, tile_size, tile_size) self.I_ref_t3 = tf.image.crop_to_bounding_box(self.images_I_ref, tile_size, 0, tile_size, tile_size) self.I_ref_t4 = tf.image.crop_to_bounding_box(self.images_I_ref, tile_size, tile_size, tile_size, tile_size) t1_10nn_ids = tf.reshape(tf.sparse.to_dense(t1_10nn_ids), (self.batch_size, -1)) t2_10nn_ids = tf.reshape(tf.sparse.to_dense(t2_10nn_ids), (self.batch_size, -1)) t3_10nn_ids = tf.reshape(tf.sparse.to_dense(t3_10nn_ids), (self.batch_size, -1)) t4_10nn_ids = tf.reshape(tf.sparse.to_dense(t4_10nn_ids), (self.batch_size, -1)) t1_10nn_subids = tf.reshape(tf.sparse.to_dense(t1_10nn_subids), (self.batch_size, -1)) t2_10nn_subids = tf.reshape(tf.sparse.to_dense(t2_10nn_subids), (self.batch_size, -1)) t3_10nn_subids = tf.reshape(tf.sparse.to_dense(t3_10nn_subids), (self.batch_size, -1)) t4_10nn_subids = tf.reshape(tf.sparse.to_dense(t4_10nn_subids), (self.batch_size, -1)) t1_10nn_L2 = tf.reshape(tf.sparse.to_dense(t1_10nn_L2), (self.batch_size, -1)) t2_10nn_L2 = tf.reshape(tf.sparse.to_dense(t2_10nn_L2), (self.batch_size, -1)) t3_10nn_L2 = tf.reshape(tf.sparse.to_dense(t3_10nn_L2), (self.batch_size, -1)) t4_10nn_L2 = tf.reshape(tf.sparse.to_dense(t4_10nn_L2), (self.batch_size, -1)) nn_id = tf.random_uniform([self.batch_size], 0, 9, dtype=tf.int32, seed=4285) path = self.params.full_imgs_path if not isIdeRun else 'D:\\learning-object-representations-by-mixing-scenes\\src\\datasets\\coco\\2017_training\\version\\v4\\full\\' path = tf.constant(path) filetype = tf.constant(".jpg") # kNN images of quadrant t1 ############################################################################################ for id in range(self.batch_size): t1_10nn_ids_b = t1_10nn_ids[id] index = nn_id[id] t1_10nn_id = tf.gather(t1_10nn_ids_b, index) t1_10nn_id_str = tf.as_string(t1_10nn_id) t1_10nn_subids_b = t1_10nn_subids[id] t1_10nn_subid = tf.gather(t1_10nn_subids_b, index) t1_10nn_subid_str = tf.as_string(t1_10nn_subid) postfix = underscore + t1_10nn_subid_str + filetype fname = get_coco_filename(t1_10nn_id_str, postfix) t1_10nn_fnames = fname if id == 0 else tf.concat(axis=0, values=[t1_10nn_fnames, fname]) with tf.control_dependencies([tf.assert_equal(self.batch_size, t1_10nn_fnames.shape[0]), tf.assert_equal(tf.strings.length(t1_10nn_fnames), 18)]): self.t1_fnames = t1_10nn_fnames t1_10nn_fnames = tf.strings.join([path, t1_10nn_fnames]) for id in range(self.batch_size): file = tf.read_file(t1_10nn_fnames[id]) file = tf.image.decode_jpeg(file) file = resize_img(file, image_size, self.batch_size) file = tf.expand_dims(file, 0) t1_10nn_images = file if id == 0 else tf.concat(axis=0, values=[t1_10nn_images, file]) self.images_t1 = t1_10nn_images # create tile for I_t1 (only for logging purposes) self.I_t1_tile = tf.image.crop_to_bounding_box(self.images_t1, 0, 0, tile_size, tile_size) # kNN images of quadrant t2 ############################################################################################ for id in range(self.batch_size): t2_10nn_ids_b = t2_10nn_ids[id] index = nn_id[id] t2_10nn_id = tf.gather(t2_10nn_ids_b, index) t2_10nn_id_str = tf.as_string(t2_10nn_id) t2_10nn_subids_b = t2_10nn_subids[id] t2_10nn_subid = tf.gather(t2_10nn_subids_b, index) t2_10nn_subid_str = tf.as_string(t2_10nn_subid) postfix = underscore + t2_10nn_subid_str + filetype fname = get_coco_filename(t2_10nn_id_str, postfix) t2_10nn_fnames = fname if id == 0 else tf.concat(axis=0, values=[t2_10nn_fnames, fname]) with tf.control_dependencies([tf.assert_equal(self.batch_size, t2_10nn_fnames.shape[0]), tf.assert_equal(tf.strings.length(t2_10nn_fnames), 18)]): t2_10nn_fnames = tf.strings.join([path, t2_10nn_fnames]) for id in range(self.batch_size): file = tf.read_file(t2_10nn_fnames[id]) file = tf.image.decode_jpeg(file) file = resize_img(file, image_size, self.batch_size) file = tf.expand_dims(file, 0) t2_10nn_images = file if id == 0 else tf.concat(axis=0, values=[t2_10nn_images, file]) self.images_t2 = t2_10nn_images # create tile for I_t2 (only for logging purposes) self.I_t2_tile = tf.image.crop_to_bounding_box(self.images_t2, 0, tile_size, tile_size, tile_size) # kNN images of quadrant t3 ############################################################################################ for id in range(self.batch_size): t3_10nn_ids_b = t3_10nn_ids[id] index = nn_id[id] t3_10nn_id = tf.gather(t3_10nn_ids_b, index) t3_10nn_id_str = tf.as_string(t3_10nn_id) t3_10nn_subids_b = t3_10nn_subids[id] t3_10nn_subid = tf.gather(t3_10nn_subids_b, index) t3_10nn_subid_str = tf.as_string(t3_10nn_subid) postfix = underscore + t3_10nn_subid_str + filetype fname = get_coco_filename(t3_10nn_id_str, postfix) t3_10nn_fnames = fname if id == 0 else tf.concat(axis=0, values=[t3_10nn_fnames, fname]) with tf.control_dependencies([tf.assert_equal(self.batch_size, t3_10nn_fnames.shape[0]), tf.assert_equal(tf.strings.length(t3_10nn_fnames), 18)]): t3_10nn_fnames = tf.strings.join([path, t3_10nn_fnames]) for id in range(self.batch_size): file = tf.read_file(t3_10nn_fnames[id]) file = tf.image.decode_jpeg(file) file = resize_img(file, image_size, self.batch_size) file = tf.expand_dims(file, 0) t3_10nn_images = file if id == 0 else tf.concat(axis=0, values=[t3_10nn_images, file]) self.images_t3 = t3_10nn_images # create tile for I_t3 (only for logging purposes) self.I_t3_tile = tf.image.crop_to_bounding_box(self.images_t3, tile_size, 0, tile_size, tile_size) # kNN images of quadrant t4 ############################################################################################ for id in range(self.batch_size): t4_10nn_ids_b = t4_10nn_ids[id] index = nn_id[id] t4_10nn_id = tf.gather(t4_10nn_ids_b, index) t4_10nn_id_str = tf.as_string(t4_10nn_id) t4_10nn_subids_b = t4_10nn_subids[id] t4_10nn_subid = tf.gather(t4_10nn_subids_b, index) t4_10nn_subid_str = tf.as_string(t4_10nn_subid) postfix = underscore + t4_10nn_subid_str + filetype fname = get_coco_filename(t4_10nn_id_str, postfix) t4_10nn_fnames = fname if id == 0 else tf.concat(axis=0, values=[t4_10nn_fnames, fname]) with tf.control_dependencies([tf.assert_equal(self.batch_size, t4_10nn_fnames.shape[0]), tf.assert_equal(tf.strings.length(t4_10nn_fnames), 18)]): t4_10nn_fnames = tf.strings.join([path, t4_10nn_fnames]) for id in range(self.batch_size): file = tf.read_file(t4_10nn_fnames[id]) file = tf.image.decode_jpeg(file) file = resize_img(file, image_size, self.batch_size) file = tf.expand_dims(file, 0) t4_10nn_images = file if id == 0 else tf.concat(axis=0, values=[t4_10nn_images, file]) self.images_t4 = t4_10nn_images # create tile for I_t4 (only for logging purposes) self.I_t4_tile = tf.image.crop_to_bounding_box(self.images_t4, tile_size, tile_size, tile_size, tile_size) # ########################################################################################################### # ########################################################################################################### # 12.11: currently leave scaling idea out and first focus on the core clustering idea self.chunk_num = self.params.chunk_num """ number of chunks: 8 """ self.chunk_size = self.params.chunk_size """ size per chunk: 64 """ self.feature_size_tile = self.chunk_size * self.chunk_num """ equals the size of all chunks from a single tile """ self.feature_size = self.feature_size_tile * NUM_TILES_L2_MIX """ equals the size of the full image feature """ # each tile chunk is initialized with 1's a_tile_chunk = tf.ones((self.batch_size, self.feature_size_tile), dtype=tf.int32) assert a_tile_chunk.shape[0] == self.batch_size assert a_tile_chunk.shape[1] == self.feature_size_tile with tf.variable_scope('generator') as scope_generator: #self.I_ref_f1 = self.encoder(self.I_ref_t1) # params for ENCODER model = self.params.autoencoder_model coordConvLayer = False #################### self.I_ref_f = encoder_dense(self.images_I_ref, self.batch_size, self.feature_size, dropout_p=0.0, preset_model=model, addCoordConv=coordConvLayer) # if model == 'FC-DenseNet-RF-46': # (receptive_field_x, receptive_field_y, _, _, _, _) = receptive_field.compute_receptive_field_from_graph_def( # self.sess.graph, "generator/g_1_enc/first_conv/Conv2D", "generator/g_1_enc/transitiondown-final/max_pool") # assert receptive_field_x == receptive_field_y # print('receptive field: %dx%d' % (receptive_field_x, receptive_field_y)) # else: # (receptive_field_x, receptive_field_y, _, _, _, _) = receptive_field.compute_receptive_field_from_graph_def( # self.sess.graph, "generator/g_1_enc/first_conv/Conv2D", "generator/g_1_enc/logits/BiasAdd") # assert receptive_field_x == receptive_field_y # print('receptive field: %dx%d' % (receptive_field_x, receptive_field_y)) feature_tile_shape = [self.batch_size, self.feature_size_tile] self.I_ref_f1 = tf.slice(self.I_ref_f, [0, self.feature_size_tile * 0], feature_tile_shape) self.I_ref_f2 = tf.slice(self.I_ref_f, [0, self.feature_size_tile * 1], feature_tile_shape) self.I_ref_f3 = tf.slice(self.I_ref_f, [0, self.feature_size_tile * 2], feature_tile_shape) self.I_ref_f4 = tf.slice(self.I_ref_f, [0, self.feature_size_tile * 3], feature_tile_shape) assert self.I_ref_f1.shape[0] == self.batch_size assert self.I_ref_f1.shape[1] == self.feature_size_tile assert self.I_ref_f1.shape == self.I_ref_f2.shape assert self.I_ref_f3.shape == self.I_ref_f4.shape self.f_I_ref_composite = tf.zeros((self.batch_size, self.feature_size)) assert self.I_ref_f.shape == self.f_I_ref_composite.shape # TODO remove self.f_I_ref_composite # this is used to build up graph nodes (variables) -> for later reuse_variables.. decoder_dense(self.f_I_ref_composite, self.batch_size, self.feature_size, preset_model=model, dropout_p=0.0) # Classifier # -> this is used to build up graph nodes (variables) -> for later reuse_variables.. #__self.classifier(self.images_I_ref, self.images_I_ref, self.images_I_ref, self.images_I_ref, self.images_I_ref, self.images_I_ref) self.classifier_two_image(self.images_I_ref, self.images_I_ref) # to share the weights between the Encoders scope_generator.reuse_variables() self.I_t1_f = encoder_dense(self.images_t1, self.batch_size, self.feature_size, dropout_p=0.0, preset_model=model, addCoordConv=coordConvLayer) self.t1_f = tf.slice(self.I_t1_f, [0, self.feature_size_tile * 0], feature_tile_shape) self.I_t2_f = encoder_dense(self.images_t2, self.batch_size, self.feature_size, dropout_p=0.0, preset_model=model, addCoordConv=coordConvLayer) self.t2_f = tf.slice(self.I_t2_f, [0, self.feature_size_tile * 1], feature_tile_shape) self.I_t3_f = encoder_dense(self.images_t3, self.batch_size, self.feature_size, dropout_p=0.0, preset_model=model, addCoordConv=coordConvLayer) self.t3_f = tf.slice(self.I_t3_f, [0, self.feature_size_tile * 2], feature_tile_shape) self.I_t4_f = encoder_dense(self.images_t4, self.batch_size, self.feature_size, dropout_p=0.0, preset_model=model, addCoordConv=coordConvLayer) self.t4_f = tf.slice(self.I_t4_f, [0, self.feature_size_tile * 3], feature_tile_shape) # ########################################################################################################### # 1) replace tile w/ max L2 wrt I_ref w/ respective tile of I_ref # 2) replaces tiles t_i w/ I_ref where L2(t_i) > tau # 3) ensure tile t_i w/ min L2(t_i) is selected # ultimately, we want to construct f_Iref_I2_mix for generation of new image tau = self.params.threshold_L2 for id in range(self.batch_size): index = nn_id[id] t1_10nn_L2_b = tf.gather(t1_10nn_L2[id], index) t2_10nn_L2_b = tf.gather(t2_10nn_L2[id], index) t3_10nn_L2_b = tf.gather(t3_10nn_L2[id], index) t4_10nn_L2_b = tf.gather(t4_10nn_L2[id], index) all_L2 = tf.stack(axis=0, values=[t1_10nn_L2_b, t2_10nn_L2_b, t3_10nn_L2_b, t4_10nn_L2_b]) argmax_L2 = tf.argmax(tf.reshape(all_L2, [-1]), axis=0) argmin_L2 = tf.argmin(tf.reshape(all_L2, [-1]), axis=0) # pick I_ref_t1 IFF t1 is argmax L2 or L2 > TAU and t1 is not argmin L2 is_t1_maxL2 = tf.equal(argmax_L2, 0) is_t1_minL2 = tf.equal(argmin_L2, 0) cond_Iref_t1 = tf.logical_and(tf.logical_or(is_t1_maxL2, tf.greater(t1_10nn_L2_b, tau)), tf.logical_not(is_t1_minL2)) tile_1 = tf.expand_dims(tf.where(cond_Iref_t1, self.I_ref_t1[id], self.I_t1_tile[id]), 0) # for the assignment mask e.g. [0 1 1 0], of shape (4,) # 0 selects the corresponding tile from I_ref # 1 selects the corresponding tile from I_M assignment_1 = tf.where(cond_Iref_t1, 0, 1) self.J_1_tile = tile_1 if id == 0 else tf.concat(axis=0, values=[self.J_1_tile, tile_1]) feature_1 = tf.expand_dims(tf.where(cond_Iref_t1, self.I_ref_f1[id], self.t1_f[id]), 0) is_t2_maxL2 = tf.equal(argmax_L2, 1) is_t2_minL2 = tf.equal(argmin_L2, 1) cond_Iref_t2 = tf.logical_and(tf.logical_or(is_t2_maxL2, tf.greater(t2_10nn_L2_b, tau)), tf.logical_not(is_t2_minL2)) tile_2 = tf.expand_dims(tf.where(cond_Iref_t2, self.I_ref_t2[id], self.I_t2_tile[id]), 0) assignment_2 = tf.where(cond_Iref_t2, 0, 1) self.J_2_tile = tile_2 if id == 0 else tf.concat(axis=0, values=[self.J_2_tile, tile_2]) feature_2 = tf.expand_dims(tf.where(cond_Iref_t2, self.I_ref_f2[id], self.t2_f[id]), 0) is_t3_maxL2 = tf.equal(argmax_L2, 2) is_t3_minL2 = tf.equal(argmin_L2, 2) cond_Iref_t3 = tf.logical_and(tf.logical_or(is_t3_maxL2, tf.greater(t3_10nn_L2_b, tau)), tf.logical_not(is_t3_minL2)) tile_3 = tf.expand_dims(tf.where(cond_Iref_t3, self.I_ref_t3[id], self.I_t3_tile[id]), 0) assignment_3 = tf.where(cond_Iref_t3, 0, 1) self.J_3_tile = tile_3 if id == 0 else tf.concat(axis=0, values=[self.J_3_tile, tile_3]) feature_3 = tf.expand_dims(tf.where(cond_Iref_t3, self.I_ref_f3[id], self.t3_f[id]), 0) is_t4_maxL2 = tf.equal(argmax_L2, 3) is_t4_minL2 = tf.equal(argmin_L2, 3) cond_Iref_t4 = tf.logical_and(tf.logical_or(is_t4_maxL2, tf.greater(t4_10nn_L2_b, tau)), tf.logical_not(is_t4_minL2)) tile_4 = tf.expand_dims(tf.where(cond_Iref_t4, self.I_ref_t4[id], self.I_t4_tile[id]), 0) assignment_4 = tf.where(cond_Iref_t4, 0, 1) self.J_4_tile = tile_4 if id == 0 else tf.concat(axis=0, values=[self.J_4_tile, tile_4]) feature_4 = tf.expand_dims(tf.where(cond_Iref_t4, self.I_ref_f4[id], self.t4_f[id]), 0) # only for logging purposes START assignments = tf.stack(axis=0, values=[assignment_1, assignment_2, assignment_3, assignment_4]) assignments = tf.expand_dims(tf.reshape(assignments, [-1]), 0) self.assignments_actual = assignments if id == 0 else tf.concat(axis=0, values=[self.assignments_actual, assignments]) # or 'mask' # only for logging purposes END next_assignment = tf.stack(axis=0, values=[assignment_1, ZERO, ZERO, ZERO]) next_assignment = tf.expand_dims(tf.reshape(next_assignment, [-1]), 0) self.assignments_actual_t1 = next_assignment if id == 0 else tf.concat(axis=0, values=[self.assignments_actual_t1, next_assignment]) next_assignment = tf.stack(axis=0, values=[ZERO, assignment_2, ZERO, ZERO]) next_assignment = tf.expand_dims(tf.reshape(next_assignment, [-1]), 0) self.assignments_actual_t2 = next_assignment if id == 0 else tf.concat(axis=0, values=[self.assignments_actual_t2, next_assignment]) next_assignment = tf.stack(axis=0, values=[ZERO, ZERO, assignment_3, ZERO]) next_assignment = tf.expand_dims(tf.reshape(next_assignment, [-1]), 0) self.assignments_actual_t3 = next_assignment if id == 0 else tf.concat(axis=0, values=[self.assignments_actual_t3, next_assignment]) next_assignment = tf.stack(axis=0, values=[ZERO, ZERO, ZERO, assignment_4]) next_assignment = tf.expand_dims(tf.reshape(next_assignment, [-1]), 0) self.assignments_actual_t4 = next_assignment if id == 0 else tf.concat(axis=0, values=[self.assignments_actual_t4, next_assignment]) assert feature_1.shape[0] == 1 assert feature_1.shape[1] == self.feature_size_tile assert feature_1.shape[0] == feature_2.shape[0] and feature_1.shape[1] == feature_2.shape[1] assert feature_2.shape[0] == feature_3.shape[0] and feature_2.shape[1] == feature_3.shape[1] assert feature_2.shape[0] == feature_4.shape[0] and feature_2.shape[1] == feature_4.shape[1] assert feature_1.shape[1] == a_tile_chunk.shape[1] f_features_selected = tf.concat(axis=0, values=[feature_1, feature_2, feature_3, feature_4]) # axis=1 f_features_selected = tf.reshape(f_features_selected, [-1]) f_features_selected = tf.expand_dims(f_features_selected, 0) self.f_I_ref_I_M_mix = f_features_selected if id == 0 else tf.concat(axis=0, values=[self.f_I_ref_I_M_mix, f_features_selected]) assert self.assignments_actual_t1.shape[0] == self.batch_size assert self.assignments_actual_t1.shape[1] == NUM_TILES_L2_MIX assert self.assignments_actual_t1.shape == self.assignments_actual_t2.shape assert self.assignments_actual_t2.shape == self.assignments_actual_t3.shape assert self.assignments_actual_t3.shape == self.assignments_actual_t4.shape assert self.f_I_ref_I_M_mix.shape[0] == self.batch_size assert self.f_I_ref_I_M_mix.shape[1] == self.feature_size # just for logging purposes __start ### row1 = tf.concat([self.J_1_tile, self.J_3_tile], axis=1) row2 = tf.concat([self.J_2_tile, self.J_4_tile], axis=1) self.images_I_M_mix = tf.concat([row1, row2], axis=2) # just for logging purposes __end ### print("build_model() ------------------------------------------<")
def build_model(self): print("build_model() ------------------------------------------>") if self.y_dim: self.y = tf.placeholder(tf.float32, [None, self.y_dim], name='y') tf.set_random_seed(self.random_seed) # image_size = self.image_size self.feature_size_tile = self.params.chunk_size * self.params.chunk_num self.feature_size = self.feature_size_tile * NUM_TILES_L2_MIX ########################### PASCAL VOC BEGIN image_size = self.image_size isIdeRun = 'lz826' in os.path.realpath(sys.argv[0]) file_train = self.params.dataset_path if not isIdeRun else '../data/pascal_voc_2012_trainval_100imgs.tfrecords' reader = tf.TFRecordReader() rrm_fn = lambda name : read_record(name, reader, image_size) train_images, multi_labels = get_pipeline(file_train, self.batch_size, self.epochs, rrm_fn) multi_labels = tf.reshape(tf.sparse.to_dense(multi_labels), (self.batch_size, self.params.number_of_classes)) ########################### PASCAL VOC END self.images_I_ref = train_images self.labels = multi_labels with tf.variable_scope('generator'): model = self.params.autoencoder_model coordConvLayer = True #################### print("using encoder for TL...") self.I_ref_f = encoder_dense(self.images_I_ref, self.batch_size, self.feature_size, dropout_p=0.0, preset_model=model, addCoordConv=coordConvLayer) with tf.variable_scope('classifier'): print("self.I_ref_f: ", self.I_ref_f.shape) self.lin_cls_logits = self.linear_classifier(self.I_ref_f) with tf.variable_scope('classifier_loss'): #self.cls_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.lin_cls_logits, labels=tf.cast(self.labels, tf.float32))) # self.cls_loss = tf.losses.softmax_cross_entropy(onehot_labels=self.labels_onehot, logits=self.lin_cls_logits, reduction=tf.losses.Reduction.SUM_BY_NONZERO_WEIGHTS) self.cls_loss = tf.losses.sigmoid_cross_entropy(multi_class_labels=self.labels, logits=self.lin_cls_logits, reduction=tf.losses.Reduction.SUM_BY_NONZERO_WEIGHTS) t_vars = tf.trainable_variables() self.gen_vars = [var for var in t_vars if 'generator' in var.name and 'g_' in var.name] # encoder + decoder (generator) self.cls_vars = [var for var in t_vars if 'classifier' in var.name] self.print_model_params(t_vars) #print("dsc_vars:", self.dsc_vars) #print("cls_vars:", self.cls_vars) list = [] list.extend(self.gen_vars) list.extend(self.cls_vars) assert collections.Counter(list) == collections.Counter(t_vars) del list # only save encoder self.saver = tf.train.Saver(self.gen_vars, max_to_keep=5) print("build_model() ------------------------------------------<")
def build_model(self): print("build_model() ------------------------------------------>") if self.y_dim: self.y = tf.placeholder(tf.float32, [None, self.y_dim], name='y') tf.set_random_seed(self.random_seed) image_size = self.image_size # self.crop_shape = tf.parallel_stack([self.batch_size, image_size // 2, image_size // 2, 3]) # self.crop_shape = [self.batch_size, image_size // 2, image_size // 2, 3] isIdeRun = 'lz826' in os.path.realpath(sys.argv[0]) file_train = 'datasets/coco/2017_training/version/v1/final/' if not isIdeRun else 'data/train-00011-of-00060.tfrecords' file_test = 'datasets/coco/2017_val/version/v1/final/' if not isIdeRun else 'data/train-00011-of-00060.tfrecords' file_test_cherry = 'datasets/coco/2017_val/version/v4/final/' if not isIdeRun else 'data/train-00011-of-00060.tfrecords' cwd = os.getcwd() file_train = os.path.join(cwd, file_train) file_test = os.path.join(cwd, file_test) file_test_cherry = os.path.join(cwd, file_test_cherry) #################################################################################### reader = tf.TFRecordReader() rrm_fn = lambda name : read_record_max(name, reader, image_size) _, train_images, _, _, _, _, _, _, _, _, _, _, _, _ = \ get_pipeline(file_train, self.batch_size, self.epochs, rrm_fn) print('train_images.shape..:', train_images.shape) self.images_I_ref = train_images # self.images_I_ref_crop = tf.random_crop(self.images_I_ref, self.crop_shape, seed=4285) reader_test = tf.TFRecordReader() rrm_fn = lambda name: read_record_max(name, reader_test, image_size) _, test_images, _, _, _, _, _, _, _, _, _, _, _, _ = \ get_pipeline(file_test, self.batch_size, self.epochs * 10000, rrm_fn) print('test_images.shape..:', test_images.shape) self.images_I_test = test_images reader_test = tf.TFRecordReader() rrm_fn = lambda name: read_record_max(name, reader_test, image_size, crop=False) _, test_images_cherry, _, _, _, _, _, _, _, _, _, _, _, _ = \ get_pipeline_cherry(file_test_cherry, self.batch_size_cherry, self.epochs * 100000, rrm_fn) print('test_images_cherry.shape..:', test_images_cherry.shape) self.images_I_test_cherry = test_images_cherry self.chunk_num = self.params.chunk_num """ number of chunks: 8 """ self.chunk_size = self.params.chunk_size """ size per chunk: 64 """ self.feature_size = self.chunk_size*self.chunk_num """ equals the size of all chunks from a single tile """ with tf.variable_scope('generator') as scope_generator: model = 'FC-DenseNet103' # TODO: add spectral norm! self.I_ref_f = encoder_dense(self.images_I_ref, self.batch_size, self.feature_size, dropout_p=0.0, preset_model=model) # self.I_ref_f = self.encoder(self.images_I_ref, self.batch_size) assert self.I_ref_f.shape[0] == self.batch_size assert self.I_ref_f.shape[1] == self.feature_size # this is used to build up graph nodes (variables) -> for later reuse_variables.. #self.decoder(self.f_I_ref_composite) self.images_I_ref_hat = decoder_dense(self.I_ref_f, self.batch_size, self.feature_size, preset_model=model, dropout_p=0.0, reuse=False) # self.images_I_ref_hat_crop = tf.random_crop(self.images_I_ref_hat, self.crop_shape, seed=4285) # self.images_I_ref_hat = self.decoder(self.I_ref_f, self.batch_size) # to share the weights between the Encoders scope_generator.reuse_variables() self.I_test_f = encoder_dense(self.images_I_test, self.batch_size, self.feature_size, dropout_p=0.0, preset_model=model) self.images_I_test_hat = decoder_dense(self.I_test_f, self.batch_size, self.feature_size, preset_model=model, dropout_p=0.0, reuse=True) # self.I_test_f = self.encoder(self.images_I_test, self.batch_size) # self.images_I_test_hat = self.decoder(self.I_test_f, self.batch_size) self.I_test_f_cherry = encoder_dense(self.images_I_test_cherry, self.batch_size_cherry, self.feature_size, dropout_p=0.0, preset_model=model) self.images_I_test_hat_cherry = decoder_dense(self.I_test_f_cherry, self.batch_size_cherry, self.feature_size, preset_model=model, dropout_p=0.0, reuse=True) # self.I_test_f_cherry = self.encoder(self.images_I_test_cherry, self.batch_size_cherry) # self.images_I_test_hat_cherry = self.decoder(self.I_test_f_cherry, self.batch_size_cherry) self.images_I_ref_psnr = tf.reduce_mean(tf.image.psnr(self.images_I_ref, self.images_I_ref_hat, max_val=1.0)) self.images_I_test_psnr = tf.reduce_mean(tf.image.psnr(self.images_I_test, self.images_I_test_hat, max_val=1.0)) self.images_I_test_cherry_psnr = tf.reduce_mean(tf.image.psnr(self.images_I_test_cherry, self.images_I_test_hat_cherry, max_val=1.0)) with tf.variable_scope('discriminator'): # Dsc for I1 self.dsc_I_ref = self.discriminator(self.images_I_ref) # self.dsc_I_ref = self.discriminator_global_local(self.images_I_ref, self.images_I_ref_crop) # assert self.dsc_I_reftmp.shape == self.dsc_I_ref.shape """ dsc_I_ref: real/fake, of shape (64, 1) """ # Dsc for I3 self.dsc_I_ref_hat = self.discriminator(self.images_I_ref_hat, reuse=True) # self.dsc_I_ref_hat = self.discriminator_global_local(self.images_I_ref_hat, self.images_I_ref_hat_crop, reuse=True) # just for logging purposes: self.dsc_I_ref_mean = tf.reduce_mean(self.dsc_I_ref) self.dsc_I_ref_hat_mean = tf.reduce_mean(self.dsc_I_ref_hat) self.v_g_d = tf.reduce_mean(tf.log(self.dsc_I_ref) + tf.log(1 - self.dsc_I_ref_hat)) """ dsc_I_ref_I_M_mix: real/fake, of shape (64, 1) """ with tf.variable_scope('discriminator_loss'): # Dsc loss x1 self.dsc_loss_real = binary_cross_entropy_with_logits(tf.ones_like(self.dsc_I_ref), self.dsc_I_ref) # Dsc loss x3 # this is max_D part of minmax loss function self.dsc_loss_fake = binary_cross_entropy_with_logits(tf.zeros_like(self.dsc_I_ref_hat), self.dsc_I_ref_hat) self.dsc_loss = self.dsc_loss_real + self.dsc_loss_fake """ dsc_loss: a scalar, of shape () """ with tf.variable_scope('generator_loss'): self.g_loss = binary_cross_entropy_with_logits(tf.ones_like(self.dsc_I_ref_hat), self.dsc_I_ref_hat) with tf.variable_scope('L1') as _: # Reconstruction loss L2 between I1 and I1' (to ensure autoencoder works properly) self.rec_loss_I_ref_hat_I_ref = tf.reduce_mean(tf.abs(self.images_I_ref_hat - self.images_I_ref)) self.bn_assigners = tf.group(*batch_norm.assigners) t_vars = tf.trainable_variables() # Tf stuff (tell variables how to train..) self.dsc_vars = [var for var in t_vars if 'discriminator' in var.name and 'd_' in var.name] # discriminator self.gen_vars = [var for var in t_vars if 'generator' in var.name and 'g_' in var.name] # encoder + decoder (generator) # self.cls_vars = [var for var in t_vars if 'c_' in var.name] # classifier self.print_model_params(t_vars) # save the weights self.saver = tf.train.Saver(self.dsc_vars + self.gen_vars + batch_norm.shadow_variables, max_to_keep=5)
def build_model(self): print("build_model() ------------------------------------------>") if self.y_dim: self.y = tf.placeholder(tf.float32, [None, self.y_dim], name='y') tf.set_random_seed(self.random_seed) #################################################################################### # Load data #################################################################################### def _parse_function(file_Iref, file_Iobj): # print("_parse_function: " , ref_path) # image_string = tf.read_file(path_Iref[0]) # file_Iref = tf.image.decode_jpeg(image_string, channels=3) # file_Iref = tf.image.decode_png(image_string, channels=3) #print("file_Iref: ", file_Iref) #file_Iref = crop_max(file_Iref) image_Iref_resized = tf.image.resize_images( file_Iref, [64, 64]) #, method=tf.image.ResizeMethod.AREA) for PNG? image_Iref_resized = tf.cast(image_Iref_resized, tf.float32) * (2. / 255) - 1 #image_string = tf.read_file(path_Iobj[0]) # file_Iobj = tf.image.decode_jpeg(image_string, channels=3) #file_Iobj = tf.image.decode_png(image_string, channels=3) #file_Iobj = crop_max(file_Iobj) image_Iobj_resized = tf.image.resize_images( file_Iobj, [64, 64]) # , method=tf.image.ResizeMethod.AREA) for PNG? image_Iobj_resized = tf.cast(image_Iobj_resized, tf.float32) * (2. / 255) - 1 return image_Iref_resized, image_Iobj_resized # self.images_I_ref_plh = tf.placeholder(tf.float32, shape=[1, None, None, 3]) # self.images_I_obj_plh = tf.placeholder(tf.float32, shape=[1, None, None, 3]) print("self.params.image_ref_path: %s" % self.params.image_ref_path) print("self.params.image_obj_path: %s" % self.params.image_obj_path) # = tf.constant([self.params.image_ref_path], dtype=tf.string) self.images_I_ref_plh = tf.placeholder(tf.float32, shape=[1, None, None, 3]) # obj_path = tf.constant([self.params.image_obj_path], dtype=tf.string) self.images_I_obj_plh = tf.placeholder(tf.float32, shape=[1, None, None, 3]) #print(ref_path.shape) #print(obj_path.shape) dataset = tf.data.Dataset.from_tensor_slices( (self.images_I_ref_plh, self.images_I_obj_plh)) dataset = dataset.repeat().batch(self.batch_size) self.dataset = dataset.map(_parse_function) # self.iterator = dataset.make_one_shot_iterator() self.iterator = self.dataset.make_initializable_iterator() images_I_ref, images_I_obj = self.iterator.get_next( ) # Notice: for both train + test images!! images_I_ref = tf.reshape(images_I_ref, [self.batch_size, 64, 64, 3]) images_I_obj = tf.reshape(images_I_obj, [self.batch_size, 64, 64, 3]) print("images_I_ref: %s" % images_I_ref) print("images_I_obj: %s" % images_I_obj) # images_I_ref = tf.cast(images_I_ref, tf.float32) * (2. / 255) - 1 # images_I_ref = tf.reshape(images_I_ref, (self.image_size, self.image_size, 3)) # images_I_ref = tf.expand_dims(images_I_ref, 0) # images_I_obj = tf.cast(images_I_obj, tf.float32) * (2. / 255) - 1 # images_I_obj = tf.reshape(images_I_obj, (self.image_size, self.image_size, 3)) # images_I_obj = tf.expand_dims(images_I_obj, 0) print("images_I_ref: ", images_I_ref) print("images_I_obj: ", images_I_obj) self.images_I_ref = images_I_ref self.images_I_obj = images_I_obj self.feature_mix = [int(s) for s in self.params.feature_mix.split(',')] print("self.feature_mix: %s" % str(self.feature_mix)) # ########################################################################################################### self.chunk_num = self.params.chunk_num """ number of chunks: 8 """ self.chunk_size = self.params.chunk_size """ size per chunk: 64 """ self.feature_size_tile = self.chunk_size * self.chunk_num """ equals the size of all chunks from a single tile """ self.feature_size = self.feature_size_tile * NUM_TILES_L2_MIX """ equals the size of the full image feature """ with tf.variable_scope('generator') as scope_generator: # params for ENCODER model = self.params.autoencoder_model coordConvLayer = True #################### self.I_ref_f = encoder_dense(self.images_I_ref, self.batch_size, self.feature_size, dropout_p=0.0, preset_model=model, addCoordConv=coordConvLayer) feature_tile_shape = [self.batch_size, self.feature_size_tile] self.I_ref_f1 = tf.slice(self.I_ref_f, [0, self.feature_size_tile * 0], feature_tile_shape) self.I_ref_f2 = tf.slice(self.I_ref_f, [0, self.feature_size_tile * 1], feature_tile_shape) self.I_ref_f3 = tf.slice(self.I_ref_f, [0, self.feature_size_tile * 2], feature_tile_shape) self.I_ref_f4 = tf.slice(self.I_ref_f, [0, self.feature_size_tile * 3], feature_tile_shape) # this is used to build up graph nodes (variables) -> for later reuse_variables.. self.decoder(self.I_ref_f, preset_model=model, dropout_p=0.0) # to share the weights between the Encoders scope_generator.reuse_variables() self.I_obj_f = encoder_dense(self.images_I_obj, self.batch_size, self.feature_size, dropout_p=0.0, preset_model=model, addCoordConv=coordConvLayer) self.I_obj_f1 = tf.slice(self.I_obj_f, [0, self.feature_size_tile * 0], feature_tile_shape) self.I_obj_f2 = tf.slice(self.I_obj_f, [0, self.feature_size_tile * 1], feature_tile_shape) self.I_obj_f3 = tf.slice(self.I_obj_f, [0, self.feature_size_tile * 2], feature_tile_shape) self.I_obj_f4 = tf.slice(self.I_obj_f, [0, self.feature_size_tile * 3], feature_tile_shape) for id in range(self.batch_size): cond_q1 = tf.equal(self.feature_mix[0], FROM_I_OBJ) feature_1 = tf.expand_dims( tf.where(cond_q1, self.I_obj_f1[id], self.I_ref_f1[id]), 0) cond_q2 = tf.equal(self.feature_mix[1], FROM_I_OBJ) feature_2 = tf.expand_dims( tf.where(cond_q2, self.I_obj_f2[id], self.I_ref_f2[id]), 0) cond_q3 = tf.equal(self.feature_mix[2], FROM_I_OBJ) feature_3 = tf.expand_dims( tf.where(cond_q3, self.I_obj_f3[id], self.I_ref_f3[id]), 0) cond_q4 = tf.equal(self.feature_mix[3], FROM_I_OBJ) feature_4 = tf.expand_dims( tf.where(cond_q4, self.I_obj_f4[id], self.I_ref_f4[id]), 0) f_features_selected = tf.concat( axis=0, values=[feature_1, feature_2, feature_3, feature_4]) # axis=1 f_features_selected = tf.reshape(f_features_selected, [-1]) f_features_selected = tf.expand_dims(f_features_selected, 0) self.f_I_ref_I_M_mix = f_features_selected if id == 0 else tf.concat( axis=0, values=[self.f_I_ref_I_M_mix, f_features_selected]) self.images_I_mix = self.decoder(self.f_I_ref_I_M_mix, preset_model=model, dropout_p=0.0) t_vars = tf.trainable_variables() self.gen_vars = [ var for var in t_vars if 'generator' in var.name and 'g_' in var.name ] # encoder + decoder (generator) self.print_model_params(t_vars) print("build_model() ------------------------------------------<")