def make_dataset(features: np.ndarray, labels: np.ndarray = None, shuffle: bool = False): """Converts features and labels into a tf.data.Dataset object. Arguments: features {np.ndarray} -- NumPy Array containing features. Keyword Arguments: labels {np.ndarray} -- NumPy array containing labels. (default: {None}) shuffle {bool} -- Shuffle the dataset? (default: {False}) Returns: tf.data.Dataset -- Dataset object. """ if labels is not None: dataset = tf.data.Dataset.from_tensor_slices((features, labels)) else: dataset = tf.data.Dataset.from_tensor_slices(features) # Transform dataset. # dataset = dataset.batch(batch_size=args.batch_size) dataset = dataset.apply(batch_and_drop_remainder(args.batch_size)) if shuffle: dataset = dataset.shuffle(buffer_size=args.buffer_size) return dataset
def build(input_reader_config, batch_size=None, transform_input_data_fn=None, input_context=None, reduce_to_frame_fn=None): """Builds a tf.data.Dataset. Builds a tf.data.Dataset by applying the `transform_input_data_fn` on all records. Applies a padded batch to the resulting dataset. Args: input_reader_config: A input_reader_pb2.InputReader object. batch_size: Batch size. If batch size is None, no batching is performed. transform_input_data_fn: Function to apply transformation to all records, or None if no extra decoding is required. input_context: optional, A tf.distribute.InputContext object used to shard filenames and compute per-replica batch_size when this function is being called per-replica. reduce_to_frame_fn: Function that extracts frames from tf.SequenceExample type input data. Returns: A tf.data.Dataset based on the input_reader_config. Raises: ValueError: On invalid input reader proto. ValueError: If no input paths are specified. """ if not isinstance(input_reader_config, input_reader_pb2.InputReader): raise ValueError('input_reader_config not of type ' 'input_reader_pb2.InputReader.') decoder = decoder_builder.build(input_reader_config) if input_reader_config.WhichOneof('input_reader') == 'tf_record_input_reader': config = input_reader_config.tf_record_input_reader if not config.input_path: raise ValueError('At least one input path must be specified in ' '`input_reader_config`.') shard_fn = shard_function_for_context(input_context) if input_context is not None: batch_size = input_context.get_per_replica_batch_size(batch_size) dataset = read_dataset( functools.partial(tf.data.TFRecordDataset, buffer_size=8 * 1000 * 1000), config.input_path[:], input_reader_config, filename_shard_fn=shard_fn) if input_reader_config.sample_1_of_n_examples > 1: dataset = dataset.shard(input_reader_config.sample_1_of_n_examples, 0) # TODO(rathodv): make batch size a required argument once the old binaries # are deleted. dataset = dataset.map(decoder.decode, tf.data.experimental.AUTOTUNE) if reduce_to_frame_fn: dataset = reduce_to_frame_fn(dataset) if transform_input_data_fn is not None: dataset = dataset.map(transform_input_data_fn, tf.data.experimental.AUTOTUNE) if batch_size: dataset = dataset.apply( tf_data.batch_and_drop_remainder(batch_size)) dataset = dataset.prefetch(input_reader_config.num_prefetch_batches) return dataset raise ValueError('Unsupported input_reader_config.')
def processing_data(dir_B, dir_A, batch_size, h, w, ch, layers_num): trainA_dataset = glob(dir_A + '/*.*') trainB_dataset = glob(dir_B + '/*.*') dataset_num = max(len(trainA_dataset), len(trainB_dataset)) Image_Data_Class = ImageData(h, w, ch) trainA = tf.data.Dataset.from_tensor_slices(trainA_dataset) trainB = tf.data.Dataset.from_tensor_slices(trainB_dataset) trainA = trainA.prefetch(batch_size).shuffle(dataset_num).map( Image_Data_Class.image_processing, num_parallel_calls=8).apply( batch_and_drop_remainder(batch_size)).repeat() trainB = trainB.prefetch(batch_size).shuffle(dataset_num).map( Image_Data_Class.image_processing, num_parallel_calls=8).apply( batch_and_drop_remainder(batch_size)).repeat() trainA_iterator = trainA.make_one_shot_iterator() trainB_iterator = trainB.make_one_shot_iterator() domain_A_info = trainA_iterator.get_next() domain_B_info = trainB_iterator.get_next() img_A, _ = domain_A_info img_B, _ = domain_B_info imgA_list = [] imgB_list = [] for layer in range(layers_num): if layer == layers_num - 1: imgA_list.append(img_A) imgB_list.append(img_B) else: img_temp_A, img_temp_B = processing_By_config( img_A, img_B, h, w, layers_num, layer) imgA_list.append(img_temp_A) imgB_list.append(img_temp_B) return imgA_list, imgB_list
def make_dataset(features: np.ndarray, labels: np.ndarray = None, shuffle: bool = False): if labels is not None: dataset = tf.data.Dataset.from_tensor_slices((features, labels)) else: dataset = tf.data.Dataset.from_tensor_slices(features) # Transform dataset. # dataset = dataset.batch(batch_size=args.batch_size) dataset = dataset.apply(batch_and_drop_remainder(args.batch_size)) if shuffle: dataset = dataset.shuffle(buffer_size=args.buffer_size) return dataset
def load_tfrecord(serialized_data, shape, batch_size=1, read_threads=4, shuffle_buffer_size=1000, prefetch=1, distribute=(1, 0)): def parser(serialized_data): features = { "shape": tf.FixedLenFeature([3], tf.int64), "patch": tf.FixedLenFeature([], tf.string), "filename": tf.FixedLenFeature([], tf.string), "coordinate": tf.FixedLenFeature([2], tf.int64), } decoded = tf.parse_single_example(serialized_data, features) # &&&&&& My output id tf.float64 !!!! &&&&&& patch = tf.reshape(tf.decode_raw(decoded["patch"], tf.float64), decoded["shape"]) # randomly crop mini-patches from data patch = tf.random_crop(patch, shape) print(decoded["filename"], decoded["coordinate"], patch) return decoded["filename"], decoded["coordinate"], patch # TODO: understand this code dataset = (tf.data.Dataset.list_files( serialized_data, shuffle=True).shard(*distribute).apply( parallel_interleave( lambda f: tf.data.TFRecordDataset(f).map(parser), cycle_length=read_threads, sloppy=True, ))) # TODO: understand the code print(dataset) dataset = dataset.apply( batch_and_drop_remainder(batch_size)).prefetch(prefetch) return dataset
def build_model(self): self.lr = tf.placeholder(tf.float32, name='learning_rate') """ Input Image""" Image_Data_Class = ImageData(self.img_h, self.img_w, self.img_ch, self.augment_flag) trainA = tf.data.Dataset.from_tensor_slices(self.trainA_dataset) trainB = tf.data.Dataset.from_tensor_slices(self.trainB_dataset) trainA = trainA.prefetch(self.batch_size).shuffle( self.dataset_num).map(Image_Data_Class.image_processing, num_parallel_calls=8).apply( batch_and_drop_remainder( self.batch_size)).repeat() trainB = trainB.prefetch(self.batch_size).shuffle( self.dataset_num).map(Image_Data_Class.image_processing, num_parallel_calls=8).apply( batch_and_drop_remainder( self.batch_size)).repeat() trainA_iterator = trainA.make_one_shot_iterator() trainB_iterator = trainB.make_one_shot_iterator() self.domain_A = trainA_iterator.get_next() self.domain_B = trainB_iterator.get_next() """ Define Encoder, Generator, Discriminator """ self.style_a = tf.placeholder( tf.float32, shape=[self.batch_size, 1, 1, self.style_dim], name='style_a') self.style_b = tf.placeholder( tf.float32, shape=[self.batch_size, 1, 1, self.style_dim], name='style_b') # encode content_a, style_a_prime = self.Encoder_A(self.domain_A) content_b, style_b_prime = self.Encoder_B(self.domain_B) # decode (within domain) x_aa = self.Decoder_A(content_B=content_a, style_A=style_a_prime) x_bb = self.Decoder_B(content_A=content_b, style_B=style_b_prime) # decode (cross domain) x_ba = self.Decoder_A(content_B=content_b, style_A=self.style_a, reuse=True) x_ab = self.Decoder_B(content_A=content_a, style_B=self.style_b, reuse=True) # encode again content_b_, style_a_ = self.Encoder_A(x_ba, reuse=True) content_a_, style_b_ = self.Encoder_B(x_ab, reuse=True) # decode again (if needed) if self.recon_x_cyc_w > 0: x_aba = self.Decoder_A(content_B=content_a_, style_A=style_a_prime, reuse=True) x_bab = self.Decoder_B(content_A=content_b_, style_B=style_b_prime, reuse=True) cyc_recon_A = L1_loss(x_aba, self.domain_A) cyc_recon_B = L1_loss(x_bab, self.domain_B) else: cyc_recon_A = 0.0 cyc_recon_B = 0.0 real_A_logit, real_B_logit = self.discriminate_real( self.domain_A, self.domain_B) fake_A_logit, fake_B_logit = self.discriminate_fake(x_ba, x_ab) """ Define Loss """ G_ad_loss_a = generator_loss(self.gan_type, fake_A_logit) G_ad_loss_b = generator_loss(self.gan_type, fake_B_logit) D_ad_loss_a = discriminator_loss(self.gan_type, real_A_logit, fake_A_logit) D_ad_loss_b = discriminator_loss(self.gan_type, real_B_logit, fake_B_logit) recon_A = L1_loss(x_aa, self.domain_A) # reconstruction recon_B = L1_loss(x_bb, self.domain_B) # reconstruction # The style reconstruction loss encourages # diverse outputs given different style codes recon_style_A = L1_loss(style_a_, self.style_a) recon_style_B = L1_loss(style_b_, self.style_b) # The content reconstruction loss encourages # the translated image to preserve semantic content of the input image recon_content_A = L1_loss(content_a_, content_a) recon_content_B = L1_loss(content_b_, content_b) Generator_A_loss = self.gan_w * G_ad_loss_a + \ self.recon_x_w * recon_A + \ self.recon_s_w * recon_style_A + \ self.recon_c_w * recon_content_A + \ self.recon_x_cyc_w * cyc_recon_A Generator_B_loss = self.gan_w * G_ad_loss_b + \ self.recon_x_w * recon_B + \ self.recon_s_w * recon_style_B + \ self.recon_c_w * recon_content_B + \ self.recon_x_cyc_w * cyc_recon_B Discriminator_A_loss = self.gan_w * D_ad_loss_a Discriminator_B_loss = self.gan_w * D_ad_loss_b self.Generator_loss = Generator_A_loss + Generator_B_loss self.Discriminator_loss = Discriminator_A_loss + Discriminator_B_loss """ Training """ t_vars = tf.trainable_variables() G_vars = [ var for var in t_vars if 'decoder' in var.name or 'encoder' in var.name ] D_vars = [var for var in t_vars if 'discriminator' in var.name] self.G_optim = tf.train.AdamOptimizer( self.lr, beta1=0.5, beta2=0.999).minimize(self.Generator_loss, var_list=G_vars) self.D_optim = tf.train.AdamOptimizer( self.lr, beta1=0.5, beta2=0.999).minimize(self.Discriminator_loss, var_list=D_vars) """" Summary """ self.all_G_loss = tf.summary.scalar("Generator_loss", self.Generator_loss) self.all_D_loss = tf.summary.scalar("Discriminator_loss", self.Discriminator_loss) self.G_A_loss = tf.summary.scalar("G_A_loss", Generator_A_loss) self.G_B_loss = tf.summary.scalar("G_B_loss", Generator_B_loss) self.D_A_loss = tf.summary.scalar("D_A_loss", Discriminator_A_loss) self.D_B_loss = tf.summary.scalar("D_B_loss", Discriminator_B_loss) self.G_loss = tf.summary.merge( [self.G_A_loss, self.G_B_loss, self.all_G_loss]) self.D_loss = tf.summary.merge( [self.D_A_loss, self.D_B_loss, self.all_D_loss]) """ Image """ self.fake_A = x_ba self.fake_B = x_ab self.real_A = self.domain_A self.real_B = self.domain_B """ Test """ self.test_image = tf.placeholder( tf.float32, [1, self.img_h, self.img_w, self.img_ch], name='test_image') self.test_style = tf.placeholder(tf.float32, [1, 1, 1, self.style_dim], name='test_style') test_content_a, _ = self.Encoder_A(self.test_image, reuse=True) test_content_b, _ = self.Encoder_B(self.test_image, reuse=True) self.test_fake_A = self.Decoder_A(content_B=test_content_b, style_A=self.test_style, reuse=True) self.test_fake_B = self.Decoder_B(content_A=test_content_a, style_B=self.test_style, reuse=True) """ Guided Image Translation """ self.content_image = tf.placeholder( tf.float32, [1, self.img_h, self.img_w, self.img_ch], name='content_image') self.style_image = tf.placeholder( tf.float32, [1, self.img_h, self.img_w, self.img_ch], name='guide_style_image') if self.direction == 'a2b': guide_content_A, guide_style_A = self.Encoder_A(self.content_image, reuse=True) guide_content_B, guide_style_B = self.Encoder_B(self.style_image, reuse=True) else: guide_content_B, guide_style_B = self.Encoder_B(self.content_image, reuse=True) guide_content_A, guide_style_A = self.Encoder_A(self.style_image, reuse=True) self.guide_fake_A = self.Decoder_A(content_B=guide_content_B, style_A=guide_style_A, reuse=True) self.guide_fake_B = self.Decoder_B(content_A=guide_content_A, style_B=guide_style_B, reuse=True)
def build_model(self): self.lr = tf.placeholder(tf.float32, name='learning_rate') """ Input Image""" Image_Data_Class = ImageData(self.img_size, self.img_ch, self.augment_flag) trainA = tf.data.Dataset.from_tensor_slices(self.trainA_dataset) trainB = tf.data.Dataset.from_tensor_slices(self.trainB_dataset) trainA = trainA.prefetch(self.batch_size).shuffle(self.dataset_num).map(Image_Data_Class.image_processing, num_parallel_calls=8).apply(batch_and_drop_remainder(self.batch_size)).repeat() trainB = trainB.prefetch(self.batch_size).shuffle(self.dataset_num).map(Image_Data_Class.image_processing, num_parallel_calls=8).apply(batch_and_drop_remainder(self.batch_size)).repeat() trainA_iterator = trainA.make_one_shot_iterator() trainB_iterator = trainB.make_one_shot_iterator() self.domain_A = trainA_iterator.get_next() self.domain_B = trainB_iterator.get_next() """ Define Encoder, Generator, Discriminator """ x_aa, x_ba, x_ab, x_bb, shared = self.translation(self.domain_A, self.domain_B) x_bab, shared_bab = self.generate_a2b(x_ba) x_aba, shared_aba = self.generate_b2a(x_ab) real_A_logit, real_B_logit = self.discriminate_real(self.domain_A, self.domain_B) fake_A_logit, fake_B_logit = self.discriminate_fake(x_ba, x_ab) """ Define Loss """ G_ad_loss_a = generator_loss(self.gan_type, fake_A_logit) G_ad_loss_b = generator_loss(self.gan_type, fake_B_logit) D_ad_loss_a = discriminator_loss(self.gan_type, real_A_logit, fake_A_logit) D_ad_loss_b = discriminator_loss(self.gan_type, real_B_logit, fake_B_logit) enc_loss = KL_divergence(shared) enc_bab_loss = KL_divergence(shared_bab) enc_aba_loss = KL_divergence(shared_aba) l1_loss_a = L1_loss(x_aa, self.domain_A) # identity l1_loss_b = L1_loss(x_bb, self.domain_B) # identity l1_loss_aba = L1_loss(x_aba, self.domain_A) # reconstruction l1_loss_bab = L1_loss(x_bab, self.domain_B) # reconstruction Generator_A_loss = self.GAN_weight * G_ad_loss_a + \ self.L1_weight * l1_loss_a + \ self.L1_cycle_weight * l1_loss_aba + \ self.KL_weight * enc_loss + \ self.KL_cycle_weight * enc_bab_loss Generator_B_loss = self.GAN_weight * G_ad_loss_b + \ self.L1_weight * l1_loss_b + \ self.L1_cycle_weight * l1_loss_bab + \ self.KL_weight * enc_loss + \ self.KL_cycle_weight * enc_aba_loss Discriminator_A_loss = self.GAN_weight * D_ad_loss_a Discriminator_B_loss = self.GAN_weight * D_ad_loss_b self.Generator_loss = Generator_A_loss + Generator_B_loss self.Discriminator_loss = Discriminator_A_loss + Discriminator_B_loss """ Training """ t_vars = tf.trainable_variables() G_vars = [var for var in t_vars if 'generator' in var.name or 'encoder' in var.name] D_vars = [var for var in t_vars if 'discriminator' in var.name] self.G_optim = tf.train.AdamOptimizer(self.lr, beta1=0.5, beta2=0.999).minimize(self.Generator_loss, var_list=G_vars) self.D_optim = tf.train.AdamOptimizer(self.lr, beta1=0.5, beta2=0.999).minimize(self.Discriminator_loss, var_list=D_vars) """" Summary """ self.all_G_loss = tf.summary.scalar("Generator_loss", self.Generator_loss) self.all_D_loss = tf.summary.scalar("Discriminator_loss", self.Discriminator_loss) self.G_A_loss = tf.summary.scalar("G_A_loss", Generator_A_loss) self.G_B_loss = tf.summary.scalar("G_B_loss", Generator_B_loss) self.D_A_loss = tf.summary.scalar("D_A_loss", Discriminator_A_loss) self.D_B_loss = tf.summary.scalar("D_B_loss", Discriminator_B_loss) self.G_loss = tf.summary.merge([self.G_A_loss, self.G_B_loss, self.all_G_loss]) self.D_loss = tf.summary.merge([self.D_A_loss, self.D_B_loss, self.all_D_loss]) """ Image """ self.fake_A = x_ba self.fake_B = x_ab self.real_A = self.domain_A self.real_B = self.domain_B """ Test """ self.test_image = tf.placeholder(tf.float32, [1, self.img_size, self.img_size, self.img_ch], name='test_image') self.test_fake_A, _ = self.generate_b2a(self.test_image) self.test_fake_B, _ = self.generate_a2b(self.test_image)
def build_model(self): self.lr = tf.placeholder(tf.float32, name='learning_rate') """ Input Image""" Image_Data_Class = ImageData(self.img_h, self.img_w, self.img_ch, self.augment_flag) trainA = tf.data.Dataset.from_tensor_slices(self.trainA_dataset) trainB = tf.data.Dataset.from_tensor_slices(self.trainB_dataset) trainA = trainA.prefetch(self.batch_size).shuffle(self.dataset_num).map(Image_Data_Class.image_processing, num_parallel_calls=8).apply(batch_and_drop_remainder(self.batch_size)).repeat() trainB = trainB.prefetch(self.batch_size).shuffle(self.dataset_num).map(Image_Data_Class.image_processing, num_parallel_calls=8).apply(batch_and_drop_remainder(self.batch_size)).repeat() trainA_iterator = trainA.make_one_shot_iterator() trainB_iterator = trainB.make_one_shot_iterator() self.domain_A = trainA_iterator.get_next() self.domain_B = trainB_iterator.get_next() """ Define Encoder, Generator, Discriminator """ # encode content_a = self.Encoder_A(self.domain_A) content_b = self.Encoder_B(self.domain_B, reuse=True) # decode (within domain and cross domain) x_aa, x_ba = self.Decoder_A(content_A=content_a, content_B=content_b) x_ab, x_bb = self.Decoder_B(content_A=content_a, content_B=content_b) # encode again content_aa_ = self.Encoder_A(x_aa, reuse=True) content_ab_ = self.Encoder_B(x_ab, reuse=True) content_ba_ = self.Encoder_A(x_ba, reuse=True) content_bb_ = self.Encoder_B(x_bb, reuse=True) real_A_logit, real_B_logit = self.discriminate_real(self.domain_A, self.domain_B) fake_A_logit, fake_B_logit = self.discriminate_fake(x_ba, x_ab) """ Define Loss """ if self.gan_type.__contains__('wgan') or self.gan_type == 'dragan' : GP_ba = self.gradient_panalty(real=self.domain_A, fake=x_ba, scope="discriminator_A") GP_ab = self.gradient_panalty(real=self.domain_B, fake=x_ab, scope="discriminator_B") else : GP_ba = GP_ab = 0 G_ad_loss_a = generator_loss(self.gan_type, fake_A_logit) G_ad_loss_b = generator_loss(self.gan_type, fake_B_logit) D_ad_loss_a = discriminator_loss(self.gan_type, real_A_logit, fake_A_logit) + GP_ba D_ad_loss_b = discriminator_loss(self.gan_type, real_B_logit, fake_B_logit) + GP_ab recon_A = L1_loss(x_aa, self.domain_A) recon_B = L1_loss(x_bb, self.domain_B) # The style reconstruction loss encourages # diverse outputs given different style codes #recon_style_A = L1_loss(style_a_, self.style_a) #recon_style_B = L1_loss(style_b_, self.style_b) # The content reconstruction loss encourages # the translated image to preserve semantic content of the input image recon_content_A = L1_loss(content_aa_, content_a) + L1_loss(content_ab_, content_a) recon_content_B = L1_loss(content_ba_, content_b) + L1_loss(content_bb_, content_b) Generator_A_loss = self.gan_w * G_ad_loss_a + \ self.recon_x_w * recon_A + \ self.recon_c_w * recon_content_A Generator_B_loss = self.gan_w * G_ad_loss_b + \ self.recon_x_w * recon_B + \ self.recon_c_w * recon_content_B Discriminator_A_loss = self.gan_w * D_ad_loss_a Discriminator_B_loss = self.gan_w * D_ad_loss_b self.Generator_loss = Generator_A_loss + Generator_B_loss self.Discriminator_loss = Discriminator_A_loss + Discriminator_B_loss """ Training """ t_vars = tf.trainable_variables() G_vars = [var for var in t_vars if 'decoder' in var.name or 'encoder' in var.name] D_vars = [var for var in t_vars if 'discriminator' in var.name] self.G_optim = tf.train.AdamOptimizer(self.lr, beta1=0.5, beta2=0.9).minimize(self.Generator_loss, var_list=G_vars) self.D_optim = tf.train.AdamOptimizer(self.lr, beta1=0.5, beta2=0.9).minimize(self.Discriminator_loss, var_list=D_vars) """" Summary """ self.all_G_loss = tf.summary.scalar("Generator_loss", self.Generator_loss) self.all_D_loss = tf.summary.scalar("Discriminator_loss", self.Discriminator_loss) self.G_A_loss = tf.summary.scalar("G_A_loss", Generator_A_loss) self.G_B_loss = tf.summary.scalar("G_B_loss", Generator_B_loss) self.D_A_loss = tf.summary.scalar("D_A_loss", Discriminator_A_loss) self.D_B_loss = tf.summary.scalar("D_B_loss", Discriminator_B_loss) self.G_loss = tf.summary.merge([self.G_A_loss, self.G_B_loss, self.all_G_loss]) self.D_loss = tf.summary.merge([self.D_A_loss, self.D_B_loss, self.all_D_loss]) """ Image """ self.fake_BA = x_ba self.fake_AB = x_ab self.real_A = self.domain_A self.real_B = self.domain_B """ Test """ self.test_image = tf.placeholder(tf.float32, [1, self.img_h, self.img_w, self.img_ch], name='test_image') test_content_a = self.Encoder_A(self.test_image, reuse=True) test_content_b = self.Encoder_B(self.test_image, reuse=True) self.test_fake_AA, self.test_fake_BA = self.Decoder_A(content_A=test_content_a, content_B=test_content_b, reuse=True) self.test_fake_AB, self.test_fake_BB = self.Decoder_B(content_A=test_content_a, content_B=test_content_b, reuse=True)
checkpoint_dir = "checkpoint" sample_dir = "sample" # dataset processing train_A_dataset = glob(train_A_dir + "/*.*") train_B_dataset = glob(train_B_dir + "/*.*") dataset_num = max(len(train_A_dataset), len(train_B_dataset)) Image_Data_Class = ImageData(img_size, img_ch) trainA = tf.data.Dataset.from_tensor_slices(train_A_dataset) trainB = tf.data.Dataset.from_tensor_slices(train_B_dataset) trainA = trainA.prefetch(batch_size).shuffle(dataset_num).map( Image_Data_Class.image_processing, num_parallel_calls=8).apply(batch_and_drop_remainder(batch_size)).repeat() trainB = trainB.prefetch(batch_size).shuffle(dataset_num).map( Image_Data_Class.image_processing, num_parallel_calls=8).apply(batch_and_drop_remainder(batch_size)).repeat() trainA_iterator = trainA.make_one_shot_iterator() trainB_iterator = trainB.make_one_shot_iterator() train_A = trainA_iterator.get_next() train_B = trainB_iterator.get_next() # build model G_ab = cyclegan.generate(train_A, scope="generate_B") G_ba = cyclegan.generate(train_B, scope="generate_A") G_aba = cyclegan.generate(G_ab, reuse=True, scope="generate_A")
def build(input_reader_config, batch_size=None, transform_input_data_fn=None, input_context=None): """Builds a tf.data.Dataset. Builds a tf.data.Dataset by applying the `transform_input_data_fn` on all records. Applies a padded batch to the resulting dataset. Args: input_reader_config: A input_reader_pb2.InputReader object. batch_size: Batch size. If batch size is None, no batching is performed. transform_input_data_fn: Function to apply transformation to all records, or None if no extra decoding is required. input_context: optional, A tf.distribute.InputContext object used to shard filenames and compute per-replica batch_size when this function is being called per-replica. Returns: A tf.data.Dataset based on the input_reader_config. Raises: ValueError: On invalid input reader proto. ValueError: If no input paths are specified. """ if not isinstance(input_reader_config, input_reader_pb2.InputReader): raise ValueError('input_reader_config not of type ' 'input_reader_pb2.InputReader.') decoder = decoder_builder.build(input_reader_config) if input_reader_config.WhichOneof('input_reader') == 'tf_record_input_reader': config = input_reader_config.tf_record_input_reader if not config.input_path: raise ValueError('At least one input path must be specified in ' '`input_reader_config`.') def process_fn(value): """Sets up tf graph that decodes, transforms and pads input data.""" processed_tensors = decoder.decode(value) if transform_input_data_fn is not None: processed_tensors = transform_input_data_fn(processed_tensors) return processed_tensors shard_fn = shard_function_for_context(input_context) if input_context is not None: batch_size = input_context.get_per_replica_batch_size(batch_size) dataset = read_dataset( functools.partial(tf.data.TFRecordDataset, buffer_size=8 * 1000 * 1000), config.input_path[:], input_reader_config, filename_shard_fn=shard_fn) if input_reader_config.sample_1_of_n_examples > 1: dataset = dataset.shard(input_reader_config.sample_1_of_n_examples, 0) # TODO(rathodv): make batch size a required argument once the old binaries # are deleted. if batch_size: num_parallel_calls = batch_size * input_reader_config.num_parallel_batches else: num_parallel_calls = input_reader_config.num_parallel_map_calls # TODO(b/123952794): Migrate to V2 function. if hasattr(dataset, 'map_with_legacy_function'): data_map_fn = dataset.map_with_legacy_function else: data_map_fn = dataset.map dataset = data_map_fn(process_fn, num_parallel_calls=num_parallel_calls) if batch_size: dataset = dataset.apply( tf_data.batch_and_drop_remainder(batch_size)) dataset = dataset.prefetch(input_reader_config.num_prefetch_batches) return dataset raise ValueError('Unsupported input_reader_config.')
def dump_dataset(dataset): iterator = dataset.make_one_shot_iterator() features = iterator.get_next() with tf.Session() as sess: try: while True: print(sess.run(features)) except tf.errors.OutOfRangeError: print("end!") length = 32 componets = np.array([[i] for i in range(length)], dtype=np.int64) #print( componets ) dataset = Dataset.from_tensor_slices(componets) dump_dataset(dataset) window_size = 4 dataset = dataset.apply(batch_and_drop_remainder(window_size)) dump_dataset(dataset) # [[0][1][2][3]] # [[4][5][6][7]] # [[8][9][10][11]] # Skip first row and duplicate all rows, this allows the creation of overlapping window dataset1 = dataset.apply( group_by_window(key_func=lambda x: 3, reduce_func=lambda k, d: d.shuffle(3), window_size=2)) dump_dataset(dataset1)
def build_model(self): self.lr = tf.placeholder(tf.float32, name='learning_rate') """ Input Image""" Image_Data_Class = ImageData(self.img_h, self.img_w, self.img_ch, self.augment_flag) trainA = tf.data.Dataset.from_tensor_slices(self.trainA_dataset) trainB = tf.data.Dataset.from_tensor_slices(self.trainB_dataset) trainA = trainA.prefetch(self.batch_size).shuffle(self.dataset_num).map(Image_Data_Class.image_processing, num_parallel_calls=8).apply(batch_and_drop_remainder(self.batch_size)).repeat() trainB = trainB.prefetch(self.batch_size).shuffle(self.dataset_num).map(Image_Data_Class.image_processing, num_parallel_calls=8).apply(batch_and_drop_remainder(self.batch_size)).repeat() trainA_iterator = trainA.make_one_shot_iterator() trainB_iterator = trainB.make_one_shot_iterator() self.domain_A = trainA_iterator.get_next() self.domain_B = trainB_iterator.get_next() """ Define Encoder, Generator, Discriminator """ # encode content_a, style_a = self.encoder_A(self.domain_A) content_b, style_b = self.encoder_B(self.domain_B) # decode (cross domain) x_ba, U_A = self.decoder_A(content_B=content_b, style_A=style_a) x_ab, U_B = self.decoder_B(content_A=content_a, style_B=style_b) # decode (within domain) x_aa, _ = self.decoder_A(content_B=content_a, style_A=style_a, reuse=True) x_bb, _ = self.decoder_B(content_A=content_b, style_B=style_b, reuse=True) # encode again content_ba, style_ba = self.encoder_A(x_ba, reuse=True) content_ab, style_ab = self.encoder_B(x_ab, reuse=True) # decode again (if needed) x_aba, _ = self.decoder_A(content_B=content_ab, style_A=style_ba, reuse=True) x_bab, _ = self.decoder_B(content_A=content_ba, style_B=style_ab, reuse=True) real_A_logit, real_B_logit = self.discriminate_real(self.domain_A, self.domain_B) fake_A_logit, fake_B_logit = self.discriminate_fake(x_ba, x_ab) """ Define Loss """ G_adv_A = self.gan_w * generator_loss(self.gan_type, fake_A_logit) G_adv_B = self.gan_w * generator_loss(self.gan_type, fake_B_logit) D_adv_A = self.gan_w * discriminator_loss(self.gan_type, real_A_logit, fake_A_logit) D_adv_B = self.gan_w * discriminator_loss(self.gan_type, real_B_logit, fake_B_logit) recon_style_A = self.recon_s_w * L1_loss(style_ba, style_a) recon_style_B = self.recon_s_w * L1_loss(style_ab, style_b) recon_content_A = self.recon_c_w * L1_loss(content_ab, content_a) recon_content_B = self.recon_c_w * L1_loss(content_ba, content_b) cyc_recon_A = self.recon_x_cyc_w * L1_loss(x_aba, self.domain_A) cyc_recon_B = self.recon_x_cyc_w * L1_loss(x_bab, self.domain_B) recon_A = self.recon_x_w * L1_loss(x_aa, self.domain_A) # reconstruction recon_B = self.recon_x_w * L1_loss(x_bb, self.domain_B) # reconstruction whitening_A, coloring_A = group_wise_regularization(deep_whitening_transform(content_a), U_A, self.group_num) whitening_B, coloring_B = group_wise_regularization(deep_whitening_transform(content_b), U_B, self.group_num) whitening_A = self.lambda_w * whitening_A whitening_B = self.lambda_w * whitening_B coloring_A = self.lambda_c * coloring_A coloring_B = self.lambda_c * coloring_B G_reg_A = regularization_loss('decoder_A') + regularization_loss('encoder_A') G_reg_B = regularization_loss('decoder_B') + regularization_loss('encoder_B') D_reg_A = regularization_loss('discriminator_A') D_reg_B = regularization_loss('discriminator_B') Generator_A_loss = G_adv_A + \ recon_A + \ recon_style_A + \ recon_content_A + \ cyc_recon_B + \ whitening_A + \ coloring_A + \ G_reg_A Generator_B_loss = G_adv_B + \ recon_B + \ recon_style_B + \ recon_content_B + \ cyc_recon_A + \ whitening_B + \ coloring_B + \ G_reg_B Discriminator_A_loss = D_adv_A + D_reg_A Discriminator_B_loss = D_adv_B + D_reg_B self.Generator_loss = Generator_A_loss + Generator_B_loss self.Discriminator_loss = Discriminator_A_loss + Discriminator_B_loss """ Training """ t_vars = tf.trainable_variables() G_vars = [var for var in t_vars if 'decoder' in var.name or 'encoder' in var.name] D_vars = [var for var in t_vars if 'discriminator' in var.name] self.G_optim = tf.train.AdamOptimizer(self.lr, beta1=0.5, beta2=0.999).minimize(self.Generator_loss, var_list=G_vars) self.D_optim = tf.train.AdamOptimizer(self.lr, beta1=0.5, beta2=0.999).minimize(self.Discriminator_loss, var_list=D_vars) """" Summary """ self.all_G_loss = tf.summary.scalar("Generator_loss", self.Generator_loss) self.all_D_loss = tf.summary.scalar("Discriminator_loss", self.Discriminator_loss) self.G_A_loss = tf.summary.scalar("G_A_loss", Generator_A_loss) self.G_B_loss = tf.summary.scalar("G_B_loss", Generator_B_loss) self.D_A_loss = tf.summary.scalar("D_A_loss", Discriminator_A_loss) self.D_B_loss = tf.summary.scalar("D_B_loss", Discriminator_B_loss) self.G_A_adv_loss = tf.summary.scalar("G_A_adv_loss", G_adv_A) self.G_A_style_loss = tf.summary.scalar("G_A_style_loss", recon_style_A) self.G_A_content_loss = tf.summary.scalar("G_A_content_loss", recon_content_A) self.G_A_cyc_loss = tf.summary.scalar("G_A_cyc_loss", cyc_recon_A) self.G_A_identity_loss = tf.summary.scalar("G_A_identity_loss", recon_A) self.G_A_whitening_loss = tf.summary.scalar("G_A_whitening_loss", whitening_A) self.G_A_coloring_loss = tf.summary.scalar("G_A_coloring_loss", coloring_A) self.G_B_adv_loss = tf.summary.scalar("G_B_adv_loss", G_adv_B) self.G_B_style_loss = tf.summary.scalar("G_B_style_loss", recon_style_B) self.G_B_content_loss = tf.summary.scalar("G_B_content_loss", recon_content_B) self.G_B_cyc_loss = tf.summary.scalar("G_B_cyc_loss", cyc_recon_B) self.G_B_identity_loss = tf.summary.scalar("G_B_identity_loss", recon_B) self.G_B_whitening_loss = tf.summary.scalar("G_B_whitening_loss", whitening_B) self.G_B_coloring_loss = tf.summary.scalar("G_B_coloring_loss", coloring_B) self.alpha_var = [] for var in tf.trainable_variables(): if 'alpha' in var.name: self.alpha_var.append(tf.summary.histogram(var.name, var)) self.alpha_var.append(tf.summary.scalar(var.name, tf.reduce_max(var))) G_summary_list = [self.G_A_adv_loss, self.G_A_style_loss, self.G_A_content_loss, self.G_A_cyc_loss, self.G_A_identity_loss, self.G_A_whitening_loss, self.G_A_coloring_loss, self.G_A_loss, self.G_B_adv_loss, self.G_B_style_loss, self.G_B_content_loss, self.G_B_cyc_loss, self.G_B_identity_loss, self.G_B_whitening_loss, self.G_B_coloring_loss, self.G_B_loss, self.all_G_loss] G_summary_list.extend(self.alpha_var) self.G_loss = tf.summary.merge(G_summary_list) self.D_loss = tf.summary.merge([self.D_A_loss, self.D_B_loss, self.all_D_loss]) """ Image """ self.fake_A = x_ba self.fake_B = x_ab self.real_A = self.domain_A self.real_B = self.domain_B """ Test """ """ Guided Image Translation """ self.content_image = tf.placeholder(tf.float32, [1, self.img_h, self.img_w, self.img_ch], name='content_image') self.style_image = tf.placeholder(tf.float32, [1, self.img_h, self.img_w, self.img_ch], name='guide_style_image') if self.direction == 'a2b' : guide_content_A, _ = self.encoder_A(self.content_image, reuse=True) _, guide_style_B = self.encoder_B(self.style_image, reuse=True) self.guide_fake_B, _ = self.decoder_B(content_A=guide_content_A, style_B=guide_style_B, reuse=True) else : guide_content_B, _ = self.encoder_B(self.content_image, reuse=True) _, guide_style_A = self.encoder_A(self.style_image, reuse=True) self.guide_fake_A, _ = self.decoder_A(content_B=guide_content_B, style_A=guide_style_A, reuse=True)
def __init__(self, batch_size=128, num_thread=16, num_given=sys.maxsize, use_length=12, raw_input=False): ''' initial target: self.iterator.initializer feed_dict: file_names_placeholder ''' start_time = time.time() self.raw_len = np.load(config.encode_embedding_len_file) self.batch_size = batch_size # TODO(tommy8054): Raw input from numpy array. Is it necessary to implement this? QQ if raw_input: # Filter instances length = np.load(config.encode_embedding_len_file) vec = np.load( config.encode_embedding_vec_file)[length <= use_length] word = np.load( config.encode_embedding_key_file)[length <= use_length] length = length[length <= use_length] vec = vec[:min(len(vec), num_given)] word = word[:min(len(word), num_given)] length = length[:min(len(length), num_given)] # Build up input pipeline self.load = {'vec': vec, 'word': word, 'len': length} self.vec, self.word, self.len = tf.placeholder(tf.float32, [None, embedding_size], 'vec'), \ tf.placeholder(tf.int64, [None, args.max_length], 'word'), \ tf.placeholder(tf.int64, [None], 'length') self.vec_temp, self.word_temp, self.len_temp = tf.placeholder(tf.float32, [None, embedding_size], 'vec_temp'), \ tf.placeholder(tf.int64, [None, args.max_length], 'word_temp'), \ tf.placeholder(tf.int64, [None], 'length_temp') self.dataset = tf_data.Dataset.from_tensor_slices( (self.vec_temp, self.word_temp, self.len_temp)) \ .prefetch(num_thread * batch_size * 4) \ .shuffle(buffer_size=num_thread * batch_size * 8) \ .apply(tf_data.batch_and_drop_remainder(batch_size)) self.iterator = self.dataset.make_initializable_iterator() self.input = self.iterator.get_next() else: self.num_each_len = [ np.sum(self.raw_len == (i + 1), dtype=np.int64) for i in range(args.max_length) ] self.num_example = min(len(self.raw_len), num_given) # Use floor instead of ceil because we drop last batch. self.total_step = int( math.floor(self.num_example / self.batch_size)) self.file_names = glob(self.RECORD_FILE_PATTERN_ % 'length_') self.file_names_placeholder = tf.placeholder(tf.string, shape=[None]) self.dataset = tf.data.TFRecordDataset(self.file_names_placeholder) \ .shuffle(buffer_size=160) \ .map(feature_parser, num_parallel_calls=num_thread) \ .prefetch(2000) \ .shuffle(buffer_size=1000) \ .apply(tf_data.batch_and_drop_remainder(batch_size)) \ .repeat() self.iterator = self.dataset.make_initializable_iterator() self.vec, self.word, self.len = self.iterator.get_next() self.vocab = du.json_load(config.char_vocab_file) self.vocab_size = len(self.vocab) print('Data Loading Finished with %.3f s.' % (time.time() - start_time))
def train(): # load maritime data train_features = np.load( '/workspace/01_feature_extraction/res5c_features/Y_train.npy').astype( np.float32) train_labels = np.load( '/workspace/01_feature_extraction/res5c_features/label_train.npy' ).astype(np.int) test_features = np.load( '/workspace/01_feature_extraction/res5c_features/Y_test.npy').astype( np.float32) test_labels = np.load( '/workspace/01_feature_extraction/res5c_features/label_test.npy' ).astype(np.int) """ train_features = np.load('/workspace/01_feature_extraction/pool5_features/Y_train.npy').astype(np.float32) train_labels = np.load('/workspace/01_feature_extraction/pool5_features/label_train.npy').astype(np.int) test_features = np.load('/workspace/01_feature_extraction/pool5_features/Y_test.npy').astype(np.float32) test_labels = np.load('/workspace/01_feature_extraction/pool5_features/label_test.npy').astype(np.int) """ # Assume that each row of `features` corresponds to the same row as `labels`. assert train_features.shape[0] == train_labels.shape[0] train_origin_shape = train_features.shape test_origin_shape = test_features.shape """ # normalize train_features = np.reshape(train_features, (train_origin_shape[0], train_origin_shape[2] * train_origin_shape[3] * train_origin_shape[4])) test_features = np.reshape(test_features, (test_origin_shape[0], test_origin_shape[2] * test_origin_shape[3] * test_origin_shape[4])) scaler = StandardScaler() scaler.fit(train_features) test_features = scaler.transform(test_features) print (train_features.shape) print (train_labels.shape) """ # reshape train_features = np.reshape(train_features, (train_origin_shape[0], train_origin_shape[2], train_origin_shape[3], train_origin_shape[4])) test_features = np.reshape(test_features, (test_origin_shape[0], test_origin_shape[2], test_origin_shape[3], test_origin_shape[4])) # transpose train_features = np.transpose(train_features, (0, 2, 3, 1)) test_features = np.transpose(test_features, (0, 2, 3, 1)) print(train_features.shape) print(train_labels.shape) #print (train_labels[0], train_labels[1]) #print (test_features.shape) #print (test_labels.shape) # training parameters train_dataset_num = train_features.shape[0] test_dataset_num = test_features.shape[0] #batch_size = 32 #batch_size = 5 batch_size = 1 training_epochs = 500 display_step = 1 # need to make as tf record files if the size of the dataset is too big to load into memory ##################################### # with one-shot iterator #train_dataset = tf.data.Dataset.from_tensor_slices((train_features, train_labels)) #test_dataset = tf.data.Dataset.from_tensor_slices((test_features, test_labels)) #train_dataset = train_dataset.apply(shuffle_and_repeat(dataset_num)).apply(batch_and_drop_remainder(batch_size)) # create iterator #iter = train_dataset.make_one_shot_iterator() #batch_x, batch_y = iter.get_next() #batch_y = tf.cast(batch_y, tf.int32) ##################################### # with initializable iterator placeholder_X = tf.placeholder(tf.float32, [None, 7, 7, 2048]) # pool5 features #placeholder_X = tf.placeholder(tf.float32, [None, 1, 1, 2048]) placeholder_y = tf.placeholder(tf.int32, [None]) dataset = tf.data.Dataset.from_tensor_slices( (placeholder_X, placeholder_y)) dataset = dataset.apply(shuffle_and_repeat(train_dataset_num)).apply( batch_and_drop_remainder(batch_size)) # create iterator iter = dataset.make_initializable_iterator() batch_x, batch_y = iter.get_next() batch_y = tf.cast(batch_y, tf.int32) # our self-attention model model = SelfAttentionModel(batch_x, batch_y) # open session config = tf.ConfigProto() config.allow_soft_placement = True config.gpu_options.per_process_gpu_memory_fraction = 0.8 config.gpu_options.allow_growth = True # initialize all variables init = tf.global_variables_initializer() # saver saver = tf.train.Saver() save_step = 5 checkpoint_dir = './checkpoints/' model_name = 'sa_vessel' with tf.Session(config=config) as sess: # initialize sess.run(init) for epoch in range(training_epochs): train_avg_cost = 0. train_avg_acc = 0. test_avg_cost = 0. test_avg_acc = 0. tt = 0 # Initialize iterator with training data sess.run(iter.initializer, feed_dict={ placeholder_X: train_features, placeholder_y: train_labels }) total_batch = int(train_dataset_num / batch_size) #print (total_batch) for i in range(total_batch): # Run optimization op (backprop) and cost op (to get loss value) accuracy, _, c = sess.run( [model.accuracy, model.optimizer, model.loss]) # Compute average loss and accuracy train_avg_cost += c train_avg_acc += accuracy train_avg_cost /= train_dataset_num train_avg_acc /= train_dataset_num # Initialize iterator with test data sess.run(iter.initializer, feed_dict={ placeholder_X: test_features, placeholder_y: test_labels }) total_batch = int(test_dataset_num / batch_size) #print (total_batch) for i in range(total_batch): # Run optimization op (backprop) and cost op (to get loss value) accuracy, c = sess.run([model.accuracy, model.loss]) # Compute average loss and accuracy test_avg_cost += c test_avg_acc += accuracy tt += 1 print(test_avg_acc, tt) test_avg_cost /= test_dataset_num test_avg_acc /= test_dataset_num # display logs per epoch step if epoch % display_step == 0: print("Epoch:", '%04d' % (epoch + 1), "training_accuracy={:.9f}".format(train_avg_acc), "train_cost={:.9f}".format(train_avg_cost)) print("Epoch:", '%04d' % (epoch + 1), "test_accuracy={:.9f}".format(test_avg_acc), "test_cost={:.9f}".format(test_avg_cost)) # save checkpoints if epoch % save_step == 0: saver.save(sess, os.path.join(checkpoint_dir, model_name + '.model'), global_step=epoch) print("Epoch:", '%04d' % (epoch + 1), "saving checkpoint")
def load_n_test(): print("load trained model and run test") # load maritime data # res5c_features # to extract feature maps from training set test_features = np.load( '/workspace/01_feature_extraction/res5c_features/Y_train.npy').astype( np.float32) test_labels = np.load( '/workspace/01_feature_extraction/res5c_features/label_train.npy' ).astype(np.int) #test_features = np.load('/workspace/01_feature_extraction/res5c_features/Y_test.npy').astype(np.float32) #test_labels = np.load('/workspace/01_feature_extraction/res5c_features/label_test.npy').astype(np.int) # Assume that each row of `features` corresponds to the same row as `labels`. assert test_features.shape[0] == test_labels.shape[0] test_origin_shape = test_features.shape test_dataset_num = test_features.shape[0] # reshape test_features = np.reshape(test_features, (test_origin_shape[0], test_origin_shape[2], test_origin_shape[3], test_origin_shape[4])) # transpose test_features = np.transpose(test_features, (0, 2, 3, 1)) # test parameters batch_size = 1 ##################################### # with initializable iterator placeholder_X = tf.placeholder(tf.float32, [None, 7, 7, 2048]) # pool5 features #placeholder_X = tf.placeholder(tf.float32, [None, 1, 1, 2048]) placeholder_y = tf.placeholder(tf.int32, [None]) dataset = tf.data.Dataset.from_tensor_slices( (placeholder_X, placeholder_y)) #dataset = dataset.apply(batch_and_drop_remainder(batch_size)) dataset = dataset.apply(shuffle_and_repeat(test_dataset_num)).apply( batch_and_drop_remainder(batch_size)) # create iterator iter = dataset.make_initializable_iterator() batch_x, batch_y = iter.get_next() batch_y = tf.cast(batch_y, tf.int32) # our self-attention model model = SelfAttentionModel(batch_x, batch_y) # open session config = tf.ConfigProto() config.allow_soft_placement = True config.gpu_options.per_process_gpu_memory_fraction = 0.8 config.gpu_options.allow_growth = True # initialize all variables init = tf.global_variables_initializer() # saver saver = tf.train.Saver() checkpoint_dir = './checkpoints/' with tf.Session(config=config) as sess: # initialize sess.run(init) # load a checkpoint ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: ckpt_name = os.path.basename(ckpt.model_checkpoint_path) saver.restore(sess, os.path.join(checkpoint_dir, ckpt_name)) print(" [*] Success to read {}".format(ckpt_name)) # Initialize iterator with test data sess.run(iter.initializer, feed_dict={ placeholder_X: test_features, placeholder_y: test_labels }) total_batch = int(test_dataset_num / batch_size) test_avg_cost = 0. test_avg_acc = 0. Y_test = [] label_test = [] #print (total_batch) for i in range(total_batch): # Run optimization op (backprop) and cost op (to get loss value) #accuracy, c = sess.run([model.accuracy, model.loss]) accuracy, c, feature_map, gt_label = sess.run([ model.accuracy, model.loss, model.feature_map, model.gt_label ]) print(feature_map.shape) print(gt_label) Y_test.append(feature_map) label_test.append(gt_label) # Compute average loss and accuracy test_avg_cost += c test_avg_acc += accuracy test_avg_cost /= test_dataset_num test_avg_acc /= test_dataset_num # show the experimental result print("On Test set:", "test_accuracy={:.9f}".format(test_avg_acc), "test_cost={:.9f}".format(test_avg_cost))
def main(): if FLAGS.dataset == "fashion_iq": trainset = fashion_iq.fashion_iq(path=FLAGS.data_path, split=FLAGS.data_split, subset=FLAGS.subset) elif FLAGS.dataset == "shoes": trainset = shoes.shoes(path=FLAGS.data_path, split=FLAGS.data_split) else: raise ValueError("dataset must be fashion_iq or shoes") ### initialize the relations between source and target if FLAGS.dataset == "fashion_iq": trainset.generate_queries_(subset=FLAGS.subset) all_texts = trainset.get_all_texts(subset=FLAGS.subset) else: trainset.generate_queries_() all_texts = trainset.get_all_texts() num_modif = trainset.num_modifiable_imgs max_steps = FLAGS.train_length vocab = vocabulary.SimpleVocab() for text in all_texts: vocab.add_text_to_vocab(text) # thêm từ chưa có trong vocab vào vocab if FLAGS.remove_rare_words: print('Remove rare words') vocab.threshold_rare_words() # loại bỏ các từ hiếm vocab_size = vocab.get_size() print("Number of samples = {}. Number of words = {}.".format( num_modif, vocab_size)) # Đọc tới đây là éo hiểu with tf.Graph().as_default(): dataset = tf.data.Dataset.from_tensor_slices( (trainset.source_files, trainset.target_files, trainset.modify_texts)) dataset = dataset.prefetch(FLAGS.batch_size).shuffle(num_modif).map( train_pair_image_parse_function, num_parallel_calls=FLAGS.threads).apply( batch_and_drop_remainder(FLAGS.batch_size)).repeat() data_iterator = dataset.make_one_shot_iterator() batch_source_image, batch_target_image, batch_text = data_iterator.get_next( ) source_images_placeholder = tf.placeholder(tf.float32, shape=(FLAGS.batch_size, FLAGS.image_size, FLAGS.image_size, 3)) target_images_placeholder = tf.placeholder(tf.float32, shape=(FLAGS.batch_size, FLAGS.image_size, FLAGS.image_size, 3)) modify_texts_placeholder = tf.placeholder(tf.int32, shape=(FLAGS.batch_size, None)) seqlengths_placeholder = tf.placeholder(tf.int32, shape=(FLAGS.batch_size)) global_step = tf.train.get_or_create_global_step() if FLAGS.constant_lr: lr = FLAGS.init_learning_rate else: boundaries = [int(max_steps * 0.5)] values = [FLAGS.init_learning_rate, FLAGS.init_learning_rate * 0.1] print('boundaries = %s, values = %s ' % (boundaries, values)) lr = tf.train.piecewise_constant(global_step, boundaries, values) opt = tf.train.AdamOptimizer(learning_rate=lr) with tf.variable_scope(tf.get_variable_scope()): total_loss, matching_loss = _build_model( source_images_placeholder, target_images_placeholder, modify_texts_placeholder, seqlengths_placeholder, vocab_size) # build mô hình train_vars = tf.trainable_variables() barchnorm = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # not to train the logits layer train_vars = [var for var in train_vars if not "ogits" in var.name] barchnorm = [var for var in barchnorm if not "ogits" in var.name] barchnorm_op = tf.group(*barchnorm) updates_op = tf.assign(global_step, global_step + 1) if FLAGS.moving_average_decay: ema_op = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay).apply(train_vars) with tf.control_dependencies([barchnorm_op, updates_op, ema_op]): train_op = opt.minimize(loss=total_loss, global_step=tf.train.get_global_step(), var_list=train_vars) summaries = tf.get_collection(tf.GraphKeys.SUMMARIES) summary_op = tf.summary.merge(summaries) saver = tf.train.Saver(max_to_keep=6) init_op = tf.global_variables_initializer() config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) config.gpu_options.allow_growth = True sess = tf.Session(config=config) # Fine tuning from checkpoint??? sess.run(init_op) if FLAGS.checkpoint_dir_stage1: load_checkpoint = tf.train.latest_checkpoint( FLAGS.checkpoint_dir_stage1) print("Fine tuning from checkpoint: {}".format(load_checkpoint)) vars_to_load = optimistic_restore_vars(load_checkpoint) finetuning_restorer = tf.train.Saver(var_list=vars_to_load) finetuning_restorer.restore(sess, load_checkpoint) # Fine tuning from pretrained checkpoint??? elif FLAGS.pretrain_checkpoint_dir: print("Fine tuning from pretrained checkpoint: {}".format( FLAGS.pretrain_checkpoint_dir)) checkpoint_vars = tf.train.list_variables( FLAGS.pretrain_checkpoint_dir) checkpoint_vars = [v[0] for v in checkpoint_vars] vars_can_be_load = [] all_vars = tf.get_collection_ref(tf.GraphKeys.GLOBAL_VARIABLES) for i in range(len(all_vars)): var_name = all_vars[i].name.replace(":0", "") if (var_name in checkpoint_vars) and ( not var_name == "global_step") and (not "ogits" in var_name): vars_can_be_load.append(all_vars[i]) pretrain_restorer = tf.train.Saver(var_list=vars_can_be_load) pretrain_restorer.restore(sess, FLAGS.pretrain_checkpoint_dir) summary_writer = tf.summary.FileWriter(FLAGS.checkpoint_dir, graph=sess.graph) feed_dict = { source_images_placeholder: np.zeros( (FLAGS.batch_size, FLAGS.image_size, FLAGS.image_size, 3)), target_images_placeholder: np.zeros( (FLAGS.batch_size, FLAGS.image_size, FLAGS.image_size, 3)), modify_texts_placeholder: np.zeros((FLAGS.batch_size, 10), dtype=int), seqlengths_placeholder: np.zeros((FLAGS.batch_size), dtype=int) } tf.train.start_queue_runners(sess=sess) start_time = time.time() # Bắt đầu training while True: source_image_array, target_image_array, raw_text, step = sess.run( [ batch_source_image, batch_target_image, batch_text, global_step ], feed_dict=feed_dict) text_array, lengths = vocab.encode_text2id_batch(raw_text) if FLAGS.max_length is not None: lengths = np.minimum(lengths, FLAGS.max_length) max_length = FLAGS.max_length else: max_length = max(lengths) feed_dict = { source_images_placeholder: source_image_array, target_images_placeholder: target_image_array, modify_texts_placeholder: text_array[:, 0:max_length], seqlengths_placeholder: lengths } _, loss_value, matching_loss_value, step = sess.run( [train_op, total_loss, matching_loss, global_step], feed_dict=feed_dict) if step % FLAGS.print_span == 0: duration = time.time() - start_time start_time = time.time() print( "step = %d, total_loss = %.4f, matching_loss = %s, time = %.4f" % (step, loss_value, str(matching_loss_value), duration)) summary_str = sess.run(summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) if step > 0 and (step % FLAGS.save_length == 0 or step == max_steps): checkpoint_path = os.path.join(FLAGS.checkpoint_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) # Lưu model lại if step >= max_steps: break