def train(max_iter=60000): # Initialize data provider di_l = I.data_iterator_mnist(batch_size, True) di_t = I.data_iterator_mnist(batch_size, False) # Network shape_x = (1, 28, 28) shape_z = (50, ) x = nn.Variable((batch_size, ) + shape_x) loss_l = I.vae(x, shape_z, test=False) loss_t = I.vae(x, shape_z, test=True) # Create solver solver = S.Adam(learning_rate) solver.set_parameters(nn.get_parameters()) # Monitors for training and validation path = cache_dir(os.path.join(I.name, "monitor")) monitor = M.Monitor(path) monitor_train_loss = M.MonitorSeries("train_loss", monitor, interval=600) monitor_val_loss = M.MonitorSeries("val_loss", monitor, interval=600) monitor_time = M.MonitorTimeElapsed("time", monitor, interval=600) # Training Loop. for i in range(max_iter): # Initialize gradients solver.zero_grad() # Forward, backward and update x.d, _ = di_l.next() loss_l.forward(clear_no_need_grad=True) loss_l.backward(clear_buffer=True) solver.weight_decay(weight_decay) solver.update() # Forward for test x.d, _ = di_t.next() loss_t.forward(clear_no_need_grad=True) # Monitor for logging monitor_train_loss.add(i, loss_l.d.copy()) monitor_val_loss.add(i, loss_t.d.copy()) monitor_time.add(i) return path
def train(max_iter=5000, learning_rate=0.001, weight_decay=0): train = create_net(False) test = create_net(True) # ソルバーの作成 solver = S.Adam(learning_rate) solver.set_parameters(nn.get_parameters()) # モニタの作成 path = cache_dir(os.path.join(I.name, "monitor")) monitor = M.Monitor(path) monitor_loss_train = M.MonitorSeries("training_loss", monitor, interval=100) monitor_time = M.MonitorTimeElapsed("time", monitor, interval=100) monitor_loss_val = M.MonitorSeries("val_loss", monitor, interval=100) # 訓練の実行 for i in range(max_iter): if (i + 1) % 100 == 0: val_error = 0.0 val_iter = 10 for j in range(val_iter): test.image0.d, test.image1.d, test.label.d = test.data.next() test.loss.forward(clear_buffer=True) val_error += test.loss.d monitor_loss_val.add(i, val_error / val_iter) train.image0.d, train.image1.d, train.label.d = train.data.next() solver.zero_grad() train.loss.forward(clear_no_need_grad=True) train.loss.backward(clear_buffer=True) solver.weight_decay(weight_decay) solver.update() monitor_loss_train.add(i, train.loss.d.copy()) monitor_time.add(i) nn.save_parameters(os.path.join(path, "params.h5")) return path
loss(model, training_inputs, training_outputs))) print("W = {}, B = {}".format(model.W.numpy(), model.B.numpy())) # ## Use objects for state during eager execution # ### Variables are objects if tf.test.is_gpu_available(): with tf.device("gpu:0"): v = tf.Variable(tf.random.normal([1000, 1000])) v = None # v no longer takes up GPU memory # ### Object-based saving x = tf.Variable(10.0) checkpoint = tf.train.Checkpoint(x=x) x.assign(2.0) # type: ignore checkpoint_path = cache_dir("tensorflow/eager") checkpoint.save(os.path.join(checkpoint_path, "ckpt")) # - x.assign(11.0) # type: ignore # Change the variable after saving. # Restore values from the checkpoint checkpoint.restore(tf.train.latest_checkpoint(checkpoint_path)) print(x) # => 2.0 # - model = tf.keras.Sequential([ tf.keras.layers.Conv2D(16, [3, 3], activation="relu"), tf.keras.layers.GlobalAveragePooling2D(), tf.keras.layers.Dense(10),
model.add(Dense(1, activation="sigmoid")) model.compile(optimizer="rmsprop", loss="binary_crossentropy", metrics=["acc"]) model.summary() history = model.fit(x_train, y_train, epochs=10, batch_size=32, validation_split=0.2) # ### Putting it all together: from raw text to word embeddings # #### ##Download the IMDB data as raw text url = "http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz" base_dir = cache_dir("keras/ch6/data") path = os.path.join(base_dir, os.path.basename(url)) res = requests.get(url, stream=True) if res.status_code == 200: with open(path, "wb") as f: for chunk in res.iter_content(chunk_size=1024): f.write(chunk) with tarfile.open(path, "r:gz") as tarf: tarf.extractall(path=base_dir) # - imdb_dir = os.path.join(base_dir, "aclImdb") train_dir = os.path.join(imdb_dir, "train") labels = []
def train(max_iter=24000): shape_x = (1, 28, 28) n_h = args.n_units n_y = args.n_class # Load MNIST Dataset from mnist_data import load_mnist, data_iterator_mnist images, labels = load_mnist(train=True) rng = np.random.RandomState(706) inds = rng.permutation(len(images)) def feed_labeled(i): j = inds[i] return images[j], labels[j] def feed_unlabeled(i): j = inds[i] return images[j], labels[j] di_l = I.data_iterator_simple( feed_labeled, args.n_labeled, args.batchsize_l, shuffle=True, rng=rng, with_file_cache=False, ) di_u = I.data_iterator_simple( feed_unlabeled, args.n_train, args.batchsize_u, shuffle=True, rng=rng, with_file_cache=False, ) di_v = data_iterator_mnist(args.batchsize_v, train=False) # Create networks # feed-forward-net building function def forward(x, test=False): return I.mlp_net(x, n_h, n_y, test) # Net for learning labeled data xl = nn.Variable((args.batchsize_l,) + shape_x, need_grad=False) yl = forward(xl, test=False) tl = nn.Variable((args.batchsize_l, 1), need_grad=False) loss_l = F.mean(F.softmax_cross_entropy(yl, tl)) # Net for learning unlabeled data xu = nn.Variable((args.batchsize_u,) + shape_x, need_grad=False) yu = forward(xu, test=False) y1 = yu.get_unlinked_variable() y1.need_grad = False noise = nn.Variable((args.batchsize_u,) + shape_x, need_grad=True) r = noise / (F.sum(noise ** 2, [1, 2, 3], keepdims=True)) ** 0.5 r.persistent = True y2 = forward(xu + args.xi_for_vat * r, test=False) y3 = forward(xu + args.eps_for_vat * r, test=False) loss_k = F.mean(I.distance(y1, y2)) loss_u = F.mean(I.distance(y1, y3)) # Net for evaluating validation data xv = nn.Variable((args.batchsize_v,) + shape_x, need_grad=False) hv = forward(xv, test=True) tv = nn.Variable((args.batchsize_v, 1), need_grad=False) err = F.mean(F.top_n_error(hv, tv, n=1)) # Create solver solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Monitor training and validation stats. path = cache_dir(os.path.join(I.name, "monitor")) monitor = M.Monitor(path) monitor_verr = M.MonitorSeries("val_error", monitor, interval=240) monitor_time = M.MonitorTimeElapsed("time", monitor, interval=240) # Training Loop. for i in range(max_iter): # Validation Test if i % args.val_interval == 0: valid_error = I.calc_validation_error(di_v, xv, tv, err, args.val_iter) monitor_verr.add(i, valid_error) # forward, backward and update xl.d, tl.d = di_l.next() xl.d = xl.d / 255 solver.zero_grad() loss_l.forward(clear_no_need_grad=True) loss_l.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() # Calculate y without noise, only once. xu.d, _ = di_u.next() xu.d = xu.d / 255 yu.forward(clear_buffer=True) # Do power method iteration noise.d = np.random.normal(size=xu.shape).astype(np.float32) for k in range(args.n_iter_for_power_method): r.grad.zero() loss_k.forward(clear_no_need_grad=True) loss_k.backward(clear_buffer=True) noise.data.copy_from(r.grad) # forward, backward and update solver.zero_grad() loss_u.forward(clear_no_need_grad=True) loss_u.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() if i % args.iter_per_epoch == 0: solver.set_learning_rate(solver.learning_rate() * args.learning_rate_decay) monitor_time.add(i) # Evaluate the final model by the error rate with validation dataset valid_error = I.calc_validation_error(di_v, xv, tv, err, args.val_iter) monitor_verr.add(i, valid_error) monitor_time.add(i) return path
from tensorflow.keras.applications import VGG16 from ivory.utils.path import cache_dir from ivory.utils.keras.history import history_to_dataframe # ### Feature extraction conv_base = VGG16(weights="imagenet", include_top=False, input_shape=(150, 150, 3)) conv_base.summary() # - base = "keras/ch5/cats_and_dogs_small" dirs = {} for dataset in ["train", "validation", "test"]: dirs[dataset] = cache_dir(base, dataset) datagen = ImageDataGenerator(rescale=1.0 / 255) batch_size = 20 def extract_features(directory, sample_count): features = np.zeros(shape=(sample_count, 4, 4, 512)) labels = np.zeros(shape=(sample_count)) generator = datagen.flow_from_directory(directory, target_size=(150, 150), batch_size=batch_size, class_mode="binary") i = 0 for inputs_batch, labels_batch in generator: features_batch = conv_base.predict(inputs_batch) features[i * batch_size:(i + 1) * batch_size] = features_batch
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]) return model # - # !Create a basic model instance model = create_model() model.summary() # ## Save checkpoints during training # ### Checkpoint callback usage checkpoint_dir = cache_dir("tensorflow/ml_basics/training_1") checkpoint_path = os.path.join(checkpoint_dir, "cp.ckpt") # !Create checkpoint callback cp_callback = tf.keras.callbacks.ModelCheckpoint(checkpoint_path, save_weights_only=True, verbose=1) model.fit( train_images, train_labels, epochs=10, validation_data=(test_images, test_labels), callbacks=[cp_callback], )
# ## Define the optimizer and the loss function optimizer = tf.keras.optimizers.Adam() loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) def loss_function(real, pred): mask = tf.math.logical_not(tf.math.equal(real, 0)) loss_ = loss_object(real, pred) mask = tf.cast(mask, dtype=loss_.dtype) loss_ *= mask return tf.reduce_mean(loss_) # ## Checkpoints (Object-based saving) checkpoint_dir = cache_dir( "tensorflow/nmn_with_attention/training_checkpoints") checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt") checkpoint = tf.train.Checkpoint(optimizer=optimizer, encoder=encoder, decoder=decoder) # ## Training @tf.function def train_step(inp, targ, enc_hidden): loss = 0 with tf.GradientTape() as tape: enc_output, enc_hidden = encoder(inp, enc_hidden) dec_hidden = enc_hidden dec_input = tf.expand_dims([targ_lang.word_index["<start>"]] *
fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output) total_loss = real_loss + fake_loss return total_loss # ### Generator loss def generator_loss(fake_output): return cross_entropy(tf.ones_like(fake_output), fake_output) # - generator_optimizer = tf.keras.optimizers.Adam(1e-4) discriminator_optimizer = tf.keras.optimizers.Adam(1e-4) # ### Save checkpoints checkpoint_dir = cache_dir("tensorflow/dcgan/training_checkpoints") checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt") checkpoint = tf.train.Checkpoint( generator_optimizer=generator_optimizer, discriminator_optimizer=discriminator_optimizer, generator=generator, discriminator=discriminator, ) # ## Define the training loop EPOCHS = 50 noise_dim = 100 num_examples_to_generate = 16 # !We will reuse this seed overtime (so it's easier) # !to visualize progress in the animated GIF)
model.add(layers.Conv1D(32, 7, activation="relu")) model.add(layers.GlobalMaxPooling1D()) model.add(layers.Dense(1)) model.summary() # - model.compile(optimizer=RMSprop(lr=1e-4), loss="binary_crossentropy", metrics=["acc"]) history = model.fit(x_train, y_train, epochs=10, batch_size=128, validation_split=0.2) # - plot_history(history, "acc") | plot_history(history, "loss") # ### Combining CNNs and RNNs to process long sequences base_dir = cache_dir("keras/ch6/data/weather/zip") dfs = [] for name in os.listdir(os.path.join(base_dir)): df = pd.read_csv(os.path.join(base_dir, name), encoding="cp932") dfs.append(df) df = pd.concat(dfs) float_data = df.iloc[:, 1:].values mean = float_data[:200000].mean(axis=0) float_data -= mean std = float_data[:200000].std(axis=0) float_data /= std def generator( data, lookback, delay, min_index, max_index, shuffle=False, batch_size=128, step=6 ):
import matplotlib.pyplot as plt import numpy as np from tensorflow.keras.preprocessing import image from tensorflow.keras import backend as K from tensorflow.keras import models from tensorflow.keras.applications import VGG16 from tensorflow.keras.applications.vgg16 import (decode_predictions, preprocess_input) from tensorflow.keras.models import load_model from ivory.utils.path import cache_dir # ### Visualizing intermediate activations model = load_model( os.path.join(cache_dir("keras/ch5"), "cats_and_dogs_small_2.h5")) model.summary() # As a reminder. # - img_path = os.path.join(cache_dir("keras/ch5/cats_and_dogs_small/test/cat"), "cat.1700.jpg") # !We preprocess the image into a 4D tensor img = image.load_img(img_path, target_size=(150, 150)) img_tensor = image.img_to_array(img) img_tensor = np.expand_dims(img_tensor, axis=0) # !Remember that the model was trained on inputs that were preprocessed in the following # !way: img_tensor /= 255.0 # !Its shape is (1, 150, 150, 3) print(img_tensor.shape)
# - optimizer = tf.keras.optimizers.Adam() loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) def loss_function(real, pred): mask = tf.math.logical_not(tf.math.equal(real, 0)) loss_ = loss_object(real, pred) mask = tf.cast(mask, dtype=loss_.dtype) loss_ *= mask return tf.reduce_mean(loss_) # ## Checkpoint checkpoint_path = cache_dir( "tensorflow/sequences/image_captioning/checkpoints/train") ckpt = tf.train.Checkpoint(encoder=encoder, decoder=decoder, optimizer=optimizer) ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=5) # - start_epoch = 0 if ckpt_manager.latest_checkpoint: start_epoch = int(ckpt_manager.latest_checkpoint.split("-")[-1]) # ## Training # !adding this in a separate cell because if you run the training cell many times, # !the loss_plot array will be reset loss_plot = []
def get_monitor_path(net_name: str) -> str: return cache_dir(os.path.join(cl.name, net_name))
# ### How to train your DCGAN # !Load CIFAR10 data (x_train, y_train), (_, _) = keras.datasets.cifar10.load_data() # !Select frog images (class 6) x_train = x_train[y_train.flatten() == 6] # !Normalize data x_train = (x_train.reshape((x_train.shape[0], ) + (height, width, channels)).astype("float32") / 255.0) iterations = 10000 batch_size = 20 save_dir = cache_dir("keras/ch8/gan_images") # !Start training loop start = 0 for step in range(iterations): # Sample random points in the latent space random_latent_vectors = np.random.normal(size=(batch_size, latent_dim)) # Decode them to fake images generated_images = generator.predict(random_latent_vectors) # Combine them with real images stop = start + batch_size real_images = x_train[start:stop] combined_images = np.concatenate([generated_images, real_images])
optimizer.apply_gradients(zip(gradients, variables)) # ## Generate Images epochs = 100 latent_dim = 50 num_examples_to_generate = 16 # !keeping the random vector constant for generation (prediction) so # !it will be easier to see the improvement. random_vector_for_generation = tf.random.normal( shape=[num_examples_to_generate, latent_dim]) model = CVAE(latent_dim) # - directory = cache_dir("tensorflow/cvae") def generate_and_save_images(model, epoch, test_input): predictions = model.sample(test_input) plt.figure(figsize=(4, 4)) for i in range(predictions.shape[0]): plt.subplot(4, 4, i + 1) plt.imshow(predictions[i, :, :, 0], cmap="gray") plt.axis("off") # tight_layout minimizes the overlap between 2 sub-plots plt.savefig( os.path.join(directory, "image_at_epoch_{:04d}.png".format(epoch))) plt.show()
solver_gen = S.Adam(learning_rate, beta1=0.5) solver_dis = S.Adam(learning_rate, beta1=0.5) with nn.parameter_scope("gen"): solver_gen.set_parameters(nn.get_parameters()) with nn.parameter_scope("dis"): solver_dis.set_parameters(nn.get_parameters()) # パラメータスコープの使い方を見ておく。 print(len(nn.get_parameters())) with nn.parameter_scope("gen"): print(len(nn.get_parameters())) # パラメータスコープ内では、`get_parameters()`で取得できるパラメータがフィルタリングされ # る。 # モニターの設定 path = cache_dir(os.path.join(I.name, "monitor")) monitor = M.Monitor(path) monitor_loss_gen = M.MonitorSeries("generator_loss", monitor, interval=100) monitor_loss_dis = M.MonitorSeries("discriminator_loss", monitor, interval=100) monitor_time = M.MonitorTimeElapsed("time", monitor, interval=100) monitor_fake = M.MonitorImageTile("Fake images", monitor, normalize_method=lambda x: (x + 1) / 2.0) # パラメータ保存関数の定義 def save_parameters(i): with nn.parameter_scope("gen"): nn.save_parameters(os.path.join(path, "generator_param_%06d.h5" % i)) with nn.parameter_scope("dis"): nn.save_parameters(
gan_loss = loss_object(tf.ones_like(disc_generated_output), disc_generated_output) # mean absolute error l1_loss = tf.reduce_mean(tf.abs(target - gen_output)) total_gen_loss = gan_loss + (LAMBDA * l1_loss) return total_gen_loss generator_optimizer = tf.keras.optimizers.Adam(2e-4, beta_1=0.5) discriminator_optimizer = tf.keras.optimizers.Adam(2e-4, beta_1=0.5) # ## Checkpoints (Object-based saving) checkpoint_dir = cache_dir("tensorflow/pix2pix/training_checkpoints") checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt") checkpoint = tf.train.Checkpoint( generator_optimizer=generator_optimizer, discriminator_optimizer=discriminator_optimizer, generator=generator, discriminator=discriminator, ) # ## Generate Images EPOCHS = 200 def generate_images(model, test_input, tar): # the training=True is intentional here since # we want the batch statistics while running the model