def train_cnn(nets, placeholders, sess, graph, train_inputs, train_outputs, batch_size, hypers): aux_ind = 0 predictions = {} with graph.as_default(): out = nets["n0"].building( placeholders["in"]["i0"], graph, hypers["skip"] ) # We need to flatten the output of the CNN before feeding it to the MLP out = tf.layers.dense(tf.layers.flatten(out), 20) predictions["n0"] = out out = nets["n1"].building(predictions["n0"], graph) predictions["n1"] = out lf = tf.losses.softmax_cross_entropy(placeholders["out"]["o1"], predictions["n1"]) opt = optimizers[hypers["optimizer"]]( learning_rate=hypers["lrate"]).minimize(lf) sess.run(tf.global_variables_initializer()) for i in range(10): # As the input of n1 is the output of n0, these two placeholders need no feeding _, loss = sess.run( [opt, lf], feed_dict={ placeholders["in"]["i0"]: evolution.batch(train_inputs["i0"], batch_size, aux_ind), placeholders["out"]["o1"]: evolution.batch(train_outputs["o0"], batch_size, aux_ind) }) aux_ind = (aux_ind + batch_size) % train_inputs["i0"].shape[0] return predictions
def train_cnn_ae(nets, placeholders, sess, graph, train_inputs, _, batch_size, hypers): aux_ind = 0 predictions = {} with graph.as_default(): out = nets["n0"].building(placeholders["in"]["i0"], graph) # We take the ouptut of the CNN out = tf.layers.flatten(out) # and flatten it out = tf.layers.dense(out, 49) # before transforming it to the desired dimension predictions["n0"] = tf.reshape(out, (-1, 7, 7, 1)) # Then we reshape it so that the TCNN can take it out = nets["n1"].building(predictions["n0"], graph) # Take the piece of data we're interested in (for reconstruction) predictions["n1"] = out[:, :28, :28, :3] # as the TCNN could provide more than that # Common training lf = tf.losses.mean_squared_error(placeholders["in"]["i0"], predictions["n1"]) opt = optimizers[hypers["optimizer"]](learning_rate=hypers["lrate"]).minimize(lf) sess.run(tf.global_variables_initializer()) for i in range(10): # As the input of n1 is the output of n0, these two placeholders need no feeding __, loss = sess.run([opt, lf], feed_dict={placeholders["in"]["i0"]: batch(train_inputs["i0"], batch_size, aux_ind)}) if np.isnan(loss): break aux_ind = (aux_ind + batch_size) % train_inputs["i0"].shape[0] return predictions
def gan_train(nets, placeholders, sess, graph, train_inputs, _, batch_size, __): aux_ind = 0 predictions = {} with graph.as_default(): # We define the special GAN structure out = nets["n1"].building(placeholders["in"]["i1"], graph, _) predictions["gen"] = tf.nn.sigmoid(out) out = nets["n0"].building(placeholders["in"]["i0"], graph, _) predictions["realDisc"] = tf.nn.sigmoid(out) out = nets["n0"].building(predictions["gen"], graph, _) predictions["fakeDisc"] = tf.nn.sigmoid(out) # Loss function and optimizer d_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=predictions["realDisc"], labels=tf.ones_like(predictions["realDisc"])) + tf.nn.sigmoid_cross_entropy_with_logits( logits=predictions["fakeDisc"], labels=tf.zeros_like(predictions["fakeDisc"]))) g_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=predictions["fakeDisc"], labels=tf.ones_like(predictions["fakeDisc"]))) g_solver = tf.train.AdamOptimizer(learning_rate=0.0001).minimize( g_loss, var_list=[nets["n1"].List_weights, nets["n1"].List_bias]) d_solver = tf.train.AdamOptimizer(learning_rate=0.0001).minimize( d_loss, var_list=[nets["n0"].List_weights, nets["n0"].List_bias]) sess.run(tf.global_variables_initializer()) for it in range(epochs): # Train the model x_mb = batch(train_inputs["i0"], batch_size, aux_ind) aux_ind = (aux_ind + batch_size) % train_inputs["i0"].shape[0] z_mb = np.random.uniform(size=(batch_size, z_size)) _, b = sess.run([g_solver, g_loss], feed_dict={placeholders["in"]["i1"]: z_mb}) z_mb = np.random.uniform(size=(batch_size, z_size)) _, b = sess.run([g_solver, g_loss], feed_dict={placeholders["in"]["i1"]: z_mb}) z_mb = np.random.uniform(size=(batch_size, z_size)) _, b = sess.run([g_solver, g_loss], feed_dict={placeholders["in"]["i1"]: z_mb}) _, a = sess.run([d_solver, d_loss], feed_dict={ placeholders["in"]["i0"]: x_mb, placeholders["in"]["i1"]: z_mb }) #if it % 50 == 0: print(a, b) #samples = sess.run(predictions["gen"], feed_dict={placeholders["in"]["i1"]: np.random.uniform(size=(1000, z_size))}) #plt.plot(x_mb[:, 0], x_mb[:, 1], "o") #plt.show() return predictions
def train(nets, placeholders, sess, graph, train_inputs, train_outputs, batch_size, _): aux_ind = 0 predictions = {} with graph.as_default(): # Both networks are created separately, using their own placeholders. They are not involved in any way out = nets["n1"].building(tf.layers.flatten(placeholders["in"]["i1"]), graph) predictions["n1"] = out out = nets["n0"].building(tf.layers.flatten(placeholders["in"]["i0"]), graph) predictions["n0"] = out loss = tf.losses.softmax_cross_entropy( placeholders["out"]["o1"], predictions["n1"]) + tf.losses.softmax_cross_entropy( placeholders["out"]["o0"], predictions["n0"]) solver = tf.train.AdamOptimizer(learning_rate=0.01).minimize( loss, var_list=[ nets["n1"].List_weights, nets["n1"].List_bias, nets["n0"].List_weights, nets["n0"].List_bias ]) sess.run(tf.global_variables_initializer()) for it in range(10): aux_ind = (aux_ind + batch_size) % train_inputs["i0"].shape[0] # Here all placeholders need to be feeded, unlike in the previous examples, as both networks work on their own _ = sess.run( [solver], feed_dict={ placeholders["in"]["i0"]: batch(train_inputs["i0"], batch_size, aux_ind), placeholders["in"]["i1"]: batch(train_inputs["i1"], batch_size, aux_ind), placeholders["out"]["o0"]: batch(train_outputs["o0"], batch_size, aux_ind), placeholders["out"]["o1"]: batch(train_outputs["o1"], batch_size, aux_ind) }) return predictions
def gan_train(nets, placeholders, sess, graph, train_inputs, _, batch_size, __): aux_ind = 0 predictions = {} with graph.as_default(): # We define the special GAN structure out = nets["n1"].building(placeholders["in"]["i1"], graph, _) predictions["gen"] = out out = nets["n0"].building(tf.layers.flatten(placeholders["in"]["i0"]), graph, _) predictions["realDisc"] = out out = nets["n0"].building(predictions["gen"], graph, _) predictions["fakeDisc"] = out # Loss function and optimizer d_loss = -tf.reduce_mean(predictions["realDisc"]) + tf.reduce_mean( predictions["fakeDisc"]) g_loss = -tf.reduce_mean(predictions["fakeDisc"]) g_solver = tf.train.AdamOptimizer(learning_rate=0.01).minimize( g_loss, var_list=[nets["n1"].List_weights, nets["n1"].List_bias]) d_solver = tf.train.AdamOptimizer(learning_rate=0.01).minimize( d_loss, var_list=[nets["n0"].List_weights, nets["n0"].List_bias]) sess.run(tf.global_variables_initializer()) for it in range(10): # Train the model z_mb = np.random.normal(size=(150, 10)) x_mb = batch(train_inputs["i0"], batch_size, aux_ind) aux_ind = (aux_ind + batch_size) % train_inputs["i0"].shape[0] _ = sess.run([d_solver], feed_dict={ placeholders["in"]["i0"]: x_mb, placeholders["in"]["i1"]: z_mb }) _ = sess.run([g_solver], feed_dict={placeholders["in"]["i1"]: z_mb}) return predictions
def train_sequential(nets, placeholders, sess, graph, train_inputs, train_outputs, batch_size, hypers): """ This function takes care of arranging the model and training it. It is used by the evolutionary internally, and always is provided with the same parameters :param nets: Dictionary with the Networks ("n0", "n1", ..., "nm", in the same order as they have been requested in the *desc_list* parameter) :param placeholders: Dictionary with input ("in"->"i0", "i1", ..., im) placeholders ("out"->"o0", "o1", ..., "om") for each network :param sess: tf session to be used when training :param graph: tf graph to be used when training :param train_inputs: Data to be used for training :param train_outputs: Data to be used for training :param batch_size: Batch_size to be used when training. It is not mandatory to use it :param hypers: Optional hyperparameters being evolved in case they were defined for evolution (in this case we also evolve optimizer selection and learning rate) :return: A dictionary with the tf layer which makes the predictions """ aux_ind = 0 predictions = {} with graph.as_default(): # The following four lines define the model layout: out = nets["n0"].building(tf.layers.flatten(placeholders["in"]["i0"]), graph) predictions["n0"] = out # We construct n0 over its input placeholder, "in"-> "i0" out = nets["n1"].building(predictions["n0"], graph) predictions["n1"] = out # We construct n1 over n0 because they are supposed to be sequential # Define the loss function and optimizer with the output of n1, which is the final output of the model lf = tf.losses.softmax_cross_entropy(placeholders["out"]["o1"], predictions["n1"]) opt = optimizers[hypers["optimizer"]](learning_rate=hypers["lrate"]).minimize(lf) # The rest is typical training sess.run(tf.global_variables_initializer()) for i in range(10): # As the input of n1 is the output of n0, these two placeholders need no feeding _, loss = sess.run([opt, lf], feed_dict={placeholders["in"]["i0"]: batch(train_inputs["i0"], batch_size, aux_ind), placeholders["out"]["o1"]: batch(train_outputs["o0"], batch_size, aux_ind)}) aux_ind = (aux_ind + batch_size) % train_inputs["i0"].shape[0] return predictions