Example #1
0
    def evaluate(self, X_test, Y_test, sess):
        # Evaluate the dev set. Used inside a session.
        m = X_test.shape[0]
        model = self.test_model
        accuracy = model.accuracy
        logits = model.logits
        cost = model.cost     

        minibatches = random_mini_batches(X_test, Y_test, self.params.test_batch_size)
        minibatch_cost = 0.
        minibatch_accuracy = 0.
        num_minibatches = (m + self.params.test_batch_size - 1) // self.params.test_batch_size

        count_batch=0
        for minibatch in minibatches:
            # Select a minibatch
            (minibatch_X, minibatch_Y) = minibatch
            temp_cost, temp_accuracy = sess.run([cost, accuracy], feed_dict={model.X: minibatch_X, model.Y: minibatch_Y})
            
            # compute dev cost
            minibatch_cost += temp_cost / num_minibatches
            minibatch_accuracy += temp_accuracy / num_minibatches

            # Print result
            #if (count_batch % 10) == 0:
            print("dev_count_batch",count_batch,"dev_temp_cost:", temp_cost, "dev_temp_accuracy:", temp_accuracy)
            count_batch += 1

        return minibatch_cost, minibatch_accuracy
Example #2
0
	def fit(self, X, y):
		"""
    	Fit the coeffs_ and intercepts_ to the training data X, y
    	
    	Arguments:
    	X -- input dataset placeholder, of shape (input size, number of examples)
    	Y -- "true" labels vector placeholder, of shape (input size, number of classes)
    	
    	Returns:
    	None
    	"""

		n_x, n_m = X.shape
		n_y = y.shape[0]
		print_cost = True
		costs = []

		seed = self.seed_

		#Define the tensorflow graph
		X_tf, y_tf = self.create_placeholders(n_x, n_y)
		self.initialize_parameters(n_x, n_y)

		print(self.coeffs_)
		print(self.intercepts_)

		Z = self.forward_propagation(X_tf)
		cost = self.compute_cost(Z, y_tf)

		optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate_init).minimize(cost)
		init = tf.global_variables_initializer()

		with tf.Session() as sess:
			sess.run(init)

			#do the training loop
			for epoch in range(self.num_epochs):

				epoch_cost = 0
				num_minibatches = int(n_m/self.minibatch_size)

				if self.seed_ != None
					seed = seed+1
				
				minibatches = utils.random_mini_batches(X, y, self.minibatch_size, seed)

				for minibatch in minibatches:

					(minibatch_X, minibatch_y) = minibatch
					_, minibatch_cost = sess.run([optimizer, cost], feed_dict={X_tf:minibatch_X, y_tf:minibatch_y})

					epoch_cost += minibatch_cost / num_minibatches

				# Print the cost every epoch
				if print_cost == True and epoch % 100 == 0:
					print ("Cost after epoch %i: %f" % (epoch, epoch_cost))
				if print_cost == True and epoch % 5 == 0:
					costs.append(epoch_cost)
    def fit(self, X, Y):
        seed = 10
        t = 0
        self.costs = []

        for epoch in range(self.n_epochs):

            seed = seed + 1
            minibatches = random_mini_batches(X, Y, self.mini_batch_size, seed)

            for minibatch in minibatches:

                minibatch_X, minibatch_Y = minibatch

                # Step 1) forward propagation
                if self.keep_prob == 1:
                    AL = self.forward(minibatch_X)
                elif self.keep_prob < 1:
                    AL = self.forward_with_dropout(minibatch_X, self.keep_prob)

                # Step 2) compute cost
                if self.lambd == 0:
                    cost = self.compute_cost(AL, minibatch_Y)
                else:
                    cost = self.compute_cost_with_regularization(AL, minibatch_Y, self.lambd)

                # Step 3) backward
                if self.lambd == 0 and self.keep_prob == 1:
                    self.backward(AL, minibatch_Y)
                elif self.lambd != 0:
                    self.backward_with_regularization(AL, minibatch_Y, self.lambd)
                elif self.keep_prob < 1:
                    self.backward_with_dropout(AL, minibatch_Y, self.keep_prob)

                # Step 4) update parameters
                if self.optimizer == 'gd':
                    self.update_parameters_gd()
                elif self.optimizer == 'momentum':
                    self.update_parameters_with_momentum()
                elif self.optimizer == 'adam':
                    t = t + 1
                    self.update_parameters_with_adam(t)

            # log
            if epoch % self.step_size == 0:
                print("{}, cost = {:.6f}".format(epoch, cost))
                self.costs.append(cost)
Example #4
0
def neural_network(X_train,
                   Y_train,
                   X_test,
                   Y_test,
                   layers,
                   learning_rate=0.0001,
                   num_epochs=1000,
                   minibatch_size=32,
                   print_cost=False):
    ops.reset_default_graph()
    tf.set_random_seed(1)
    seed = 3
    n_x, m = X_train.shape
    n_y = Y_train.shape[0]
    #costs = []

    X, Y = create_placeholders(n_x, n_y)
    parameters = initialize_parameters(layers)
    Z = forward_propagation(X, parameters)
    cost = compute_cost(Z, Y)

    optimizer = tf.train.AdamOptimizer(
        learning_rate=learning_rate).minimize(cost)

    init = tf.global_variables_initializer()

    with tf.Session() as sess:
        sess.run(init)

        for epoch in range(num_epochs):
            epoch_cost = 0.
            seed += 1
            minibatches = random_mini_batches(X_train, Y_train, minibatch_size,
                                              seed)
            num_minibatches = len(minibatches)

            for minibatch in minibatches:
                (minibatch_X, minibatch_Y) = minibatch
                _, minibatch_cost = sess.run([optimizer, cost],
                                             feed_dict={
                                                 X: minibatch_X,
                                                 Y: minibatch_Y
                                             })

                epoch_cost += minibatch_cost / num_minibatches

            if print_cost == True and epoch % 100 == 0:
                print("Cost after epoch %i: %f" % (epoch, epoch_cost))

        parameters = sess.run(parameters)

        correct_prediction = tf.equal(tf.argmax(Z, axis=0), tf.argmax(Y,
                                                                      axis=0))
        # Calculate accuracy on the test set
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

        print("Train Accuracy:", accuracy.eval({X: X_train, Y: Y_train}))
        print("Test Accuracy:", accuracy.eval({X: X_test, Y: Y_test}))

        print("Parameters have been trained!")

    return parameters
def main(args):
    """Initialize and train deep equilibrium net."""
    # Set the seed for replicable results
    np.random.seed(args.seed)
    tf.set_random_seed(args.seed)

    # Helper variables
    eps = 0.00001  # Small epsilon value

    # ####################################################################### #
    #                             Neural network                              #
    # ####################################################################### #
    # Neural network ----------------------------------------------------------
    # We create a placeholder for X, the input data for the neural network,
    # which corresponds to the state.
    X = tf.placeholder(tf.float32, shape=(None, num_input_nodes))
    # Get number samples
    m = tf.shape(X)[0]

    # We create all of the neural network weights and biases. The weights are
    # matrices that connect the layers of the neural network. For example, W1
    # connects the input layer to the first hidden layer
    W1 = initialize_nn_weight([num_input_nodes, num_hidden_nodes[0]])
    W2 = initialize_nn_weight([num_hidden_nodes[0], num_hidden_nodes[1]])
    W3 = initialize_nn_weight([num_hidden_nodes[1], num_output_nodes])

    # The biases are extra (shift) terms that are added to each node in the
    # neural network.
    b1 = initialize_nn_weight([num_hidden_nodes[0]])
    b2 = initialize_nn_weight([num_hidden_nodes[1]])
    b3 = initialize_nn_weight([num_output_nodes])

    # Then, we create a function, to which we pass X, that generates a
    # prediction based on the current neural network weights. Note that the
    # hidden layers are ReLU activated. The output layer is not activated
    # (i.e., it is activated with the linear function).
    def nn_predict(X):
        """Generate prediction using neural network.

        Args:
            X: state: [z, k]

        """
        hidden_layer1 = tf.nn.relu(tf.add(tf.matmul(X, W1), b1))
        hidden_layer2 = tf.nn.relu(tf.add(tf.matmul(hidden_layer1, W2), b2))
        output_layer = tf.add(tf.matmul(hidden_layer2, W3), b3)
        return output_layer

    # ####################################################################### #
    #                             Economic model                              #
    # ####################################################################### #
    # Current period ##########################################################
    # Today's extended state:
    z = X[:, 0]  # exogenous shock
    tfp = X[:, 1]  # total factor productivity
    depr = X[:, 2]  # depreciation
    K = X[:, 3]  # aggregate capital
    L = X[:, 4]  # aggregate labor
    r = X[:, 5]  # return on capital
    w = X[:, 6]  # wage
    Y = X[:, 7]  # aggregate production
    k = X[:, 8:8 + A]  # distribution of capital
    fw = X[:, 8 + A:8 + 2 * A]  # distribution of financial wealth
    linc = X[:, 8 + 2 * A:8 + 3 * A]  # distribution of labor income
    inc = X[:, 8 + 3 * A:8 + 4 * A]  # distribution of total income

    # Today's assets: How much the agents save
    # Get today's assets by executing the neural network
    a = nn_predict(X)
    # The last agent consumes everything they own
    a_all = tf.concat([a, tf.zeros([m, 1])], axis=1)

    # c_orig: the original consumption predicted by the neural network However,
    #     the network can predict negative values before it learns not to. We
    #     ensure that the network learns itself out of a bad region by
    #     penalizing negative consumption. We ensure that consumption is not
    #     negative by including a penalty term on c_orig_prime_1
    # c: is the corrected version c_all_orig_prime_1, in which all negative
    #     consumption values are set to ~0. If none of the consumption values
    #     are negative then c_orig_prime_1 == c_prime_1.
    c_orig = inc - a_all
    c = tf.maximum(c_orig, tf.ones_like(c_orig) * eps)

    # Today's savings become tomorrow's capital holding, but the first agent
    # is born without a capital endowment.
    k_prime = tf.concat([tf.zeros([m, 1]), a], axis=1)

    # Tomorrow's aggregate capital
    K_prime_orig = tf.reduce_sum(k_prime, axis=1, keepdims=True)
    K_prime = tf.maximum(K_prime_orig, tf.ones_like(K_prime_orig) * eps)

    # Tomorrow's labor
    l_prime = tf.tile(labor_endow, [m, 1])
    L_prime = tf.ones_like(K_prime)

    # Next period #############################################################
    # Shock 1 -----------------------------------------------------------------
    # 1) Get remaining parts of tomorrow's extended state
    # Exogenous shock
    z_prime_1 = 0 * tf.ones_like(z)

    # TFP and depreciation
    tfp_prime_1, depr_prime_1 = shocks(z_prime_1, eta, delta)

    # Return on capital, wage and aggregate production
    r_prime_1, w_prime_1, Y_prime_1 = firm(K_prime, tfp_prime_1, alpha,
                                           depr_prime_1)
    R_prime_1 = r_prime_1 * tf.ones([1, A])
    W_prime_1 = w_prime_1 * tf.ones([1, A])

    # D istribution of financial wealth, labor income, and total income
    fw_prime_1, linc_prime_1, inc_prime_1 = wealth(k_prime, R_prime_1, l_prime,
                                                   W_prime_1)

    # Tomorrow's state: Concatenate the parts together
    x_prime_1 = tf.concat([
        tf.expand_dims(z_prime_1, -1), tfp_prime_1, depr_prime_1, K_prime,
        L_prime, r_prime_1, w_prime_1, Y_prime_1, k_prime, fw_prime_1,
        linc_prime_1, inc_prime_1
    ],
                          axis=1)

    # 2) Get tomorrow's policy
    # Tomorrow's capital: capital holding at beginning of period and how much
    # they save
    a_prime_1 = nn_predict(x_prime_1)
    a_prime_all_1 = tf.concat([a_prime_1, tf.zeros([m, 1])], axis=1)

    # 3) Tomorrow's consumption
    c_orig_prime_1 = inc_prime_1 - a_prime_all_1
    c_prime_1 = tf.maximum(c_orig_prime_1, tf.ones_like(c_orig_prime_1) * eps)

    # Shock 2 -----------------------------------------------------------------
    # 1) Get remaining parts of tomorrow's extended state
    # Exogenous shock
    z_prime_2 = 1 * tf.ones_like(z)

    # TFP and depreciation
    tfp_prime_2, depr_prime_2 = shocks(z_prime_2, eta, delta)

    # Return on capital, wage and aggregate production
    r_prime_2, w_prime_2, Y_prime_2 = firm(K_prime, tfp_prime_2, alpha,
                                           depr_prime_2)
    R_prime_2 = r_prime_2 * tf.ones([1, A])
    W_prime_2 = w_prime_2 * tf.ones([1, A])

    # D istribution of financial wealth, labor income, and total income
    fw_prime_2, linc_prime_2, inc_prime_2 = wealth(k_prime, R_prime_2, l_prime,
                                                   W_prime_2)

    # Tomorrow's state: Concatenate the parts together
    x_prime_2 = tf.concat([
        tf.expand_dims(z_prime_2, -1), tfp_prime_2, depr_prime_2, K_prime,
        L_prime, r_prime_2, w_prime_2, Y_prime_2, k_prime, fw_prime_2,
        linc_prime_2, inc_prime_2
    ],
                          axis=1)

    # 2) Get tomorrow's policy
    a_prime_2 = nn_predict(x_prime_2)
    a_prime_all_2 = tf.concat([a_prime_2, tf.zeros([m, 1])], axis=1)

    # 3) Tomorrow's consumption
    c_orig_prime_2 = inc_prime_2 - a_prime_all_2
    c_prime_2 = tf.maximum(c_orig_prime_2, tf.ones_like(c_orig_prime_2) * eps)

    # Shock 3 -----------------------------------------------------------------
    # 1) Get remaining parts of tomorrow's extended state
    # Exogenous shock
    z_prime_3 = 2 * tf.ones_like(z)

    # TFP and depreciation
    tfp_prime_3, depr_prime_3 = shocks(z_prime_3, eta, delta)

    # Return on capital, wage and aggregate production
    r_prime_3, w_prime_3, Y_prime_3 = firm(K_prime, tfp_prime_3, alpha,
                                           depr_prime_3)
    R_prime_3 = r_prime_3 * tf.ones([1, A])
    W_prime_3 = w_prime_3 * tf.ones([1, A])

    # D istribution of financial wealth, labor income, and total income
    fw_prime_3, linc_prime_3, inc_prime_3 = wealth(k_prime, R_prime_3, l_prime,
                                                   W_prime_3)

    # Tomorrow's state: Concatenate the parts together
    x_prime_3 = tf.concat([
        tf.expand_dims(z_prime_3, -1), tfp_prime_3, depr_prime_3, K_prime,
        L_prime, r_prime_3, w_prime_3, Y_prime_3, k_prime, fw_prime_3,
        linc_prime_3, inc_prime_3
    ],
                          axis=1)

    # 2) Get tomorrow's policy
    # Tomorrow's capital: capital holding at beginning of period and how much
    # they save
    a_prime_3 = nn_predict(x_prime_3)
    a_prime_all_3 = tf.concat([a_prime_3, tf.zeros([m, 1])], axis=1)

    # 3) Tomorrow's consumption
    c_orig_prime_3 = inc_prime_3 - a_prime_all_3
    c_prime_3 = tf.maximum(c_orig_prime_3, tf.ones_like(c_orig_prime_3) * eps)

    # Shock 4 -----------------------------------------------------------------
    # 1) Get remaining parts of tomorrow's extended state
    # Exogenous shock
    z_prime_4 = 3 * tf.ones_like(z)

    # TFP and depreciation
    tfp_prime_4, depr_prime_4 = shocks(z_prime_4, eta, delta)

    # Return on capital, wage and aggregate production
    r_prime_4, w_prime_4, Y_prime_4 = firm(K_prime, tfp_prime_4, alpha,
                                           depr_prime_4)
    R_prime_4 = r_prime_4 * tf.ones([1, A])
    W_prime_4 = w_prime_4 * tf.ones([1, A])

    # D istribution of financial wealth, labor income, and total income
    fw_prime_4, linc_prime_4, inc_prime_4 = wealth(k_prime, R_prime_4, l_prime,
                                                   W_prime_4)

    # Tomorrow's state: Concatenate the parts together
    x_prime_4 = tf.concat([
        tf.expand_dims(z_prime_4, -1), tfp_prime_4, depr_prime_4, K_prime,
        L_prime, r_prime_4, w_prime_4, Y_prime_4, k_prime, fw_prime_4,
        linc_prime_4, inc_prime_4
    ],
                          axis=1)

    # 2) Get tomorrow's policy
    # Tomorrow's capital: capital holding at beginning of period and how much
    # they save
    a_prime_4 = nn_predict(x_prime_4)
    a_prime_all_4 = tf.concat([a_prime_4, tf.zeros([m, 1])], axis=1)

    # 3) Tomorrow's consumption
    c_orig_prime_4 = inc_prime_4 - a_prime_all_4
    c_prime_4 = tf.maximum(c_orig_prime_4, tf.ones_like(c_orig_prime_4) * eps)

    # Cost function ###########################################################
    # Prepare transitions to the next periods states. In this setting, there is
    # a 25% chance of ending up in any of the 4 states in Z. This has been
    # hardcoded and need to be changed to accomodate a different transition
    # matrix.
    pi_trans_to1 = p_transition * tf.ones((m, A - 1))
    pi_trans_to2 = p_transition * tf.ones((m, A - 1))
    pi_trans_to3 = p_transition * tf.ones((m, A - 1))
    pi_trans_to4 = p_transition * tf.ones((m, A - 1))

    # Euler equation
    opt_euler = -1 + ((
        (beta *
         (pi_trans_to1 * R_prime_1[:, 0:A - 1] * c_prime_1[:, 1:A]**
          (-gamma) + pi_trans_to2 * R_prime_2[:, 0:A - 1] * c_prime_2[:, 1:A]**
          (-gamma) + pi_trans_to3 * R_prime_3[:, 0:A - 1] * c_prime_3[:, 1:A]**
          (-gamma) + pi_trans_to4 * R_prime_4[:, 0:A - 1] * c_prime_4[:, 1:A]**
          (-gamma)))**(-1. / gamma)) / c[:, 0:A - 1])

    # Punishment for negative consumption (c)
    orig_cons = tf.concat([
        c_orig, c_orig_prime_1, c_orig_prime_2, c_orig_prime_3, c_orig_prime_4
    ],
                          axis=1)
    opt_punish_cons = (1. / eps) * tf.maximum(-1 * orig_cons,
                                              tf.zeros_like(orig_cons))

    # Punishment for negative aggregate capital (K)
    opt_punish_ktot_prime = (1. / eps) * tf.maximum(
        -K_prime_orig, tf.zeros_like(K_prime_orig))

    # Concatenate the 3 equilibrium functions
    combined_opt = [opt_euler, opt_punish_cons, opt_punish_ktot_prime]
    opt_predict = tf.concat(combined_opt, axis=1)

    # Define the "correct" outputs. For all equilibrium functions, the correct
    # outputs is zero.
    opt_correct = tf.zeros_like(opt_predict)

    # Define the cost function
    cost = tf.losses.mean_squared_error(opt_correct, opt_predict)

    # Optimizer and gradient descent ##########################################
    # Adam optimizer
    optimizer = tf.train.AdamOptimizer(learning_rate=lr)

    # Clip the gradients to limit the extent of exploding gradients
    gvs = optimizer.compute_gradients(cost)
    capped_gvs = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gvs]

    # Define a training step
    train_step = optimizer.apply_gradients(capped_gvs)

    # ####################################################################### #
    #                                Training                                 #
    # ####################################################################### #
    def simulate_episodes(sess, x_start, episode_length, print_flag=True):
        """Simulate an episode for a given starting point using the current
        neural network state.

        Args:
            sess: current tensorflow session,
            x_start: starting state to simulate forward from,
            episode_length: number of steps to simulate forward,
            print_flag: boolean that determines whether to print simulation stats.

        Returns:
            X_episodes: tensor of states [z, k] to train on (training set).
        """
        time_start = datetime.now()
        if print_flag:
            print('Start simulating {} periods.'.format(episode_length))
        dim_state = np.shape(x_start)[1]

        X_episodes = np.zeros([episode_length, dim_state])
        X_episodes[0, :] = x_start
        X_old = x_start

        # Generate a sequence of random shocks
        rand_num = np.random.rand(episode_length, 1)

        for t in range(1, episode_length):
            z = int(X_old[0, 0])  # Current period's shock

            # Determine which state we will be in in the next period based on
            # the shock and generate the corresponding state (x_prime)
            if rand_num[t - 1] <= pi_np[z, 0]:
                X_new = sess.run(x_prime_1, feed_dict={X: X_old})
            elif rand_num[t - 1] <= pi_np[z, 0] + pi_np[z, 1]:
                X_new = sess.run(x_prime_2, feed_dict={X: X_old})
            elif rand_num[t - 1] <= pi_np[z, 0] + pi_np[z, 1] + pi_np[z, 2]:
                X_new = sess.run(x_prime_3, feed_dict={X: X_old})
            else:
                X_new = sess.run(x_prime_4, feed_dict={X: X_old})

            # Append it to the dataset
            X_episodes[t, :] = X_new
            X_old = X_new

        time_end = datetime.now()
        time_diff = time_end - time_start
        if print_flag:
            print('Finished simulation. Time for simulation: {}.'.format(
                time_diff))

        return X_episodes

    # Analytical solution #####################################################
    # Get the analytical solution
    beta_vec = beta_np * (1 - beta_np**(A - 1 - np.arange(A - 1))) / (
        1 - beta_np**(A - np.arange(A - 1)))
    beta_vec = tf.constant(np.expand_dims(beta_vec, 0), dtype=tf.float32)
    a_analytic = inc[:, :-1] * beta_vec

    # Training the deep equilibrium net #######################################
    # Helper variables for plotting
    all_ages = np.arange(1, A + 1)
    ages = np.arange(1, A)

    # Initialize tensorflow session
    sess = tf.Session()

    # Generate a random starting point
    if args.load_episode == 0:
        X_data_train = np.random.rand(1, num_input_nodes)
        X_data_train[:, 0] = (X_data_train[:, 0] > 0.5)
        X_data_train[:, 1:] = X_data_train[:, 1:] + 0.1
        assert np.min(
            np.sum(X_data_train[:, 1:], axis=1, keepdims=True) > 0
        ) == True, 'Starting point has negative aggregate capital (K)!'
        print('Calculated a valid starting point')
    else:
        data_path = './output/startpoints/data_{}.npy'.format(
            args.load_episode)
        X_data_train = np.load(data_path)
        print('Loaded initial data from ' + data_path)

    train_seed = 0

    cost_store, mov_ave_cost_store = [], []

    time_start = datetime.now()
    print('start time: {}'.format(time_start))

    # Initialize the random variables (neural network weights)
    init = tf.global_variables_initializer()

    # Initialize saver to save and load previous sessions
    saver = tf.train.Saver()

    # Run the initializer
    sess.run(init)

    if args.load_episode != 0:
        saver.restore(sess,
                      './output/models/sess_{}.ckpt'.format(args.load_episode))

    for episode in range(args.load_episode, num_episodes):
        # Simulate data: every episode uses a new training dataset generated on
        # the current iteration's neural network parameters.
        X_episodes = simulate_episodes(sess,
                                       X_data_train,
                                       len_episodes,
                                       print_flag=(episode == 0))
        X_data_train = X_episodes[-1, :].reshape([1, -1])
        k_dist_mean = np.mean(X_episodes[:, 8:8 + A], axis=0)
        k_dist_min = np.min(X_episodes[:, 8:8 + A], axis=0)
        k_dist_max = np.max(X_episodes[:, 8:8 + A], axis=0)

        ee_error = np.zeros((1, num_agents - 1))
        max_ee = np.zeros((1, num_agents - 1))

        for epoch in range(epochs_per_episode):
            # Every epoch is one full pass through the dataset. We train
            # multiple passes on one training set before we resimulate a
            # new dataset.
            train_seed += 1
            minibatch_cost = 0

            # Mini-batch the simulated data
            minibatches = random_mini_batches(X_episodes, minibatch_size,
                                              train_seed)

            for minibatch_X in minibatches:
                # Run optimization; i.e., determine the cost of each mini-batch.
                minibatch_cost += sess.run(
                    cost, feed_dict={X: minibatch_X}) / num_minibatches
                if epoch == 0:
                    # For the first epoch, save the mean and max euler errors for plotting
                    # This way, the errors are calculated out-of-sample.
                    opt_euler_ = np.abs(
                        sess.run(opt_euler, feed_dict={X: minibatch_X}))
                    ee_error += np.mean(opt_euler_, axis=0) / num_minibatches
                    mb_max_ee = np.max(opt_euler_, axis=0, keepdims=True)
                    max_ee = np.maximum(max_ee, mb_max_ee)

            if epoch == 0:
                # Record the cost and moving average of the cost at the beginning of each
                # episode to track learning progress.
                cost_store.append(minibatch_cost)
                mov_ave_cost_store.append(np.mean(cost_store[-100:]))

            for minibatch_X in minibatches:
                # Take a mini-batch gradient descent training step. That is, update the
                # weights for one mini-batch.
                sess.run(train_step, feed_dict={X: minibatch_X})

        if episode % args.plot_interval == 0:
            # Plot
            # Plot the loss function
            plt.figure(figsize=std_figsize)
            ax = plt.subplot(1, 1, 1)
            ax.plot(np.log10(cost_store), 'k-', label='cost')
            ax.plot(np.log10(mov_ave_cost_store),
                    'r--',
                    label='moving average')
            ax.set_xlabel('Episodes')
            ax.set_ylabel('Cost [log10]')
            ax.legend(loc='upper right')
            plt.savefig('./output/plots/loss_ep_%d.pdf' % episode,
                        bbox_inches='tight')
            plt.close()

            # Plot the relative errors in the Euler equation
            plt.figure(figsize=std_figsize)
            ax = plt.subplot(1, 1, 1)
            ax.plot(ages, np.log10(ee_error).ravel(), 'k-', label='mean')
            ax.plot(ages, np.log10(max_ee).ravel(), 'k--', label='max')
            ax.set_xlabel('Age')
            ax.set_ylabel('Rel EE [log10]')
            ax.legend()
            plt.savefig('./output/plots/relee_ep_%d.pdf' % episode,
                        bbox_inches='tight')
            plt.close()

            # Plot the capital distribution
            plt.figure(figsize=std_figsize)
            ax = plt.subplot(1, 1, 1)
            ax.plot(all_ages, k_dist_mean, 'k-', label='mean')
            ax.plot(all_ages, k_dist_min, 'k-.', label='min')
            ax.plot(all_ages, k_dist_max, 'k--', label='max')
            ax.set_xlabel('Age')
            ax.set_ylabel('Capital (k)')
            ax.legend()
            ax.set_xticks(all_ages)
            plt.savefig('./output/plots/distk_ep_%d.pdf' % episode,
                        bbox_inches='tight')
            plt.close()

            # =======================================================================================
            # Sample 50 states and compare the neural network's prediction to the analytical solution
            pick = np.random.randint(len_episodes, size=50)
            random_states = X_episodes[pick, :]

            # Sort the states by the exogenous shock
            random_states_1 = random_states[random_states[:, 0] == 0]
            random_states_2 = random_states[random_states[:, 0] == 1]
            random_states_3 = random_states[random_states[:, 0] == 2]
            random_states_4 = random_states[random_states[:, 0] == 3]

            # Get corresponding capital distribution for plots
            random_k_1 = random_states_1[:, 8:8 + A]
            random_k_2 = random_states_2[:, 8:8 + A]
            random_k_3 = random_states_3[:, 8:8 + A]
            random_k_4 = random_states_4[:, 8:8 + A]

            # Generate a prediction using the neural network
            nn_pred_1 = sess.run(a, feed_dict={X: random_states_1})
            nn_pred_2 = sess.run(a, feed_dict={X: random_states_2})
            nn_pred_3 = sess.run(a, feed_dict={X: random_states_3})
            nn_pred_4 = sess.run(a, feed_dict={X: random_states_4})

            # Calculate the analytical solution
            true_pol_1 = sess.run(a_analytic, feed_dict={X: random_states_1})
            true_pol_2 = sess.run(a_analytic, feed_dict={X: random_states_2})
            true_pol_3 = sess.run(a_analytic, feed_dict={X: random_states_3})
            true_pol_4 = sess.run(a_analytic, feed_dict={X: random_states_4})

            # Plot both
            for i in range(A - 1):
                plt.figure(figsize=std_figsize)
                ax = plt.subplot(1, 1, 1)
                # Plot the true solution with a circle
                ax.plot(random_k_1[:, i],
                        true_pol_1[:, i],
                        'ro',
                        mfc='none',
                        alpha=0.5,
                        markersize=6,
                        label='analytic')
                ax.plot(random_k_2[:, i],
                        true_pol_2[:, i],
                        'bo',
                        mfc='none',
                        alpha=0.5,
                        markersize=6)
                ax.plot(random_k_3[:, i],
                        true_pol_3[:, i],
                        'go',
                        mfc='none',
                        alpha=0.5,
                        markersize=6)
                ax.plot(random_k_4[:, i],
                        true_pol_4[:, i],
                        'yo',
                        mfc='none',
                        alpha=0.5,
                        markersize=6)
                # Plot the prediction of the neural net
                ax.plot(random_k_1[:, i],
                        nn_pred_1[:, i],
                        'r*',
                        markersize=2,
                        label='DEQN')
                ax.plot(random_k_2[:, i], nn_pred_2[:, i], 'b*', markersize=2)
                ax.plot(random_k_3[:, i], nn_pred_3[:, i], 'g*', markersize=2)
                ax.plot(random_k_4[:, i], nn_pred_4[:, i], 'y*', markersize=2)
                ax.set_title('Agent {}'.format(i + 1))
                ax.set_xlabel(r'$k_t$')
                ax.set_ylabel(r'$a_t$')
                ax.legend()
                plt.savefig('./output/plots/policy_agent_%d_ep_%d.pdf' %
                            (i + 1, episode),
                            bbox_inches='tight')
                plt.close()

        # Print cost and time log
        print('Episode {}: \t log10(Cost): {:.4f}; \t runtime: {}'\
            .format(episode, np.log10(cost_store[-1]), datetime.now()- time_start))

        if episode % args.save_interval == 0:
            # Save the tensorflow session
            saver.save(sess, './output/models/sess_{}.ckpt'.format(episode))
            # Save the starting point
            np.save('./output/startpoints/data_{}.npy'.format(episode),
                    X_data_train)
Example #6
0
]

minibatch_size = 20

lr = 0.009
k = 2000
net = Network(layers, lr=lr, loss=cross_entropy)
num_epochs = 10
costs = []

m = X_train.shape[0]
for epoch in range(num_epochs):

    minibatch_cost = 0.
    num_minibatches = int(m / minibatch_size)  # number of minibatches of size minibatch_size in the train set
    minibatches = random_mini_batches(X_train, Y_train, minibatch_size)
    epoch_cost = 0
    for minibatch in minibatches:
        (minibatch_X, minibatch_Y) = minibatch
        net.train_step((minibatch_X, minibatch_Y))
        loss = np.sum(cross_entropy.compute((net.forward(minibatch_X), minibatch_Y)))
        print("cost minibatch %f" % loss)
        epoch_cost += loss / num_minibatches

    if epoch % 5 == 0:
        print ("Cost after epoch %i: %f" % (epoch, epoch_cost))
    if epoch % 1 == 0:
        costs.append(epoch_cost)


#for epoch in xrange(100):
Example #7
0
    return G, D_real, D_fake

# Define constants
NUM_EPOCHS = 100
BATCH_SIZE = 128
LEARNING_RATE = 0.0002
BETA1 = 0.5
NOISE_DIM = 100
SAMPLE_SIZE = 100

# Load mnist data
X_train = utils.load_mnist_data()
utils.plot_sample(X_train[:SAMPLE_SIZE], "output/mnist_data.png")
X_train = utils.preprocess_images(X_train)
mini_batches = utils.random_mini_batches(X_train, BATCH_SIZE)

# Create DCGAN
X = tf.placeholder(tf.float32, shape=(None, X_train.shape[1], X_train.shape[2], X_train.shape[3]))
Z = tf.placeholder(tf.float32, [None, NOISE_DIM])
G, D_real, D_fake = create_gan(X, Z)

# Create training steps
G_loss_func, D_loss_func = utils.create_loss_funcs(D_real, D_fake)
G_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="Generator")
D_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="Discriminator")
G_train_step = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE, beta1=BETA1).minimize(G_loss_func, var_list=G_vars)
D_train_step = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE, beta1=BETA1).minimize(D_loss_func, var_list=D_vars)

# Start session
with tf.Session() as sess:
Example #8
0
    def train(self,
              X_train,
              Y_train,
              X_dev,
              Y_dev,
              model_dir,
              restore_from=None,
              print_cost=True):
        m = X_train.shape[0]

        model = self.model
        accuracy = model.accuracy
        cost = model.cost

        optimizer = tf.train.AdamOptimizer(
            self.params.learning_rate).minimize(cost)

        last_saver = tf.train.Saver(max_to_keep=1)
        best_saver = tf.train.Saver(max_to_keep=1)

        with tf.Session() as sess:
            init = tf.global_variables_initializer()
            sess.run(init)

            if (self.weights_file is not None) and (restore_from is None):
                model.load_weights(self.weights_file, sess)

            begin_at_epoch, costs, dev_costs, best_dev_accuracy, dev_accuracies, train_accuracies = self.restoreSession(
                last_saver, sess, restore_from, is_training=True)

            for epoch in range(self.params.num_epochs):
                count_batch = 0
                print("epoch: ", epoch + 1)
                minibatch_cost = 0.
                minibatch_accuracy = 0.
                num_minibatches = (m + self.params.train_batch_size -
                                   1) // self.params.train_batch_size

                minibatches = random_mini_batches(X_train, Y_train,
                                                  self.params.train_batch_size)

                for minibatch in minibatches:
                    # Select a minibatch
                    (minibatch_X, minibatch_Y) = minibatch
                    _, temp_cost, temp_accuracy = sess.run(
                        [optimizer, cost, accuracy],
                        feed_dict={
                            model.X: minibatch_X,
                            model.Y: minibatch_Y
                        })

                    # compute training cost
                    minibatch_cost += temp_cost / num_minibatches
                    minibatch_accuracy += temp_accuracy / num_minibatches

                    # Print result
                    if (count_batch % 10) == 0:
                        print("count_batch", count_batch, "temp_cost:",
                              temp_cost, "temp_accuracy:", temp_accuracy)
                    count_batch += 1

                costs.append(minibatch_cost)

                # compute dev cost
                dev_cost, dev_accuracy = self.evaluate(X_dev, Y_dev, sess)
                dev_costs.append(dev_cost)
                dev_accuracies.append(dev_accuracy)
                train_accuracies.append(minibatch_accuracy)

                if print_cost == True and epoch % 1 == 0:
                    print("Cost after epoch %i: %f" %
                          (begin_at_epoch + epoch + 1, minibatch_cost))
                    print("Accuracy after epoch %i: %f" %
                          (begin_at_epoch + epoch + 1, minibatch_accuracy))
                    print("dev_Cost after epoch %i: %f" %
                          (begin_at_epoch + epoch + 1, dev_cost))
                    print("dev_accuracy after epoch %i: %f" %
                          (begin_at_epoch + epoch + 1, dev_accuracy))

                # Save best sess
                if dev_accuracy > best_dev_accuracy:
                    best_dev_accuracy = dev_accuracy
                    best_save_path = os.path.join(model_dir, 'best_weights',
                                                  'after-epoch')
                    best_saver.save(sess,
                                    best_save_path,
                                    global_step=begin_at_epoch + epoch + 1)
                    if not (os.path.exists(
                            os.path.join(model_dir, 'last_weights'))):
                        os.makedirs(os.path.join(model_dir, 'last_weights'))
                    np.save(
                        os.path.join(model_dir, 'last_weights',
                                     "best_dev_accuracy"), [best_dev_accuracy])

            # Save sess and costs
            last_save_path = os.path.join(model_dir, 'last_weights',
                                          'after-epoch')
            last_saver.save(sess,
                            last_save_path,
                            global_step=begin_at_epoch + epoch + 1)
            np.save(os.path.join(model_dir, 'last_weights', "costs"), costs)
            np.save(os.path.join(model_dir, 'last_weights', "dev_costs"),
                    dev_costs)
            np.save(os.path.join(model_dir, 'last_weights', "dev_accuracies"),
                    dev_accuracies)
            np.save(
                os.path.join(model_dir, 'last_weights', "train_accuracies"),
                train_accuracies)
Example #9
0
def model(X_train,
          Y_train,
          X_test,
          Y_test,
          learning_rate=0.0001,
          num_epochs=700,
          minibatch_size=64,
          print_cost=True):
    """
    Implements a three-layer tensorflow neural network: LINEAR->RELU->LINEAR->RELU->LINEAR->SOFTMAX.
    
    Arguments:
    X_train -- training set, of shape (input size = 12288, number of training examples = 1080)
    Y_train -- test set, of shape (output size = 6, number of training examples = 1080)
    X_test -- training set, of shape (input size = 12288, number of training examples = 120)
    Y_test -- test set, of shape (output size = 6, number of test examples = 120)
    learning_rate -- learning rate of the optimization
    num_epochs -- number of epochs of the optimization loop
    minibatch_size -- size of a minibatch
    print_cost -- True to print the cost every 100 epochs
    
    Returns:
    parameters -- parameters learnt by the model. They can then be used to predict.
    """

    ops.reset_default_graph(
    )  # to be able to rerun the model without overwriting tf variables
    tf.set_random_seed(1)  # to keep consistent results
    seed = 3  # to keep consistent results
    (
        n_x, m
    ) = X_train.shape  # (n_x: input size, m : number of examples in the train set)
    n_y = Y_train.shape[0]  # n_y : output size
    costs = []  # To keep track of the cost

    # Create Placeholders of shape (n_x, n_y)
    X, Y = create_placeholders(n_x, n_y)

    # Initialize parameters
    parameters = initialize_parameters()

    # Forward propagation: Build the forward propagation in the tensorflow graph
    Z3 = forward_propagation(X, parameters)

    # Cost function: Add cost function to tensorflow graph
    cost = compute_cost(Z3, Y)

    # Backpropagation: Define the tensorflow optimizer. Use an AdamOptimizer.
    optimizer = tf.train.AdamOptimizer(
        learning_rate=learning_rate).minimize(cost)

    # Initialize all the variables
    init = tf.global_variables_initializer()

    # Start the session to compute the tensorflow graph
    with tf.Session() as sess:

        # Run the initialization
        sess.run(init)

        # Do the training loop
        for epoch in range(num_epochs):

            epoch_cost = 0.  # Defines a cost related to an epoch
            num_minibatches = int(
                m / minibatch_size
            )  # number of minibatches of size minibatch_size in the train set
            seed = seed + 1
            minibatches = random_mini_batches(X_train, Y_train, minibatch_size,
                                              seed)

            for minibatch in minibatches:

                # Select a minibatch
                (minibatch_X, minibatch_Y) = minibatch

                # Run the session to execute the "optimizer" and the "cost"
                _, minibatch_cost = sess.run([optimizer, cost],
                                             feed_dict={
                                                 X: minibatch_X,
                                                 Y: minibatch_Y
                                             })

                epoch_cost += minibatch_cost / minibatch_size

            # Print the cost every epoch
            if print_cost == True and epoch % 100 == 0:
                print("Cost after epoch %i: %f" % (epoch, epoch_cost))
            if print_cost == True and epoch % 5 == 0:
                costs.append(epoch_cost)

        # plot the cost
        plt.plot(np.squeeze(costs))
        plt.ylabel('cost')
        plt.xlabel('iterations (per fives)')
        plt.title("Learning rate =" + str(learning_rate))
        plt.show()

        # lets save the parameters in a variable
        parameters = sess.run(parameters)
        print("Parameters have been trained!")

        # Calculate the correct predictions
        correct_prediction = tf.equal(tf.argmax(Z3), tf.argmax(Y))

        # Calculate accuracy on the test set
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

        print("Train Accuracy:", accuracy.eval({X: X_train, Y: Y_train}))
        print("Test Accuracy:", accuracy.eval({X: X_test, Y: Y_test}))

        return parameters
Example #10
0
    # Log files
    logfile = os.path.join(output_dir, "loghistory.txt")
    fid = open(logfile, "w")

    nb_samples = 2000
    seed = 0
    count = 0

    for i in range(num_epoch):

        seed = seed + 1
        mini = 0

        minibatches = random_mini_batches(toydata.T,
                                          mini_batch_size=mb_size,
                                          seed=seed)

        for minibatch in minibatches:
            mini = mini + 1
            X_mb = minibatch.T
            count = count + 1

            # Train auto-encoders
            z_mb = sample_z(X_mb.shape[0], z_dim)
            sess.run([R_solver], feed_dict={X: X_mb, z: z_mb})

            # Train disciminator
            for k in range(n_critics):
                z_mb_critics = sample_z(X_mb.shape[0], z_dim)
                X_mb_critics = random_batches(toydata, X_mb.shape[0])
Example #11
0
writer.add_graph(sess.graph)
sess.run(tf.global_variables_initializer())


# train my model
step = 0
costs = []
train_accu = []

print('Learning Started!')

for epoch in range(cfg.epoch):
    avg_cost = 0
    avg_accu = 0
    total_batch = int(x_train.shape[0] / cfg.b_size)
    minibatches = tu.random_mini_batches(x_train, y_train, cfg.b_size)

    for i in minibatches:
        (batch_xs, batch_ys) = i
        _, temp_cost, temp_accu, summary = model.train(batch_xs, batch_ys)
        avg_cost += temp_cost / total_batch
        avg_accu += temp_accu / total_batch
        writer.add_summary(summary, global_step=step)
        step += 1

    costs.append(avg_cost)
    train_accu.append(avg_accu)

    print('Epoch', '%04d' % (epoch + 1),
          ': cost =', '{:.9f}'.format(avg_cost), '| accuracy =', '{:.9f}'.format(avg_accu))
def model(X_train, Y_train, X_test, Y_test, learning_rate, num_epochs, minibatch_size, print_cost = True):

    tf.reset_default_graph()                         # to be able to rerun the model without overwriting tf variables
    tf.set_random_seed(1)                             # to keep consistent results
    seed = 3                                          # to keep consistent results
    
    (n_x, m) = X_train.shape                          # (n_x: input size, m : number of examples in the train set)
    n_y = Y_train.shape[0]                            # n_y : output size
    costs = []                                        # To keep track of the cost
    
    X, Y = create_placeholders(n_x, n_y)
    
    parameters = initialize_parameters()

    Z3 = forward_propagation(X, parameters)
    
    cost = compute_cost(Z3, Y)
    print(cost)
    
    # Backpropagation: Define the tensorflow optimizer. Use an AdamOptimizer.
    ### START CODE HERE ### (1 line)
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
    ### END CODE HERE ###
    
    # Initialize all the variables
    init = tf.global_variables_initializer()

    # Start the session to compute the tensorflow graph
    with tf.Session() as sess:
        
        # Run the initialization
        sess.run(init)
        
        # Do the training loop
        for epoch in range(num_epochs):

            epoch_cost = 0.                       # Defines a cost related to an epoch
            num_minibatches = int(m / minibatch_size) # number of minibatches of size minibatch_size in the train set
            seed = seed + 1
            minibatches = random_mini_batches(X_train, Y_train, minibatch_size, seed)

            for minibatch in minibatches:

                # Select a minibatch
                (minibatch_X, minibatch_Y) = minibatch
                
                # IMPORTANT: The line that runs the graph on a minibatch.
                # Run the session to execute the "optimizer" and the "cost", the feedict should contain a minibatch for (X,Y).
                ### START CODE HERE ### (1 line)
                _ , minibatch_cost = sess.run([optimizer, cost], 
                                             feed_dict={X: minibatch_X, 
                                                        Y: minibatch_Y})
                ### END CODE HERE ###
                
                epoch_cost += minibatch_cost / num_minibatches

            # Print the cost every epoch
            if print_cost == True and epoch % 100 == 0:
                print ("Cost after epoch %i: %f" % (epoch, epoch_cost))
            if print_cost == True and epoch % 5 == 0:
                costs.append(epoch_cost)
                
        # plot the cost
        plt.plot(np.squeeze(costs))
        plt.ylabel('cost')
        plt.xlabel('iterations (per tens)')
        plt.title("Learning rate =" + str(learning_rate))
        plt.show()
        
#        check=sess.run(tf.test.compute_gradient_error(X_train, X_train.shape, Y_train, Y_train.shape))
#        print(check)

        # lets save the parameters in a variable
        parameters = sess.run(parameters)
        print ("Parameters have been trained!")

        # Calculate the correct predictions
        correct_prediction = tf.equal(tf.argmax(Z3), tf.argmax(Y))
        
        # Calculate accuracy on the test set
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

        print ("Train Accuracy:", accuracy.eval({X: X_train, Y: Y_train}))
        print("X_test -------", X_test.shape)
        print ("Test Accuracy:", accuracy.eval({X: X_test, Y: Y_test}))
        
        return parameters
def L_layer_model(X, Y, layers_dims, optimizer, learning_rate = 0.0075,
	mini_batch_size = 64, beta = 0.9, beta1 = 0.9, beta2 = 0.999,  epsilon = 1e-8, num_epochs = 10000,
	 print_cost=False, preloaded_weights = None, lambd = 0.7):#lr was 0.009
    """
    Implements a L-layer neural network: [LINEAR->RELU]*(L-1)->LINEAR->SIGMOID.
    
    Arguments:
    X -- data, numpy array of shape (number of examples, num_px * num_px * 3)
    Y -- true "label" vector (containing 0 if cat, 1 if non-cat), of shape (1, number of examples)
    layers_dims -- list containing the input size and each layer size, of length (number of layers + 1).
    learning_rate -- learning rate of the gradient descent update rule
    mini_batch_size -- the size of a mini batch
    beta -- Momentum hyperparameter
    beta1 -- Exponential decay hyperparameter for the past gradients estimates 
    beta2 -- Exponential decay hyperparameter for the past squared gradients estimates 
    epsilon -- hyperparameter preventing division by zero in Adam updates
    num_epochs -- number of epochs
    print_cost -- if True, it prints the cost every 100 steps
    preloaded_weights = pretraind model

    Returns:
    parameters -- parameters learnt by the model. They can then be used to predict.
    costs vector
    """
    costs = []                         # keep track of cost
    
    # Parameters initialization. (≈ 1 line of code)
    parameters = initialize_parameters_deep(layers_dims)
    if preloaded_weights:
        for l in range(1,len(layers_dims)):
            assert(parameters["W" + str(l)].shape == preloaded_weights["W" + str(l)].shape)
        parameters = preloaded_weights
        print('weights preloaded')

    print('Using :',optimizer)
     # Initialize the optimizer
    if optimizer == "gd":
        pass # no initialization required for gradient descent
    elif optimizer == "momentum":
        v = initialize_velocity(parameters)
    elif optimizer == "adam":
        v, s = initialize_adam(parameters)

    for i in range(num_epochs):
             
        # Define the random minibatches. We increment the seed to reshuffle differently the dataset after each epoch
        minibatches = lr_utils.random_mini_batches(X, Y, mini_batch_size)

        for minibatch in minibatches:
    
            # Select a minibatch
            (minibatch_X, minibatch_Y) = minibatch
            # Forward propagation: [LINEAR -> RELU]*(L-1) -> LINEAR -> SIGMOID.
            AL, caches = L_model_forward(minibatch_X, parameters)
            # Compute cost.
            cost =  compute_cost_with_regularization(AL, minibatch_Y, parameters, lambd)
            # Backward propagation.
            grads = L_model_backward(AL, minibatch_Y, caches, lambd)
            # Update parameters.
                    # Update parameters
            if optimizer == "gd":
                parameters = update_parameters_with_gd(parameters, grads, learning_rate)
            elif optimizer == "momentum":
                parameters, v = update_parameters_with_momentum(parameters, grads, v, beta, learning_rate)
            elif optimizer == "adam":
                parameters, v, s = update_parameters_with_adam(parameters, grads, v, s, learning_rate, beta1, beta2,  epsilon)

        # Print the cost every 100 training example
        if print_cost:
           print ("Cost after iteration %i: %f" %(i, cost))
        if print_cost and i % 100 == 0:
           save_parameters(parameters)
           costs.append(cost)
           
    return parameters, costs
Example #14
0
    def model(self,
              path_train_dataset,
              path_test_dataset,
              X_train_column,
              Y_train_column,
              X_test_column,
              Y_test_column,
              classes_list,
              optimizer_algo='adam',
              print_cost=True):

        # load the datasets
        X_train_orig, Y_train_orig, X_test_orig, Y_test_orig, classes = load_dataset(
            path_train_dataset, path_test_dataset, X_train_column,
            Y_train_column, X_test_column, Y_test_column, classes_list)

        # pre-processing
        X_train, Y_train, X_test, Y_test = flatten(X_train_orig, Y_train_orig,
                                                   X_test_orig, Y_test_orig,
                                                   classes)

        # to be able to rerun the model without overwriting tf variables
        ops.reset_default_graph()
        # (n_x: input size, m : number of examples in the train set)
        (n_x, m) = X_train.shape
        n_y = Y_train.shape[0]  # n_y : output size
        costs = []  # To keep track of the cost

        # Create Placeholders of shape (n_x, n_y)
        X, Y = create_placeholders(n_x, n_y)

        # Initialize parameters
        parameters = initialize_parameters(self.layers_list, seed=1)

        # Forward propagation: Build for-propagation in the tensorflow graph
        Z_final_layer = forward_propagation(X, parameters)

        # Cost function: Add cost function to tensorflow graph
        cost = compute_cost(Z_final_layer, Y)

        # Backpropagation: Define the tensorflow optimizer. Use AdamOptimizer
        if optimizer_algo == 'gradient_descent':
            optimizer = tf.train.GradientDescentOptimizer(
                learning_rate=self.learning_rate).minimize(cost)

        elif optimizer_algo == 'momentum':
            optimizer = tf.train.MomentumOptimizer(
                learning_rate=self.learning_rate).minimize(cost)

        elif optimizer_algo == 'adam':
            optimizer = tf.train.AdamOptimizer(
                learning_rate=self.learning_rate).minimize(cost)

        # Initialize all the variables
        init = tf.global_variables_initializer()

        # Start the session to compute the tensorflow graph
        with tf.Session() as sess:

            # Run the initialization
            sess.run(init)

            # Do the training loop
            for epoch in range(self.n_epochs):

                epoch_cost = 0.  # Defines a cost related to an epoch
                # number of minibatches of size minibatch_size in the train set
                num_minibatches = int(m / minibatch_size)
                seed = seed + 1
                minibatches = random_mini_batches(X_train, Y_train,
                                                  self.minibatch_size, seed)

                for minibatch in minibatches:

                    # Select a minibatch
                    (minibatch_X, minibatch_Y) = minibatch
                    _, minibatch_cost = sess.run([optimizer, cost],
                                                 feed_dict={
                                                     X: minibatch_X,
                                                     Y: minibatch_Y
                                                 })

                    epoch_cost += minibatch_cost / minibatch_size

                # Print the cost every epoch
                if print_cost == True and epoch % 100 == 0:
                    print("Cost after epoch %i: %f" % (epoch, epoch_cost))

                if print_cost == True and epoch % 5 == 0:
                    costs.append(epoch_cost)

            # lets save the parameters in a variable
            parameters = sess.run(parameters)
            print("Parameters have been trained!")

            # stores quantities useful for later
            quantities = {
                "X": X,
                "Y": Y,
                "Z_final_layer": Z_final_layer,
                "X_train": X_train,
                "Y_train": Y_train,
                "X_test": X_test,
                "Y_test": Y_test
            }

        return quantities, costs, parameters