Example #1
0
    def get_updates_with_momentum(self, loss_tensor, param_vars):
        """
        Question 5: Returns the gradient descent updates when momentum is used.

        Args:
            loss_tensor: loss tensor used to compute the gradients.
            param_vars: list of parameter variables.

        Returns:
            A list of tuples, where each tuple is an update of the form
            (var, new_tensor) indicating that, at runtime, the variable var
            should be updated with new_tensor.

        You implementation should use the gradient tensors and the velocity
        variables (both provided below), and the member variables
        self.learning_rate and self.momentum.
        """

        grad_tensors = tf.gradients(loss_tensor, param_vars)

        vel_vars = [
            tf.Variable(np.zeros(param_var.get_shape(), dtype=np.float32))
            for param_var in param_vars
        ]
        tfu.get_session().run([vel_var.initializer for vel_var in vel_vars])

        vt = [
            self.momentum * i - self.learning_rate * x
            for i, x in zip(vel_vars, grad_tensors)
        ]
        updates = [(x, x + i)
                   for x, i in zip(param_vars, vt)] + zip(vel_vars, vt)
        "*** YOUR CODE HERE ***"
        #util.raiseNotDefined()
        return updates
Example #2
0
    def get_updates_with_momentum(self, loss_tensor, param_vars):
        """
        Question 5: Returns the gradient descent updates when momentum is used.

        Args:
            loss_tensor: loss tensor used to compute the gradients.
            param_vars: list of parameter variables.

        Returns:
            A list of tuples, where each tuple is an update of the form
            (var, new_tensor) indicating that, at runtime, the variable var
            should be updated with new_tensor.

        You implementation should use the gradient tensors and the velocity
        variables (both provided below), and the member variables
        self.learning_rate and self.momentum.
        """
        grad_tensors = tf.gradients(loss_tensor, param_vars)
        vel_vars = [
            tf.Variable(np.zeros(param_var.get_shape(), dtype=np.float32))
            for param_var in param_vars
        ]
        tfu.get_session().run([vel_var.initializer for vel_var in vel_vars])
        # Updating the velocity.
        update_v = [(vel_vars[i], self.momentum * vel_vars[i] -
                     self.learning_rate * grad_tensors[i])
                    for i in range(len(vel_vars))]
        # Updating the weight vectors based on the velocity.
        update_w = [(param_vars[i], param_vars[i] + update_v[i][1])
                    for i in range(len(param_vars))]
        # util.raiseNotDefined()
        return update_v + update_w
Example #3
0
    def get_updates_with_momentum(self, loss_tensor, param_vars):
        """
        Question 5: Returns the gradient descent updates when momentum is used.

        Args:
            loss_tensor: loss tensor used to compute the gradients.
            param_vars: list of parameter variables.

        Returns:
            A list of tuples, where each tuple is an update of the form
            (var, new_tensor) indicating that, at runtime, the variable var
            should be updated with new_tensor.

        You implementation should use the gradient tensors and the velocity
        variables (both provided below), and the member variables
        self.learning_rate and self.momentum.
        """
        grad_tensors = tf.gradients(loss_tensor, param_vars)
        vel_vars = [tf.Variable(np.zeros(param_var.get_shape(), dtype=np.float32)) for param_var in param_vars]
        tfu.get_session().run([vel_var.initializer for vel_var in vel_vars])
        updates = []
        for i, var in enumerate(vel_vars):
            new_var = self.momentum * var - self.learning_rate * grad_tensors[i]
            updates.append((var, new_var))
            updates.append((param_vars[i], param_vars[i] + new_var))
        return updates
Example #4
0
    def get_updates_with_momentum(self, loss_tensor, param_vars):
        """
        Question 5: Returns the gradient descent updates when momentum is used.

        Args:
            loss_tensor: loss tensor used to compute the gradients.
            param_vars: list of parameter variables.

        Returns:
            A list of tuples, where each tuple is an update of the form
            (var, new_tensor) indicating that, at runtime, the variable var
            should be updated with new_tensor.
        You implementation should use the gradient tensors and the velocity
        variables (both provided below), and the member variables
        self.learning_rate and self.momentum.
        """
        grad_tensors = tf.gradients(loss_tensor, param_vars)
        vel_vars = [tf.Variable(np.zeros(param_var.get_shape(), dtype=np.float32)) for param_var in param_vars]
        tfu.get_session().run([vel_var.initializer for vel_var in vel_vars])
        updates = []

	# easier to handle w/ multiple updates in a for loop
	# iterate simultaneously through velocity, parameters, gradients
	for vel_var, param_var, grad in zip(vel_vars, param_vars, grad_tensors):
	    # vt+1 = momentum * vt - alpha*gradient step
	    vt = (self.momentum * vel_var) - (self.learning_rate * grad)
	    # append velocity update
	    updates.append((vel_var, vt))
	    # append parameter update
	    updates.append((param_var, param_var + vt))
	
        return updates
    def get_updates_with_momentum(self, loss_tensor, param_vars):
        """
        Question 5: Returns the gradient descent updates when momentum is used.

        Args:
            loss_tensor: loss tensor used to compute the gradients.
            param_vars: list of parameter variables.

        Returns:
            A list of tuples, where each tuple is an update of the form
            (var, new_tensor) indicating that, at runtime, the variable var
            should be updated with new_tensor.

        You implementation should use the gradient tensors and the velocity
        variables (both provided below), and the member variables
        self.learning_rate and self.momentum.
        """
        grad_tensors = tf.gradients(loss_tensor, param_vars)
        vel_vars = [tf.Variable(np.zeros(param_var.get_shape(), dtype=np.float32)) for param_var in param_vars]
        tfu.get_session().run([vel_var.initializer for vel_var in vel_vars])
        updates = []
        "*** YOUR CODE HERE ***"
        for i in range(len(param_vars)):
            new_vel_var = self.momentum * vel_vars[i] - self.learning_rate * grad_tensors[i]
            updates.append((param_vars[i], param_vars[i] + new_vel_var))
            updates.append((vel_vars[i], new_vel_var))
        return updates
def main():
    dataset = tinyDataset()
    input_data, target_data = dataset[:2]
    W_init = np.array([[0.4, 0.0], [0.0, -0.2], [0.1, 0.0]], dtype=np.float32)
    b_init = np.array([-0.5, 0.3], dtype=np.float32)
    model = models.LinearRegressionModel(x_shape=(None, 3), W=W_init, b=b_init)
    solver = solvers.GradientDescentSolver(learning_rate=0.1,
                                           iterations=1,
                                           momentum=0.9)

    target_ph = tf.placeholder(tf.float32, shape=(None, 2))
    loss_tensor = solvers.squared_error(model.prediction_tensor, target_ph)
    param_vars = model.get_param_vars(trainable=True)

    updates = solver.get_updates(loss_tensor, param_vars)
    update_ops = [tf.assign(old, new) for (old, new) in updates]

    # gradient and parameter values before updates
    grad_tensors = tf.gradients(loss_tensor, param_vars)
    feed_dict = dict([(model.input_ph, input_data), (target_ph, target_data)])
    grads = [
        grad_tensor.eval(session=tfu.get_session(), feed_dict=feed_dict)
        for grad_tensor in grad_tensors
    ]
    param_values = model.get_param_values()

    print(grads)
    print(param_values)
    tfu.get_session().run([loss_tensor] + update_ops, feed_dict=feed_dict)
    print((model.get_param_values()))
    def get_updates_with_momentum(self, loss_tensor, param_vars):
        """
        Question 5: Returns the gradient descent updates when momentum is used.

        Args:
            loss_tensor: loss tensor used to compute the gradients.
            param_vars: list of parameter variables.

        Returns:
            A list of tuples, where each tuple is an update of the form
            (var, new_tensor) indicating that, at runtime, the variable var
            should be updated with new_tensor.

        You implementation should use the gradient tensors and the velocity
        variables (both provided below), and the member variables
        self.learning_rate and self.momentum.
        """


        grad_tensors = tf.gradients(loss_tensor, param_vars)


        vel_vars = [tf.Variable(np.zeros(param_var.get_shape(), dtype=np.float32)) for param_var in param_vars]
        tfu.get_session().run([vel_var.initializer for vel_var in vel_vars])
        
        vt = [self.momentum*i - self.learning_rate*x for i, x in zip(vel_vars, grad_tensors)]
        updates = [(x, x + i) for x, i in zip(param_vars, vt)] + zip(vel_vars, vt)
        "*** YOUR CODE HERE ***"
        #util.raiseNotDefined()
        return updates 
Example #8
0
def main():
    dataset = tinyDataset()
    input_data, target_data = dataset[:2]
    W_init = np.array([[0.4, 0.0], [0.0, -0.2], [0.1, 0.0]], dtype=np.float32)
    b_init = np.array([-0.5, 0.3], dtype=np.float32)
    model = models.LinearRegressionModel(x_shape=(None, 3), W=W_init, b=b_init)
    solver = solvers.GradientDescentSolver(learning_rate=0.1, iterations=1, momentum=0.9)

    target_ph = tf.placeholder(tf.float32, shape=(None, 2))
    loss_tensor = solvers.squared_error(model.prediction_tensor, target_ph)
    param_vars = model.get_param_vars(trainable=True)

    updates = solver.get_updates(loss_tensor, param_vars)
    update_ops = [tf.assign(old, new) for (old, new) in updates]

    # gradient and parameter values before updates
    grad_tensors = tf.gradients(loss_tensor, param_vars)
    feed_dict = dict([(model.input_ph, input_data), (target_ph, target_data)])
    grads = [grad_tensor.eval(session=tfu.get_session(), feed_dict=feed_dict) for grad_tensor in grad_tensors]
    param_values = model.get_param_values()

    print(grads)
    print(param_values)
    tfu.get_session().run([loss_tensor] + update_ops, feed_dict=feed_dict)
    print(model.get_param_values())
    def get_update_values(self, moduleDict):
        # dataset
        dataset = getattr(datasets, self.dataset)()
        input_data, target_data = dataset[:2]
        # model
        if self.model_module == 'models':  # need to check for this since this is not a student file
            module = models
        else:
            module = moduleDict[self.model_module]
        model_class = getattr(module, self.model_class)
        model_kwargs = dict(num_labels=dataset[1].shape[-1])
        if self.model_class == 'ConvNetModel':
            model_kwargs['x_shape'] = (None,) + dataset[0].shape[1:]
        else:
            model_kwargs['num_features'] = dataset[0].shape[-1]
        model = model_class(**model_kwargs)
        # solver
        solver_class = getattr(moduleDict[self.solver_module], self.solver_class)
        solver = solver_class(learning_rate=self.learning_rate, iterations=0, momentum=self.momentum)

        target_ph = tf.placeholder(tf.float32, shape=(None, 2))
        loss_tensor = solvers.squared_error(model.prediction_tensor, target_ph)
        param_vars = model.get_param_vars(trainable=True)

        updates = solver.get_updates(loss_tensor, param_vars)
        update_ops = [tf.assign(old, new) for (old, new) in updates]
        feed_dict = dict(zip([model.input_ph, target_ph], [input_data, target_data]))
        for i in range(self.update_iterations):
            tfu.get_session().run(update_ops, feed_dict=feed_dict)

        grad_tensors = tf.gradients(loss_tensor, param_vars)
        grads = [grad_tensor.eval(session=tfu.get_session(), feed_dict=feed_dict) for grad_tensor in grad_tensors]

        len_messages = len(self.messages)
        if not isinstance(updates, (list, tuple)):
            self.addMessage('updates should be a list, %r given' % updates)
            return updates, None, grads
        # Check updates are in the right format
        for update in updates:
            try:
                old, new = update
            except ValueError:
                self.addMessage('Each update in updates should be of length 2, but it is of length %d' % len(update))
                continue
            if not isinstance(old, tf.Variable):
                self.addMessage('The first element in the tuple update should be a tf.Variable, %r given' % old)
            if not isinstance(new, (tf.Variable, tf.Tensor)):
                self.addMessage('The second element in the tuple update should be a tf.Variable or a tf.Tensor, %r given' % new)
        if len(self.messages) > len_messages:
            return updates, None, grads
        # Check for repeated variables
        if len(set(zip(*updates)[0])) != len(updates):
            self.addMessage('There are some repeated variables being updated: %r' % zip(*updates)[0])
            return updates, None, grads
        update_values = [tfu.get_session().run(update, feed_dict=feed_dict) for update in updates]
        return updates, update_values, grads
Example #10
0
    def get_updates_with_momentum(self, loss_tensor, param_vars):
        """
        Question 5: Returns the gradient descent updates when momentum is used.

        Args:
            loss_tensor: loss tensor used to compute the gradients.
            param_vars: list of parameter variables.

        Returns:
            A list of tuples, where each tuple is an update of the form
            (var, new_tensor) indicating that, at runtime, the variable var
            should be updated with new_tensor.

        You implementation should use the gradient tensors and the velocity
        variables (both provided below), and the member variables
        self.learning_rate and self.momentum.
        """
        grad_tensors = tf.gradients(loss_tensor, param_vars)
        vel_vars = [
            tf.Variable(np.zeros(param_var.get_shape(), dtype=np.float32))
            for param_var in param_vars
        ]
        tfu.get_session().run([vel_var.initializer for vel_var in vel_vars])
        updates = []
        print(type(vel_vars))
        "*** YOUR CODE HERE ***"
        alpha = self.learning_rate
        mu = self.momentum

        i = 0
        l1 = []
        l2 = []
        l3 = []
        for velocities in vel_vars:
            l1.append(velocities)
        print(len(l1))

        for weights in param_vars:
            l2.append(weights)
        print(len(l2))

        for grad_tensor in grad_tensors:
            l3.append(grad_tensor)
        print(len(l3))

        #print(t)
        for i in range(len(l3)):
            product1 = tf.math.scalar_mul(alpha, l3[i])
            product2 = tf.math.multiply(mu, l1[i])
            v_t_plus_1 = tf.math.subtract(product2, product1)
            w_t_plus_1 = tf.math.add(l2[i], v_t_plus_1)
            updates.extend([(l1[i], v_t_plus_1), (l2[i], w_t_plus_1)])

        #util.raiseNotDefined()

        return updates
Example #11
0
 def set_param_values(self, param_values, **tags):
     param_vars = self.get_param_vars(**tags)
     if len(param_values) != len(param_vars):
         raise ValueError(
             'there are %d parameter variables with the given tags'
             'but %d parameter values were given' %
             (len(param_vars), len(param_values)))
     tfu.get_session().run([
         tf.assign(param_var, param_value)
         for (param_var, param_value) in zip(param_vars, param_values)
     ])
Example #12
0
    def __init__(self, num_features=784, num_labels=10):
        super(SoftmaxRegressionModel, self).__init__()
        # input and target placeholder variables
        self.x = tf.placeholder(tf.float32, shape=(None, num_features))
        self.input_ph = self.x

        # parameter variables
        self.W = self.add_param_var(truncated_normal([num_features, num_labels], stddev=0.1, fixed_random=self._fixed_random), name='W')
        self.b = self.add_param_var(tf.constant(0.1, shape=[num_labels]), name='b', regularizable=False)

        # prediction tensor
        self.y = tf.nn.softmax(tf.matmul(self.x, self.W) + self.b)
        self.prediction_tensor = self.y

        # initialize parameters
        tfu.get_session().run([param_var.initializer for param_var in self.get_param_vars()])
    def get_updates_with_momentum(self, loss_tensor, param_vars):
        """
        Question 5: Returns the gradient descent updates when momentum is used.

        Args:
            loss_tensor: loss tensor used to compute the gradients.
            param_vars: list of parameter variables.

        Returns:
            A list of tuples, where each tuple is an update of the form
            (var, new_tensor) indicating that, at runtime, the variable var
            should be updated with new_tensor.

        You implementation should use the gradient tensors and the velocity
        variables (both provided below), and the member variables
        self.learning_rate and self.momentum.
        """
        grad_tensors = tf.gradients(loss_tensor, param_vars)
        vel_vars = [
            tf.Variable(np.zeros(param_var.get_shape(), dtype=np.float32))
            for param_var in param_vars
        ]
        tfu.get_session().run([vel_var.initializer for vel_var in vel_vars])
        updates = []
        "*** YOUR CODE HERE ***"
        each = []
        # each element in the list, will become a tuple
        length = len(grad_tensors)
        a = 0
        while a <= length - 1:
            each.append(param_vars[a])
            v = self.momentum * vel_vars[
                a] - self.learning_rate * grad_tensors[a]
            b = param_vars[a] + v
            each.append(b)
            tuple(each)
            updates.append(each)
            each = []
            each.append(vel_vars[a])
            each.append(v)
            tuple(each)
            updates.append(each)
            each = []
            a = a + 1

        #util.raiseNotDefined()
        return updates
Example #14
0
    def solve(self, input_train_data, target_train_data, input_val_data, target_val_data, model, callback=None):
        """
        Question 6.a: Optimize the model and return the intermediate losses.

        Optimize the model using gradient descent by running the variable
        updates for self.iterations iterations.

        Args:
            input_train_data: a numpy.array with shape (N, R)
            target_train_data: a numpy.array with shape (N, S)
            input_val_data: a numpy.array with shape (M, R)
            target_val_data: a numpy.array with shape (M, S)
            model: the model from which the parameters are optimized

        Returns:
            A tuple of lists, where the first list contains the training loss of
            each iteration and the second list contains the validation loss of
            each iteration.

        N and M are the numbers of training points, respectively, and R and S
        are the dimensions for each input and target data point, respectively.

        You may not need to fill in both "*** YOUR CODE HERE ***" blanks,
        but they are both provided so you can define variables outside and
        inside the for loop.

        Useful method:
        session.run
        """
        session = tfu.get_session()
        target_ph = tf.placeholder(tf.float32, shape=(None,) + target_train_data.shape[1:])
        placeholders = [model.input_ph, target_ph]
        train_data = [input_train_data, target_train_data]
        val_data = [input_val_data, target_val_data]
        # You may want to initialize some variables that are shared across iterations
        "*** YOUR CODE HERE ***"
        loss_tensor = self.get_loss_tensor(model.prediction_tensor, target_ph, model.get_param_vars(regularizable=True))
        updates = self.get_updates(loss_tensor, model.get_param_vars(trainable=True))
        update_ops = [tf.assign(old_var, new_var_or_tensor) for (old_var, new_var_or_tensor) in updates]
        train_losses = []
        val_losses = []
        for iter_ in range(self.iterations):
            # train_loss should be the loss of this iteration using all of the training data
            # val_loss should be the loss of this iteration using all of the validation data
    	    
	    # run the loss tensor & update ops to get the new tensor values. iterate through placeholders/train together for dict
	    train_loss = session.run(loss_tensor, feed_dict=dict(zip(placeholders, train_data)))
	    
	    # run update ops to update the loss values
	    updates = session.run(update_ops, feed_dict=dict(zip(placeholders, train_data)))

	    # run loss tensor for validation loss, since we're no longer updating, just checking the loss. same as above for dict
	    val_loss = session.run(loss_tensor, feed_dict=dict(zip(placeholders, val_data)))

            train_losses.append(train_loss)
            val_losses.append(val_loss)
            if callback is not None: callback(model)
            self.display_progress(iter_, train_losses, val_losses)
        return train_losses, val_losses
Example #15
0
    def get_updates_with_momentum(self, loss_tensor, param_vars):
        """
        Question 5: Returns the gradient descent updates when momentum is used.

        Args:
            loss_tensor: loss tensor used to compute the gradients.
            param_vars: list of parameter variables.

        Returns:
            A list of tuples, where each tuple is an update of the form
            (var, new_tensor) indicating that, at runtime, the variable var
            should be updated with new_tensor.

        You implementation should use the gradient tensors and the velocity
        variables (both provided below), and the member variables
        self.learning_rate and self.momentum.
        """
        grad_tensors = tf.gradients(loss_tensor, param_vars)
        vel_vars = [tf.Variable(np.zeros(param_var.get_shape(), dtype=np.float32)) for param_var in param_vars]
        tfu.get_session().run([vel_var.initializer for vel_var in vel_vars])
        updates = []
        "*** YOUR CODE HERE ***"
        #util.raiseNotDefined()
	#Q5: Question 5.
	#param_vars = [W, b], or more precisely w = concatenate(flatten(W), b). Namely a set of weights, plus bias b.
	#grad_tensors = [PartialDeritatives of W on loss function, PartialDervatives b on loss fuction].
	#For example, with 2 weights.
	#Gradient descent method is
	#For each of parameter  i, 
	# v[i]  =  self.momentum*v[i] - self.learning_rate*grad_tensors[0]
	# param_vars[i]  += v[i]
	# updates should include both weight update and  velocity update.
	

	for i in range(len( param_vars)):
		oldweight=param_vars[i]
		oldv=vel_vars[i] 
		newv=self.momentum*oldv - self.learning_rate*grad_tensors[i]
		newweight= oldweight + newv
		updates.append((oldweight,newweight))
		updates.append((oldv,newv))

		#vel_vars[i]=newv
		#param_vars[i]=newweight
        return updates
    def solve(self, input_train_data, target_train_data, input_val_data, target_val_data, model, callback=None):
        """
        Question 6.a: Optimize the model and return the intermediate losses.

        Optimize the model using gradient descent by running the variable
        updates for self.iterations iterations.

        Args:
            input_train_data: a numpy.array with shape (N, R)
            target_train_data: a numpy.array with shape (N, S)
            input_val_data: a numpy.array with shape (M, R)
            target_val_data: a numpy.array with shape (M, S)
            model: the model from which the parameters are optimized

        Returns:
            A tuple of lists, where the first list contains the training loss of
            each iteration and the second list contains the validation loss of
            each iteration.

        N and M are the numbers of training points, respectively, and R and S
        are the dimensions for each input and target data point, respectively.

        You may not need to fill in both "*** YOUR CODE HERE ***" blanks,
        but they are both provided so you can define variables outside and
        inside the for loop.

        Useful method:
        session.run
        """
        session = tfu.get_session()
        # placeholder = target. its val is ???
        target_ph = tf.placeholder(tf.float32, shape=(None,) + target_train_data.shape[1:])
        placeholders = [model.input_ph, target_ph]
        train_data = [input_train_data, target_train_data]
        val_data = [input_val_data, target_val_data]
        # You may want to initialize some variables that are shared across iterations
        "*** YOUR CODE HERE ***"
        # loss tensor currentl is a mathmatical model, it's a loss because nothing feed in.
        # each time, it becomes an value of
        loss_tensor = self.get_loss_tensor(model.prediction_tensor, target_ph, model.get_param_vars(regularizable=True))
        updates = self.get_updates(loss_tensor, model.get_param_vars(trainable=True))
        update_ops = [tf.assign(old_var, new_var_or_tensor) for (old_var, new_var_or_tensor) in updates]
        train_losses = []
        val_losses = []
        for iter_ in range(self.iterations):
            # train_loss should be the loss of this iteration using all of the training data
            # val_loss should be the loss of this iteration using all of the validation data
            # trick here: input new argument
            train_loss = session.run([loss_tensor, update_ops], feed_dict={placeholders[0]: train_data[0], placeholders[1]: train_data[1]})
            train_loss = train_loss[0]
            val_loss = session.run(loss_tensor, feed_dict={placeholders[0]: val_data[0], placeholders[1]: val_data[1]})
            train_losses.append(train_loss)
            val_losses.append(val_loss)
            if callback is not None: callback(model)
            self.display_progress(iter_, train_losses, val_losses)
        return train_losses, val_losses
    def solve(self, input_train_data, target_train_data, input_val_data, target_val_data, model, callback=None):
        """
        Question 6.a: Optimize the model and return the intermediate losses.

        Optimize the model using gradient descent by running the variable
        updates for self.iterations iterations.

        Args:
            input_train_data: a numpy.array with shape (N, R)
            target_train_data: a numpy.array with shape (N, S)
            input_val_data: a numpy.array with shape (M, R)
            target_val_data: a numpy.array with shape (M, S)
            model: the model from which the parameters are optimized

        Returns:
            A tuple of lists, where the first list contains the training loss of
            each iteration and the second list contains the validation loss of
            each iteration.

        N and M are the numbers of training points, respectively, and R and S
        are the dimensions for each input and target data point, respectively.

        You may not need to fill in both "*** YOUR CODE HERE ***" blanks,
        but they are both provided so you can define variables outside and
        inside the for loop.

        Useful method:
        session.run
        """
        session = tfu.get_session()
        target_ph = tf.placeholder(tf.float32, shape=(None,) + target_train_data.shape[1:])
        placeholders = [model.input_ph, target_ph]
        train_data = [input_train_data, target_train_data]
        val_data = [input_val_data, target_val_data]
        # You may want to initialize some variables that are shared across iterations
        "*** YOUR CODE HERE ***"
        loss_tensor = self.get_loss_tensor(model.prediction_tensor, target_ph, model.get_param_vars(regularizable=True))
        updates = self.get_updates(loss_tensor, model.get_param_vars(trainable=True))
        update_ops = [tf.assign(old_var, new_var_or_tensor) for (old_var, new_var_or_tensor) in updates]
        train_losses = []
        val_losses = []
        for iter_ in range(self.iterations):
            "*** YOUR CODE HERE ***"
            
            train_loss = session.run(loss_tensor, feed_dict={placeholders[0]:train_data[0], placeholders[1]:train_data[1]})

            session.run(update_ops, feed_dict={placeholders[0]:train_data[0], placeholders[1]:train_data[1]})
            val_loss = session.run(loss_tensor, feed_dict={placeholders[0]:val_data[0], placeholders[1]:val_data[1]})
            # train_loss should be the loss of this iteration using all of the training data
            # val_loss should be the loss of this iteration using all of the validation data
            train_losses.append(train_loss)
            val_losses.append(val_loss)
            if callback is not None: callback(model)
            self.display_progress(iter_, train_losses, val_losses)
        return train_losses, val_losses
Example #18
0
    def __init__(self, num_features=784, num_labels=10):
        super(SoftmaxRegressionModel, self).__init__()
        # input and target placeholder variables
        self.x = tf.placeholder(tf.float32, shape=(None, num_features))
        self.input_ph = self.x

        # parameter variables
        self.W = self.add_param_var(truncated_normal(
            [num_features, num_labels],
            stddev=0.1,
            fixed_random=self._fixed_random),
                                    name='W')
        self.b = self.add_param_var(tf.constant(0.1, shape=[num_labels]),
                                    name='b',
                                    regularizable=False)

        # prediction tensor
        self.y = tf.nn.softmax(tf.matmul(self.x, self.W) + self.b)
        self.prediction_tensor = self.y

        # initialize parameters
        tfu.get_session().run(
            [param_var.initializer for param_var in self.get_param_vars()])
Example #19
0
    def solve(self, input_train_data, target_train_data, input_val_data, target_val_data, model, callback=None):
        """
        Question 6.b: Optimize the model and return the intermediate losses.

        Optimize the model using stochastic gradient descent by running the
        variable updates for self.iterations iterations.

        Args:
            input_train_data: a numpy.array with shape (N, R)
            target_train_data: a numpy.array with shape (N, S)
            input_val_data: a numpy.array with shape (M, R)
            target_val_data: a numpy.array with shape (M, S)
            model: the model from which the parameters are optimized

        Returns:
            A tuple of lists, where the first list contains the training loss of
            each iteration and the second list contains the validation loss of
            each iteration. The validation loss should be computed using the
            same amount of data as the training loss, but using the validation
            data.

        N and M are the numbers of training points, respectively, and R and S
        are the dimensions for each input and target data point, respectively.

        In here, the gradient descent is stochastic, meaning that you don't
        need to use all the data at once before you update the model
        parameters. Instead, you update the model parameters as you iterate
        over the data. You must use MinibatchIndefinitelyGenerator to iterate
        over the data, otherwise your solution might differ from the one of
        the autograder. You will need to instantiate two generators (one for
        the training data and another one for the validation data) and you
        should do it before the for loop. You should read the docstring of
        MinibatchIndefinitelyGenerator in tensorflow_util.py to figure out
        how to use it. Make sure to pass in self.shuffle when you instantiate
        the generator. You will have to choose a proper batch size too.

        Useful member variables and methods:
        self.shuffle
        session.run(...)
        generator.next()
        """
        session = tfu.get_session()
        target_ph = tf.placeholder(tf.float32, shape=(None,) + target_train_data.shape[1:])
        placeholders = [model.input_ph, target_ph]
        train_data = [input_train_data, target_train_data]
        val_data = [input_val_data, target_val_data]
        # You may want to initialize some variables that are shared across iterations
        "*** YOUR CODE HERE ***"
	    train_data_generator = MinibatchIndefinitelyGenerator(train_data, 1, self.shuffle)
	    val_data_generator = MinibatchIndefinitelyGenerator(val_data, 1, self.shuffle)
Example #20
0
    def get_updates_with_momentum(self, loss_tensor, param_vars):
        """
        Question 5: Returns the gradient descent updates when momentum is used.

        Args:
            loss_tensor: loss tensor used to compute the gradients.
            param_vars: list of parameter variables.

        Returns:
            A list of tuples, where each tuple is an update of the form
            (var, new_tensor) indicating that, at runtime, the variable var
            should be updated with new_tensor.

        You implementation should use the gradient tensors and the velocity
        variables (both provided below), and the member variables
        self.learning_rate and self.momentum.
        """
        grad_tensors = tf.gradients(loss_tensor, param_vars)
        vel_vars = [
            tf.Variable(np.zeros(param_var.get_shape(), dtype=np.float32))
            for param_var in param_vars
        ]
        tfu.get_session().run([vel_var.initializer for vel_var in vel_vars])
        updates = []
        vel_updates = []
        i = 0
        while i < len(grad_tensors):
            vel_op1 = tf.multiply(self.learning_rate, grad_tensors[i])
            vel_op2 = tf.multiply(self.momentum, vel_vars[i])
            vel_op3 = tf.subtract(vel_op2, vel_op1)
            param_op = tf.add(param_vars[i], vel_op3)
            updates.append((param_vars[i], param_op))
            vel_updates.append((vel_vars[i], vel_op3))
            i += 1
        updates.extend(vel_updates)
        return updates
Example #21
0
 def predict(self, input_):
     batch_size = self.get_batch_size(input_)
     if batch_size == 0:
         input_ = input_[None, :]
     # do the computation in smaller chunks because some GPUs don't have too much memory
     # the following block of code is equivalent to this line
     # prediction = self.prediction_tensor.eval(session=tfu.get_session(), feed_dict=dict([(self.input_ph, input_)]))
     predictions = []
     for i in range(0, batch_size, self._max_eval_batch_size):
         excerpt = slice(i, min(i+self._max_eval_batch_size, batch_size))
         prediction = self.prediction_tensor.eval(session=tfu.get_session(),
                                                  feed_dict=dict([(self.input_ph, input_[excerpt])]))
         predictions.append(prediction)
     prediction = np.concatenate(predictions, axis=0)
     if batch_size == 0:
         prediction = np.squeeze(prediction, axis=0)
     return prediction
Example #22
0
 def predict(self, input_):
     batch_size = self.get_batch_size(input_)
     if batch_size == 0:
         input_ = input_[None, :]
     # do the computation in smaller chunks because some GPUs don't have too much memory
     # the following block of code is equivalent to this line
     # prediction = self.prediction_tensor.eval(session=tfu.get_session(), feed_dict=dict([(self.input_ph, input_)]))
     predictions = []
     for i in range(0, batch_size, self._max_eval_batch_size):
         excerpt = slice(i, min(i + self._max_eval_batch_size, batch_size))
         prediction = self.prediction_tensor.eval(session=tfu.get_session(),
                                                  feed_dict=dict([
                                                      (self.input_ph,
                                                       input_[excerpt])
                                                  ]))
         predictions.append(prediction)
     prediction = np.concatenate(predictions, axis=0)
     if batch_size == 0:
         prediction = np.squeeze(prediction, axis=0)
     return prediction
Example #23
0
    def solve(self,
              input_train_data,
              target_train_data,
              input_val_data,
              target_val_data,
              model,
              callback=None):
        """
        Question 6.b: Optimize the model and return the intermediate losses.

        Optimize the model using stochastic gradient descent by running the
        variable updates for self.iterations iterations.

        Args:
            input_train_data: a numpy.array with shape (N, R)
            target_train_data: a numpy.array with shape (N, S)
            input_val_data: a numpy.array with shape (M, R)
            target_val_data: a numpy.array with shape (M, S)
            model: the model from which the parameters are optimized

        Returns:
            A tuple of lists, where the first list contains the training loss of
            each iteration and the second list contains the validation loss of
            each iteration. The validation loss should be computed using the
            same amount of data as the training loss, but using the validation
            data.

        N and M are the numbers of training points, respectively, and R and S
        are the dimensions for each input and target data point, respectively.

        In here, the gradient descent is stochastic, meaning that you don't
        need to use all the data at once before you update the model
        parameters. Instead, you update the model parameters as you iterate
        over the data. You must use MinibatchIndefinitelyGenerator to iterate
        over the data, otherwise your solution might differ from the one of
        the autograder. You will need to instantiate two generators (one for
        the training data and another one for the validation data) and you
        should do it before the for loop. You should read the docstring of
        MinibatchIndefinitelyGenerator in tensorflow_util.py to figure out
        how to use it. Make sure to pass in self.shuffle when you instantiate
        the generator. You will have to choose a proper batch size too.

        Useful member variables and methods:
        self.shuffle
        session.run(...)
        generator.next()
        """
        session = tfu.get_session()
        target_ph = tf.placeholder(tf.float32,
                                   shape=(None, ) +
                                   target_train_data.shape[1:])
        placeholders = [model.input_ph, target_ph]
        train_data = [input_train_data, target_train_data]
        val_data = [input_val_data, target_val_data]
        # You may want to initialize some variables that are shared across iterations
        "*** YOUR CODE HERE ***"
        loss_tensor = self.get_loss_tensor(
            model.prediction_tensor, target_ph,
            model.get_param_vars(regularizable=True))
        updates = self.get_updates(loss_tensor,
                                   model.get_param_vars(trainable=True))
        update_ops = [
            tf.assign(old_var, new_var_or_tensor)
            for (old_var, new_var_or_tensor) in updates
        ]
        train_losses = []
        val_losses = []

        trainGenerator = MinibatchIndefinitelyGenerator(train_data, 1, True)
        valGenerator = MinibatchIndefinitelyGenerator(val_data, 1, True)

        for iter_ in xrange(self.iterations):
            "*** YOUR CODE HERE ***"
            # train_loss should be the loss of this iteration using only the training data that was used for the updates
            # val_loss should be the loss of this iteration using the same amount of data used for the updates, but using the validation data instead

            #for data in xrange(num_train_batches): # iterations is the num of gradient updates!
            """
            # should only call .next() once! .next() changes the dataset
            train_loss = session.run(loss_tensor, feed_dict={placeholders[0]: batch_train.next()[0], placeholders[1]: batch_train.next()[1]})
            session.run(update_ops, feed_dict={placeholders[0]: batch_train.next()[0], placeholders[1]: batch_train.next()[1]})
            val_loss = session.run(loss_tensor, feed_dict={placeholders[0]: batch_val.next()[0], placeholders[1]: batch_val.next()[1]})
            """
            tG1, tG2 = trainGenerator.next()
            vG1, vG2 = valGenerator.next()
            train_loss = session.run(loss_tensor,
                                     feed_dict={
                                         placeholders[0]: tG1,
                                         placeholders[1]: tG2
                                     })
            session.run(update_ops,
                        feed_dict={
                            placeholders[0]: tG1,
                            placeholders[1]: tG2
                        })
            val_loss = session.run(loss_tensor,
                                   feed_dict={
                                       placeholders[0]: vG1,
                                       placeholders[1]: vG2
                                   })

            train_losses.append(train_loss)
            val_losses.append(val_loss)
            if callback is not None: callback(model)
            self.display_progress(iter_, train_losses, val_losses)
        return train_losses, val_losses
    def get_update_values(self, moduleDict):
        # dataset
        dataset = getattr(datasets, self.dataset)()
        input_data, target_data = dataset[:2]
        # model
        if self.model_module == 'models':  # need to check for this since this is not a student file
            module = models
        else:
            module = moduleDict[self.model_module]
        model_class = getattr(module, self.model_class)
        model_kwargs = dict(num_labels=dataset[1].shape[-1])
        if self.model_class == 'ConvNetModel':
            model_kwargs['x_shape'] = (None, ) + dataset[0].shape[1:]
        else:
            model_kwargs['num_features'] = dataset[0].shape[-1]
        model = model_class(**model_kwargs)
        # solver
        solver_class = getattr(moduleDict[self.solver_module],
                               self.solver_class)
        solver = solver_class(learning_rate=self.learning_rate,
                              iterations=0,
                              momentum=self.momentum)

        target_ph = tf.placeholder(tf.float32, shape=(None, 2))
        loss_tensor = solvers.squared_error(model.prediction_tensor, target_ph)
        param_vars = model.get_param_vars(trainable=True)

        updates = solver.get_updates(loss_tensor, param_vars)
        update_ops = [tf.assign(old, new) for (old, new) in updates]
        feed_dict = dict(
            list(zip([model.input_ph, target_ph], [input_data, target_data])))
        for i in range(self.update_iterations):
            tfu.get_session().run(update_ops, feed_dict=feed_dict)

        grad_tensors = tf.gradients(loss_tensor, param_vars)
        grads = [
            grad_tensor.eval(session=tfu.get_session(), feed_dict=feed_dict)
            for grad_tensor in grad_tensors
        ]

        len_messages = len(self.messages)
        if not isinstance(updates, (list, tuple)):
            self.addMessage('updates should be a list, %r given' % updates)
            return updates, None, grads
        # Check updates are in the right format
        for update in updates:
            try:
                old, new = update
            except ValueError:
                self.addMessage(
                    'Each update in updates should be of length 2, but it is of length %d'
                    % len(update))
                continue
            if not isinstance(old, tf.Variable):
                self.addMessage(
                    'The first element in the tuple update should be a tf.Variable, %r given'
                    % old)
            if not isinstance(new, (tf.Variable, tf.Tensor)):
                self.addMessage(
                    'The second element in the tuple update should be a tf.Variable or a tf.Tensor, %r given'
                    % new)
        if len(self.messages) > len_messages:
            return updates, None, grads
        # Check for repeated variables
        if len(set(zip(*updates)[0])) != len(updates):
            self.addMessage(
                'There are some repeated variables being updated: %r' %
                list(zip(*updates))[0])
            return updates, None, grads
        update_values = [
            tfu.get_session().run(update, feed_dict=feed_dict)
            for update in updates
        ]
        return updates, update_values, grads
Example #25
0
 def set_param_values(self, param_values, **tags):
     param_vars = self.get_param_vars(**tags)
     if len(param_values) != len(param_vars):
         raise ValueError('there are %d parameter variables with the given tags'
                          'but %d parameter values were given' % (len(param_vars), len(param_values)))
     tfu.get_session().run([tf.assign(param_var, param_value) for (param_var, param_value) in zip(param_vars, param_values)])
Example #26
0
 def get_param_values(self, **tags):
     param_vars = self.get_param_vars(**tags)
     return [param_var.eval(session=tfu.get_session()) for param_var in param_vars]
Example #27
0
    def __init__(self, use_batchnorm=False, use_dropout=False, x_shape=(None, 28, 28, 1), num_labels=10):
        super(ConvNetModel, self).__init__()
        _, image_size, _, num_channels = x_shape
        assert x_shape[2] == image_size
        self.x = tf.placeholder(tf.float32, shape=x_shape)
        self.input_ph = self.x
        is_train = True
        init_symmetry = False
        var_eps = 1e-20
        use_global_bn = True
        if use_global_bn:
            bn_axes = [0,1,2]
        else:
            bn_axes = [0]

        if init_symmetry:
            conv1_weights = tf.Variable(
                tf.zeros([5, 5, num_channels, 32]))  # 5x5 filter, depth 32.
            conv1_biases = tf.Variable(tf.zeros([32]))
            conv2_weights = tf.Variable(
               tf.zeros([5, 5, 32, 64]))
            conv2_biases = tf.Variable(tf.zeros([64]))
            fc1_weights = tf.Variable(  # fully connected, depth 512.
                tf.constant(0.1,
                      shape = [image_size // 4 * image_size // 4 * 64, 512],
                      ))
            fc1_biases = tf.Variable(tf.constant(0.1, shape=[512]))
            fc2_weights = tf.Variable(
                tf.constant(0.1,shape=[512, num_labels],
                                  ))
            fc2_biases = tf.Variable(tf.constant(0.1, shape=[num_labels]))
        else:
            conv1_weights = tf.Variable(
                truncated_normal([5, 5, num_channels, 32],  # 5x5 filter, depth 32.
                                      stddev=0.1, fixed_random=self._fixed_random))
            conv1_biases = tf.Variable(tf.zeros([32]))
            conv2_weights = tf.Variable(
               truncated_normal([5, 5, 32, 64],
                                  stddev=0.1, fixed_random=self._fixed_random))
            conv2_biases = tf.Variable(tf.constant(0., shape=[64]))
            fc1_weights = tf.Variable(  # fully connected, depth 512.
                truncated_normal(
                      [image_size // 4 * image_size // 4 * 64, 512],
                      stddev=0.1, fixed_random=self._fixed_random))
            fc1_biases = tf.Variable(tf.constant(0., shape=[512]))
            fc2_weights = tf.Variable(
                truncated_normal([512, num_labels],
                                  stddev=0.1, fixed_random=self._fixed_random))
            fc2_biases = tf.Variable(tf.constant(0., shape=[num_labels]))


        # Add parameter variables for solvers
        self.conv1_weights = self.add_param_var(conv1_weights)
        self.conv1_biases = self.add_param_var(conv1_biases)
        self.conv2_weights = self.add_param_var(conv2_weights)
        self.conv2_biases = self.add_param_var(conv2_biases)
        self.fc1_weights = self.add_param_var(fc1_weights)
        self.fc1_biases = self.add_param_var(fc1_biases)
        self.fc2_weights = self.add_param_var(fc2_weights)
        self.fc2_biases = self.add_param_var(fc2_biases)

        #Run Inference
        conv = tf.nn.conv2d(self.x,
                        conv1_weights,
                        strides=[1, 1, 1, 1],
                        padding='SAME')

        conv = tf.nn.bias_add(conv, conv1_biases)

        #Add batch norm
        if use_batchnorm:
            mean,variance = tf.nn.moments(conv, bn_axes)
            conv = tf.nn.batch_normalization(conv, mean, variance, None,None,var_eps)

        relu = tf.nn.relu(conv)
        pool = tf.nn.max_pool(relu,
                          ksize=[1, 2, 2, 1],
                          strides=[1, 2, 2, 1],
                          padding='SAME')

        conv = tf.nn.conv2d(pool,
                            conv2_weights,
                            strides=[1, 1, 1, 1],
                            padding='SAME')

        conv = tf.nn.bias_add(conv, conv2_biases)
        #Add batch norm
        if use_batchnorm:
            mean,variance = tf.nn.moments(conv, bn_axes)
            conv = tf.nn.batch_normalization(conv, mean, variance, None,None,var_eps)

        relu = tf.nn.relu(conv)
        pool = tf.nn.max_pool(relu,
                              ksize=[1, 2, 2, 1],
                              strides=[1, 2, 2, 1],
                              padding='SAME')


        #Reshape the feature map cuboid into a 2D matrix to feed it to the
        # fully connected layers.
        pool_shape = pool.get_shape().as_list()
        reshape = tf.reshape(
                pool,
                [-1, pool_shape[1] * pool_shape[2] * pool_shape[3]])
        # Fully connected layer. Note that the '+' operation automatically
        # broadcasts the biases.
        hidden = tf.nn.relu(tf.matmul(reshape, fc1_weights) + fc1_biases)
        # Add a 50% dropout during training only. Dropout also scales
        # activations such that no rescaling is needed at evaluation time.
        if is_train and use_dropout:
            hidden = tf.nn.dropout(hidden, 0.5, seed=_SEED)

        logits = tf.matmul(hidden, fc2_weights) + fc2_biases
        self.prediction_tensor = tf.nn.softmax(logits)

        tfu.get_session().run([param_var.initializer for param_var in self.get_param_vars()])
Example #28
0
    def solve(self, input_train_data, target_train_data, input_val_data, target_val_data, model, callback=None):
        """
        Question 6.c: Optimize the model and return the intermediate losses.

        Optimize the model using minibatch stochastic gradient descent by
        running the variable updates for self.iterations iterations.

        Args:
            input_train_data: a numpy.array with shape (N, R)
            target_train_data: a numpy.array with shape (N, S)
            input_val_data: a numpy.array with shape (M, R)
            target_val_data: a numpy.array with shape (M, S)
            model: the model from which the parameters are optimized

        Returns:
            A tuple of lists, where the first list contains the training loss of
            each iteration and the second list contains the validation loss of
            each iteration. The validation loss should be computed using the
            same amount of data as the training loss, but using the validation
            data.

        N and M are the numbers of training points, respectively, and R and S
        are the dimensions for each input and target data point, respectively.

        For minibatch stochastic gradient descent, you will need to iterate
        over the data in minibatches. As before, you must use
        MinibatchIndefinitelyGenerator to iterate over the data. You will
        need to instantiate two generators (one for the training data and
        another one for the validation data) and you should do it before the
        for loop. You should read the docstring of
        MinibatchIndefinitelyGenerator in tensorflow_util.py to figure out
        how to use it. Make sure to pass in self.batch_size and self.shuffle
        when you instantiate the generator.

        Useful member variables and methods:
        self.batch_size
        self.shuffle
        session.run(...)
        generator.next()
        """
        session = tfu.get_session()
        target_ph = tf.placeholder(tf.float32, shape=(None,) + target_train_data.shape[1:])
        placeholders = [model.input_ph, target_ph]
        train_data = [input_train_data, target_train_data]
        val_data = [input_val_data, target_val_data]

        loss_tensor = self.get_loss_tensor(model.prediction_tensor, target_ph, model.get_param_vars(regularizable=True))
        updates = self.get_updates(loss_tensor, model.get_param_vars(trainable=True))
        update_ops = [tf.assign(old_var, new_var_or_tensor) for (old_var, new_var_or_tensor) in updates]
        train_losses = []
        val_losses = []
        train_data_generator = MinibatchIndefinitelyGenerator(train_data, self.batch_size, self.shuffle)
        val_data_generator = MinibatchIndefinitelyGenerator(val_data, self.batch_size, self.shuffle)
        for iter_ in range(self.iterations):
            train_dict = dict(zip(placeholders, train_data_generator.next()))
            val_dict = dict(zip(placeholders, val_data_generator.next()))

            train_loss, _ = session.run([loss_tensor, update_ops], feed_dict=train_dict)
            val_loss = session.run(loss_tensor, feed_dict=val_dict)

            train_losses.append(train_loss)
            val_losses.append(val_loss)

            if callback is not None: callback(model)
            self.display_progress(iter_, train_losses, val_losses)
        return train_losses, val_losses
Example #29
0
    def solve(self,
              input_train_data,
              target_train_data,
              input_val_data,
              target_val_data,
              model,
              callback=None):
        """
        Question 6.a: Optimize the model and return the intermediate losses.

        Optimize the model using gradient descent by running the variable
        updates for self.iterations iterations.

        Args:
            input_train_data: a numpy.array with shape (N, R)
            target_train_data: a numpy.array with shape (N, S)
            input_val_data: a numpy.array with shape (M, R)
            target_val_data: a numpy.array with shape (M, S)
            model: the model from which the parameters are optimized

        Returns:
            A tuple of lists, where the first list contains the training loss of
            each iteration and the second list contains the validation loss of
            each iteration.

        N and M are the numbers of training points, respectively, and R and S
        are the dimensions for each input and target data point, respectively.

        You may not need to fill in both "*** YOUR CODE HERE ***" blanks,
        but they are both provided so you can define variables outside and
        inside the for loop.

        Useful method:
        session.run
        """
        session = tfu.get_session()
        #the input placeholder is already defined in the model, so we define the target placeholder
        target_ph = tf.placeholder(tf.float32,
                                   shape=(None, ) +
                                   target_train_data.shape[1:])
        placeholders = [model.input_ph, target_ph]
        print(placeholders[0])
        #variable for input training data
        train_data = [input_train_data, target_train_data]
        #variable for validation data
        val_data = [input_val_data, target_val_data]
        # You may want to initialize some variables that are shared across iterations
        "*** YOUR CODE HERE ***"
        training_dict = {}
        i = 0
        for placeholder in placeholders:
            training_dict[placeholder] = train_data[i]
            i = i + 1

        validation_dict = {}
        j = 0
        for placeholder in placeholders:
            validation_dict[placeholder] = val_data[j]
            j = j + 1

        #creating the definition of the loss tensor
        loss_tensor = self.get_loss_tensor(
            model.prediction_tensor, target_ph,
            model.get_param_vars(regularizable=True))
        updates = self.get_updates(loss_tensor,
                                   model.get_param_vars(trainable=True))
        #update gets updates (tuples) from 4 & 5, ops assigns new values
        update_ops = [
            tf.assign(old_var, new_var_or_tensor)
            for (old_var, new_var_or_tensor) in updates
        ]
        train_losses = []  # training loss
        val_losses = []  #testing loss
        for iter_ in range(self.iterations):
            "*** YOUR CODE HERE ***"

            #util.raiseNotDefined()

            # train_loss should be the loss of this iteration using all of the training data
            #compute loss(loss_tensor), compute gradients(grad_tensor), update parameters(updates_ops)
            train_loss, U_value = session.run([loss_tensor, update_ops],
                                              feed_dict=training_dict)
            #print(train_loss)
            #print(U_value)
            # val_loss should be the loss of this iteration using all of the validation data
            val_loss = session.run(loss_tensor, feed_dict=validation_dict)

            train_losses.append(train_loss)
            val_losses.append(val_loss)
            if callback is not None: callback(model)
            self.display_progress(iter_, train_losses, val_losses)
        return train_losses, val_losses
Example #30
0
    def solve(self, input_train_data, target_train_data, input_val_data, target_val_data, model, callback=None):
        """
        Question 6.b: Optimize the model and return the intermediate losses.

        Optimize the model using stochastic gradient descent by running the
        variable updates for self.iterations iterations.

        Args:
            input_train_data: a numpy.array with shape (N, R)
            target_train_data: a numpy.array with shape (N, S)
            input_val_data: a numpy.array with shape (M, R)
            target_val_data: a numpy.array with shape (M, S)
            model: the model from which the parameters are optimized

        Returns:
            A tuple of lists, where the first list contains the training loss of
            each iteration and the second list contains the validation loss of
            each iteration. The validation loss should be computed using the
            same amount of data as the training loss, but using the validation
            data.

        N and M are the numbers of training points, respectively, and R and S
        are the dimensions for each input and target data point, respectively.

        In here, the gradient descent is stochastic, meaning that you don't
        need to use all the data at once before you update the model
        parameters. Instead, you update the model parameters as you iterate
        over the data. You must use MinibatchIndefinitelyGenerator to iterate
        over the data, otherwise your solution might differ from the one of
        the autograder. You will need to instantiate two generators (one for
        the training data and another one for the validation data) and you
        should do it before the for loop. You should read the docstring of
        MinibatchIndefinitelyGenerator in tensorflow_util.py to figure out
        how to use it. Make sure to pass in self.shuffle when you instantiate
        the generator. You will have to choose a proper batch size too.

        Useful member variables and methods:
        self.shuffle
        session.run(...)
        generator.next()
        """
        session = tfu.get_session()
        target_ph = tf.placeholder(tf.float32, shape=(None,) + target_train_data.shape[1:])
        placeholders = [model.input_ph, target_ph]
        train_data = [input_train_data, target_train_data]
        val_data = [input_val_data, target_val_data]
        # You may want to initialize some variables that are shared across iterations
        "*** YOUR CODE HERE ***"
        loss_tensor = self.get_loss_tensor(model.prediction_tensor, target_ph, model.get_param_vars(regularizable=True))
        updates = self.get_updates(loss_tensor, model.get_param_vars(trainable=True))
        update_ops = [tf.assign(old_var, new_var_or_tensor) for (old_var, new_var_or_tensor) in updates]
        train_losses = []
        val_losses = []
        for iter_ in range(self.iterations):
            "*** YOUR CODE HERE ***"
            util.raiseNotDefined()
            # train_loss should be the loss of this iteration using only the training data that was used for the updates
            # val_loss should be the loss of this iteration using the same amount of data used for the updates, but using the validation data instead
            train_losses.append(train_loss)
            val_losses.append(val_loss)
            if callback is not None: callback(model)
            self.display_progress(iter_, train_losses, val_losses)
        return train_losses, val_losses
Example #31
0
    def solve(self,
              input_train_data,
              target_train_data,
              input_val_data,
              target_val_data,
              model,
              callback=None):
        """
        Question 6.a: Optimize the model and return the intermediate losses.

        Optimize the model using gradient descent by running the variable
        updates for ++ iterations.

        Args:
            input_train_data: a numpy.array with shape (N, R)
            target_train_data: a numpy.array with shape (N, S)
            input_val_data: a numpy.array with shape (M, R)
            target_val_data: a numpy.array with shape (M, S)
            model: the model from which the parameters are optimized

        Returns:
            A tuple of lists, where the first list contains the training loss of
            each iteration and the second list contains the validation loss of
            each iteration.

        N and M are the numbers of training points, respectively, and R and S
        are the dimensions for each input and target data point, respectively.

        You may not need to fill in both "*** YOUR CODE HERE ***" blanks,
        but they are both provided so you can define variables outside and
        inside the for loop.

        Useful method:
        session.run
        """
        session = tfu.get_session()
        target_ph = tf.placeholder(tf.float32,
                                   shape=(None, ) +
                                   target_train_data.shape[1:])
        placeholders = [model.input_ph, target_ph]
        train_data = [input_train_data, target_train_data]
        val_data = [input_val_data, target_val_data]
        # You may want to initialize some variables that are shared across iterations
        "*** YOUR CODE HERE ***"
        #vel_vars = [tf.Variable(np.zeros(param_var.get_shape(), dtype=np.float32)) for param_var in param_vars]
        #tfu.get_session().run([input_train_data.initializer for vel_var in vel_vars])
        loss_tensor = self.get_loss_tensor(
            model.prediction_tensor, target_ph,
            model.get_param_vars(regularizable=True))
        updates = self.get_updates(loss_tensor,
                                   model.get_param_vars(trainable=True))
        update_ops = [
            tf.assign(old_var, new_var_or_tensor)
            for (old_var, new_var_or_tensor) in updates
        ]
        train_losses = []
        val_losses = []
        #train_loss = session.run(loss_tensor, feed_dict={model.input_ph:input_train_data, target_ph: target_train_data})
        #val = session.run(loss_tensor, feed_dict={model.input_ph:input_val_data, target_ph: target_val_data})
        for iter_ in range(self.iterations):
            "*** YOUR CODE HERE ***"
            #print "self.iterations",self.iterations
            #print "update_ops",update_ops
            train_loss = 0
            val_loss = 0
            for i in range(len(update_ops)):
                train, updates = session.run([loss_tensor, update_ops[i]],
                                             feed_dict={
                                                 model.input_ph:
                                                 input_train_data,
                                                 target_ph: target_train_data
                                             })
                train_loss += train
                val = session.run(loss_tensor,
                                  feed_dict={
                                      model.input_ph: input_val_data,
                                      target_ph: target_val_data
                                  })
                val_loss += val
            train_loss /= len(update_ops)
            val_loss /= len(update_ops)
            # train_loss should be the loss of this iteration using all of the training data
            # val_loss should be the loss of this iteration using all of the validation data
            train_losses.append(train_loss)
            val_losses.append(val_loss)
            if callback is not None: callback(model)
            self.display_progress(iter_, train_losses, val_losses)
        return train_losses, val_losses
Example #32
0
    def __init__(self,
                 use_batchnorm=False,
                 use_dropout=False,
                 x_shape=(None, 28, 28, 1),
                 num_labels=10):
        super(ConvNetModel, self).__init__()
        _, image_size, _, num_channels = x_shape
        assert x_shape[2] == image_size
        self.x = tf.placeholder(tf.float32, shape=x_shape)
        self.input_ph = self.x
        is_train = True
        init_symmetry = False
        var_eps = 1e-20
        use_global_bn = True
        if use_global_bn:
            bn_axes = [0, 1, 2]
        else:
            bn_axes = [0]

        if init_symmetry:
            conv1_weights = tf.Variable(tf.zeros([5, 5, num_channels, 32
                                                  ]))  # 5x5 filter, depth 32.
            conv1_biases = tf.Variable(tf.zeros([32]))
            conv2_weights = tf.Variable(tf.zeros([5, 5, 32, 64]))
            conv2_biases = tf.Variable(tf.zeros([64]))
            fc1_weights = tf.Variable(  # fully connected, depth 512.
                tf.constant(
                    0.1,
                    shape=[image_size // 4 * image_size // 4 * 64, 512],
                ))
            fc1_biases = tf.Variable(tf.constant(0.1, shape=[512]))
            fc2_weights = tf.Variable(
                tf.constant(
                    0.1,
                    shape=[512, num_labels],
                ))
            fc2_biases = tf.Variable(tf.constant(0.1, shape=[num_labels]))
        else:
            conv1_weights = tf.Variable(
                truncated_normal(
                    [5, 5, num_channels, 32],  # 5x5 filter, depth 32.
                    stddev=0.1,
                    fixed_random=self._fixed_random))
            conv1_biases = tf.Variable(tf.zeros([32]))
            conv2_weights = tf.Variable(
                truncated_normal([5, 5, 32, 64],
                                 stddev=0.1,
                                 fixed_random=self._fixed_random))
            conv2_biases = tf.Variable(tf.constant(0., shape=[64]))
            fc1_weights = tf.Variable(  # fully connected, depth 512.
                truncated_normal([image_size // 4 * image_size // 4 * 64, 512],
                                 stddev=0.1,
                                 fixed_random=self._fixed_random))
            fc1_biases = tf.Variable(tf.constant(0., shape=[512]))
            fc2_weights = tf.Variable(
                truncated_normal([512, num_labels],
                                 stddev=0.1,
                                 fixed_random=self._fixed_random))
            fc2_biases = tf.Variable(tf.constant(0., shape=[num_labels]))

        # Add parameter variables for solvers
        self.conv1_weights = self.add_param_var(conv1_weights)
        self.conv1_biases = self.add_param_var(conv1_biases)
        self.conv2_weights = self.add_param_var(conv2_weights)
        self.conv2_biases = self.add_param_var(conv2_biases)
        self.fc1_weights = self.add_param_var(fc1_weights)
        self.fc1_biases = self.add_param_var(fc1_biases)
        self.fc2_weights = self.add_param_var(fc2_weights)
        self.fc2_biases = self.add_param_var(fc2_biases)

        #Run Inference
        conv = tf.nn.conv2d(self.x,
                            conv1_weights,
                            strides=[1, 1, 1, 1],
                            padding='SAME')

        conv = tf.nn.bias_add(conv, conv1_biases)

        #Add batch norm
        if use_batchnorm:
            mean, variance = tf.nn.moments(conv, bn_axes)
            conv = tf.nn.batch_normalization(conv, mean, variance, None, None,
                                             var_eps)

        relu = tf.nn.relu(conv)
        pool = tf.nn.max_pool(relu,
                              ksize=[1, 2, 2, 1],
                              strides=[1, 2, 2, 1],
                              padding='SAME')

        conv = tf.nn.conv2d(pool,
                            conv2_weights,
                            strides=[1, 1, 1, 1],
                            padding='SAME')

        conv = tf.nn.bias_add(conv, conv2_biases)
        #Add batch norm
        if use_batchnorm:
            mean, variance = tf.nn.moments(conv, bn_axes)
            conv = tf.nn.batch_normalization(conv, mean, variance, None, None,
                                             var_eps)

        relu = tf.nn.relu(conv)
        pool = tf.nn.max_pool(relu,
                              ksize=[1, 2, 2, 1],
                              strides=[1, 2, 2, 1],
                              padding='SAME')

        #Reshape the feature map cuboid into a 2D matrix to feed it to the
        # fully connected layers.
        pool_shape = pool.get_shape().as_list()
        reshape = tf.reshape(
            pool, [-1, pool_shape[1] * pool_shape[2] * pool_shape[3]])
        # Fully connected layer. Note that the '+' operation automatically
        # broadcasts the biases.
        hidden = tf.nn.relu(tf.matmul(reshape, fc1_weights) + fc1_biases)
        # Add a 50% dropout during training only. Dropout also scales
        # activations such that no rescaling is needed at evaluation time.
        if is_train and use_dropout:
            hidden = tf.nn.dropout(hidden, 0.5, seed=_SEED)

        logits = tf.matmul(hidden, fc2_weights) + fc2_biases
        self.prediction_tensor = tf.nn.softmax(logits)

        tfu.get_session().run(
            [param_var.initializer for param_var in self.get_param_vars()])
Example #33
0
    def solve(self, input_train_data, target_train_data, input_val_data, target_val_data, model, callback=None):
        """
        Question 6.c: Optimize the model and return the intermediate losses.

        Optimize the model using minibatch stochastic gradient descent by
        running the variable updates for self.iterations iterations.

        Args:
            input_train_data: a numpy.array with shape (N, R)
            target_train_data: a numpy.array with shape (N, S)
            input_val_data: a numpy.array with shape (M, R)
            target_val_data: a numpy.array with shape (M, S)
            model: the model from which the parameters are optimized

        Returns:
            A tuple of lists, where the first list contains the training loss of
            each iteration and the second list contains the validation loss of
            each iteration. The validation loss should be computed using the
            same amount of data as the training loss, but using the validation
            data.

        N and M are the numbers of training points, respectively, and R and S
        are the dimensions for each input and target data point, respectively.

        For minibatch stochastic gradient descent, you will need to iterate
        over the data in minibatches. As before, you must use
        MinibatchIndefinitelyGenerator to iterate over the data. You will
        need to instantiate two generators (one for the training data and
        another one for the validation data) and you should do it before the
        for loop. You should read the docstring of
        MinibatchIndefinitelyGenerator in tensorflow_util.py to figure out
        how to use it. Make sure to pass in self.batch_size and self.shuffle
        when you instantiate the generator.

        Useful member variables and methods:
        self.batch_size
        self.shuffle
        session.run(...)
        generator.next()
        """
        session = tfu.get_session()
        target_ph = tf.placeholder(tf.float32, shape=(None,) + target_train_data.shape[1:])
        placeholders = [model.input_ph, target_ph]
        train_data = [input_train_data, target_train_data]
        val_data = [input_val_data, target_val_data]
        # You may want to initialize some variables that are shared across iterations
        "*** YOUR CODE HERE ***"
	#Q6c: Question 6c
	#First we setup a training data generator so that it can pop up a training point one by one.
	#input_train_data is of size NxR.  target_train_data is of size NxS. 
	#We will get one row from input_train_data and one row from target_train_data which is the label.
	
	trainGen=tfu.MinibatchIndefinitelyGenerator(train_data,self.batch_size, self.shuffle)
	validateGen=tfu.MinibatchIndefinitelyGenerator(val_data,self.batch_size, self.shuffle)

        loss_tensor = self.get_loss_tensor(model.prediction_tensor, target_ph, model.get_param_vars(regularizable=True))
        updates = self.get_updates(loss_tensor, model.get_param_vars(trainable=True))
        update_ops = [tf.assign(old_var, new_var_or_tensor) for (old_var, new_var_or_tensor) in updates]
        train_losses = []
        val_losses = []
        for iter_ in range(self.iterations):
            "*** YOUR CODE HERE ***"
            #util.raiseNotDefined()
	    # Step 1a: Training step: Compute the loss based on the forward propagation 
            # train_loss should be the loss of this iteration using only the training data that was used for the updates
	    #Get one training data point first
	    [oneTrainData, oneTargetData]= trainGen.next()
	    trainFeedDict={placeholders[0]:oneTrainData, placeholders[1]:oneTargetData}
	    train_loss =session.run(loss_tensor,trainFeedDict)
            train_losses.append(train_loss)
	    # Stgep 1b: update weights  of the network with a backward propagation
	    session.run(update_ops,trainFeedDict)
	    # Step 2: Check validation data. 
            # val_loss should be the loss of this iteration using the same amount of data used for the updates, but using the validation data instead
	    #Get one validate data point
	    [oneValidateData, oneValTargetData]= validateGen.next()
	    validateFeedDict={placeholders[0]:oneValidateData, placeholders[1]:oneValTargetData}
	    val_loss=session.run(loss_tensor,validateFeedDict)
            val_losses.append(val_loss)

            if callback is not None: callback(model)
            self.display_progress(iter_, train_losses, val_losses)
        return train_losses, val_losses
Example #34
0
    def solve(self, input_train_data, target_train_data, input_val_data, target_val_data, model, callback=None):
        """
        Question 6.a: Optimize the model and return the intermediate losses.

        Optimize the model using gradient descent by running the variable
        updates for self.iterations iterations.

        Args:
            input_train_data: a numpy.array with shape (N, R)
            target_train_data: a numpy.array with shape (N, S)
            input_val_data: a numpy.array with shape (M, R)
            target_val_data: a numpy.array with shape (M, S)
            model: the model from which the parameters are optimized

        Returns:
            A tuple of lists, where the first list contains the training loss of
            each iteration and the second list contains the validation loss of
            each iteration.

        N and M are the numbers of training points, respectively, and R and S
        are the dimensions for each input and target data point, respectively.

        You may not need to fill in both "*** YOUR CODE HERE ***" blanks,
        but they are both provided so you can define variables outside and
        inside the for loop.

        Useful method:
        session.run
        """
        session = tfu.get_session()
        target_ph = tf.placeholder(tf.float32, shape=(None,) + target_train_data.shape[1:])
        placeholders = [model.input_ph, target_ph]
        train_data = [input_train_data, target_train_data]
        val_data = [input_val_data, target_val_data]
        # You may want to initialize some variables that are shared across iterations
        "*** YOUR CODE HERE ***"
	#Q6a: Question 6a
	trainFeedDict={placeholders[0]:train_data[0], placeholders[1]:train_data[1]}
	validateFeedDict={placeholders[0]:val_data[0], placeholders[1]:val_data[1]}
	#prepare operations to be performed, we donot actually execute these operations
        loss_tensor = self.get_loss_tensor(model.prediction_tensor, target_ph, model.get_param_vars(regularizable=True))
        updates = self.get_updates(loss_tensor, model.get_param_vars(trainable=True))
        update_ops = [tf.assign(old_var, new_var_or_tensor) for (old_var, new_var_or_tensor) in updates]
        train_losses = []
        val_losses = []
        for iter_ in range(self.iterations):
            "*** YOUR CODE HERE ***"
            #util.raiseNotDefined()
	    # Step 1a: Training step: Compute the loss based on the forward propagation 
	    # train_loss should be the loss of this iteration using all of the training data
	    train_loss =session.run(loss_tensor,trainFeedDict)
            train_losses.append(train_loss)
	    # Stgep 1b: update weights  of the network with a backward propagation
	    session.run(update_ops,trainFeedDict)
	    # Step 2: Check validation data. 
	    #val_loss should be the loss of this iteration using all of the validation data
	    val_loss=session.run(loss_tensor,validateFeedDict)
            val_losses.append(val_loss)
            if callback is not None: callback(model)
            self.display_progress(iter_, train_losses, val_losses)
        return train_losses, val_losses
Example #35
0
    def solve(self,
              input_train_data,
              target_train_data,
              input_val_data,
              target_val_data,
              model,
              callback=None):
        """
        Question 6.c: Optimize the model and return the intermediate losses.

        Optimize the model using minibatch stochastic gradient descent by
        running the variable updates for self.iterations iterations.

        Args:
            input_train_data: a numpy.array with shape (N, R)
            target_train_data: a numpy.array with shape (N, S)
            input_val_data: a numpy.array with shape (M, R)
            target_val_data: a numpy.array with shape (M, S)
            model: the model from which the parameters are optimized

        Returns:
            A tuple of lists, where the first list contains the training loss of
            each iteration and the second list contains the validation loss of
            each iteration. The validation loss should be computed using the
            same amount of data as the training loss, but using the validation
            data.

        N and M are the numbers of training points, respectively, and R and S
        are the dimensions for each input and target data point, respectively.

        For minibatch stochastic gradient descent, you will need to iterate
        over the data in minibatches. As before, you must use
        MinibatchIndefinitelyGenerator to iterate over the data. You will
        need to instantiate two generators (one for the training data and
        another one for the validation data) and you should do it before the
        for loop. You should read the docstring of
        MinibatchIndefinitelyGenerator in tensorflow_util.py to figure out
        how to use it. Make sure to pass in self.batch_size and self.shuffle
        when you instantiate the generator.

        Useful member variables and methods:
        self.batch_size
        self.shuffle
        session.run(...)
        generator.next()
        """
        session = tfu.get_session()
        target_ph = tf.placeholder(tf.float32,
                                   shape=(None, ) +
                                   target_train_data.shape[1:])
        placeholders = [model.input_ph, target_ph]
        train_data = [input_train_data, target_train_data]
        val_data = [input_val_data, target_val_data]
        # You may want to initialize some variables that are shared across iterations
        "*** YOUR CODE HERE ***"
        loss_tensor = self.get_loss_tensor(
            model.prediction_tensor, target_ph,
            model.get_param_vars(regularizable=True))
        updates = self.get_updates(loss_tensor,
                                   model.get_param_vars(trainable=True))
        update_ops = [
            tf.assign(old_var, new_var_or_tensor)
            for (old_var, new_var_or_tensor) in updates
        ]
        train_losses = []
        val_losses = []
        trainGenerator = MinibatchIndefinitelyGenerator(
            train_data, self.batch_size, True)
        valGenerator = MinibatchIndefinitelyGenerator(val_data,
                                                      self.batch_size, True)

        for iter_ in range(self.iterations):

            # train_loss should be the loss of this iteration using only the training data that was used for the updates
            # val_loss should be the loss of this iteration using the same amount of data used for the updates, but using the validation data instead

            trainData = trainGenerator.next()
            valData = valGenerator.next()

            feed_dict_train = {}
            for i in range(len(trainData)):
                # print(trainGenerator.next())
                tG1 = trainData[i]
                feed_dict_train[placeholders[i]] = tG1

            train_loss = session.run(loss_tensor, feed_dict_train)

            session.run(update_ops, feed_dict_train)

            feed_dict_val = {}
            for i in range(len(valData)):
                vG1 = valData[i]
                feed_dict_val[placeholders[i]] = vG1

            val_loss = session.run(loss_tensor, feed_dict_val)

            train_losses.append(train_loss)
            val_losses.append(val_loss)

            if callback is not None: callback(model)
            self.display_progress(iter_, train_losses, val_losses)
        return train_losses, val_losses
Example #36
0
 def get_param_values(self, **tags):
     param_vars = self.get_param_vars(**tags)
     return [
         param_var.eval(session=tfu.get_session())
         for param_var in param_vars
     ]
    def solve(self,
              input_train_data,
              target_train_data,
              input_val_data,
              target_val_data,
              model,
              callback=None):
        """
        Question 6.a: Optimize the model and return the intermediate losses.

        Optimize the model using gradient descent by running the variable
        updates for self.iterations iterations.

        Args:
            input_train_data: a numpy.array with shape (N, R)
            target_train_data: a numpy.array with shape (N, S)
            input_val_data: a numpy.array with shape (M, R)
            target_val_data: a numpy.array with shape (M, S)
            model: the model from which the parameters are optimized

        Returns:
            A tuple of lists, where the first list contains the training loss of
            each iteration and the second list contains the validation loss of
            each iteration.

        N and M are the numbers of training points, respectively, and R and S
        are the dimensions for each input and target data point, respectively.

        You may not need to fill in both "*** YOUR CODE HERE ***" blanks,
        but they are both provided so you can define variables outside and
        inside the for loop.

        Useful method:
        session.run
        """
        session = tfu.get_session()
        target_ph = tf.placeholder(tf.float32,
                                   shape=(None, ) +
                                   target_train_data.shape[1:])
        placeholders = [model.input_ph, target_ph]
        train_data = [input_train_data, target_train_data]
        val_data = [input_val_data, target_val_data]
        # You may want to initialize some variables that are shared across iterations
        "*** YOUR CODE HERE ***"
        # how many iterations do we have to go throught : self.iterations
        # the args for input/ parameter: input_train_data target_train_data input_val_data target_val_data model
        # want: tuple of list,
        # 1st list : training loss for each iterations
        # 2nd list : validation loss for each itertions

        # method we should use session.run

        # here we may want to initialize sth. that's all

        loss_tensor = self.get_loss_tensor(
            model.prediction_tensor, target_ph,
            model.get_param_vars(regularizable=True))
        updates = self.get_updates(loss_tensor,
                                   model.get_param_vars(trainable=True))
        update_ops = [
            tf.assign(old_var, new_var_or_tensor)
            for (old_var, new_var_or_tensor) in updates
        ]
        train_losses = []
        val_losses = []

        #print "loss:\n"
        #print "OK"
        #print target_ph
        #print target_train_data.shape[1:]
        #print train_data
        #print placeholders[0]
        #print train_data[0].shape,"   "
        #print train_data[1].shape,"   "
        #print val_data
        #print val_data[0].shape,"   "
        #print val_data[1].shape,"   "
        #print model.prediction_tensor
        #print model.get_param_vars(regularizable=True)
        #a=val_data[0]+val_data[1]
        #print a.shape
        #print model.get_param_vars(regularizable=True)
        #print updates
        #print update_ops

        # here is the place we go through all iterations and do the computations
        for iter_ in range(self.iterations):

            "*** YOUR CODE HERE ***"
            # util.raiseNotDefined()
            # train_loss should be the loss of this iteration using all of the training data
            # val_loss should be the loss of this iteration using all of the validation data
            train_loss = session.run(loss_tensor,
                                     feed_dict={
                                         placeholders[0]: train_data[0],
                                         placeholders[1]: train_data[1]
                                     })
            #train_loss=session.run(loss_tensor,feed_dict={placeholders:train_data})
            #for a in range(len(updates)):
            #   train_loss,updates[a]=session.run([loss_tensor,update_ops[a]],feed_dict={placeholders[0]:train_data[0], placeholders[1]:train_data[1]})
            #updates=session.run()
            #for a in range(len(updates)):
            updates = session.run(update_ops,
                                  feed_dict={
                                      placeholders[0]: train_data[0],
                                      placeholders[1]: train_data[1]
                                  })
            val_loss = session.run(loss_tensor,
                                   feed_dict={
                                       placeholders[0]: val_data[0],
                                       placeholders[1]: val_data[1]
                                   })

            #train_loss=session.run(loss_tensor,feed_dict={model.prediction_tensor:train_data[1], target_ph:train_data[1]})
            #val_loss=session.run(loss_tensor,feed_dict={model.prediction_tensor:val_data[1], target_ph: val_data[1]})
            #train_loss=session.run(loss_tensor,feed_dict={model.prediction_tensor:train_data[1], target_ph:train_data[1]})
            #val_loss=session.run(loss_tensor,feed_dict={model.prediction_tensor:val_data[1], target_ph: val_data[1]})
            train_losses.append(train_loss)
            val_losses.append(val_loss)

            if callback is not None: callback(model)
            self.display_progress(iter_, train_losses, val_losses)

        return train_losses, val_losses