Python Constant Exemples, nn.Constant Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : test_models.py Projet : matteo-ronchetti/opencl-nn

    def test_two_layer_conv(self):
        bs = 10
        r = 2
        n_channels = 3
        n_filters = 5
        w, h = 32, 32

        # define a simple model
        x = nn.Tensor((bs, n_channels, h, w))
        W1 = nn.Constant(
            np.random.randn(n_filters, n_channels, 2 * r + 1,
                            2 * r + 1).astype(np.float32))
        W2 = nn.Constant(
            np.random.randn(n_channels, n_filters, 2 * r + 1,
                            2 * r + 1).astype(np.float32))
        y = nn.conv(x, W1)
        self.assertEqual(y.output.shape, (bs, n_filters, h, w))

        y = nn.conv(y, W2)
        self.assertEqual(y.output.shape, (bs, n_channels, h, w))

        # compile model
        mdl = nn.compile_model(x, y)

        # emulate model with numpy
        npx = np.random.randn(bs, n_channels, h, w).astype(np.float32)
        npy = np_pad(npx, r)
        npy = np_conv(npy, W1.value)
        npy = np_pad(npy, r)
        npy = np_conv(npy, W2.value)

        # results must be very similar
        res = mdl(npx)
        self.assertLess(np.max(np.abs(npy - res)), 1e-3)

Exemple #2

0

Afficher le fichier

Fichier : models.py Projet : dom-inic/nn_perceptron

    def train(self, dataset):
        """
        Trains the model.
        """
        "*** YOUR CODE HERE ***"

        while True:

            #print(nn.Constant(dataset.x), nn.Constant(dataset.y))

            for x, y in dataset.iterate_once(self.batch_size):
                loss = self.get_loss(x, y)
                grad = nn.gradients(loss, [self.w0, self.w1, self.b0, self.b1])

                #print(nn.as_scalar(nn.DotProduct(grad[0],grad[0])))
                self.w0.update(grad[0], -0.005)
                self.w1.update(grad[1], -0.005)
                self.b0.update(grad[2], -0.005)
                self.b1.update(grad[3], -0.005)

            #print(nn.as_scalar(self.get_loss(nn.Constant(dataset.x), nn.Constant(dataset.y))))
            if nn.as_scalar(
                    self.get_loss(nn.Constant(dataset.x), nn.Constant(
                        dataset.y))) < 0.02:
                return

Exemple #3

0

Afficher le fichier

Fichier : models.py Projet : Nimiksha/AI-machine-learning

    def train(self, dataset):
        """
        Trains the model.
        """
        "*** YOUR CODE HERE ***"
        # initialize training to be False
        training = False
        # arbitrarily set batch size to be 5 and learning rate to be -0.005
        batch_size = 10
        learning_rate = -0.01

        # set up loop to train the neural network, which runs till it converges
        while not training:
            for (x, y) in dataset.iterate_once(batch_size):
                # get the loss value
                loss = self.get_loss(x, y)
                # get the gradient values for the loss with respect to the parameters
                gradient_w1, gradient_w2, gradient_b1, gradient_b2 = nn.gradients(loss, [self.weight1, self.weight2, self.bias1, self.bias2])
                # update the parameters (both weight and bias) as required based on the learning rate
                # m.update(grad_wrt_m, multiplier)
                self.weight1.update(gradient_w1, learning_rate)
                self.weight2.update(gradient_w2, learning_rate)
                self.bias1.update(gradient_b1, learning_rate)
                self.bias2.update(gradient_b2, learning_rate)

                # calculate the total loss averaged across all examples in the dataset
                total_loss = self.get_loss(nn.Constant(dataset.x), nn.Constant(dataset.y))
                # check if the final loss is <= 0.02, and if it is, the test passes
                if nn.as_scalar(total_loss) <= 0.02:
                    training = True
                    return

Exemple #4

0

Afficher le fichier

    def train(self, dataset):
        """
        Trains the model.
        """
        "*** YOUR CODE HERE ***"

        while True:

            #keep upadating until loss is 0.02 or less

            for inpu, outp in dataset.iterate_once(self.batch_size):
                #compute gradient w.r.t parameters
                gradient = nn.gradients(self.get_loss(inpu, outp),
                                        [self.W1, self.W2, self.B1, self.B2])
                #update the parameters to minimize loss
                self.W1.update(gradient[0], self.multiplier)
                self.W2.update(gradient[1], self.multiplier)
                self.B1.update(gradient[2], self.multiplier)
                self.B2.update(gradient[3], self.multiplier)
            #see if hit loss objective
            x = nn.Constant(dataset.x)
            y = nn.Constant(dataset.y)

            if nn.as_scalar(self.get_loss(x, y)) <= 0.02:
                break

Exemple #5

0

Afficher le fichier

def check_fashion_classification(tracker):
    import models
    model = models.FashionClassificationModel()
    dataset = backend.FashionClassificationDataset(model)

    detected_parameters = None
    for batch_size in (1, 2, 4):
        inp_x = nn.Constant(dataset.x[:batch_size])
        inp_y = nn.Constant(dataset.y[:batch_size])
        output_node = model.run(inp_x)
        verify_node(output_node, 'node', (batch_size, 10),
                    "FashionClassificationModel.run()")
        trace = trace_node(output_node)
        assert inp_x in trace, "Node returned from FashionClassificationModel.run() does not depend on the provided input (x)"

        if detected_parameters is None:
            detected_parameters = [
                node for node in trace if isinstance(node, nn.Parameter)
            ]

        for node in trace:
            assert not isinstance(
                node, nn.Parameter
            ) or node in detected_parameters, (
                "Calling FashionClassificationModel.run() multiple times should always re-use the same parameters, but a new nn.Parameter object was detected"
            )

    for batch_size in (1, 2, 4):
        inp_x = nn.Constant(dataset.x[:batch_size])
        inp_y = nn.Constant(dataset.y[:batch_size])
        loss_node = model.get_loss(inp_x, inp_y)
        verify_node(loss_node, 'loss', None,
                    "FashionClassificationModel.get_loss()")
        trace = trace_node(loss_node)
        assert inp_x in trace, "Node returned from FashionClassificationModel.get_loss() does not depend on the provided input (x)"
        assert inp_y in trace, "Node returned from FashionClassificationModel.get_loss() does not depend on the provided labels (y)"

        for node in trace:
            assert not isinstance(
                node, nn.Parameter
            ) or node in detected_parameters, (
                "FashionClassificationModel.get_loss() should not use additional parameters not used by FashionClassificationModel.run()"
            )

    tracker.add_points(2)  # Partial credit for passing sanity checks

    model.train(dataset)

    test_logits = model.run(nn.Constant(dataset.test_images)).data
    test_predicted = np.argmax(test_logits, axis=1)
    test_accuracy = np.mean(test_predicted == dataset.test_labels)

    accuracy_threshold = 0.96
    if test_accuracy >= accuracy_threshold:
        print("Your final test set accuracy is: {:%}".format(test_accuracy))
        tracker.add_points(4)
    else:
        print(
            "Your final test set accuracy ({:%}) must be at least {:.0%} to receive full points for this question"
            .format(test_accuracy, accuracy_threshold))

Exemple #6

0

Afficher le fichier

 def train(self, dataset):
     parameter = [self.W1, self.b1, self.W2, self.b2]
     #while nn.as_scalar(self.get_loss(nn.Constant(dataset.x), nn.Constant(dataset.y))) >= 0.02:
     for x, y in dataset.iterate_forever(self.batch_size):
         gradient = nn.gradients(self.get_loss(x, y), parameter)
         for i in range(4):
             parameter[i].update(gradient[i], self.multiplier)
         if nn.as_scalar(
                 self.get_loss(nn.Constant(dataset.x), nn.Constant(
                     dataset.y))) < 0.02:
             break

Exemple #7

0

Afficher le fichier

Fichier : dataset.py Projet : zz0828/RNN-Language-Classifier

 def _encode(self, inp_x, inp_y, only_x=False):
     xs = []
     for i in range(inp_x.shape[1]):
         if np.all(inp_x[:, i] == -1):
             break
         x = np.eye(len(self.chars))[inp_x[:, i]]
         xs.append(nn.Constant(x))
     if (not only_x):
         y = np.eye(len(self.language_names))[inp_y]
         y = nn.Constant(y)
         return xs, y
     return xs

Exemple #8

0

Afficher le fichier

 def iterate_once(self, batch_size):
     assert isinstance(batch_size, int) and batch_size > 0, (
         "Batch size should be a positive integer, got {!r}".format(
             batch_size))
     assert self.x.shape[0] % batch_size == 0, (
         "Dataset size {:d} is not divisible by batch size {:d}".format(
             self.x.shape[0], batch_size))
     index = 0
     while index < self.x.shape[0]:
         x = self.x[index:index + batch_size]
         y = self.y[index:index + batch_size]
         yield nn.Constant(x), nn.Constant(y)
         index += batch_size

Exemple #9

0

Afficher le fichier

Fichier : backend.py Projet : whwow/CS188-Homework

 def _encode(self, inp_x, inp_y):
     xs = []
     for i in range(inp_x.shape[1]):
         if np.all(inp_x[:, i] == -1):
             break
         assert not np.any(inp_x[:, i] == -1), (
             "Please report this error in the project: batching by length was done incorrectly in the provided code"
         )
         x = np.eye(len(self.chars))[inp_x[:, i]]
         xs.append(nn.Constant(x))
     y = np.eye(len(self.language_names))[inp_y]
     y = nn.Constant(y)
     return xs, y

Exemple #10

0

Afficher le fichier

    def update(self, state, action, nextState, reward):
        legalActions = self.getLegalActions(state)
        action_index = legalActions.index(action)
        done = nextState.isLose() or nextState.isWin()
        reward = self.shape_reward(reward)

        if self.counts is None:
            x, y = np.array(state.getFood().data).shape
            self.counts = np.ones((x, y))

        state = self.get_features(state)
        nextState = self.get_features(nextState)
        self.counts[int(state[0])][int(state[1])] += 1

        transition = (state, action_index, reward, nextState, done)
        self.replay_memory.push(*transition)

        if len(self.replay_memory) < self.min_transitions_before_training:
            self.epsilon = self.epsilon_explore
        else:
            self.epsilon = max(
                self.epsilon0 * (1 - self.update_amount / 20000), 0)

        if len(
                self.replay_memory
        ) > self.min_transitions_before_training and self.update_amount % self.update_frequency == 0:
            minibatch = self.replay_memory.pop(self.model.batch_size)
            states = np.vstack([x.state for x in minibatch])
            states = nn.Constant(states.astype("float64"))
            Q_target1 = self.compute_q_targets(minibatch,
                                               self.model,
                                               self.target_model,
                                               doubleQ=self.doubleQ)
            Q_target1 = nn.Constant(Q_target1.astype("float64"))

            if self.doubleQ:
                Q_target2 = self.compute_q_targets(minibatch,
                                                   self.target_model,
                                                   self.model,
                                                   doubleQ=self.doubleQ)
                Q_target2 = nn.Constant(Q_target2.astype("float64"))

            self.model.gradient_update(states, Q_target1)
            if self.doubleQ:
                self.target_model.gradient_update(states, Q_target2)

        if self.target_update_rate > 0 and self.update_amount % self.target_update_rate == 0:
            self.target_model.set_weights(copy.deepcopy(self.model.parameters))

        self.update_amount += 1

Exemple #11

0

Afficher le fichier

    def iterate_once(self, batch_size):
        for x, y in super().iterate_once(batch_size):
            yield x, y
            self.processed += batch_size

            if use_graphics and time.time() - self.last_update > 0.1:
                predicted = self.model.run(nn.Constant(self.x)).data
                loss = self.model.get_loss(
                    nn.Constant(self.x), nn.Constant(self.y)).data
                self.learned.set_data(self.x[self.argsort_x], predicted[self.argsort_x])
                self.text.set_text("processed: {:,}\nloss: {:.6f}".format(
                   self.processed, loss))
                self.fig.canvas.draw_idle()
                self.fig.canvas.start_event_loop(1e-3)
                self.last_update = time.time()

Exemple #12

0

Afficher le fichier

Fichier : models.py Projet : kylewang811/CS188_MachineLearning

 def train(self, dataset):
     """
     Trains the model.
     """
     "*** YOUR CODE HERE ***"
     for x, y in dataset.iterate_forever(self.batch_size):
         temp1 = [self.weight1, self.weight2, self.bias1, self.bias2]
         temp2 = list(nn.gradients(self.get_loss(x, y), temp1))
         [
             temp1[i].update(temp2[i], self.learning_rate)
             for i in range(len(temp1))
         ]
         if nn.as_scalar(
                 self.get_loss(nn.Constant(dataset.x), nn.Constant(
                     dataset.y))) < .02:
             return

Exemple #13

0

Afficher le fichier

Fichier : models.py Projet : Ahmadlamaa/Machine-Learning

    def train(self, dataset):
        """
        Trains the model.
        """
        "*** YOUR CODE HERE ***"

        while nn.as_scalar(
                self.get_loss(nn.Constant(dataset.x), nn.Constant(
                    dataset.y))) >= 0.02:
            for x, y in dataset.iterate_once(self.size):
                gradients = nn.gradients(self.get_loss(x, y),
                                         [self.w0, self.w1, self.x0, self.x1])
                self.w0.update(gradients[0], -1 / 150)
                self.w1.update(gradients[1], -1 / 150)
                self.x0.update(gradients[2], -1 / 150)
                self.x1.update(gradients[3], -1 / 150)

Exemple #14

0

Afficher le fichier

Fichier : models.py Projet : rasreee/ml-classification

 def f(h, x):
     if h is None:
         result = nn.Linear(x, self.w)
         temp = nn.Constant(numpy.ones([x.data.shape[0], result.data.shape[0]]))
         return nn.Linear(temp, result)
     else:
         return nn.Add(nn.Linear(x, self.w), nn.Linear(h, self.w_hidden))

Exemple #15

0

Afficher le fichier

Fichier : main.py Projet : matteo-ronchetti/opencl-nn

def main():
    tot_images = 50000
    bs = 200
    n_filters = 1024

    x = nn.Tensor((bs, 3, 32, 32))

    w_1 = nn.Constant(np.random.randn(n_filters, 3, 5, 5).astype(np.float32))
    y = nn.conv(x, w_1)
    # y = nn.activation(y, lambda x: "exp(-({x}))")

    mdl = nn.compile_model(x, y)

    # s = time.time()
    npx = np.random.randn(bs, 3, 32, 32).astype(np.float32)
    # npy = np_pad(npx, 2)
    # npy = np_conv(npy, w_1.value)
    # e = time.time()
    # print("Numpy time", e - s)

    s = time.time()
    res = mdl(npx)
    e = time.time()
    print("OpenCL time", e - s)
    print(
        f"Expected time for {tot_images} is {(tot_images / bs) * (e - s)} seconds ({(tot_images / (bs * 60)) * (e - s)} minutes)"
    )

Exemple #16

0

Afficher le fichier

Fichier : models.py Projet : zuluzazu/cs188-project5-machineLearning

 def train(self, dataset):
     """
     Trains the model.
     """
     "*** YOUR CODE HERE ***"
     loss = float('inf')
     while (loss > 0.01):
         grad_wrt_W1, grad_wrt_b1, grad_wrt_W2, grad_wrt_b2 = nn.gradients(
             self.get_loss(nn.Constant(dataset.x), nn.Constant(dataset.y)),
             [self.W1, self.b1, self.W2, self.b2])
         self.W1.update(grad_wrt_W1, -0.01)
         self.b1.update(grad_wrt_b1, -0.01)
         self.W2.update(grad_wrt_W2, -0.01)
         self.b2.update(grad_wrt_b2, -0.01)
         loss = nn.as_scalar(
             self.get_loss(nn.Constant(dataset.x), nn.Constant(dataset.y)))

Exemple #17

0

Afficher le fichier

Fichier : models.py Projet : sid-mishra910/Pacman-AI

    def computePolyFeatures(self, point):
        """
        Compute the polynomial features you need from the input x
        NOTE: you will need to unpack the x since it is wrapped in an object
        thus, use the following function call to get the contents of x as a list:
        point_list = nn.as_vector(point)
        Once you do that, create a list of the form (for batch size of n): [[x11, x12, ...], [x21, x22, ...], ..., [xn1, xn2, ...]]
        Once this is done, then use the following code to convert it back into the object
        nn.Constant(nn.list_to_arr(new_point_list))
        Input: a node with shape (batch_size x 1)
        Output: an nn.Constant object with shape (batch_size x n) where n is the number of features generated from point (input)
        """
        "*** YOUR CODE HERE ***"
        point_list = nn.as_vector(point)
        poly_batch_list = []
        for ind in range(0,len(point_list)):
            
            powers_list = []
            for i in range(1,self.degree+1):
                powers_list.append(i)
            
            con_cat_list = []
            for i in range(0, self.degree):
                con_cat_list.append(point_list[ind])

            features = []
            for i in range(0, len(con_cat_list)):
                features.append(np.power(con_cat_list[i], powers_list[i]))
            
            poly_batch_list.append(features)
        
        return nn.Constant(nn.list_to_arr(poly_batch_list))

Exemple #18

0

Afficher le fichier

    def train(self, dataset):
        """
        Trains the model.
        """
        "*** YOUR CODE HERE ***"
        while nn.as_scalar(
                self.get_loss(nn.Constant(dataset.x), nn.Constant(
                    dataset.y))) > 0.01:

            for x, y in dataset.iterate_once(self.batch_size):
                grad = nn.gradients(self.get_loss(x, y), self.list)
                self.w1.update(grad[0], -0.01)
                self.w2.update(grad[1], -0.01)
                self.w3.update(grad[2], -0.01)
                self.b1.update(grad[3], -0.01)
                self.b2.update(grad[4], -0.01)
                self.b3.update(grad[5], -0.01)

Exemple #19

0

Afficher le fichier

Fichier : models.py Projet : c404err/CS188-2020-Fall

 def train(self, dataset):
     """
     Trains the model.
     """
     "*** YOUR CODE HERE ***"
     while (True):
         for x, y in dataset.iterate_once(1):
             g = nn.gradients(self.get_loss(x, y),
                              [self.w0, self.w1, self.b0, self.b1])
             self.w0.update(g[0], -0.005)
             self.w1.update(g[1], -0.005)
             self.b0.update(g[2], -0.005)
             self.b1.update(g[3], -0.005)
         if (nn.as_scalar(
                 self.get_loss(nn.Constant(dataset.x), nn.Constant(
                     dataset.y))) < 0.02):
             return

Exemple #20

0

Afficher le fichier

    def train(self, dataset):
        """
        Trains the model.
        """
        "*** YOUR CODE HERE ***"
        while True:
            loss = self.get_loss(nn.Constant(dataset.x),
                                 nn.Constant(dataset.y))
            if nn.as_scalar(loss) <= self.maxLoss: break

            grad_w1, grad_b1, grad_w2, grad_b2 = nn.gradients(
                loss, [self.w1, self.b1, self.w2, self.b2])

            self.w1.update(grad_w1, self.learningRate)
            self.b1.update(grad_b1, self.learningRate)
            self.w2.update(grad_w2, self.learningRate)
            self.b2.update(grad_b2, self.learningRate)

Exemple #21

0

Afficher le fichier

Fichier : models.py Projet : vraman23/CS188-MachineLearning

 def train(self, dataset):
     """
     Trains the model.
     """
     "*** YOUR CODE HERE ***"
     loss = 1
     alpha = 0.01
     while (loss > 0.02):
         grad_loss = self.get_loss(nn.Constant(dataset.x),
                                   nn.Constant(dataset.y))
         loss = nn.as_scalar(grad_loss)
         grad = nn.gradients(grad_loss,
                             [self.W1, self.b1, self.W2, self.b2])
         self.W1.update(grad[0], -alpha)
         self.b1.update(grad[1], -alpha)
         self.W2.update(grad[2], -alpha)
         self.b2.update(grad[3], -alpha)

Exemple #22

0

Afficher le fichier

Fichier : models.py Projet : phoebep/cs188

 def train(self, dataset):
     """
     Trains the model.
     """
     "*** YOUR CODE HERE ***"
     while True:
         for x, y in dataset.iterate_once(self.batch_size):
             lose = self.get_loss(a, b)
             gradient = nn.gradients(lose,
                                     [self.w0, self.w1, self.b0, self.b1])
             self.w0.update(gradient[0], -.005)
             self.w1.update(gradient[1], -.005)
             self.b0.update(gradient[2], -.005)
             self.b1.update(gradient[3], -.005)
         if nn.as_scalar(
                 self.get_loss(nn.Constant(dataset.x), nn.Constant(
                     dataset.y))) < .02:
             return

Exemple #23

0

Afficher le fichier

    def train(self, dataset):
        """
        Trains the model.
        """

        while True:
            for x, y in dataset.iterate_once(self.batch):
                loss = self.get_loss(x, y)
                gradient = nn.gradients(loss, [self.w1, self.b1, self.w2])
                self.w1.update(gradient[0], -self.lr)
                self.b1.update(gradient[1], -self.lr)
                self.w2.update(gradient[2], -self.lr)
                #self.w3.update(gradient[3], -self.lr)

            if nn.as_scalar(
                    self.get_loss(nn.Constant(dataset.x), nn.Constant(
                        dataset.y))) < .02:
                break

Exemple #24

0

Afficher le fichier

Fichier : models.py Projet : alaynfm/Machinelearning

    def train(self, dataset):
        """
        Trains the model.
        
        """

        batch_size = self.batch_size
        total_loss = 100000
        while total_loss > 0.02:
            #ITERAR SOBRE EL TRAIN EN LOTES MARCADOS POR EL BATCH SIZE COMO HABEIS HECHO EN LOS OTROS EJERCICIOS
            #ACTUALIZAR LOS PESOS EN BASE AL ERROR loss = self.get_loss(x, y) QUE RECORDAD QUE GENERA
            #UNA FUNCION DE LA LA CUAL SE  PUEDE CALCULAR LA DERIVADA (GRADIENTE)

            "*** YOUR CODE HERE ***"

            total_loss = nn.as_scalar(
                self.get_loss(nn.Constant(dataset.x), nn.Constant(dataset.y))
            )  #AQUI SE CALCULA OTRA VEZ EL ERROR PERO SOBRE TODO EL TRAIN A LA VEZ (CUIDADO!! NO ES LO MISMO el x de antes QUE dataset.x)

Exemple #25

0

Afficher le fichier

    def train(self, dataset):
        """
        Trains the model.
        """
        "*** YOUR CODE HERE ***"
        while True:
            for x, y in dataset.iterate_once(self.batch_size):
                loss = self.get_loss(x, y)
                grad = nn.gradients(loss, self.params)

                self.m0.update(grad[0], -0.01)
                self.m1.update(grad[1], -0.01)
                self.b0.update(grad[2], -0.01)
                self.b1.update(grad[3], -0.01)
            if nn.as_scalar(
                    self.get_loss(nn.Constant(dataset.x), nn.Constant(
                        dataset.y))) < 0.02:
                return

Exemple #26

0

Afficher le fichier

    def train(self, dataset):
        """
        Trains the model.
        """

        alpha = -.005
        while True:
            for x, y in dataset.iterate_once(self.batch_size):
                loss = self.get_loss(x, y)
                gradients = nn.gradients(loss, self.learning)

                for i in range(len(self.learning)):
                    self.learning[i].update(gradients[i], alpha)

            if nn.as_scalar(
                    self.get_loss(nn.Constant(dataset.x), nn.Constant(
                        dataset.y))) < 0.02:
                return

Exemple #27

0

Afficher le fichier

 def getQValue(self, state, action):
     """
       Should return Q(state,action) as predicted by self.model
     """
     feats = self.get_features(state)
     legalActions = self.getLegalActions(state)
     action_index = legalActions.index(action)
     state = nn.Constant(np.array([feats]).astype("float64"))
     return self.model.run(state).data[0][action_index]

Exemple #28

0

Afficher le fichier

Fichier : models.py Projet : sophia-iannaccone/intelligent-pacman

    def train(self, dataset):
        """
        Trains the model.
        """
        while True:
            for x, y in dataset.iterate_once(self.batch_size):
                loss = self.get_loss(x, y)
                grad = nn.gradients(loss, [self.w1, self.w2, self.b1, self.b2])

                self.w1.update(grad[0], -self.learning_rate)
                self.w2.update(grad[1], -self.learning_rate)
                self.b1.update(grad[2], -self.learning_rate)
                self.b2.update(grad[3], -self.learning_rate)

            if nn.as_scalar(
                    self.get_loss(nn.Constant(dataset.x), nn.Constant(
                        dataset.y))) < 0.02:
                return

Exemple #29

0

Afficher le fichier

    def compute_q_targets(self,
                          minibatch,
                          network=None,
                          target_network=None,
                          doubleQ=False):
        """Prepare minibatches
        Args:
            minibatch (List[Transition]): Minibatch of `Transition`
        Returns:
            float: Loss value
        """
        if network is None:
            network = self.model
        if target_network is None:
            target_network = self.target_model
        states = np.vstack([x.state for x in minibatch])
        states = nn.Constant(states)
        actions = np.array([x.action for x in minibatch])
        rewards = np.array([x.reward for x in minibatch])
        next_states = np.vstack([x.next_state for x in minibatch])
        next_states = nn.Constant(next_states)
        done = np.array([x.done for x in minibatch])

        Q_predict = network.run(states).data
        Q_target = np.copy(Q_predict)
        state_indices = states.data.astype(int)
        state_indices = (state_indices[:, 0], state_indices[:, 1])
        exploration_bonus = 1 / (2 * np.sqrt(
            (self.counts[state_indices] / 100)))

        replace_indices = np.arange(actions.shape[0])
        action_indices = np.argmax(network.run(next_states).data, axis=1)
        target = rewards + exploration_bonus + (
            1 - done) * self.discount * target_network.run(next_states).data[
                replace_indices, action_indices]

        Q_target[replace_indices, actions] = target

        if self.td_error_clipping is not None:
            Q_target = Q_predict + np.clip(Q_target - Q_predict,
                                           -self.td_error_clipping,
                                           self.td_error_clipping)

        return Q_target

Exemple #30

0

Afficher le fichier

    def train(self, dataset):
        """
        Trains the model.
        """
        "*** YOUR CODE HERE ***"
        while True:
            for x, y in dataset.iterate_once(self.batch_size):
                loss = self.get_loss(x,y)
                grad_wrt_W1, grad_wrt_W2, grad_wrt_b1, grad_wrt_b2 = nn.gradients(loss, [self.W1, self.W2, self.b1, self.b2])

                multiplier = -.01
                self.W1.update(grad_wrt_W1, multiplier)
                self.W2.update(grad_wrt_W2, multiplier)
                self.b1.update(grad_wrt_b1, multiplier)
                self.b2.update(grad_wrt_b2, multiplier)

            loss2 = self.get_loss(nn.Constant(dataset.x), nn.Constant(dataset.y))
            if nn.as_scalar(loss2) < 0.02:
                break