예제 #1
0
class DQN(object):
    def __init__(self):
        self.eval_net, self.target_net = Net(N_STATES, N_ACTIONS,
                                             Hidden_num), Net(
                                                 N_STATES, N_ACTIONS,
                                                 Hidden_num)

        self.learn_step_counter = 0  # 如果次数到了,更新target_net
        self.memory_counter = 0  # for storing memory
        self.memory = np.zeros((MEMORY_CAPACITY, N_STATES * 2 + 2))  # 初始化记忆
        self.optimizer = torch.optim.Adam(self.eval_net.parameters(), lr=LR)
        self.loss_func = nn.MSELoss()

    # 选择动作
    def choose_action(self, x):
        x = torch.unsqueeze(torch.FloatTensor(x), 0)
        if np.random.uniform() < EPSILON:  # 贪婪策略
            actions_value = self.eval_net.forward(x, False).detach()
            action = torch.max(actions_value, 1)[1].data.numpy()
            action = action[0]
        else:  # random
            action = np.random.randint(0, N_ACTIONS)
        return action

    # 存储记忆
    def store_transition(self, s, a, r, s_):
        transition = np.hstack((s, [a, r], s_))  # 将每个参数打包起来
        # replace the old memory with new memory
        index = self.memory_counter % MEMORY_CAPACITY
        self.memory[index, :] = transition
        self.memory_counter += 1

    def learn(self):
        for target_param, param in zip(self.target_net.parameters(),
                                       self.eval_net.parameters()):
            target_param.data.copy_(target_param.data * (1.0 - TAU) +
                                    param.data * TAU)
        self.learn_step_counter += 1

        sample_index = np.random.choice(MEMORY_CAPACITY, BATCH_SIZE)
        b_memory = self.memory[sample_index, :]
        b_s = torch.FloatTensor(b_memory[:, :N_STATES])
        b_a = torch.LongTensor(b_memory[:, N_STATES:N_STATES + 1].astype(int))
        b_r = torch.FloatTensor(b_memory[:, N_STATES + 1:N_STATES + 2])
        b_s_ = torch.FloatTensor(b_memory[:, -N_STATES:])

        q_eval = self.eval_net(b_s)  # 按照动作以列为对象进行索引,这样就知道当时采取的那个动作的Q值了
        q_eval = torch.gather(q_eval, 1, b_a)
        q_target = self.target_net(
            b_s_).detach()  # detach的作用就是不反向传播去更新,因为target的更新在前面定义好了的
        y = b_r + GAMMA * q_target.max(1)[0].view(BATCH_SIZE,
                                                  1)  # shape (batch, 1)
        loss = self.loss_func(q_eval, y)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        return loss.data.item()
예제 #2
0
    def train_Net(self,
                  data_files_dic="./data/",
                  normalized=False,
                  percent=0.9):
        f = open("./log/train_Net_" + str(self.stride) + ".txt", "w")
        print("-------------------------------------------")
        f.write("\n" + "-------------------------------------------")
        # print("Train Net Model...")
        f.write("\n" + "Train Net Model...")
        # all trian data and test data files path

        # Get train data and test data
        train_data_x, train_data_y, test_data_x, test_data_y = self.getData(
            data_files_dic=data_files_dic,
            normalized=normalized,
            percent=percent)

        # Print the size of train data and test data
        print("Train data shape:")
        f.write("\n" + "Train data shape:")
        print(train_data_x.shape)
        f.write("\n" + str(train_data_x.shape))
        print(train_data_y.shape)
        f.write("\n" + str(train_data_y.shape))
        train_data_x = torch.Tensor(train_data_x)
        train_data_y = torch.Tensor(train_data_y)

        trainset = DataSet(train_data_x, train_data_y)
        trainloader = DataLoader(trainset, batch_size=16, shuffle=False)

        # Load the Net LSTM_model
        # net = torch.load('LSTM_model.pth')

        # Bulid the Net_model
        net = Net(self.input_size * self.stride, self.output_size)
        # Optimize all net parameters
        optimizer = torch.optim.Adam(net.parameters(), lr=self.learning_rate)
        # The loss function uses mean square error (MSE) loss function
        loss_func = nn.MSELoss()

        for step in range(self.EPOCH):
            total_loss = 0
            for tx, ty in trainloader:
                output = net.forward(torch.unsqueeze(tx, dim=0))
                loss = loss_func(torch.squeeze(output), ty)
                # clear gradients for this training step
                optimizer.zero_grad()
                # back propagation, compute gradients
                loss.backward()
                optimizer.step()
                total_loss += float(loss)
            # print(step, float(total_loss))
            f.write("\n" + str(step) + " " + str(float(total_loss)))
        torch.save(net, "./models/Net_model_" + str(self.stride) + ".pth")
        f.write("\n" + "Save Net Model")
        print("Save Net Model")
예제 #3
0
    def online_train_Net(self, train_path, model_path, stride, EPOCH):
        print("-------------------------------------------")
        print("Online Train Net Model...")
        self.stride = stride
        self.EPOCH = EPOCH
        # Get train data and test data
        train_data_x, train_data_y = self.getOnlineData(
            data_files_dic=train_path, normalized=True)

        # Print the size of train data and test data
        print("Train data shape:")
        print(train_data_x.shape)
        print(train_data_y.shape)

        train_data_x = torch.Tensor(train_data_x)
        train_data_y = torch.Tensor(train_data_y)

        trainset = DataSet(train_data_x, train_data_y)
        trainloader = DataLoader(trainset, batch_size=16, shuffle=False)

        # Bulid the Net_model
        net = Net(self.input_size * self.stride, self.output_size)
        # Optimize all net parameters
        optimizer = torch.optim.Adam(net.parameters(), lr=self.learning_rate)
        # The loss function uses mean square error (MSE) loss function
        loss_func = nn.MSELoss()

        for step in range(self.EPOCH):
            total_loss = 0
            for tx, ty in trainloader:
                output = net.forward(torch.unsqueeze(tx, dim=0))
                # print(output.detach().numpy()[0][0],ty.numpy()[0])
                loss = loss_func(torch.squeeze(output), ty)
                # clear gradients for this training step
                optimizer.zero_grad()
                # back propagation, compute gradients
                loss.backward()
                optimizer.step()
                total_loss += float(loss)
            # print(step, float(total_loss))
        torch.save(net, model_path + "/Net_model_" + str(self.stride) + ".pth")
        print("Save Online Net Model")
예제 #4
0
# ----------------------------- net, opt, lr, loss ----------------------------

net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=args.init_lr, momentum=0.9)

# -------------------------------- train & test -------------------------------

for epoch in range(50):
    sum_loss = 0.0
    adjust_lr_by_epoch(optimizer, epoch, args.init_lr)
    for batch_no in range(1, provider.train_batch_num + 1):
        imgs, labels = provider.next()
        imgs = imgs.permute(0, 3, 1, 2).float() / 255
        pred = net.forward(imgs)
        '''
        相当于loss = criterion.forward(distribution, label_gt)
        注意不要写成 loss = nn.CrossEntropyLoss(distribution, label_gt) !!!
        '''
        loss = criterion(pred, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        sum_loss += loss.item()
        if batch_no % WATCH_LOSS_PER_BATCH == 0:
            print("[{}, {}] loss: {:.5f}  lr: {:.5f}".format(
                epoch, batch_no, sum_loss / WATCH_LOSS_PER_BATCH,
                get_lr(optimizer)))
class DQN(object):
    def __init__(self):
        # 两张网是一样的,不过就是target_net是每100次更新一次,eval_net每次都更新
        self.eval_net, self.target_net = Net(N_STATES, N_ACTIONS,
                                             Hidden_num), Net(
                                                 N_STATES, N_ACTIONS,
                                                 Hidden_num)

        self.learn_step_counter = 0  # 如果次数到了,更新target_net
        self.memory_counter = 0  # for storing memory
        self.memory = np.zeros((MEMORY_CAPACITY, N_STATES * 2 + 2))  # 初始化记忆
        self.memory_state = np.zeros((MEMORY_CAPACITY, N_STATES))
        self.memory_next_state = np.zeros((MEMORY_CAPACITY, N_STATES))
        self.memory_action = np.zeros((MEMORY_CAPACITY, 1))
        self.memory_reward = np.zeros((MEMORY_CAPACITY, 1))
        self.optimizer = torch.optim.Adam(self.eval_net.parameters(), lr=LR)
        self.loss_func = nn.MSELoss()

    # 选择动作
    def choose_action(self, x):
        x = torch.unsqueeze(torch.FloatTensor(x), 0)
        if np.random.uniform() < EPSILON:  # 贪婪策略
            actions_value = self.eval_net.forward(x, False).detach()
            action = torch.max(actions_value, 1)[1].data.numpy()
            action = action[0]
        else:  # random
            action = np.random.randint(0, N_ACTIONS)
        return action

    # 存储记忆
    def store_transition(self, s, a, r, s_):
        transition = np.hstack((s, [a, r], s_))  # 将每个参数打包起来
        # replace the old memory with new memory
        index = self.memory_counter % MEMORY_CAPACITY
        self.memory_state[index, :] = s
        self.memory_next_state[index, :] = s_
        self.memory_action[index, :] = a
        self.memory_reward[index, :] = r
        # self.memory[index, :] = transition
        self.memory_counter += 1

    def learn(self):
        # target parameter update
        for target_param, param in zip(self.target_net.parameters(),
                                       self.eval_net.parameters()):
            target_param.data.copy_(target_param.data * (1.0 - TAU) +
                                    param.data * TAU)
        self.learn_step_counter += 1

        # 学习过程
        sample_index = np.random.choice(MEMORY_CAPACITY, BATCH_SIZE)
        bound = min(T - sample_index.max(), N) - 1
        s_memory = list()
        next_s_memory = list()
        a_memory = list()
        r_memory = list()
        for index in sample_index:
            r_memory.append(self.memory_reward[index:index + bound + 1, ])
        s_memory.append(self.memory_state[sample_index])
        next_s_memory.append(self.memory_next_state[sample_index + bound])
        a_memory.append(self.memory_action[sample_index])

        # b_memory = self.memory[sample_index, :]
        b_s = torch.tensor(s_memory, dtype=torch.float32).squeeze()
        b_s_ = torch.tensor(next_s_memory, dtype=torch.float32).squeeze()
        b_r = torch.tensor(r_memory, dtype=torch.float32)
        b_r = torch.sum(b_r, dim=1)
        b_a = torch.tensor(a_memory, dtype=torch.long).squeeze(0)

        # q_eval w.r.t the action in experience
        q_eval = self.eval_net(b_s)
        q_eval = q_eval.gather(1, b_a)  # shape (batch, 1)
        q_next = self.target_net(
            b_s_).detach()  # detach的作用就是不反向传播去更新,因为target的更新在前面定义好了的
        q_target = b_r + GAMMA * q_next.max(1)[0].view(BATCH_SIZE,
                                                       1)  # shape (batch, 1)
        loss = self.loss_func(q_eval, q_target)

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        return loss.data.item()

    def learn2(self):
        sample_index = np.random.choice(self.memory_counter, BATCH_SIZE)
        s_memory = list()
        next_s_memory = list()
        a_memory = list()
        r_memory = list()
        r_memory.append(self.memory_reward[sample_index])
        s_memory.append(self.memory_state[sample_index])
        next_s_memory.append(self.memory_next_state[sample_index])
        a_memory.append(self.memory_action[sample_index])

        b_s = torch.tensor(s_memory, dtype=torch.float32).squeeze()
        b_s_ = torch.tensor(next_s_memory, dtype=torch.float32).squeeze()
        b_r = torch.tensor(r_memory, dtype=torch.float32)
        b_r = torch.sum(b_r, dim=1)
        b_a = torch.tensor(a_memory, dtype=torch.long).squeeze(0)

        q_eval = self.eval_net(b_s)
        q_eval = q_eval.gather(1, b_a)  # shape (batch, 1)
        action = torch.argmax(self.eval_net(b_s_), dim=1).view(BATCH_SIZE, 1)
        q_next = self.target_net(b_s_).gather(
            dim=1,
            index=action).detach()  # detach的作用就是不反向传播去更新,因为target的更新在前面定义好了的
        q_target = b_r + GAMMA * q_next.view(BATCH_SIZE, 1)
        loss = self.loss_func(q_eval, q_target)

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        return loss.data.item()
예제 #6
0
    batch_loss = 0
    batch_acc = 0
    val_acc = 0
    val_loss = 0

    # train
    total = 0
    train_acc = 0
    train_loss = 0
    # imgs,labs=data.next_batch(batch_size)
    for i in range(60000 // batch_size):
        imgs, labs = data.next_batch(batch_size)
        imgs = imgs

        #训练
        sf_out = net.forward(imgs)
        net.backward(sf_out - labs)
        net.Gradient(alpha=learning_rate, weight_decay=0.01)

        #统计
        for j in range(batch_size):
            if np.argmax(sf_out[j]) == np.argmax(labs[j]):
                train_acc += 1
            total += 1

        mod = 100
        if i % mod == 0:
            print("epoch=%d  batchs=%d   train_acc: %.4f  " %
                  (epoch, i, train_acc / total))
            train_acc = 0
            total = 0
예제 #7
0
파일: NetTest.py 프로젝트: MGSE97/NAVY
teach = gen_labeled(input_size)
test = gen_labeled(test_size)


# view
#viewTeach = prepareView("XOR Teach")
#drawPoints(viewTeach, teach)
viewTest = prepareView("XOR Test")
points = drawPoints(viewTest, test)

# learn
#for epoch in range(0, epochs):
epoch = 0
while(True):
    for point in teach:
        y = net.forward(point.data)
        net.backwards([point.label], y, learning_rate)

    if epoch % draw_epoch == 0:
        # test
        losses = 0
        for point in test:
            y = net.forward(point.data)[0]
            loss = 0.5*(point.label - y)**2
            point.label = y
            losses += loss
        print("Loss({}): {}".format(epoch, losses))
        #print(epoch)
        points = drawPoints(viewTest, test, points)
        #net.backwards([0], 1, 10.0)