for i, data in enumerate(dataLoad, 0):
        x, y = data
        x = x[0].float().to(device)
        #y = y.float()
        y = y[0].to(device)
        optimizer.zero_grad()
        yhat = model(x)
        loss = criterion(yhat, y)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 200 == 199:  # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 200))
            running_loss = 0.0
    #print(running_loss)
    import matplotlib.pyplot as plt
    plt.plot(loss_list)
    plt.title('Model_loss vs epochs')
    plt.ylabel('Loss')
    plt.xlabel('epochs')
    s = '../working/epochwise_loss_' + str(epochs)
    plt.savefig(s)
    plt.show()
    plt.close()
    torch.save(model.state_dict(), '../working/new_mod.pth')

PATH = '/home/prathmesh/Desktop/Mesh_Neural_Network/new_models.pth'
torch.save(model.state_dict(), PATH)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
net = Net()
net.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.7)

# Training
for epoch in range(5):
    for i, data in enumerate(train_load, 0):
        inputs, labels, name = data
        inputs = inputs.float()
        inputs, labels = inputs.to(device), labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        print('[%d, %5d/%d] loss: %.3f' %
              (epoch + 1, i + 1, len(train_load), loss.item()))

print('Finished Training')

torch.save(net.state_dict(), './cnn')
Exemple #3
0
class Trainer:
    def __init__(self, hidden_dim, buffer_size, gamma, batch_size, device, writer):
        self.env = make("connectx", debug=True)
        self.device = device
        self.policy = Net(self.env.configuration.columns * self.env.configuration.rows, hidden_dim,
                          self.env.configuration.columns).to(
            device)

        self.target = Net(self.env.configuration.columns * self.env.configuration.rows, hidden_dim,
                          self.env.configuration.columns).to(
            device)
        self.enemyNet = Net(self.env.configuration.columns * self.env.configuration.rows, hidden_dim,
                            self.env.configuration.columns).to(
            device)
        self.target.load_state_dict(self.policy.state_dict())
        self.target.eval()
        self.buffer = ExperienceReplay(buffer_size)
        self.enemy = "random"
        self.trainingPair = self.env.train([None, self.enemy])
        self.loss_function = nn.MSELoss()
        self.optimizer = optim.Adam(params=self.policy.parameters(), lr=0.001)
        self.gamma = gamma
        self.batch_size = batch_size

        self.first = True
        self.player = 1
        self.writer = writer

    def agent(self, observation, configuration):
        with torch.no_grad():
            state = torch.tensor(observation['board'], dtype=torch.float)
            reshaped = self.reshape(state)
            action = self.takeAction(self.enemyNet(reshaped).view(-1), reshaped, 0, False)
            return action

    def switch(self):
        self.trainingPair = self.env.train([None, "negamax"])
        self.enemy = "negamax"

    def switchPosition(self):
        self.env.reset()
        if self.first:
            self.trainingPair = self.env.train([self.enemy, None])
            self.player = 2
        else:
            self.trainingPair = self.env.train([None, self.enemy])
            self.player = 1
        self.first = not self.first

    def load(self, path):
        self.policy.load_state_dict(torch.load(path))

    def synchronize(self):
        self.target.load_state_dict(self.policy.state_dict())

    def save(self, name):
        torch.save(self.policy.state_dict(), name)

    def reset(self):
        self.env.reset()
        return self.trainingPair.reset()

    def step(self, action):
        return self.trainingPair.step(action)

    def addExperience(self, experience):
        self.buffer.append(experience)

    def epsilon(self, maxE, minE, episode, lastEpisode):
        return (maxE - minE) * max((lastEpisode - episode) / lastEpisode, 0) + minE

    def change_reward(self, reward, done):
        if done and reward == 1:
            return 10
        if done and reward == -1:
            return -10
        if reward is None and done:
            return -20
        if done:
            return 1
        if reward == 0:
            return 1 / 42
        else:
            return reward

    def change_reward_streak(self, reward, done, reshapedBoard, action, useStreak):
        if done and reward == 1:
            return 20
        if done and reward == -1:
            return -20
        if reward is None and done:
            return -40
        if done:
            return 1
        if reward == 0 & useStreak:
            return 1 / 42 + self.streakReward(self.player, reshapedBoard, action)
        if reward == 0:
            return 1 / 42
        else:
            return reward

    def streakReward(self, player, reshapedBoard, action):
        verticalReward = 0
        horizontalReward = 0
        if self.longestVerticalStreak(player, reshapedBoard, action) == 3:
            verticalReward = 3
        if self.longestHorizontalStreak(player, reshapedBoard, action) == 3:
            horizontalReward = 3
        return verticalReward + horizontalReward + self.longestDiagonalStreak(player, reshapedBoard, action)

    def longestVerticalStreak(self, player, reshapedBoard, action):
        count = 0
        wasZero = False
        for i in range(5, 0, -1):
            if reshapedBoard[0][player][i][action] == 0:
                wasZero = True
            if reshapedBoard[0][player][i][action] == 1 & wasZero:
                count = 0
                wasZero = False
            count += reshapedBoard[0][player][i][action]
        if reshapedBoard[0][0][0][action] == 0:
            return 0
        return count

    def longestHorizontalStreak(self, player, reshapedBoard, action):
        count = 0
        rowOfAction = self.rowOfAction(player, reshapedBoard, action)
        wasZero = False
        for i in range(7):
            if reshapedBoard[0][player][rowOfAction][i] == 0:
                wasZero = True
            if reshapedBoard[0][player][rowOfAction][i] == 1 & wasZero:
                count = 0
                wasZero = False
            count += reshapedBoard[0][player][rowOfAction][i]
        return count

    def longestDiagonalStreak(self, player, reshapedBoard, action):
        rowOfAction = self.rowOfAction(player, reshapedBoard, action)
        for row in range(4):
            for col in range(5):
                if reshapedBoard[0][player][row][col] == reshapedBoard[0][player][row + 1][col + 1] == \
                        reshapedBoard[0][player][row + 2][col + 2] == 1 and self.actionInDiagonal1(action, row, col,
                                                                                                   rowOfAction):
                    return 3
        for row in range(5, 1, -1):
            for col in range(4):
                if reshapedBoard[0][player][row][col] == reshapedBoard[0][player][row - 1][col + 1] == \
                        reshapedBoard[0][player][row - 2][col + 2] == 1 and self.actionInDiagonal2(action, row, col,
                                                                                                   rowOfAction):
                    return 3
        return 0

    def actionInDiagonal1(self, action, row, col, rowOfAction):
        return (rowOfAction == row and action == col or
                rowOfAction == row + 1 and action == col + 1 or
                rowOfAction == row + 2 and action == col + 2)

    def actionInDiagonal2(self, action, row, col, rowOfAction):
        return (rowOfAction == row and action == col or
                rowOfAction == row - 1 and action == col + 1 or
                rowOfAction == row - 2 and action == col + 2)

    def rowOfAction(self, player, reshapedBoard, action):
        rowOfAction = 10
        for i in range(6):
            if reshapedBoard[0][player][i][action] == 1:
                rowOfAction = min(i, rowOfAction)
        return rowOfAction

    def policyAction(self, board, episode, lastEpisode, minEp=0.1, maxEp=0.9):
        reshaped = self.reshape(torch.tensor(board))
        output = self.policy(reshaped).view(-1)
        return self.takeAction(output, reshaped, self.epsilon(maxEp, minEp, episode, lastEpisode))

    def takeAction(self, actionList: torch.tensor, board, epsilon, train=True):
        if (np.random.random() < epsilon) & train:
            # invalide actions rein=geht nicht
            #return torch.tensor(np.random.choice(len(actionList))).item()
            return np.random.choice([i for i in range(len(actionList)) if board[0][0][0][i] == 1])
        else:
            for i in range(7):
                if board[0][0][0][i] == 0:
                    actionList[i] = float('-inf')
            return torch.argmax(actionList).item()

    def reshape(self, board: torch.tensor, unsqz=True):
        tensor = board.view(-1, 7).long()
        # [0] = wo kann er reinwerfen(da wo es geht, steht eine 1), [1] = player1 (da wo es geht steht eine 0), [2] = player2 (da wo es geht steht eine 0)
        a = F.one_hot(tensor, 3).permute([2, 0, 1])
        b = a[:, :, :]
        if unsqz:
            return torch.unsqueeze(b, 0).float().to(self.device)
        return b.float().to(self.device)

    def preprocessState(self, state):
        state = self.reshape(torch.tensor(state), True)
        return state

    def trainActionFromPolicy(self, state, action):
        state = self.preprocessState(state)
        value = self.policy(state).view(-1).to(self.device)
        return value[action].to(self.device)

    def trainActionFromTarget(self, next_state, reward, done):
        next_state = self.preprocessState(next_state)
        target = self.target(next_state)
        target = torch.max(target, 1)[0].item()
        target = reward + ((self.gamma * target) * (1 - done))
        return torch.tensor(target).to(self.device)

    def train(self):
        if len(self.buffer) > self.batch_size:
            self.optimizer.zero_grad()
            states, actions, rewards, next_states, dones = self.buffer.sample(self.batch_size, self.device)
            meanLoss = 0
            for i in range(self.batch_size):
                value = self.trainActionFromPolicy(states[i], actions[i])
                target = self.trainActionFromTarget(next_states[i], rewards[i], dones[i])
                loss = self.loss_function(value, target)
                loss.backward()
                meanLoss += loss
            self.optimizer.step()
            return meanLoss / self.batch_size
Exemple #4
0
                    correct += (pred_t == labels_t).sum().item()

                pred_temp.append(torch.cat(pt))
                true_temp.append(torch.cat(tt))

                test_acc = correct / total
                test_acc_list.append(test_acc)

                print('test Acc {}:'.format(test_acc))

        best_result_index = np.argmax(np.array(test_acc_list))
        loss_total.append(loss_list[best_result_index])
        acc_total.append(test_acc_list[best_result_index])
        pred_total.append(pred_temp[best_result_index].tolist())
        true_total.append(true_temp[best_result_index].tolist())

    file_name = 'res/2gram MCSC'
    torch.save(net.state_dict(), file_name + '.pth')

    loss_DF = pd.DataFrame(loss_total)
    loss_DF.to_csv(file_name + " loss.csv")

    acc_DF = pd.DataFrame(acc_total)
    acc_DF.to_csv(file_name + " acc.csv")

    pred_DF = pd.DataFrame(pred_total)
    pred_DF.to_csv(file_name + " pred.csv")

    true_DF = pd.DataFrame(true_total)
    true_DF.to_csv(file_name + " true.csv")
    running_loss = 0.0
    st2 = time.time()
    for i in range(len(DataObject_X)):
        #x,y = DataObject[i]
        x, y = torch.from_numpy(DataObject_X[i]), torch.tensor(
            [DataObject_Y[i]])
        if torch.cuda.is_available():
            x = x.cuda()
            y = y.cuda()
        #print(y)
        x = x.float()

        #y = y.float()
        optimizer.zero_grad()
        yhat = model(x)
        loss = criterion(yhat, y)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 200 == 199:  # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 200))
            running_loss = 0.0
        st3 = time.time()
    print(st3 - st2)
    print(epochs, running_loss)

PATH = '/home/prathmesh/Desktop/Mesh_Neural_Network/new_models.pth'
torch.save(model.state_dict(), PATH)
Exemple #6
0
            roadImageNew1 = stacked_roadImage1.reshape(args.batch_size, 800*800)
            roadImageNewf1 = roadImageNew1.type(torch.float)
            sample1, road_image1 = Variable(sampleNew1).to(device), Variable(roadImageNewf1).to(device)
            output1 = model(sample1)
            loss_func = nn.BCELoss()
            vloss = loss_func(output1, road_image1)
            validation_loss += loss_func(output1, road_image1).data.item() # sum up batch loss
            outputNew = output1 > 0.5
            outputNew1 = outputNew.type(torch.float)
            iou = compute_iou(outputNew1,road_image1)
            total_iou += iou
        validation_loss /= len(valLoader)
        total_iou /= len(valLoader)
        return validation_loss,total_iou


# In[ ]:


for epoch in range(0, args.epoch+1):
    total_loss = train(epoch)
    print("Epoch:" + str(epoch) +" " + "total training loss: " +str(total_loss) +" \n")
    val_loss, total_iou = validation()
    print("Epoch:" + str(epoch) +" " + "total validation loss: " +str(val_loss) +" total iou: " + str(total_iou) +"\n")
    
    model_file = 'model5/resnet18_model2_' + str(epoch) + '.pth'
    torch.save({'modelRoadMap_state_dict': model.state_dict()},model_file)
    print('\nSaved model to ' + model_file )


Exemple #7
0
            inputs, labels = data[0].to(device), data[1].to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss_val.append(loss.item())
            validation_loss_store.append([epoch, loss.item()])
            val_loss += loss

        val_loss = val_loss / float(i + 1)

        if val_loss < min_loss:
            min_loss = val_loss
            no_impr_epoch = 0

            #save the best model
            torch.save(
                model.state_dict(), 'weight/' + 'epoch_' + str(epoch + 1) +
                'loss_' + str(float(val_loss.cpu().numpy())) + '.pt')

            print('performance improved with validation loss ' +
                  str(float(val_loss.cpu().numpy())))
            file.write(
                '--------------------------------------------------------------------\n'
            )
            file.write('performance improved with validation loss =  ' +
                       str(float(val_loss.cpu().numpy())) + '\n')

            file.write('epoch = ' + str(epoch + 1) + '\t' + 'step = ' +
                       str(step) + '\t' + 'val_loss = ' + '\t' +
                       str(np.mean(loss_val)) + '\n')
            file.write(
                '--------------------------------------------------------------------\n\n'
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 100 == 99:  # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0
        i += 1
    print(i)

print('Finished Training')

SAVE_PATH = './prototype_net'
torch.save(net.state_dict(), SAVE_PATH)

correct = 0
total = 0
with torch.no_grad():
    for data in batch_feeder(X_val, y_val, batch_size):
        images, labels = data
        images = torch.from_numpy(images).float().to(device)
        labels = torch.from_numpy(labels).long().to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on validation images: %d %%' %
      (100 * correct / total))