for i, data in enumerate(dataLoad, 0): x, y = data x = x[0].float().to(device) #y = y.float() y = y[0].to(device) optimizer.zero_grad() yhat = model(x) loss = criterion(yhat, y) loss.backward() optimizer.step() running_loss += loss.item() if i % 200 == 199: # print every 2000 mini-batches print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 200)) running_loss = 0.0 #print(running_loss) import matplotlib.pyplot as plt plt.plot(loss_list) plt.title('Model_loss vs epochs') plt.ylabel('Loss') plt.xlabel('epochs') s = '../working/epochwise_loss_' + str(epochs) plt.savefig(s) plt.show() plt.close() torch.save(model.state_dict(), '../working/new_mod.pth') PATH = '/home/prathmesh/Desktop/Mesh_Neural_Network/new_models.pth' torch.save(model.state_dict(), PATH)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(device) net = Net() net.to(device) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.7) # Training for epoch in range(5): for i, data in enumerate(train_load, 0): inputs, labels, name = data inputs = inputs.float() inputs, labels = inputs.to(device), labels.to(device) # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = net(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() # print statistics print('[%d, %5d/%d] loss: %.3f' % (epoch + 1, i + 1, len(train_load), loss.item())) print('Finished Training') torch.save(net.state_dict(), './cnn')
class Trainer: def __init__(self, hidden_dim, buffer_size, gamma, batch_size, device, writer): self.env = make("connectx", debug=True) self.device = device self.policy = Net(self.env.configuration.columns * self.env.configuration.rows, hidden_dim, self.env.configuration.columns).to( device) self.target = Net(self.env.configuration.columns * self.env.configuration.rows, hidden_dim, self.env.configuration.columns).to( device) self.enemyNet = Net(self.env.configuration.columns * self.env.configuration.rows, hidden_dim, self.env.configuration.columns).to( device) self.target.load_state_dict(self.policy.state_dict()) self.target.eval() self.buffer = ExperienceReplay(buffer_size) self.enemy = "random" self.trainingPair = self.env.train([None, self.enemy]) self.loss_function = nn.MSELoss() self.optimizer = optim.Adam(params=self.policy.parameters(), lr=0.001) self.gamma = gamma self.batch_size = batch_size self.first = True self.player = 1 self.writer = writer def agent(self, observation, configuration): with torch.no_grad(): state = torch.tensor(observation['board'], dtype=torch.float) reshaped = self.reshape(state) action = self.takeAction(self.enemyNet(reshaped).view(-1), reshaped, 0, False) return action def switch(self): self.trainingPair = self.env.train([None, "negamax"]) self.enemy = "negamax" def switchPosition(self): self.env.reset() if self.first: self.trainingPair = self.env.train([self.enemy, None]) self.player = 2 else: self.trainingPair = self.env.train([None, self.enemy]) self.player = 1 self.first = not self.first def load(self, path): self.policy.load_state_dict(torch.load(path)) def synchronize(self): self.target.load_state_dict(self.policy.state_dict()) def save(self, name): torch.save(self.policy.state_dict(), name) def reset(self): self.env.reset() return self.trainingPair.reset() def step(self, action): return self.trainingPair.step(action) def addExperience(self, experience): self.buffer.append(experience) def epsilon(self, maxE, minE, episode, lastEpisode): return (maxE - minE) * max((lastEpisode - episode) / lastEpisode, 0) + minE def change_reward(self, reward, done): if done and reward == 1: return 10 if done and reward == -1: return -10 if reward is None and done: return -20 if done: return 1 if reward == 0: return 1 / 42 else: return reward def change_reward_streak(self, reward, done, reshapedBoard, action, useStreak): if done and reward == 1: return 20 if done and reward == -1: return -20 if reward is None and done: return -40 if done: return 1 if reward == 0 & useStreak: return 1 / 42 + self.streakReward(self.player, reshapedBoard, action) if reward == 0: return 1 / 42 else: return reward def streakReward(self, player, reshapedBoard, action): verticalReward = 0 horizontalReward = 0 if self.longestVerticalStreak(player, reshapedBoard, action) == 3: verticalReward = 3 if self.longestHorizontalStreak(player, reshapedBoard, action) == 3: horizontalReward = 3 return verticalReward + horizontalReward + self.longestDiagonalStreak(player, reshapedBoard, action) def longestVerticalStreak(self, player, reshapedBoard, action): count = 0 wasZero = False for i in range(5, 0, -1): if reshapedBoard[0][player][i][action] == 0: wasZero = True if reshapedBoard[0][player][i][action] == 1 & wasZero: count = 0 wasZero = False count += reshapedBoard[0][player][i][action] if reshapedBoard[0][0][0][action] == 0: return 0 return count def longestHorizontalStreak(self, player, reshapedBoard, action): count = 0 rowOfAction = self.rowOfAction(player, reshapedBoard, action) wasZero = False for i in range(7): if reshapedBoard[0][player][rowOfAction][i] == 0: wasZero = True if reshapedBoard[0][player][rowOfAction][i] == 1 & wasZero: count = 0 wasZero = False count += reshapedBoard[0][player][rowOfAction][i] return count def longestDiagonalStreak(self, player, reshapedBoard, action): rowOfAction = self.rowOfAction(player, reshapedBoard, action) for row in range(4): for col in range(5): if reshapedBoard[0][player][row][col] == reshapedBoard[0][player][row + 1][col + 1] == \ reshapedBoard[0][player][row + 2][col + 2] == 1 and self.actionInDiagonal1(action, row, col, rowOfAction): return 3 for row in range(5, 1, -1): for col in range(4): if reshapedBoard[0][player][row][col] == reshapedBoard[0][player][row - 1][col + 1] == \ reshapedBoard[0][player][row - 2][col + 2] == 1 and self.actionInDiagonal2(action, row, col, rowOfAction): return 3 return 0 def actionInDiagonal1(self, action, row, col, rowOfAction): return (rowOfAction == row and action == col or rowOfAction == row + 1 and action == col + 1 or rowOfAction == row + 2 and action == col + 2) def actionInDiagonal2(self, action, row, col, rowOfAction): return (rowOfAction == row and action == col or rowOfAction == row - 1 and action == col + 1 or rowOfAction == row - 2 and action == col + 2) def rowOfAction(self, player, reshapedBoard, action): rowOfAction = 10 for i in range(6): if reshapedBoard[0][player][i][action] == 1: rowOfAction = min(i, rowOfAction) return rowOfAction def policyAction(self, board, episode, lastEpisode, minEp=0.1, maxEp=0.9): reshaped = self.reshape(torch.tensor(board)) output = self.policy(reshaped).view(-1) return self.takeAction(output, reshaped, self.epsilon(maxEp, minEp, episode, lastEpisode)) def takeAction(self, actionList: torch.tensor, board, epsilon, train=True): if (np.random.random() < epsilon) & train: # invalide actions rein=geht nicht #return torch.tensor(np.random.choice(len(actionList))).item() return np.random.choice([i for i in range(len(actionList)) if board[0][0][0][i] == 1]) else: for i in range(7): if board[0][0][0][i] == 0: actionList[i] = float('-inf') return torch.argmax(actionList).item() def reshape(self, board: torch.tensor, unsqz=True): tensor = board.view(-1, 7).long() # [0] = wo kann er reinwerfen(da wo es geht, steht eine 1), [1] = player1 (da wo es geht steht eine 0), [2] = player2 (da wo es geht steht eine 0) a = F.one_hot(tensor, 3).permute([2, 0, 1]) b = a[:, :, :] if unsqz: return torch.unsqueeze(b, 0).float().to(self.device) return b.float().to(self.device) def preprocessState(self, state): state = self.reshape(torch.tensor(state), True) return state def trainActionFromPolicy(self, state, action): state = self.preprocessState(state) value = self.policy(state).view(-1).to(self.device) return value[action].to(self.device) def trainActionFromTarget(self, next_state, reward, done): next_state = self.preprocessState(next_state) target = self.target(next_state) target = torch.max(target, 1)[0].item() target = reward + ((self.gamma * target) * (1 - done)) return torch.tensor(target).to(self.device) def train(self): if len(self.buffer) > self.batch_size: self.optimizer.zero_grad() states, actions, rewards, next_states, dones = self.buffer.sample(self.batch_size, self.device) meanLoss = 0 for i in range(self.batch_size): value = self.trainActionFromPolicy(states[i], actions[i]) target = self.trainActionFromTarget(next_states[i], rewards[i], dones[i]) loss = self.loss_function(value, target) loss.backward() meanLoss += loss self.optimizer.step() return meanLoss / self.batch_size
correct += (pred_t == labels_t).sum().item() pred_temp.append(torch.cat(pt)) true_temp.append(torch.cat(tt)) test_acc = correct / total test_acc_list.append(test_acc) print('test Acc {}:'.format(test_acc)) best_result_index = np.argmax(np.array(test_acc_list)) loss_total.append(loss_list[best_result_index]) acc_total.append(test_acc_list[best_result_index]) pred_total.append(pred_temp[best_result_index].tolist()) true_total.append(true_temp[best_result_index].tolist()) file_name = 'res/2gram MCSC' torch.save(net.state_dict(), file_name + '.pth') loss_DF = pd.DataFrame(loss_total) loss_DF.to_csv(file_name + " loss.csv") acc_DF = pd.DataFrame(acc_total) acc_DF.to_csv(file_name + " acc.csv") pred_DF = pd.DataFrame(pred_total) pred_DF.to_csv(file_name + " pred.csv") true_DF = pd.DataFrame(true_total) true_DF.to_csv(file_name + " true.csv")
running_loss = 0.0 st2 = time.time() for i in range(len(DataObject_X)): #x,y = DataObject[i] x, y = torch.from_numpy(DataObject_X[i]), torch.tensor( [DataObject_Y[i]]) if torch.cuda.is_available(): x = x.cuda() y = y.cuda() #print(y) x = x.float() #y = y.float() optimizer.zero_grad() yhat = model(x) loss = criterion(yhat, y) loss.backward() optimizer.step() running_loss += loss.item() if i % 200 == 199: # print every 2000 mini-batches print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 200)) running_loss = 0.0 st3 = time.time() print(st3 - st2) print(epochs, running_loss) PATH = '/home/prathmesh/Desktop/Mesh_Neural_Network/new_models.pth' torch.save(model.state_dict(), PATH)
roadImageNew1 = stacked_roadImage1.reshape(args.batch_size, 800*800) roadImageNewf1 = roadImageNew1.type(torch.float) sample1, road_image1 = Variable(sampleNew1).to(device), Variable(roadImageNewf1).to(device) output1 = model(sample1) loss_func = nn.BCELoss() vloss = loss_func(output1, road_image1) validation_loss += loss_func(output1, road_image1).data.item() # sum up batch loss outputNew = output1 > 0.5 outputNew1 = outputNew.type(torch.float) iou = compute_iou(outputNew1,road_image1) total_iou += iou validation_loss /= len(valLoader) total_iou /= len(valLoader) return validation_loss,total_iou # In[ ]: for epoch in range(0, args.epoch+1): total_loss = train(epoch) print("Epoch:" + str(epoch) +" " + "total training loss: " +str(total_loss) +" \n") val_loss, total_iou = validation() print("Epoch:" + str(epoch) +" " + "total validation loss: " +str(val_loss) +" total iou: " + str(total_iou) +"\n") model_file = 'model5/resnet18_model2_' + str(epoch) + '.pth' torch.save({'modelRoadMap_state_dict': model.state_dict()},model_file) print('\nSaved model to ' + model_file )
inputs, labels = data[0].to(device), data[1].to(device) outputs = model(inputs) loss = criterion(outputs, labels) loss_val.append(loss.item()) validation_loss_store.append([epoch, loss.item()]) val_loss += loss val_loss = val_loss / float(i + 1) if val_loss < min_loss: min_loss = val_loss no_impr_epoch = 0 #save the best model torch.save( model.state_dict(), 'weight/' + 'epoch_' + str(epoch + 1) + 'loss_' + str(float(val_loss.cpu().numpy())) + '.pt') print('performance improved with validation loss ' + str(float(val_loss.cpu().numpy()))) file.write( '--------------------------------------------------------------------\n' ) file.write('performance improved with validation loss = ' + str(float(val_loss.cpu().numpy())) + '\n') file.write('epoch = ' + str(epoch + 1) + '\t' + 'step = ' + str(step) + '\t' + 'val_loss = ' + '\t' + str(np.mean(loss_val)) + '\n') file.write( '--------------------------------------------------------------------\n\n'
loss.backward() optimizer.step() # print statistics running_loss += loss.item() if i % 100 == 99: # print every 2000 mini-batches print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 100)) running_loss = 0.0 i += 1 print(i) print('Finished Training') SAVE_PATH = './prototype_net' torch.save(net.state_dict(), SAVE_PATH) correct = 0 total = 0 with torch.no_grad(): for data in batch_feeder(X_val, y_val, batch_size): images, labels = data images = torch.from_numpy(images).float().to(device) labels = torch.from_numpy(labels).long().to(device) outputs = net(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print('Accuracy of the network on validation images: %d %%' % (100 * correct / total))