def main():
    from train import Trainer
    net = Net(VinConfig)
    net = net.cuda()
    net = net.double()
    trainer = Trainer(VinConfig, net)
    trainer.train()
Beispiel #2
0
import numpy as np
from model import Net
import torch
import matplotlib.pyplot as plt
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pdb
from tqdm import tqdm
import dataLoader as data
from torch.utils.tensorboard import SummaryWriter

net = Net().train().to(device)
net = net.double()

learning_rate = 0.01

writer = SummaryWriter()
g_checkpoint = torch.load(
    "./training_weights.ckpt",
    map_location=torch.device(device))  #the file to train
net.load_state_dict(g_checkpoint['model'])
#Will train from the same file every time, if you don't have yet make sure to just comment     this out
optimizer = optim.SGD(
    net.parameters(), lr=learning_rate,
    momentum=0.5)  #Not sure what the parameters do    , just copying it
optimizer.load_state_dict(g_checkpoint['optimizer'])

criterion = nn.CrossEntropyLoss()
Beispiel #3
0
class LFTReward:

    def __init__(self):
        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        # self.device = torch.device('cpu')
        self.encoder_net = Net()
        self.encoder_net = self.encoder_net.to(self.device)
        self.encoder_net.double()
        self.encoder_net.load_state_dict(torch.load(model_path, map_location={'cuda:0': 'cpu'}))
        # self.encoder_net.load_state_dict(torch.load(model_path))
        self.encoder_net.eval()

        img_g = self.load_data()
        img_g = torch.tensor(img_g).to(self.device)
        self.img_g_z = self.encoder_net(img_g)

    @staticmethod
    def distance(x1, x2):
        diff = torch.abs(x1 - x2).contiguous()
        return torch.pow(diff, 2)

    @staticmethod
    def load_data():
        img_ds = []
        for i in range(1, folder_size + 1):
            img_d = cv2.imread(f'goal_abstract/{i}/bw_{i}.png')
            img_ds.append(img_d / 255.0)
        return img_ds

    @staticmethod
    def rgb2gray(rgb):
        return np.dot(rgb[..., :3], [0.2989, 0.5870, 0.1140])

    def reward(self, s, j):
        # gray = self.rgb2gray(s)
        # resized_img = cv2.resize(gray, (256, 256))

        resized_img = s.cpu().detach().numpy()
        resized_img = resized_img / 255.0

        # resized_img = resized_img[:, :, :, None]
        rgb_image = cv2.merge((resized_img, resized_img, resized_img))
        rgb_image = rgb_image.reshape((-1, 84, 84, 3))
        pixel_val = rgb_image[0, 10, 60, 0]

        if pixel_val == 0:
            goal_wall = True
        elif abs(pixel_val - 0.75) < 0.01:
            goal_wall = True
        else:
            goal_wall = False

        img_s = torch.tensor(rgb_image).to(self.device)
        img_s = img_s.double()
        img_s_z = self.encoder_net(img_s)

        rewards = self.calc_reward1(img_s_z, goal_wall)
        return rewards

    def calc_reward1(self, img_s_z, goal_wall):

        reward_list = []
        for frame in range(img_s_z.shape[0]):

            distance_list = []
            for i in range(self.img_g_z.shape[0]):
                distance = torch.clamp(self.distance(img_s_z[frame], self.img_g_z[i]), max=5).cpu().detach().numpy()
                distance = np.sum(distance)
                distance_list.append(distance)

            reward = 0
            if distance_list[0] < 2.5:
                reward += 50
            if distance_list[1] < 3:
                reward += 50
            # if distance_list[2] < 3:
            #     reward -= 2
            reward_list.append(reward)

        return reward_list

    def calc_reward2(self, img_s_z):

        reward_list = []
        for frame in range(img_s_z.shape[0]):
            reward_sum_per_frame = 0
            for i in range(self.img_g_z.shape[0]):
                distance = torch.clamp(self.distance(img_s_z[frame], self.img_g_z[i]), max=5).cpu().detach().numpy()
                reward = np.exp(-1 * distance / 10.0)
                reward = np.sum(reward)
                reward_sum_per_frame += reward
            reward_list.append(reward_sum_per_frame)

        return reward_list