class Tester(): def __init__(self, render_flag): self.model = DDQN(36, 36) self.render_flag = render_flag self.width = 6 self.height = 6 self.env = MineSweeper(self.width, self.height, 6) if (self.render_flag): self.renderer = Render(self.env.state) self.load_models(20000) def get_action(self, state): state = state.flatten() mask = (1 - self.env.fog).flatten() action = self.model.act(state, mask) return action def load_models(self, number): path = "pre-trained\ddqn_dnn" + str(number) + ".pth" dict = torch.load(path) self.model.load_state_dict(dict['current_state_dict']) self.model.epsilon = 0 def do_step(self, action): i = int(action / self.width) j = action % self.width if (self.render_flag): self.renderer.state = self.env.state self.renderer.draw() self.renderer.bugfix() next_state, terminal, reward = self.env.choose(i, j) return next_state, terminal, reward
def __init__(self,width,height,bomb_no,render_flag): self.width = width self.height = height self.bomb_no = bomb_no self.box_count = width*height self.env = MineSweeper(self.width,self.height,self.bomb_no) self.current_model = DDQN(self.box_count,self.box_count) self.target_model = DDQN(self.box_count,self.box_count) self.target_model.eval() self.optimizer = torch.optim.Adam(self.current_model.parameters(),lr=0.003,weight_decay=1e-5) self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer,step_size=2000,gamma=0.95) self.target_model.load_state_dict(self.current_model.state_dict()) self.buffer = Buffer(100000) self.gamma = 0.99 self.render_flag = render_flag self.epsilon_min = 0.01 self.epsilon_decay = 0.90 self.reward_threshold = 0.12 self.reward_step = 0.01 self.batch_size = 4096 self.tau = 5e-5 self.log = open("./Logs/ddqn_log.txt",'w') if(self.render_flag): self.Render = Render(self.env.state)
def __init__(self): self.width = 20 self.height = 20 self.bombs = 20 self.env = MineSweeper(self.width, self.height, self.bombs) self.renderer = Render(self.env.state) self.renderer.state = self.env.state
def __init__(self, render_flag): self.model = DDQN(36, 36) self.render_flag = render_flag self.width = 6 self.height = 6 self.env = MineSweeper(self.width, self.height, 6) if (self.render_flag): self.renderer = Render(self.env.state) self.load_models(20000)
def render(self): if not self.map: self.build() if self.dry_run: self.output_error("Dry run completed successfully...") renderer = Render(self.map,self.image,self.format,self.world_file) if self.image: renderer.render_file() else: renderer.print_stream() self.rendered = True
class Play(): def __init__(self): self.width = 20 self.height = 20 self.bombs = 20 self.env = MineSweeper(self.width, self.height, self.bombs) self.renderer = Render(self.env.state) self.renderer.state = self.env.state def do_step(self, i, j): i = int(i / 30) j = int(j / 30) next_state, terminal, reward = self.env.choose(i, j) self.renderer.state = self.env.state self.renderer.draw() return next_state, terminal, reward
def render(self): if not self.map: self.build() if self.dry_run: self.output_error("Dry run completed successfully...") renderer = Render(self.map, self.image, self.format, self.world_file) if self.image: renderer.render_file() else: renderer.print_stream() self.rendered = True
def downloadDatasets(datasets_selected): if not datasets_selected: print('No selected datasets!\n') return DownloadDatasets(datasets_selected) print('\n') def generateData(datasets_selected, export_format): if not datasets_selected: print('No selected datasets!\n') return if not export_format: print('No export format selected!\n') return number_of_sets = int(input('Number of sets: ')) Sampler(datasets_selected, NUMBER_OF_SETS=number_of_sets) Generator(export_format) print('\n') render = Render(downloadDatasets, generateData) render.init() while (True): render.menu()
from asset_manager import AssetManager from renderer import Render from level import Level from cat import Cat width = 256 * 6 height = 24 * 6 pygame.init() pygame.display.init() pygame.display.set_caption("pet") pygame.display.set_icon(AssetManager.getCat(seq=0)) done = False fps = 2 renderer = Render(width, height) cat = Cat() level = Level(cat) renderer.render_level(level) clock = pygame.time.Clock() while not done: for event in pygame.event.get(): if event.type == pygame.QUIT: done = True renderer.render_level(level) cat.location += 1 print(cat.location) pygame.display.flip()
class Driver(): def __init__(self,width,height,bomb_no,render_flag): self.width = width self.height = height self.bomb_no = bomb_no self.box_count = width*height self.env = MineSweeper(self.width,self.height,self.bomb_no) self.current_model = DDQN(self.box_count,self.box_count) self.target_model = DDQN(self.box_count,self.box_count) self.target_model.eval() self.optimizer = torch.optim.Adam(self.current_model.parameters(),lr=0.003,weight_decay=1e-5) self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer,step_size=2000,gamma=0.95) self.target_model.load_state_dict(self.current_model.state_dict()) self.buffer = Buffer(100000) self.gamma = 0.99 self.render_flag = render_flag self.epsilon_min = 0.01 self.epsilon_decay = 0.90 self.reward_threshold = 0.12 self.reward_step = 0.01 self.batch_size = 4096 self.tau = 5e-5 self.log = open("./Logs/ddqn_log.txt",'w') if(self.render_flag): self.Render = Render(self.env.state) def load_models(self,number): path = "./pre-trained/ddqn_dnn"+str(number)+".pth" weights = torch.load(path) self.current_model.load_state_dict(weights['current_state_dict']) self.target_model.load_state_dict(weights['target_state_dict']) self.optimizer.load_state_dict(weights['optimizer_state_dict']) self.current_model.epsilon = weights['epsilon'] ### Get an action from the DDQN model by supplying it State and Mask def get_action(self,state,mask): state = state.flatten() mask = mask.flatten() action = self.current_model.act(state,mask) return action ### Does the action and returns Next State, If terminal, Reward, Next Mask def do_step(self,action): i = int(action/self.width) j = action%self.width if(self.render_flag): self.Render.state = self.env.state self.Render.draw() self.Render.bugfix() next_state,terminal,reward = self.env.choose(i,j) next_fog = 1-self.env.fog return next_state,terminal,reward,next_fog ### Reward Based Epsilon Decay def epsilon_update(self,avg_reward): if(avg_reward>self.reward_threshold): self.current_model.epsilon = max(self.epsilon_min,self.current_model.epsilon*self.epsilon_decay) self.reward_threshold+= self.reward_step def TD_Loss(self): ### Samples batch from buffer memory state,action,mask,reward,next_state,next_mask,terminal = self.buffer.sample(self.batch_size) ### Converts the variabls to tensors for processing by DDQN state = Variable(FloatTensor(float32(state))) mask = Variable(FloatTensor(float32(mask))) next_state = FloatTensor(float32(next_state)) action = LongTensor(float32(action)) next_mask = FloatTensor(float32(next_mask)) reward = FloatTensor(reward) done = FloatTensor(terminal) ### Predicts Q value for present and next state with current and target model q_values = self.current_model(state,mask) next_q_values = self.target_model(next_state,next_mask) # Calculates Loss: # If not Terminal: # Loss = (reward + gamma*Q_val(next_state)) - Q_val(current_state) # If Terminal: # Loss = reward - Q_val(current_state) q_value = q_values.gather(1, action.unsqueeze(1)).squeeze(1) next_q_value = next_q_values.max(1)[0] expected_q_value = reward + self.gamma * next_q_value * (1 - done) loss = (q_value - expected_q_value.detach()).pow(2).mean() loss_print = loss.item() # Propagates the Loss self.optimizer.zero_grad() loss.backward() self.optimizer.step() self.scheduler.step() for target_param, local_param in zip(self.target_model.parameters(), self.current_model.parameters()): target_param.data.copy_(self.tau*local_param.data + (1.0-self.tau)*target_param.data) return loss_print def save_checkpoints(self,batch_no): path = "./pre-trained/ddqn_dnn"+str(batch_no)+".pth" torch.save({ 'epoch': batch_no, 'current_state_dict': self.current_model.state_dict(), 'target_state_dict' : self.target_model.state_dict(), 'optimizer_state_dict': self.optimizer.state_dict(), 'epsilon':self.current_model.epsilon }, path) def save_logs(self,batch_no,avg_reward,loss,wins): res = [ str(batch_no), "\tAvg Reward: ", str(avg_reward), "\t Loss: ", str(loss), "\t Wins: ", str(wins), "\t Epsilon: ", str(self.current_model.epsilon) ] log_line = " ".join(res) print(log_line) self.log.write(log_line+"\n") self.log.flush()
from simulation import System, loops from renderer import Render from math import pi test = System() test.add_point(350, 600) test.add_point(350, 600-170/2) test.add_point(350, 600-585/2) test.add_base(0, 215.1/2, 0, -pi) test.add_line(0, 1, 389.1/2) test.add_line(1, 2, 856.4/2) test.add_draw(2) test.main_loop(end=loops(5000), render=Render([700, 800], {"Line":'black', "Point":'CadetBlue1', "Drive":'maroon', "Drawer":"red"}))
def __init__(self, args): ## configs self.device = 'cuda:0' if args.gpu else 'cpu' self.checkpoint_path = args.checkpoint self.detect_human_face = args.detect_human_face self.render_video = args.render_video self.output_size = args.output_size self.image_size = 64 self.min_depth = 0.9 self.max_depth = 1.1 self.border_depth = 1.05 self.xyz_rotation_range = 60 self.xy_translation_range = 0.1 self.z_translation_range = 0 self.fov = 10 # in degrees self.renderer = Render({"device": self.device}) self.depth_rescaler = lambda d: (1 + d) / 2 * self.max_depth + ( 1 - d) / 2 * self.min_depth # (-1,1) => (min_depth,max_depth) self.depth_inv_rescaler = lambda d: (d - self.min_depth) / ( self.max_depth - self.min_depth) # (min_depth,max_depth) => (0,1) self.rot_center_depth = (self.min_depth + self.max_depth) / 2 fx = (self.image_size - 1) / 2 / (np.tan(self.fov / 2 * np.pi / 180)) fy = (self.image_size - 1) / 2 / (np.tan(self.fov / 2 * np.pi / 180)) cx = (self.image_size - 1) / 2 cy = (self.image_size - 1) / 2 K = [[fx, 0., cx], [0., fy, cy], [0., 0., 1.]] K = torch.FloatTensor(K).to(self.device) self.inv_K = torch.inverse(K).unsqueeze(0) self.K = K.unsqueeze(0) ## NN models self.netD = EDDeconv(cin=3, cout=1, nf=64, zdim=256, activation=None) self.netA = EDDeconv(cin=3, cout=3, nf=64, zdim=256) self.netL = Encoder(cin=3, cout=4, nf=32) self.netV = Encoder(cin=3, cout=6, nf=32) self.netD = self.netD.to(self.device) self.netA = self.netA.to(self.device) self.netL = self.netL.to(self.device) self.netV = self.netV.to(self.device) self.load_checkpoint() self.netD.eval() self.netA.eval() self.netL.eval() self.netV.eval() ## face detecter if self.detect_human_face: from facenet_pytorch import MTCNN self.face_detector = MTCNN(select_largest=True, device=self.device) ## renderer if self.render_video: from unsup3d_extended.renderer import Renderer assert 'cuda' in self.device, 'A GPU device is required for rendering because the neural_renderer only has GPU implementation.' cfgs = { 'device': self.device, 'image_size': self.output_size, 'min_depth': self.min_depth, 'max_depth': self.max_depth, 'fov': self.fov, } self.renderer = Renderer(cfgcc)