def __init__(self, args=default_args): self.num_actions = args.num_actions self.object_detection = object_detection() self.actor_epsilon = {0: 0.9, 1: 0.9, 2: 0.9, 3: 0.9, 4: 0.9} self.gamma = args.gamma self.batch_size = args.batch_size self.memory = deque([], maxlen=args.maxlenOfQueue) self.actor = neural_network(args.num_actions) if (use_cuda): self.actor.cuda() self.target_actor = neural_network(args.num_actions) if (use_cuda): self.target_actor.cuda() self.actor_optimizer = optimizer_spec.constructor( self.actor.parameters(), **optimizer_spec.kwargs) self.target_update = args.target_update self.steps_since_last_update_target = 0 self.update_number = 0 self.checkpoint = default_args.checkpoint
def target_actor(): actor = neural_network(default_nodes) return actor
def target_meta_controller(): meta = neural_network(default_meta_nodes) return meta
import torch, cv2 from torch.autograd import Variable import numpy as np from object_detection import object_detection import gym from model.neural_network import neural_network env = gym.make('MontezumaRevenge-v0') observation = env.reset() frame = env.render(mode='rgb_array') frame = frame[30:,:,:] frame = frame[::2, ::2] print frame.shape objDet = object_detection() man_mask = objDet.blob_detect(frame,'man') actor = neural_network(18) print actor frame = np.expand_dims(np.mean(frame, axis=2).astype(np.uint8),axis=2) #print "frame", frame.shape man_mask = np.squeeze(np.array(man_mask)) man_mask = np.expand_dims(np.mean(man_mask, axis=2).astype(np.uint8), axis=2) #print "man_mask", man_mask.shape input_vector = np.concatenate([frame, frame, frame, frame, man_mask], axis=2) #print input_vector.shape input_vector = input_vector.reshape((5, -1, 90, 80)) #print input_vector.shape var = Variable(torch.from_numpy(input_vector).type(torch.FloatTensor)) print max(actor(var).data)