def load(self, args): if args.model_dir != "": loadedparams = torch.load(args.model_dir, map_location=self.device) #self.agent = agent.Agent(args,chkpoint=loadedparams) self.agent = agent.Agent(args) else: self.agent = agent.Agent(args) self.SRmodels = [] self.SRoptimizers = [] self.schedulers = [] for i in range(args.action_space): #CREATE THE ARCH if args.model == 'basic': model = arch.RRDBNet(3, 3, 32, args.d, gc=8) elif args.model == 'ESRGAN': model = arch.RRDBNet(3, 3, 64, 23, gc=32) elif args.model == 'RCAN': torch.manual_seed(args.seed) checkpoint = utility.checkpoint(args) if checkpoint.ok: module = import_module('model.rcan') model = module.make_model(args).to(self.device) kwargs = {} else: print('error loading RCAN model. QUITING') quit() #LOAD THE WEIGHTS if args.model_dir != "": model.load_state_dict(loadedparams["sisr" + str(i)]) print('continuing training') elif args.random: print('random init') model.apply(init_weights) elif args.model == 'ESRGAN': model.load_state_dict(torch.load(args.ESRGAN_PATH), strict=True) elif args.model == 'RCAN': print('RCAN loaded!') model.load_state_dict(torch.load(args.pre_train, **kwargs), strict=True) elif args.model == 'basic': if args.d == 1: model.load_state_dict(torch.load(args.basicpath_d1), strict=True) elif args.d == 2: model.load_state_dict(torch.load(args.basicpath_d2), strict=True) elif args.d == 4: model.load_state_dict(torch.load(args.basicpath_d4), strict=True) elif args.d == 8: model.load_state_dict(torch.load(args.basicpath_d8), strict=True) else: print( 'no pretrained model available. Random initialization of basic block' ) self.SRmodels.append(model) self.SRmodels[-1].to(self.device) #self.SRoptimizers.append(torch.optim.Adam(model.parameters(),lr=1e-5)) self.SRoptimizers.append( torch.optim.Adam(model.parameters(), lr=1e-5)) scheduler = torch.optim.lr_scheduler.StepLR(self.SRoptimizers[-1], 1000, gamma=0.5) self.schedulers.append(scheduler)
def Testing(): print('Testing') ## Get dataset print("Get dataset") loader = Generator() ## Get agent and model print('Get agent') if p.model_path == "": lane_agent = agent.Agent() else: lane_agent = agent.Agent() lane_agent.load_weights(804, "tensor(0.5786)") ## testing print('Testing loop') lane_agent.evaluate_mode() if p.mode == 0: # check model with test data for _, _, _, test_image in loader.Generate(): _, _, ti = test(lane_agent, np.array([test_image])) cv2.imshow("test", ti[0]) cv2.waitKey(0) elif p.mode == 1: # check model with video cap = cv2.VideoCapture( "/Users/minootaghavi/Desktop/GA/Capstone-Project-1/test/IMG_1398.mp4" ) writer = cv2.VideoWriter('filename.avi', cv2.VideoWriter_fourcc(*'MJPG'), 10, (1280, 800)) while (cap.isOpened()): ret, frame = cap.read() #torch.cuda.synchronize() prevTime = time.time() frame = cv2.resize(frame, (512, 256)) / 255.0 frame = np.rollaxis(frame, axis=2, start=0) _, _, ti = test(lane_agent, np.array([frame])) curTime = time.time() sec = curTime - prevTime fps = 1 / (sec) s = "FPS : " + str(fps) ti[0] = cv2.resize(ti[0], (1280, 800)) cv2.putText(ti[0], s, (0, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0)) cv2.imshow('frame', ti[0]) if cv2.waitKey(1) & 0xFF == ord('q'): break writer.write(ti[0]) cap.release() cv2.destroyAllWindows() elif p.mode == 2: # check model with a picture test_image = cv2.imread( "/Users/minootaghavi/Desktop/GA/tusimple-trained model/minoo/Deep Neural Networks/data/test_set/clips/0530/1492626047222176976_0/20.img" ) test_image = cv2.resize(test_image, (512, 256)) / 255.0 test_image = np.rollaxis(test_image, axis=2, start=0) _, _, ti = test(lane_agent, np.array([test_image])) cv2.imwrite( '/Users/minootaghavi/Desktop/GA/tusimple-trained model/minoo/Deep Neural Networks/save_test/image2_result.png', ti[0]) cv2.imshow("test", ti[0]) cv2.waitKey(0) elif p.mode == 3: #evaluation print("evaluate") evaluation(loader, lane_agent)
import numpy as np import agent as ag import sumoenv as se env_train = se.SumoEnv(gui_f=False) env_test = se.SumoEnv(gui_f=True) agent = ag.Agent() EPS = 20 for ieps in range(EPS): for i in range(20): state = env_train.reset() done = False while not done: action = agent.policy(state) next_state, reward, done, rewards = env_train.step_d(action) agent.train(state, action, reward, 0.001, [1, 1, done, 1, 1]) state = next_state env_train.close() state = env_test.reset() done = False while not done: action = agent.policy(state) next_state, reward, done, rewards = env_test.step_d(action) print(state) state = next_state
# tas 23.10.19 # import environment import agent import logging import sys import stateinfo logging.basicConfig( format='%(levelname)-8s [%(filename)s:%(lineno)d] %(message)s', level=logging.DEBUG) # oder z.B.: .WARNING env = environment.Environment() agent = agent.Agent(env.action_count) # ------------------------------------------------------------- # Hilfs-Routinen der untersten Ebenen in environment # ------------------------------------------------- s = env.coord2state(0, 3) if s != 3: logging.error("env.coord2state(): s = %d", s) sys.exit(0) s = env.coord2state(2, 1) if s != 35: logging.error("env.coord2state(): s = %d", s) sys.exit(0) y, x = env.state2coord(5)
def Testing(): print('Testing') ######################################################################### ## Get dataset ######################################################################### print("Get dataset") loader = Generator() ############################## ## Get agent and model ############################## print('Get agent') if p.model_path == "": lane_agent = agent.Agent() else: lane_agent = agent.Agent() lane_agent.load_weights(804, "tensor(0.5786)") ############################## ## Check GPU ############################## print('Setup GPU mode') if torch.cuda.is_available(): lane_agent.cuda() cudnn.benchmark = True cudnn.fastest = True ############################## ## testing ############################## print('Testing loop') lane_agent.evaluate_mode() if p.mode == 0: # check model with test data for _, _, _, test_image in loader.Generate(): _, _, ti = test(lane_agent, np.array([test_image])) cv2.imshow("test", ti[0]) cv2.waitKey(0) elif p.mode == 1: # check model with video cap = cv2.VideoCapture( "/home/tim/Codes-for-Lane-Detection/ERFNet-CULane-PyTorch/data/day2.MOV" ) while (cap.isOpened()): ret, frame = cap.read() torch.cuda.synchronize() prevTime = time.time() # frame = frame[:-489, :, :] frame = cv2.resize(frame, (512, 256)) / 255.0 frame = np.rollaxis(frame, axis=2, start=0) _, _, ti = test(lane_agent, np.array([frame])) curTime = time.time() sec = curTime - prevTime fps = 1 / (sec) s = "FPS : " + str(fps) ti[0] = cv2.resize(ti[0], (1280, 800)) cv2.putText(ti[0], s, (0, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0)) cv2.imshow('frame', ti[0]) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows() elif p.mode == 2: # check model with a picture test_image = cv2.imread(p.test_root_url + "clips/0530/1492720840345996040_0/20.jpg") test_image = cv2.resize(test_image, (512, 256)) / 255.0 test_image = np.rollaxis(test_image, axis=2, start=0) _, _, ti = test(lane_agent, np.array([test_image])) cv2.imshow("test", ti[0]) cv2.waitKey(0) elif p.mode == 3: #evaluation print("evaluate") evaluation(loader, lane_agent)
# Implementation of the solver import cube as c import agent as a # Get a new agent and a new cube cube = c.Cube() agent = a.Agent(cube) # Start
def train_network_analysis(train_batch_container, file_sentence_dict, config, supplemental_batch=None): """ Trains a neural network model and reports analysis usking k-fold cross validation. :param train_batch_container: A BatchContainer object containing the data to be trained. :param file_sentence_dict: Map containing SentenceStructures of all files in memory. Used for generating analysis. :param config: A configuration instance from configparser. :param supplemental_batch: A BatchContainer object containing optional data to be transfer learned. Defaults to None. :return: Nothing. """ buckets = int(config['CONFIGURATION']['BUCKETS']) epochs = int(config['CONFIGURATION']['EPOCHS']) #Setup Buckets for k fold cross validation batch_x, batch_y, seq_len, batch_to_file_map = kfold_bucket_generator(train_batch_container.bx, train_batch_container.by, train_batch_container.bs, buckets) #TODO(Jeff) Clean up supplemental_batch information. if supplemental_batch: sup_batch_x, sup_batch_y, sup_seq_len, _ = kfold_bucket_generator(supplemental_batch.bx, supplemental_batch.by, supplemental_batch.bs, epochs) #Create and train the model for kFoldCrossValidation pre_correction_confusion_matrix_list = [] phrase_matrix_list = [] post_correction_confusion_matrix = None if buckets > 1: for k in range(0, buckets): trainer = agent.Agent(config['NUM_FEATURES'], len(config['CLASS_LIST'])+1, int(config['CONFIGURATION']['MAX_SENTENCE_LENGTH'])) #Train supplemental for j epochs. if supplemental_batch: for j in range(0, epochs): for l in range(0, epochs): trainer.train(sup_batch_x[l], sup_batch_y[l], sup_seq_len[l]) #Train normal for j epochs. for j in range(0, epochs): loss = 0 #Train each bucket where l != current K for l in range(0, buckets): if l == k: continue loss += trainer.train(batch_x[l], batch_y[l], seq_len[l]) print("Loss for Epoch " + str(j) + " is " + str(loss) + ".") #Evaluate after training and store debugging files. cm = trainer.eval_token_level(batch_x[k], batch_y[k], seq_len[k]) pre_correction_confusion_matrix_list.append(cm) file = open("./outCF", 'a') outstr = np.array2string(cm) file.write(outstr) file.write("\n") file.close() pm = trainer.eval_phrase_level(batch_x[k], seq_len[k], k, train_batch_container.mapping, batch_to_file_map, file_sentence_dict, config) phrase_matrix_list.append(pm) file = open("./outCFS", 'a') outstr = np.array2string(pm) file.write(outstr) file.write("\n") file.close() trainer.clean_up() else: trainer = agent.Agent(config['NUM_FEATURES'], len(config['CLASS_LIST'])+1, int(config['CONFIGURATION']['MAX_SENTENCE_LENGTH'])) #Train supplemental for j epochs. if supplemental_batch: for j in range(0, epochs): trainer.train(sup_batch_x[0], sup_batch_y[0], sup_seq_len[0]) #Train normal for j epochs. for j in range(0, epochs): loss = trainer.train(batch_x[0], batch_y[0], seq_len[0]) print("Loss for Epoch " + str(j) + " is " + str(loss) + ".") post_correction_confusion_matrix = agent.eval_token_level_from_dict(file_sentence_dict, config) #Run analysis generation. generate_analysis_file(pre_correction_confusion_matrix_list, post_correction_confusion_matrix, phrase_matrix_list, config)
def main(args): """Trains an agent to play Atari games.""" env = environment.AtariWrapper(args.env_name, args.max_episode_length, args.replay_memory_capacity, args.observations_per_state, args.action_space) test_env = environment.AtariWrapper(args.env_name, args.max_episode_length, 100 * args.observations_per_state, args.observations_per_state, args.action_space) if not os.path.exists(args.log_dir): os.makedirs(args.log_dir) checkpoint_dir = os.path.join(args.log_dir, 'checkpoint') summary_dir = os.path.join(args.log_dir, 'summary') summary_writer = tf.summary.FileWriter(summary_dir) if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = args.gpu_memory_alloc with tf.Session(config=config) as sess: player = agent.Agent(env, args.start_epsilon, args.end_epsilon, args.anneal_duration, args.train_interval, args.target_network_reset_interval, args.batch_size, args.learning_rate, args.max_gradient_norm, args.discount) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(max_to_keep=args.num_epochs) if args.load_path: saver.restore(sess, args.load_path) LOGGER.info('Restored model from "%s".', args.load_path) LOGGER.info('Accumulating %d experiences before training...', args.wait_before_training) for _ in range(args.wait_before_training): env.step(env.sample_action()) env.reset() LOGGER.info('Accumulated %d experiences.', args.wait_before_training) for epoch_i in range(args.num_epochs): for _ in range(args.epoch_length): player.train() if args.render: env.render() if env.done: LOGGER.info('Finished episode. Total reward: %d. Length: %d.', env.episode_reward, env.episode_length) summary = tf.Summary() summary.value.add(tag='training/episode_length', simple_value=env.episode_length) summary.value.add(tag='training/episode_reward', simple_value=env.episode_reward) summary.value.add(tag='training/fps', simple_value=env.fps) summary.value.add(tag='training/epsilon', simple_value=player.epsilon) total_time_steps = args.train_interval * player.global_step.eval() summary_writer.add_summary(summary, total_time_steps) summary_writer.flush() file_name = '{}.{:05d}-of-{:05d}'.format(args.env_name, epoch_i, args.num_epochs) model_path = os.path.join(checkpoint_dir, file_name) saver.save(sess, model_path) LOGGER.info('Saved model to "%s".', model_path) # Evaluate the model. total_reward = 0 min_reward = 1e7 max_reward = -1e7 total_Q = 0 summed_min_Qs = 0 min_Q = 1e7 summed_max_Qs = 0 max_Q = -1e7 time_step = 0 num_games_finished = 0 while time_step < args.test_length: local_total_reward = 0 local_total_Q = 0 local_min_Q = 1e7 local_max_Q = -1e7 local_time_step = 0 test_env.reset() while not test_env.done and time_step + local_time_step < args.test_length: local_time_step += 1 state = test_env.get_state() # Occasionally try a random action (explore). if random.random() < args.test_epsilon: action = test_env.sample_action() else: action = player.get_action(state) # Cast NumPy scalar to float. Q = float(player.dqn.get_optimal_action_value(state)) # Record statistics. local_total_reward += test_env.step(action) local_total_Q += Q local_min_Q = min(local_min_Q, Q) local_max_Q = max(local_max_Q, Q) if not test_env.done: # Discard unfinished game. break num_games_finished += 1 time_step += local_time_step total_reward += local_total_reward min_reward = min(min_reward, local_total_reward) max_reward = max(max_reward, local_total_reward) total_Q += local_total_Q summed_min_Qs += local_min_Q summed_max_Qs += local_max_Q min_Q = min(min_Q, local_min_Q) max_Q = max(max_Q, local_max_Q) # Save results. if num_games_finished > 0: # Extract more statistics. avg_reward = total_reward / num_games_finished avg_Q = total_Q / time_step avg_min_Q = summed_min_Qs / num_games_finished avg_max_Q = summed_max_Qs / num_games_finished summary = tf.Summary() summary.value.add(tag='testing/num_games_finished', simple_value=num_games_finished) summary.value.add(tag='testing/average_reward', simple_value=avg_reward) summary.value.add(tag='testing/minimum_reward', simple_value=min_reward) summary.value.add(tag='testing/maximum_reward', simple_value=max_reward) summary.value.add(tag='testing/average_Q', simple_value=avg_Q) summary.value.add(tag='testing/average_minimum_Q', simple_value=avg_min_Q) summary.value.add(tag='testing/minimum_Q', simple_value=min_Q) summary.value.add(tag='testing/average_maximum_Q', simple_value=avg_max_Q) summary.value.add(tag='testing/maximum_Q', simple_value=max_Q) summary_writer.add_summary(summary, epoch_i) summary_writer.flush()
def Training(): print('Training') #################################################################### ## Hyper parameter #################################################################### print('Initializing hyper parameter') vis = visdom.Visdom(port='2020') loss_window = vis.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='50 steps', ylabel='Loss', title='Training Loss', legend=['Loss'])) ######################################################################### ## Get dataset ######################################################################### print("Get dataset") loader = Generator() ############################## ## Get agent and model ############################## print('Get agent') if p.model_path == "": lane_agent = agent.Agent() p.model_epoch = 0 else: lane_agent = agent.Agent(p.model_epoch + 1) lane_agent.load_weights(p.model_epoch, p.model_loss) ############################## ## Check GPU ############################## print('Setup GPU mode') if torch.cuda.is_available(): lane_agent.cuda() #torch.backends.cudnn.benchmark=True ############################## ## Loop for training ############################## print('Training loop') step = int(p.model_epoch * loader.size_train / p.batch_size) sampling_list = None for epoch in range(p.model_epoch + 1, p.n_epoch): lane_agent.training_mode() for inputs, target_lanes, target_h, test_image, data_list in loader.Generate( sampling_list): #training #util.visualize_points(inputs[0], target_lanes[0], target_h[0]) print("epoch : " + str(epoch)) print("step : " + str(step)) loss_p = lane_agent.train(inputs, target_lanes, target_h, epoch, lane_agent, data_list) torch.cuda.synchronize() loss_p = loss_p.cpu().data if step % 50 == 0: vis.line(X=torch.ones((1, 1)).cpu() * int(step / 50), Y=torch.Tensor([loss_p]).unsqueeze(0).cpu(), win=loss_window, update='append') step += 1 lane_agent.save_model(epoch, loss_p) testing(lane_agent, test_image, step, loss_p) sampling_list = copy.deepcopy(lane_agent.get_data_list()) lane_agent.sample_reset() #evaluation if p.do_eval and epoch >= 0 and epoch % 1 == 0: print("evaluation") lane_agent.evaluate_mode() th_list = [0.8] index = [3] lane_agent.save_model(int(step / 100), loss_p) for idx in index: print("generate result") test.evaluation(loader, lane_agent, index=idx, name="test_result_" + str(epoch) + "_" + str(idx) + ".json") for idx in index: print("compute score") with open("eval_results/eval_result2_" + str(idx) + "_.txt", 'a') as make_file: make_file.write("epoch : " + str(epoch) + " loss : " + str(loss_p.cpu().data)) make_file.write( evaluation.LaneEval.bench_one_submit( "test_result_" + str(epoch) + "_" + str(idx) + ".json", "test_label.json")) make_file.write("\n") with open("eval_results/eval_result_" + str(idx) + "_.txt", 'a') as make_file: make_file.write("epoch : " + str(epoch) + " loss : " + str(loss_p.cpu().data)) make_file.write( evaluation.LaneEval.bench_one_submit( "test_result_" + str(epoch) + "_" + str(idx) + ".json", "test_label.json")) make_file.write("\n") if int(step) > 700000: break
def SGD(params, lr): for param in params: param[:] = param - lr * param.grad def tderror(rt, qval2, qval1, l): return nd.nansum((rt + l * qval2 - qval1)**2) # Setup num_episodes = 20 # Create main loop env = gym.make('SuperMarioBros-1-1-v0') env.reset() a = agent.Agent(env.observation_space.shape, env.action_space.shape) for episode in range(num_episodes): observation, reward, done, info = env.step([0] * 6) observation = preprocess(observation) # env.render() for epoch in range(10): #action = env.action_space.sample() # your agent here (this takes random actions) with autograd.record(): action1, max_ind1, qval1 = a.action_nd(observation) observation, reward, done, info = env.step(action1) if done: print('Epoch {}: Resetting environment\n'.format(epoch)) break observation = preprocess(observation) action2, max_ind2, qval2 = a.action(observation) tdloss = tderror(reward, qval2, qval1, 0.99)
is_db_v2=is_db_v2) name = given_weight[:-3] else: if network == 'NN': env = environment.Environment(path=path_data, path_weights=name + '_weights.h5', is_db_v2=is_db_v2) elif network == 'LSTM': env = environment_LSTM.Environment(path=path_data, path_weights=name + '_weights.h5', is_db_v2=is_db_v2) if network == 'NN': if is_db_v2: agent = agent.Agent(env, (25, )) # 27 else: agent = agent.Agent(env, (24, )) # 26 elif network == 'LSTM': agent = agent_LSTM.Agent(env) list_users = os.listdir(env.path) if initial_range != "-1" and final_range != "-1": list_users = list_users[int(initial_range):int(final_range)] elif initial_range != "-1": list_users = list_users[int(initial_range):] elif final_range != "-1": list_users = list_users[:int(final_range)]
def main(): #delete old game files i = 0 while True: try: os.remove(options.path + "/quackgame-%i.gcg" % i) os.remove(options.path + "/cs221game-%i" % i) i += 1 except OSError: break #start quackle game in background print "starting quackle..." quackle = subprocess.Popen( "./test --repetitions=%i lexicon=cs221 --mode=cs221 --quiet" % options.numgames, cwd=options.path, shell=True) sleep(1) print "done." for i in xrange(0, options.numgames): sleep(1) you = open(options.path + "/quackgame-%i.gcg" % i, 'r') me = open(options.path + "/cs221game-%i" % i, 'w+') b = board.Board() AI = agent.Agent(b, quackle=True, montecarlo=True, heuristic=weights_MC) scoreYou = 0 scoreMe = 0 if not options.silent: print b OK = True yourMove = "" myMove = "" print "-------------------------------------------------" print "playing game %i of %i" % (i + 1, options.numgames) print "-------------------------------------------------" while True: y = you.readline().strip() #m = me.readline().strip() if y != yourMove and y != "": yourMove = y.split() #print "yourMove",yourMove player = yourMove[0] if player == "quackle": if not options.silent: print "quackle move", yourMove else: print "q", orientation = yourMove[1] loc = (int(yourMove[2]), int(yourMove[3])) if len(yourMove) == 5: word = yourMove[4].upper() if not options.silent: print "word, loc, score:", word, loc, orientation, scoreYou scoreYou += b.insertWord(word, loc, orientation, debug=False) elif len(yourMove) > 5: #abort print yourMove print "breaking" break else: if not options.silent: print "quackle pass?" elif player == "cs221": rack = yourMove[-1] #wildcard tiles, add a vowel if we have none #otherwise pick a random letter wildcard = '' if sum([1 for v in vowels if v in rack]) > 0: wildcard = vowels[randint(0, 5)] else: wildcard = alphabet[randint(0, 25)] rack.replace('?', wildcard) move = AI.move([t for t in rack]) if not options.silent: print "\ncs221 move", move else: print "c", if move != None: #write move to file (word, pos, orientation, usedTiles, score) = move if len(word) > 0: row, col = pos scoreMe += score if wildcard in usedTiles: word = list(word) word[word.index(wildcard)] = wildcard.lower() word = ''.join(word) #print "wildcard used",wildcard,word,rack me.write("%s %s %s %s %s\n" % (word, row, col, orientation, score)) me.flush() else: #tile exchange tile = pos me.write("%s %s\n" % ("exchange", tile)) me.flush() else: #write pass to file me.write("pass\n") me.flush() elif player == "Game": #game over print "Game over!" break else: #TODO: this shouldn't happen, fix it! print "file %s in a bad state, ending" % you me.write("end\n") me.flush() OK = False break if not options.silent: print b print "CS221: %s, Quackle: %s" % (scoreMe, scoreYou)
import agent agent = agent.Agent(load_model=True) print agent.test(verbose=True)
def on_init(self): self._running = True # Switches for features self.training = True self.testing = False self.whiskers_on = True self.smell_on = False self.progress_bar = False # Neural net diagnostics # General self.model_filepath_load = './models/testing/smell_lr_0001/model' self.counts_per_epoch = 100 self.count = 0 self.epoch = 0 # Training self.model_filepath_save = './models/testing/smell_lr_0001/model' self.epoch_train = 200 # Testing self.epoch_test = 200 self.reward_total = 0 self.loss_array = np.zeros(self.epoch_test) self.actions_array = np.zeros(self.counts_per_epoch * self.epoch_test) # Initialise the pygame display and define its surface parameters pg.init() self._display_surf = pg.display.set_mode(self.size, pg.HWSURFACE | pg.DOUBLEBUF) # Add animals to the ecosystem if self.training: self.animals = np.array([ agent.Agent(self._display_surf, whiskers_on=self.whiskers_on, smell_on=self.smell_on) for i in range(self.nanimals) ]) else: self.animals = np.array([ agent.Agent( self._display_surf, model_filepath=self.model_filepath_load + '_%03d' % i, whiskers_on=self.whiskers_on, smell_on=self.smell_on) for i in range(self.nanimals) ]) # Add plants to the environment self.environment = environment.Environment(self._display_surf, n_plants=self.nplants, smell_on=self.smell_on) # Initialise agents self.on_render() for animal in self.animals: animal.state_previous = animal.sense( self._display_surf, smell_map=self.environment.smell_map)
def __init__(self, **kwargs): Default.__init__(self, **kwargs) self.model = self.agent.model self.rlConfig = self.model.rlConfig if self.dump: try: import zmq except ImportError as err: print("ImportError: {0}".format(err)) sys.exit("Install pyzmq to dump experiences") context = zmq.Context() self.socket = context.socket(zmq.PUSH) self.sock_addr = "tcp://%s:%d" % (self.dump, util.port(self.model.name)) print("Connecting to " + self.sock_addr) self.socket.connect(self.sock_addr) self.dump_size = self.rlConfig.experience_length self.dump_state_actions = (self.dump_size * ssbm.SimpleStateAction)() self.dump_frame = 0 self.dump_count = 0 self.first_frame = True self.action_counter = 0 self.toggle = False self.user = os.path.expanduser(self.user) self.state = ssbm.GameMemory() # track players 1 and 2 (pids 0 and 1) self.sm = state_manager.StateManager([0, 1]) self.write_locations() if self.tag is not None: random.seed(self.tag) self.pids = [1] self.agents = {1: self.agent} self.characters = {1: self.agent.char or self.p2} reload_every = self.rlConfig.experience_length self.agent.reload_every = reload_every enemy = None if self.self_play: enemy = agent.Agent(reload_every=self.self_play * reload_every, swap=True, **kwargs) elif self.enemy: with open(self.enemy + 'agent', 'r') as f: import json enemy_kwargs = json.load(f) enemy_kwargs.update(reload_every=None, swap=True, dump=None, path=self.enemy) enemy = agent.Agent(**enemy_kwargs) if enemy: self.pids.append(0) self.agents[0] = enemy self.characters[0] = enemy.char or self.p1 self.menu_managers = { i: MenuManager(characters[c], pid=i) for i, c in self.characters.items() } print('Creating MemoryWatcher.') mwType = memory_watcher.MemoryWatcher if self.zmq: mwType = memory_watcher.MemoryWatcherZMQ self.mw = mwType(self.user + '/MemoryWatcher/MemoryWatcher') pipe_dir = self.user + '/Pipes/' print('Creating Pads at %s. Open dolphin now.' % pipe_dir) util.makedirs(self.user + '/Pipes/') paths = [pipe_dir + 'phillip%d' % i for i in self.pids] self.get_pads = util.async_map(Pad, paths) self.init_stats() # sets the game mode and random stage self.movie = movie.Movie(movie.endless_netplay + movie.stages[self.stage])
# *************************************** env = gw.make_env(config.DEFAULT_ENV_NAME) writer = SummaryWriter(comment="-" + config.DEFAULT_ENV_NAME) # the main DQN neural network that we are going to train net = dqn.DQN(env.observation_space.shape, env.action_space.n).to(device) print(net) target_net = dqn.DQN(env.observation_space.shape, env.action_space.n).to(device) # create the experience replay buffer of the required size and pass # it to the agent buffer = xr.ExperienceReplay(config.replay_size) agent = ag.Agent(env, buffer) epsilon = config.eps_start # create an optimizer, a buffer for full episode rewards, a counter of # frames and a variable to track the best mean reward reached (because # every time the mean reward beats the record, we will save the model # in a file) optimizer = optim.Adam(net.parameters(), lr=config.learning_rate) total_rewards = [] frame_idx = 0 best_mean_reward = None print(">>>Training starts at ", datetime.datetime.now()) while True: # while not converged
#!/usr/bin/env python3 #coding: utf-8 import world import robot import agent import numpy as np import math import sys if __name__ == "__main__": world = world.World(10, 0.1) agent1 = agent.Agent(0.2, 0.0) agent2 = agent.Agent(0.2, 10.0 * math.pi / 180.0) robot1 = robot.Robot("robot_1", np.array([0.0, 0.0, 0.0]), 0.2, "black", agent1) robot2 = robot.Robot("robot_2", np.array([1.0, 2.0, math.pi / 2.0]), 0.2, "red", agent2) world.add_robot(robot1) world.add_robot(robot2) world.draw()
time.sleep(2) # Loop until mission starts: print("Waiting for the mission to start ", end=' ') world_state = agent_host.getWorldState() while not world_state.has_mission_begun: print(".", end="") time.sleep(0.1) world_state = agent_host.getWorldState() for error in world_state.errors: print("Error:", error.text) print("Mission running ", end=' ') agent_host.sendCommand("chat /time set day") agent = ag.Agent(agent_host) # Loop until mission ends: while not agent.finished: state = transform_farm(copy.deepcopy(agent.state)) action = select_action(state, net) reward = np.array(agent.run(action + 1)) memory.push(state, action, transform_farm(copy.deepcopy(agent.state)), reward) sample = memory.sample(1)[0] target = sample.reward + discount_rate * \ np.max(net.predict(np.expand_dims(sample.next_state, axis=0)))
def run_worker(args): """Starts a worker thread that learns how to play the specified Atari game.""" cluster_def = get_cluster_def(args.num_threads) config = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=2) server = tf.train.Server(cluster_def, 'thread', args.worker_index, config=config) # Configure the supervisor. is_chief = args.worker_index == 0 checkpoint_dir = os.path.join(args.log_dir, 'checkpoint') thread_dir = os.path.join(args.log_dir, 'thread-{}'.format(args.worker_index)) summary_writer = tf.summary.FileWriter(thread_dir) global_variables_initializer = tf.global_variables_initializer() init_fn = lambda sess: sess.run(global_variables_initializer) # Initialize the model. env = environment.AtariWrapper(args.env_name, environment.TRAINING, args.action_space) player = agent.Agent(args.worker_index, env, args.render, args.num_local_steps, args.learning_rate, args.entropy_regularization, args.max_gradient_norm, args.discount, summary_writer, args.summary_update_interval) # Local copies of the model will not be saved. model_variables = [ var for var in tf.global_variables() if not var.name.startswith('local') ] supervisor = tf.train.Supervisor( ready_op=tf.report_uninitialized_variables(model_variables), is_chief=is_chief, init_op=tf.variables_initializer(model_variables), logdir=checkpoint_dir, summary_op=None, saver=tf.train.Saver(model_variables), global_step=player.global_step, save_summaries_secs=30, save_model_secs=30, summary_writer=summary_writer, init_fn=init_fn) config = tf.ConfigProto(device_filters=[ '/job:master', '/job:thread/task:{}/cpu:0'.format(args.worker_index) ]) LOGGER.info('Starting worker. This may take a while.') with supervisor.managed_session(server.target, config=config) as sess, sess.as_default(): global_step = 0 while not supervisor.should_stop( ) and global_step < args.num_global_steps: global_step = player.train(sess) supervisor.stop() LOGGER.info('Stopped after %d global steps.', player.global_step)
def post(self): board = tornado.escape.json_decode(self.request.body) move = agent.Agent(board).next_move() self.write(tornado.escape.json_encode(move))
# Cross-validation for episodes in params['episodes']: for epsilon_start in params['epsilon_start']: for epsilon_end in params['epsilon_end']: for alpha_fixed in params['alpha_fixed']: if (alpha_fixed): for alpha in params['alpha']: # alpha and epsilon profile alpha = np.ones(episodes) * alpha epsilon = np.linspace(epsilon_start, epsilon_end, episodes) # initialize the agent learner = agent.Agent((x * y), 5, discount, max_reward=1, softmax=softmax, sarsa=sarsa) # perform the training rewards = [] for index in range(0, episodes): # start from a random state (but avoid barrier and mountain) barrier_x = [0, 1, 2, 3, 4, 6, 7, 8, 9] barrier_y = [4, 5] while (True): initial = [ np.random.randint(0, x), np.random.randint(0, y) ] if (not (initial[0] in barrier_y and initial[1] in barrier_x)):
print "----------------" if game.done: break time.sleep(0.25) ############# if __name__ == "__main__": game = game.Game(AREA_WIDTH, AREA_HEIGHT) user_play(game) agent = agent.Agent(ACTION_SIZE, DQN_MEMSIZE) stats = stats.Stats() score_sum = 0.0 time_sum = 0.0 score_cnt = 0.0 steps_wo_r = 0 quality_max = 0.0 for e in range(EPISODES): game.reset() state = game.get_state() for t in range(MAX_STEPS): action = agent.act(state) key = action2key[game.key][action]
def cbComputeActionGA3C(self, event): feasible_actions = copy.deepcopy(self.feasible_actions) if self.operation_mode.mode != self.operation_mode.NN: print 'Not in NN mode' print self.operation_mode.mode return if len(feasible_actions.angles) == 0 \ or len(feasible_actions.path_lengths)==0: print 'Invalid Feasible Actions' # print feasible_actions return # construct agent_state x = self.pose.pose.position.x y = self.pose.pose.position.y v_x = self.vel.x v_y = self.vel.y radius = self.veh_data['radius'] turning_dir = 0.0 heading_angle = self.psi pref_speed = self.veh_data['pref_speed'] goal_x = self.goal.pose.position.x goal_y = self.goal.pose.position.y # in case current speed is larger than desired speed v = np.linalg.norm(np.array([v_x, v_y])) if v > pref_speed: v_x = v_x * pref_speed / v v_y = v_y * pref_speed / v host_agent = agent.Agent(x, y, goal_x, goal_y, radius, pref_speed, heading_angle, 0) host_agent.vel_global_frame = np.array([v_x, v_y]) # host_agent.print_agent_info() other_agents_state = copy.deepcopy(self.other_agents_state) obs = host_agent.observe(other_agents_state)[1:] obs = np.expand_dims(obs, axis=0) # print "obs:", obs predictions = self.nn.predict_p(obs, None)[0] # print "predictions:", predictions # print "best action index:", np.argmax(predictions) raw_action = copy.deepcopy(self.actions[np.argmax(predictions)]) action = np.array( [pref_speed * raw_action[0], util.wrap(raw_action[1] + self.psi)]) # print "raw_action:", raw_action # print "action:", action # feasible_actions angles = (np.array(feasible_actions.angles) + np.pi) % (2 * np.pi) - np.pi max_ranges = np.array(feasible_actions.max_speeds) - 0.3 path_lengths = np.array(feasible_actions.path_lengths) # Sort the feasible actions by increasing angle order_inds = np.argsort(angles) max_ranges = max_ranges[order_inds] angles = angles[order_inds] path_lengths = path_lengths[order_inds] # Find which index corresponds to straight in front, and 90 deg each side zero_ind = np.digitize([self.psi + 0.01], angles) - 1 self.d_min = max_ranges[zero_ind] # self.d_min = 100.0 # if close to goal kp_v = 0.5 kp_r = 1 if host_agent.dist_to_goal < 2.0: # and self.percentComplete>=0.9: # print "somewhat close to goal" pref_speed = max( min(kp_v * (host_agent.dist_to_goal - 0.1), pref_speed), 0.0) action[0] = min(raw_action[0], pref_speed) turn_amount = max(min(kp_r * (host_agent.dist_to_goal - 0.1), 1.0), 0.0) * raw_action[1] action[1] = util.wrap(turn_amount + self.psi) if host_agent.dist_to_goal < 0.3: self.stop_moving_flag = True else: self.stop_moving_flag = False # print 'chosen action (rel angle)', action[0], action[1] self.update_action(action)
import importlib parser = argparse.ArgumentParser() parser.add_argument("dir1", type=str, help="Directory to agent 1 to be tested.") parser.add_argument("dir2", type=str, default=None, nargs="?", help="Directory to agent 2 to be tested. If empty, SimpleAI is used instead.") parser.add_argument("--render", "-r", action="store_true", help="Render the competition.") parser.add_argument("--games", "-g", type=int, default=100, help="number of games.") args = parser.parse_args() sys.path.insert(0, args.dir1) import agent orig_wd = os.getcwd() os.chdir(args.dir1) agent1 = agent.Agent() agent1.load_model() os.chdir(orig_wd) del sys.path[0] if args.dir2: sys.path.insert(0, args.dir2) importlib.reload(agent) os.chdir(args.dir2) agent2 = agent.Agent() agent2.load_model() os.chdir(orig_wd) del sys.path[0] else: agent2 = None
def __init__(self, instrument): self.instrument = instrument self.agent = agent.Agent(None, None, [instrument])
import agent agent = agent.Agent() agent.train()
def __init__( self, num_nodes=100, avg_node_degree=3, # taipei : 1.92 # telaviv : 2.16 # tallinn : 2.20, engagement=0.49, trustability=0.21, influenceability=0.53, recovery=0.63, experience=1, initial_opinion=0, opinion=0, public_sector_opinion=1, corpo_opinion=1, startup_opinion=1, academic_opinion=-1, civil_opinion=-1, media_opinion=-1): # set network layout self.num_nodes = num_nodes prob = avg_node_degree / self.num_nodes self.G = nx.erdos_renyi_graph(n=self.num_nodes, p=prob) # set space and time of the model self.grid = NetworkGrid(self.G) self.schedule = RandomActivation(self) # set model parameters self.engagement = engagement self.trustability = trustability self.influenceability = influenceability self.recovery = recovery self.experience = experience self.initial_opinion = initial_opinion self.opinion = initial_opinion self.public_sector_opinion = public_sector_opinion self.corpo_opinion = corpo_opinion self.startup_opinion = startup_opinion self.academic_opinion = academic_opinion self.civil_opinion = civil_opinion self.media_opinion = media_opinion # set data collection self.datacollector = DataCollector({ "Negative": num_negative, "Neutral": num_neutral, "Positive": num_positive, "Total Engagement": total_engagement, "Total Trustability": total_trustability, "Total Recovery": total_recovery, "Total Experience": total_experience, }) # create agents with average parameters taken on #city tweets for i, node in enumerate(self.G.nodes()): a = agent.Agent( i, self, self.engagement, self.trustability, self.influenceability, self.recovery, self.experience, self.initial_opinion, # fixed by interface self.opinion) self.schedule.add(a) # add the undetermined agents to the network self.grid.place_agent(a, node) # create 1 representative of each stakeholder category public_sector = self.random.sample(self.G.nodes(), 1) for a in self.grid.get_cell_list_contents(public_sector): a.engagement = 0.57 a.trustability = 0.53 a.influenceability = 0.59 a.recovery = 0.70 a.experience = 1 a.initial_opinion = public_sector_opinion # fixed by interface a.opinion = a.initial_opinion corporate = self.random.sample(self.G.nodes(), 1) for a in self.grid.get_cell_list_contents(corporate): a.engagement = 0.75 a.trustability = 0.49 a.influenceability = 0.68 a.recovery = 0.73 a.experience = 1 a.initial_opinion = corpo_opinion # fixed by interface a.opinion = a.initial_opinion startup = self.random.sample(self.G.nodes(), 1) for a in self.grid.get_cell_list_contents(startup): a.engagement = 0.69 a.trustability = 0.29 a.influenceability = 0.68 a.recovery = 0.97 a.experience = 1 a.initial_opinion = startup_opinion # fixed by interface a.opinion = a.initial_opinion academic = self.random.sample(self.G.nodes(), 1) for a in self.grid.get_cell_list_contents(academic): a.engagement = 0.49 a.trustability = 0.20 a.influenceability = 0.65 a.recovery = 0.75 a.experience = 1 a.initial_opinion = academic_opinion # fixed by interface a.opinion = a.initial_opinion civil = self.random.sample(self.G.nodes(), 1) for a in self.grid.get_cell_list_contents(civil): a.engagement = 0.43 a.trustability = 0.21 a.influenceability = 0.69 a.recovery = 0.72 a.experience = 1 a.initial_opinion = civil_opinion # fixed by interface a.opinion = a.initial_opinion media = self.random.sample(self.G.nodes(), 1) for a in self.grid.get_cell_list_contents(media): a.engagement = 0.50 a.trustability = 0.23 a.influenceability = 0.65 a.recovery = 0.71 a.experience = 1 a.initial_opinion = media_opinion # fixed by interface a.opinion = a.initial_opinion self.running = True self.datacollector.collect(self) print('Finished initialising model, network has %s nodes' % self.G.nodes) nx.draw_networkx(self.G)
# print("acc_reward:", acc_reward) return acc_reward, i, loss n_actions = env.action_space.n state_dim = env.observation_space.high.shape[0] print("n_actions:", n_actions, "state_dim", state_dim) batch_size = 64 checkpoint_path = "/tmp/my_dqn.ckpt" qvalue_model = Qvalue.Qvalue(state_dim=state_dim, n_actions=n_actions, batch_size=64, h1_n=512, h2_n=256, checkpoint_path=checkpoint_path) agent = agent.Agent(actions=n_actions, q_value_model=qvalue_model) memory = memory.RandomMemory(max_size=1024) discount = .95 rewards = [] episodes_end = [] losses = [] eps = .9 reward, episode_end, loss = 0., 0., 0. render = False print(reward) while reward < 20.: for episode_i in range(1000): # print("episode_i:", episode_i) if episode_i % 100 == 0: eps = epslons[int(episode_i / 100)]
self.ACTION_NUM = agent.dim_actions self.STATE_NUM = agent.dim_states self.RLMemory_num = 20 self.SLMemory_num = 20 self.RLMemory = deque(maxlen=self.RLMemory_num) self.SLMemory = deque(maxlen=self.SLMemory_num) # self.Q = DQN.DQN_DouDiZhu(self.ACTION_NUM, self.STATE_NUM, self.RLMemory, self.RLMemory_num, self.player) # self.Pi = SLN.Pi(self.ACTION_NUM, self.STATE_NUM, self.SLMemory, self.SLMemory_num, self.player) self.EPSILON = 0.06 self.ETA = 0.1 self.EPISODE_NUM = 5000000 self.Q_enable = False if __name__ == '__main__': agent = ag.Agent(models=["rl", "rl", "rl"]) runAgent1 = RunAgent(agent, 'player1') runAgent2 = RunAgent(agent, 'player2') runAgent3 = RunAgent(agent, 'player3') Q = DQN.DQN_DouDiZhu(runAgent1.ACTION_NUM, runAgent1.STATE_NUM, runAgent1.RLMemory, runAgent1.RLMemory_num) Pi = SLN.Pi(runAgent1.ACTION_NUM, runAgent1.STATE_NUM, runAgent1.SLMemory, runAgent1.SLMemory_num) for i in range(runAgent1.EPISODE_NUM): print('=========== episode:', i, '============') if random.random() < runAgent1.ETA: runAgent1.Q_enable = True print('player1 ' + 'Q network is working') else: runAgent1.Q_enable = False print('player1 ' + 'Pi network is working')
def initialise_particle_data_set(self, unknown_agent, sim): # 1. Generating initial data (particles) none_count, none_threshold = 0, 500 x, y, direction = unknown_agent.position[0], unknown_agent.position[ 1], unknown_agent.direction tmp_agent = agent.Agent(x, y, direction, self.type, -1) tmp_agent.set_parameters(sim, sim.agents[0].level, sim.agents[0].radius, sim.agents[0].angle) # 4. Defining route tmp_sim = sim.copy() tmp_agent = tmp_sim.move_a_agent(tmp_agent) target = tmp_agent.get_memory() route_actions = tmp_agent.route_actions particle = {} # 5. Adding to the data set if route_actions is not None: particle['target'] = target particle['choose_target_state'] = tmp_sim particle['parameter'] = [ sim.agents[0].level, sim.agents[0].radius, sim.agents[0].angle ] particle['succeeded_steps'] = 1 particle['failed_steps'] = 0 particle['index'] = len(self.data_set) particle['cts_type'] = 'e' self.data_set.append(particle) while len(self.data_set) < self.generated_data_number: if none_count == none_threshold: break else: particle = {} # 2. Random uniform parameter sampling tmp_radius = random.uniform(radius_min, radius_max) # 'radius' tmp_angle = random.uniform(angle_min, angle_max) # 'angle' tmp_level = random.uniform(level_min, level_max) # 'level' # 3. Creating the temporary agent x, y, direction = unknown_agent.position[ 0], unknown_agent.position[1], unknown_agent.direction tmp_agent = agent.Agent(x, y, direction, self.type, -1) tmp_agent.set_parameters(sim, tmp_level, tmp_radius, tmp_angle) # 4. Calculating route tmp_sim = sim.copy() tmp_agent = tmp_sim.move_a_agent(tmp_agent) target = tmp_agent.get_memory() route_actions = tmp_agent.route_actions # 5. Adding to the data set if route_actions is not None: particle['target'] = target particle['choose_target_state'] = tmp_sim particle['parameter'] = [tmp_level, tmp_radius, tmp_angle] particle['succeeded_steps'] = 1 particle['failed_steps'] = 0 particle['index'] = len(self.data_set) particle['cts_type'] = 'e' self.data_set.append(particle) else: none_count += 1