def runner(filename, tracking=True): play = env(filename, specs, sizes, exploror_sample_size, keep_history=True) sampler = play.run_for_data() if tracking: print(play.file_id, end=' ', flush=True) del play return sampler
def init_infer_env(self, params): self.infer_environment = env(params, 'infer') self.rev_relation_vocab = self.infer_environment.grapher.rev_relation_vocab self.rev_entity_vocab = self.infer_environment.grapher.rev_entity_vocab self.max_hits_at_10 = 0 self.ePAD = self.entity_vocab['PAD'] self.rPAD = self.relation_vocab['PAD']
def init_dev_env(self, params): self.dev_test_environment = env(params, 'dev') self.test_environment = self.dev_test_environment self.rev_relation_vocab = self.test_environment.grapher.rev_relation_vocab self.rev_entity_vocab = self.test_environment.grapher.rev_entity_vocab self.max_hits_at_10 = 0 self.ePAD = self.entity_vocab['PAD'] self.rPAD = self.relation_vocab['PAD']
def __init__(self, num_of_asset=10): self.num_of_asset = num_of_asset self.env = environment.env(train=0, number_of_asset=num_of_asset) self.w = np.ones(self.num_of_asset, np.float32) / self.num_of_asset self.UCRP_deque = deque() self.UBAH_deque = deque()
def __init__(self, params): # transfer parameters to self for key, val in params.items(): setattr(self, key, val); self.agent = Agent(params) self.save_path = None self.train_environment = env(params, 'train') self.dev_test_environment = env(params, 'dev') self.test_test_environment = env(params, 'test') self.test_environment = self.dev_test_environment self.rev_relation_vocab = self.train_environment.grapher.rev_relation_vocab self.rev_entity_vocab = self.train_environment.grapher.rev_entity_vocab self.max_hits_at_10 = 0 self.ePAD = self.entity_vocab['PAD'] self.rPAD = self.relation_vocab['PAD'] # optimize self.baseline = ReactiveBaseline(l=self.Lambda) self.optimizer = tf.train.AdamOptimizer(self.learning_rate)
def main(): f = open(map_path) zone = env(f) bond = Agent(zone) current_time = 0 calculus = Evaluator() while current_time < life_time: run(zone, bond, calculus) current_time += 1
def main(): f = open(map_path) zone = env(f) bond = Agent(zone) current_time = 0 calculus = Evaluator() while current_time < life_time: run(zone,bond,calculus) current_time+=1
def init_train_env(self, params): self.train_environment = env(params, "train") self.rev_relation_vocab = self.train_environment.grapher.rev_relation_vocab self.rev_entity_vocab = self.train_environment.grapher.rev_entity_vocab self.max_hits_at_10 = 0 self.ePAD = self.entity_vocab['PAD'] self.rPAD = self.relation_vocab['PAD'] # optimize self.baseline = ReactiveBaseline(l=self.Lambda) self.optimizer = tf.train.AdamOptimizer(self.learning_rate)
def __init__(self): self.num_episodes = 5000 self.epilson = 0.99 self.decay_epilson = 0.9995 self.Q = np.zeros([5, 460, 4]) self.goal_num = 4 self.lr = 0.001 self.y = 0.9 self.env = environment.env() self.path = [] self.visit_state = [] self.final_state = 0 self.showmap = []
def main(): mapnikConfiguration = environment.require('MAPNIK_CONFIGURATION') logger.info('Using Mapnik configuration file %s', mapnikConfiguration) bbox = determineBoundingBox(environment.require('BBOX')) logger.info('Using bounding box %s', bbox) numThreads = int(environment.env('NUM_THREADS', 6)) logger.info('Using %s threads', numThreads) minZoom = int(environment.env('MIN_ZOOM', 12)) maxZoom = int(environment.env('MAX_ZOOM', 12)) logger.info('Processing zoom levels [%s, %s]', minZoom, maxZoom) tmsScheme = environment.env('TMS_SCHEME', 'false') == 'true' if tmsScheme: logger.info('Using TMS scheme') skipIfExists = environment.env('SKIP_IF_EXISTS', 'true') != 'false' if skipIfExists: logger.info('Skipping tile generation if tile exists') renderTiles(bbox, mapnikConfiguration, OUTPUT_PATH, minZoom, maxZoom, numThreads, tmsScheme, skipIfExists)
def __init__(self): self.gameStart=False self.status=False self.reward=0 super(view, self).__init__() self.n_actions = 361 #定义动作的可能个数 self.n_features = 361 self.doneList=[] self.allphoto=[] self.initView() self.env=env() self.wobservation=None self.wobservation_=None self.action1=None self.RL = DeepQNetwork(self.n_actions, self.n_features )
def initialize_inference_env(self, params): # transfer parameters to self for key, val in params.items(): setattr(self, key, val) self.agent = Agent(params) self.save_path = None self.inference_environment = env(params, 'inference') self.rev_relation_vocab = self.inference_environment.grapher.rev_relation_vocab self.rev_entity_vocab = self.inference_environment.grapher.rev_entity_vocab self.max_hits_at_10 = 0 self.ePAD = self.entity_vocab['PAD'] self.rPAD = self.relation_vocab['PAD'] # optimize self.baseline = ReactiveBaseline(l=self.Lambda)
def interactive_play(agent): input_code = input( 'Please enter a input code code any pattern between 0000 - 5555') agent.reset_possible_states() guess = agent.get_best_action() envi = env(input_code) print(f"initial guess = {guess}") u = input('Press enter to let q-learning agent make the next guess') while guess != input_code: feedback = env.score(input_code, guess) agent.restrict_possible_states(guess, feedback) guess = agent.get_best_action() print(f"Next guess = {guess}") u = input() if guess == input_code: print("mastermind level maxx, guess is right!")
def train(agent, n_episodes): for _ in range(n_episodes): input_code = env._number_from_index(random.randint(0, 6**4 - 1)) envi = env(input_code) agent.reset_possible_states() action = agent.random_action() # init action if action == input_code: # if init guess is correct skip this episode continue run = True while run: feedback = env.get_feedback(action) reward = env.reward(action) agent.learn_from_move(action, feedback, reward) if action == input_code: break # correct guess stop episode else: action = agent.random_action() # else next guess
def samplex_objective_function(type): assert type in ["facet", "random"] env().model_gen_options["objf choice"] = type
def samplex_add_noise(n=True): env().model_gen_options["add noise"] = n
def samplex_random_seed(s): env().model_gen_options["rngseed"] = s
def samplex_add_noise(n=True): env().model_gen_options['add noise'] = n
def samplex_solution_type(type): assert type in ["vertex", "interior", "CLT", "CLTvertex"] env().model_gen_options["solution type"] = type
def train(self): """ Implement your training algorithm here """ ########################### # YOUR IMPLEMENTATION HERE # # reward_buffer = deque([]) current_loss = 0.0 mean_reward = 0.0 for i_episode in range(NUM_EPISODES): # Initialize the environment and state # self.env.reset() # last_screen = get_screen() # current_screen = get_screen() state = self.env.reset() # state = np.transpose(state,(2,0,1)) #New # state = torch.tensor([state]) episode_Reward = 0.0 for t in range(EPISODE_STEP_LIMIT): # Render here # self.env.env.render() self.steps_done += 1 action = self.make_action(state, False) # 'Transition',('state', 'action', 'next_state', 'reward', 'done')) next_state, reward, done, _ = self.env.step(action) episode_Reward += reward state = np.transpose(state, (2, 0, 1)) #New next_state = np.transpose(next_state, (2, 0, 1)) self.transition = (state, action, next_state, reward, done) self.push() # Move to the next state state = next_state # self.env.render() # Update the target network, copying all weights and biases in DQN # print("Steps : ",steps_done) if self.steps_done % TARGET_UPDATE == 0: print("**********Updating Target********") self.target_net.load_state_dict( self.policy_net.state_dict()) # Perform one step of the optimization (on the target network) # optimize step start # print("Memory Size", len(self.memory)) # print("Completed 10,000 steps") if len(self.memory) > 10000 and len(self.memory) % 4 == 0: if self.flag == 0: print("Crossed 10000") self.flag = 1 batch = self.replay_buffer(BATCH_SIZE) # 'Transition',('state', 'action', 'next_state', 'reward', 'done')) state_batch = torch.from_numpy(np.asarray(batch[0])) action_batch = torch.from_numpy(np.asarray(batch[1])) next_state_batch = torch.from_numpy(np.asarray(batch[2])) reward_batch = torch.from_numpy(np.asarray( batch[3])).to(device) done_batch = torch.from_numpy(np.asarray( batch[4])).to(device) state_action_values = self.policy_net( state_batch.to(device)).gather( 1, action_batch[:, None].to(device)).squeeze(1) q_max = self.target_net( next_state_batch.to(device)).max(1)[0].detach() q_max[done_batch] = 0 expected_state_action_values = ( q_max) * GAMMA + reward_batch #print (state_action_values.double().size()) #print (expected_state_action_values.double().size()) loss = F.smooth_l1_loss( state_action_values.double(), expected_state_action_values.double()) current_loss = loss # print("Episode : ", i_episode, ", iteration : ",t, " Loss : ", current_loss, " Steps : ", steps_done," Epsilon : ", self.eps_threshold, " Mean Reward : ", mean_reward) #optimze the model self.optimizer.zero_grad() loss.backward() self.optimizer.step() if done: if len(self.reward_buffer) >= REWARD_BUFFER_SIZE: self.reward_buffer.pop(0) self.reward_buffer.append(episode_Reward) mean_reward = np.mean(self.reward_buffer) break if (i_episode % 500 == 0): env2 = env('BreakoutNoFrameskip-v4', self.args, atari_wrapper=True, test=True) test(self, env2, total_episodes=100) writer.add_scalar('Test Mean Reward', self.test_mean_reward, i_episode) if self.test_mean_reward > self.max_reward_so_far: torch.save(self.policy_net.state_dict(), "best_weights_model.pt") self.max_reward_so_far = self.test_mean_reward writer.add_scalar('Train Mean Reward', mean_reward, i_episode) writer.add_scalar('Training LOSS', current_loss, i_episode) # To calculate mean reward if i_episode % 100 == 0: # print("*****************") print("TRAIN Mean Reward after ", i_episode, " episodes is ", mean_reward, " Epsilon ", self.eps_threshold) if i_episode % 500 == 0: torch.save(self.policy_net.state_dict(), "saved_model.pt") print("Saved Model after ", i_episode, " episodes") self.env.env.close() self.writer.close()
parser = argparse.ArgumentParser() parser.add_argument("--train", help="path of your actual train model") parser.add_argument("--save", default='new_policy_net.pth', required=True, help="path of your new train model") parser.add_argument("--resolution", default='1920x1080', required=True, help="insert your monitor 0 resolution") args = parser.parse_args() input_resolution = args.resolution.split('x') path_save = args.save resolution = [int(input_resolution[0]), int(input_resolution[1])] env = environment.env(resolution) time.sleep(3) # if gpu is to be used device = torch.device("cuda" if torch.cuda.is_available() else "cpu") Transition = namedtuple('Transition', ('state', 'action', 'next_state', 'reward')) class ReplayMemory(object): def __init__(self, capacity): self.capacity = capacity self.memory = [] self.position = 0
#hyperparameters input_day_size = 50 filter_size = 3 num_of_feature = 4 num_of_asset = 8 num_episodes = 10000 if is_train == 1 else 1 money = 1e+8 #saving save_frequency = 100 save_path = './weights' save_model = 1 load_model = 1 selecting_random = True if is_train == 0: env = environment.env(train=0, number_of_asset=num_of_asset) load_model = 1 selecting_random = False else: env = environment.env(number_of_asset=num_of_asset) config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=True) config.gpu_options.allow_growth = True a_loss_sum = 0 s_loss_sum = 0 sess = tf.Session(config=config) with tf.variable_scope('ESM'): selector = network.select_network(sess)
import numpy as np import epi_model as em from environment import env import agent world = env() player = agent.Agent(8, 5, 0.0005, 0.99) # player.load() n_runs = 500 losses = [] for i in range(n_runs): done = False loss = 0.0 obs = world.reset() while not done: action = player.act(obs) next, reward, done, info = world.step(action) loss += reward player.record(obs, next, action, reward, done) obs = next player.learn() losses.append(loss) avg_loss = np.mean(losses[max(0, i - 100):(i + 1)]) print("run: ", i, " loss: %i" % int(loss), " avg: %i" % int(avg_loss)) if i % 10 == 9:
import pygame from time import sleep import model import environment import patrol_learning_grid patrol_env = environment.env() f = open("./data/좌표1.txt", 'r') lines = f.readlines() f.close() game = [[0 for j in range(50)] for i in range(50)] temp_game = [] for line in lines: temp_game.append(list(map(int, line.split()))) k = 0 for i in range(49, 1, -1): for j in range(50): if j in temp_game[i]: game[k][j] = -1 k = k + 1 n = 50 # represents no. of side squares(n*n total squares) scrx = n * 15 scry = n * 15 background = (51, 51, 51) # used to clear screen while rendering screen = pygame.display.set_mode( (scrx, scry)) # creating a screen using Pygame colors = [(51, 51, 51) for i in range(n**2)] reward = patrol_env.goal_state terminals = [] path = list(patrol_learning_grid.main())
plt.imshow(copy_maze) plt.pause(0.1) plt.clf() if agent_.env.maze.ravel()[s_] != 0: break if __name__ == '__main__': train_settings() seed() brain_ = brain(size=args.arena_size, gamma=0.9, l_r=0.9) env_ = env(size=args.arena_size, cat_r=[-10, -20], cheese_r=[10, 20]) agent_ = agent(env=env_, brain=brain_) plt.imshow(env_.maze) plt.pause(1) for i in range(args.random_steps): agent_.step() if i % 10 == 0: plt.imshow(agent_.brain.q_mat) plt.pause(0.01) plt.clf()
import random as rn from environment import env import brain from DQN import DQN ### Setting the hyperparameters max_memory = 3000 epochs = 1000 batch_size = 128 eps = 0.3 numb_actions = 5 direction_boundary = (numb_actions -1)/2 ### it will be the action corresponding to "do nothing" temp_incr = 1.5 ### the difference of temperature between each action ### Creation of the environment env = env(nb_users_ini = 20, data_transfer_ini = 30, starting_month = 0) ### Creation of the brain brain = brain.NN(nb_actions = numb_actions) model = brain.model ### Creation of the memory of the DQN Agent DQN = DQN() if(env.train): previous_loss = 0 patience = 0 for epoch in range(0,epochs): loss = 0 time_step = 0 game_over = False total_reward = 0 new_month = np.random.randint(0,12)
def samplex_random_seed(s): env().model_gen_options['rngseed'] = s
def samplex_objective_function(type): assert type in ['facet', 'random'] env().model_gen_options['objf choice'] = type
def samplex_solution_type(type): assert type in ['vertex', 'interior'] env().model_gen_options['solution type'] = type
@author: pranavmanjunath """ import time import matplotlib.pyplot as plt import random import pandas as pd import numpy as np import networkx as nx import pylab import matplotlib.pyplot as plt import environment df,data,myvalue,rewards_test,environment_rows1,environment_columns1=environment.env() def get_next_action(current_row_index,epsilon,path): a=rewards_test[current_row_index] indices=[] #M looks at the q values m=[] max_values=[] for i in range(26): if a[0,i]!=0: if i != path[len(path)-2]: indices.append(i)
parser = argparse.ArgumentParser() parser.add_argument("--train", help="path of your actual train model") parser.add_argument("--test", help="path of your actual train model") parser.add_argument("--save", default='new_policy_net.pth', help="path of your new train model") parser.add_argument("--resolution", default='1920x1080', help="insert your monitor 0 resolution") args = parser.parse_args() input_resolution = args.resolution.split('x') path_save = args.save resolution = [int(input_resolution[0]), int(input_resolution[1])] env = environment.env(resolution, noise=True, noiseType='gauss') print("Go to the game screen!") for i in tqdm(range(100)): time.sleep(0.03) # if gpu is to be used device = torch.device("cuda" if torch.cuda.is_available() else "cpu") Transition = namedtuple('Transition', ('state', 'action', 'next_state', 'reward')) class ReplayMemory(object): def __init__(self, capacity): self.capacity = capacity
#hyperparameters input_day_size = 50 filter_size = 3 num_of_feature = 4 num_of_asset = 8 num_episodes = 5000 money = 1e+8 #saving save_frequency = 100 save_path = './weights/AAM/m_' save_model = 1 load_model = 0 selecting_random = True env = environment.env(number_of_asset=num_of_asset) env_val = environment.env(number_of_asset=num_of_asset, train=0, validation=1) config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=True) config.gpu_options.allow_growth = True a_loss_sum = 0 sess = tf.Session(config=config) with tf.variable_scope('AAM'): allocator = network.policy(sess, num_of_asset=num_of_asset) #with tf.variable_scope('ESM'): # selector = network.select_network(sess) sess.run(tf.global_variables_initializer())
return boundingBoxes def mapDimensions(dimensions): width, height = dimensions print('Rendering map with page width paper size (%s m × %s m)' % (width, height)) # Dots per inch (1 point = 1/72 inch, see https://pycairo.readthedocs.io/en/latest/reference/surfaces.html#class-pdfsurface-surface) dpi = 72 # Dots per m dpm = dpi * 100 / 2.54 mapWidth = int(width * dpm) mapHeight = int(height * dpm) return mapWidth, mapHeight if __name__ == '__main__': boundingBox = determineBoundingBox(environment.require('BBOX')) pageOverlap = determinePageOverlap(environment.env('PAGE_OVERLAP', '5%')) # Default: 1 cm on the map is 1.5 km in the world scale = determineScale(environment.env('SCALE', '1:150000')) printPaperWidth, printPaperHeight = rotatePaper( determinePaperDimensions(environment.env('PAPER_SIZE', 'A4')), determineOrientation(environment.env('PAPER_ORIENTATION', ORIENTATION_PORTRAIT)) ) for bbox in boundingBoxes(boundingBox, pageOverlap, scale, (printPaperWidth, printPaperHeight)): print('%s:%s:%s:%s' % (bbox.minx, bbox.miny, bbox.maxx, bbox.maxy))
import environment if environment.env() == "production": from .production import * elif environment.env() == "development": from .development import *
def samplex_solution_type(type): assert type in ['vertex', 'interior', 'CLT', 'CLTvertex'] env().model_gen_options['solution type'] = type
td_target = reward + discount_factor * Q[next_state][ best_next_action] td_delta = td_target - Q[state][action] Q[state][action] += alpha * td_delta if done: break state = next_state return Q if __name__ == '__main__': if sys.argv[1] == "--h": print("USAGE: python q_learning.py --<demo or train>") print("--demo => run the demo app using CliffWalkingEnv") print("--train => run the demo app using RL") sys.exit() elif sys.argv[1] == "--train": n_episode = 5000 env = env(n_episode) print("0:", datetime.datetime.now()) Q = q_learning(env, n_episode) print("Q value: ", Q) elif sys.argv[1] == "--demo": from cliff_walking import CliffWalkingEnv n_episode = 5000 print("Demo Training RUN") env = CliffWalkingEnv() Q = demo_q_learning(env, n_episode) print("Q value: ", Q)
def samplex_stride(s=1): env().model_gen_options['stride'] = s