def setUpClass(cls): environment = None if _options.environment_type == 'k8s': import k8s_environment # pylint: disable=g-import-not-at-top environment = k8s_environment.K8sEnvironment if _options.environment_type == 'local': import local_environment # pylint: disable=g-import-not-at-top environment = local_environment.LocalEnvironment elif _options.environment_type == 'aws': import aws_environment # pylint: disable=g-import-not-at-top environment = aws_environment.AwsEnvironment if not environment: logging.fatal('No environment type selected') if _options.name: cls.env = environment() cls.env.use_named(_options.name) elif _options.environment_params: cls.env = environment() environment_params = dict((k, v) for k, v in ( param.split('=') for param in _options.test_params.split(','))) cls.env.create(**environment_params) else: cls.create_default_local_environment() if not cls.env: logging.fatal('No test environment exists to test against!') cls.test_params = dict((k, v) for k, v in ( param.split('=') for param in _options.test_params.split(',')))
def run(): # Run function, to be called below # set up the excel link # Sort out the ridiculous problems with working directory # that are created by xlwings exec_dir = os.path.dirname(os.path.abspath(__file__)) os.chdir(exec_dir) # set up the links xl_filename = os.path.join(os.getcwd(), 'dapper_control.xlsm') xl_control = xl_link(xl_filename) # grab parameters from the xl file input_topkml = xl_control.topkml input_bottomkml = xl_control.bottomkml outputkml = xl_control.oputkml yearlist = xl_control.grab_time() oputschedule = xl_control.grab_oputschedule() debug_dapper = xl_control.debug welcome() # (1) initialize environment for simulation t_env = environment(xl_control) # (2) initialize dune field from input t_dunefield = dunefield(input_topkml, input_bottomkml) # (3) run the dune field forward in time for t in range(0, t_env.timesteps): t_dunefield.advance(t_env, t) # call debug code to push debug outputs if debug_dapper: t_dunefield.debug(t_env, t) # check output schedule to see if its time to output interim file if oputschedule[t]: interim_oputfile = xl_control.interim_prefix + str(yearlist[t]) + '.kml' interim_oputfile = interim_oputfile t_dunefield.output(interim_oputfile) # message the user user_message = 'Timestep completed: ' + str(yearlist[t]) print(user_message) xl_control.message(user_message) # (4) output final dune field crest position t_dunefield.output(outputkml) print ('Simulation finished!') xl_control.message('Simulation finished . . ') return
uid = sessions[session] did_update = update_account_name(client, uid, name) return json.dumps({"msg": str(did_update)}), 200 if did_update else 500 else: return json.dumps({"err": "Bad method 405"}), 405 @app.route('/api/account/picture/<u_id>', methods=['GET']) @cross_origin() def getUserPicture(u_id): """Find a user picture given a supplied u_id""" image_string = read_pictureFromUID(client, u_id) return image_string @app.route('/api/account/thumbnail/<u_id>', methods=['GET']) @cross_origin() def getUserThumbnail(u_id): """Find a user thumbnail given a supplied u_id""" image_string = read_thumbnailFromUID(client, u_id) return image_string if __name__ == "__main__": """Configuration""" env = environment("app.env") url = env.get_env("url").format(env.get_env("password")) client = mongoClient(url).getClient() app.run(debug=True, host='0.0.0.0', port=int(os.environ.get('PORT', 8080)))
from environment import * import time, random, math if __name__ == "__main__": a = time.time() env = environment() env.reset() # env.elasticity = 1 p0 = env.robots[0] p1 = env.robots[1] p0.pos(50, 50) p0.diameter = 50 p0.raio = 25 p0.m = 1 p0.angle = math.pi * 6 / 4 p0.saveState() p1.pos(800, 500) p1.diameter = 50 p1.raio = 25 p1.m = 1 p1.angle = 0 p1.saveState() # Ciclo percorrido # Example 50x (1/50)s >>> Percorrido 1s # Example 100x (1/50)s >>> Percorrido 2s # env.step([[0,0],[5000,0]])
def main(): start_time = time.time() print("Started...") Result2 = [] Result1 = [] Henkel1 = [] Henkel2 = [] environmen = environment([0, 1, 2, 3], [0, 1, 2, 3, 4, 5, 6, 7], rank=3) t = TestGenerator(3, environmen) testsdata = t.generate() File = "logNewExperiment.txt" #for storing detail information deleteContent(File) File2 = "ResultNewExperiment.txt" #for storing results deleteContent(File2) with open(File, "a") as myfile: myfile.write("started .... " + '\n') with open(File2, "a") as myfile2: myfile2.write("started .... " + '\n') ObservationProbability = observation(Bias=5, Numberobservation=4) print(ObservationProbability) # c=ConstructEnvironment(10,TestData=l,SizeOfTraining=100,stepSize=0.01,lambda1=10,numIteration=2,FileName=File) for step in [0.001, 0.01, 0.1, 0.5, 1]: for lamda1 in [10, 3, 1, 0.1, 0.01, 0.005]: for i in [50, 100, 500, 1000, 3000]: # size of Training data start_time = time.time() L1 = [] L2 = [] H1 = [] H2 = [] for k in range(4): A = ConstructEnvironment( observationProbabilty=ObservationProbability, sizeOfCone=4, SizeOfTraining=i, TestData=testsdata, stepSize=step, lambda1=lamda1, numIteration=150, FileName=File) L1.append(A.R1) L2.append(A.R2) H1.append(A.E1) H2.append(A.E2) Result1.append(L1) Result2.append(L2) Henkel1.append(H1) Henkel2.append(H2) with open(File2, "a") as myfile2: myfile2.write('Results For step size equal to: ' + str(step) + ' and lamda1: ' + str(lamda1) + '\n') myfile2.write( "Error Algorithm without Denoising On Test Data: " + str(Result1) + '\n') myfile2.write("Error Algorithm with Denoising On Test Data: " + str(Result2) + '\n') myfile2.write( "Error Henkel Matrix of Algorithm without Denoising: " + str(Henkel1) + '\n') myfile2.write( "Error Henkel Matrix of Algorithm with Denoising: " + str(Henkel2) + '\n') myfile2.write("Running time: " + str(time.time() - start_time)) print('Results For step size equal to: ', str(step), ' and lamda1: ', str(lamda1), '\n') print("Error Algorithm without Denoising On Test Data: ", Result1) print("Error Algorithm with Denoising On Test Data: ", Result2) print("Error Henkel Matrix of Algorithm without Denoising: ", Henkel1) print("Error Henkel Matrix of Algorithm with Denoising: ", Henkel2) print("Running Time : ", time.time() - start_time)
parser.add_argument( '--model', help="select specific model to test, sac-stg2, sac-stg1 or sac-wos") args = parser.parse_args() model = args.model if __name__ == "__main__": if model != 'sac-stg2' and model != 'sac-stg1' and model != 'sac-wos': print('Wrong model name :( ') else: pygame.init() pygame.font.init() if model == 'sac-stg2' or model == 'sac-stg1': env = environment(traj_num=6, model='sac') else: env = environment(traj_num=6, model='sac-wos') action_dim = 2 # steer, throttle state = env.getState() state_dim = len(state) print('action_dimension:', action_dim, ' --- state_dimension:', state_dim) # Initializing the Agent for SAC and load the trained weights actor = SAC_Actor(state_dim=state_dim, action_dim=action_dim).to(device) if model == 'sac-stg2': model_path = '../weights/sac-stg2/policy_net_1280.pth' if model == 'sac-stg1':
def listas_iguales(lista, lista2): if len(lista) == len(lista2): for i in range(len(lista)): if lista[i] != lista2[i]: return False return True else: return False if __name__ == "__main__": env_params = {"base_pose": [200, 300], "nBots": 50, "r_rad": 20} env = environment(env_params) gap = env.robots[0].radius * 0.8 pathplanner = formation_planner(env, gap=gap) pathplanner.set_robots_active(n_bots_formation) pathplanner.update_actives() goal = np.array([(gap * n_bots_formation) / 2, 0]) print('New goal:', goal) pathplanner.create_formation() pathplanner.move_formation_goal(np.array([20, 20])) count = 0 set_goal = 0 same_robots = True
import pygame import numpy as np from keras.models import load_model from keras.models import Sequential from keras.layers import Dense import csv import os from tools import getHeading, bool2num np.random.seed(1234) if __name__ == "__main__": pygame.init() pygame.font.init() env = environment( 4, 9, traj_num=6, model='dqn') #Block number of throttle and the steering angle action_num = env.tStateNum * env.sStateNum state = env.getState() states_num = len(state) print('action_num: ', action_num, ' --- ', 'states_num: ', states_num) # Initializing the Agent for DQN and load the trained weights model = Sequential() model.add(Dense(48, input_dim=42, activation='relu')) model.add(Dense(64, activation='relu')) model.add(Dense(48, activation='relu')) model.add(Dense(50, activation='linear')) model.load_weights('../weights/dqn/weights_eposide_1330.h5') destinationFlag = False
def run_MC(initialQV=None, train=True, random=False): epochs = 100000 if train else 1 epsilon = 1. if train else 0 # E-greedy maze_size = 15 walls = [(3, 4), (3, 5), (3, 6)] dim = compress_dim2() qv = initialQV if initialQV is not None else np.zeros( dim) #This creates our Q-value look-up table sa_count = np.zeros( dim) #Record how many times we've seen a given state-action pair. returnSum = 0 stepSum = 0 gameplay = [] stats = np.zeros((int(epochs / 100), 2)) for i in range(epochs): env = environment.random(maze_size, walls=walls) if random else environment( maze_size=maze_size, walls=walls) state = env.state ended = False episodeReward = 0 max_epoch_length = 100 ds = [] #we keep track of all the "decision states" while not ended and (not random or max_epoch_length > 0 ): #while the snake hasn't eaten itself/Wall d = compress2(env, state) #we "compress" the state to make it smaller if not train: gameplay.append(env.maze_string()) # E-greedy policy if (np.random.random() < epsilon or np.count_nonzero(qv[d[0], d[1], d[2], d[3], :]) == 0): act = np.random.randint(0, len(actions)) else: act = np.argmax(qv[d[0], d[1], d[2], d[3], :]) #select the best action d = tuple(list(d) + [act]) #append the chosen action to the decision sa_count[d] += 1 ds.append(d) state, reward, ended = env.step(act) episodeReward += reward max_epoch_length -= 1 epsilon = epsilon * 0.9999 # Update Q values of the visited states for d in ds: qv[d] = qv[d] + (1. / sa_count[d]) * (episodeReward - qv[d]) returnSum += episodeReward stepSum += len(ds) if (i % 100 == 0 and i > 0): print("Episode: ", i, "Average Return: ", returnSum / 100.0, "Average Steps: ", stepSum / 100.0) stats[int(i / 100) - 1, 0] = returnSum / 100.0 stats[int(i / 100) - 1, 1] = stepSum / 100.0 returnSum = 0 stepSum = 0 if train: return qv, stats else: return qv, gameplay
import environment from environment import * from mpl_toolkits import mplot3d import numpy as np import matplotlib.pyplot as plt qfunc = {} envir = environment() N = 100 def monteControl(s): qfunc.setdefault((s.dealer_first, s.sum_player, 0), { 'value': 0, 'count': 0 }) qfunc.setdefault((s.dealer_first, s.sum_player, 1), { 'value': 0, 'count': 0 }) if qfunc[(s.dealer_first, s.sum_player, 0)]['value'] > qfunc[(s.dealer_first, s.sum_player, 1)]['value']: maxi = 0 elif qfunc[(s.dealer_first, s.sum_player, 0)]['value'] < qfunc[( s.dealer_first, s.sum_player, 1)]['value']: maxi = 1 else: maxi = np.random.choice([0, 1], p=[0.5, 0.5]) other = np.random.choice([0, 1], p=[0.5, 0.5])
def sarsa( num_episodes, time_steps, max_negative_reward, actions, discount_factor, num_agents, threshold ): """ 1. actions: 0 -> Do not warn the driver 1 -> Warn the driver 2. no_features: Around 40 """ # Correct ## Changing this parameter: The number of agents considered are 5. ## Only the nearest 5 agents are considered for feature extraction. no_features = num_agents*4*len(actions) returns_episodes = [] weights = np.zeros(( no_features )) epsilon = 0.8 # Correct for i in range(num_episodes): if(i%100==0): print ( "episode: ",i) # Correct car = Automobile ( -100, 0, 1, 0) env = environment( num_agents ) agents = env.get_agents() if(i%100==0): epsilon = epsilon/2 agent_warning = warning( weights, no_features, num_agents, car, env, epsilon ) agent_reward = 0 terminate = False count = 0 while( not terminate and agent_reward > max_negative_reward and count < time_steps ): # print "Count: ",count count += 1 """ Can update the feature vector here for the warning agent. """ # This step is done. env.take_one_step() agents_list = env.get_agents() # 2. car_action = car.get_action( threshold , agents_list ) ## Till here everything is correct. The environment is working perfectly. if( car_action == 1 ): terminate = True if( not car.goal): car.action(car_action) agent_Rew = agent_warning.update() agent_reward += agent_Rew else: break points = [] points.append( [car.posX, car.posY]) for i in range(len(agents)): points.append([agents[i].posX, agents[i].posY]) points = np.array(points) # plt.scatter(points[ 0, 0],points[ 0, 1], color='green', linewidths = 3) # plt.scatter(points[ 1:, 0], points[ 1:, 1], color = 'blue', linewidths = 3) # plt.title("Environment. Green - Car with the warning agent, Blue - Other agents(Cars)") # plt.xlim( -100, 100) # plt.ylim( -100, 100) # plt.show() returns_episodes.append(agent_reward) weights = agent_warning.weights all_actions = agent_warning.all_actions # print "Number of 0's: ",all_actions.count(0) # print "Number of 1's: ",all_actions.count(1) # plt.hist(all_actions) # plt.ylim(0,200) # plt.show() return returns_episodes
parser.add_argument('--num_hidden_layers', default=2, type=int) parser.add_argument('--num_hidden_units_per_layer', default=256, type=int) parser.add_argument('--sample_frequency', default=256, type=int) parser.add_argument('--activation', default='Relu', type=str) parser.add_argument('--render', default=False, type=bool) # show UI or not parser.add_argument('--log_interval', default=50, type=int) # parser.add_argument('--load', default=False, type=bool) # load model args = parser.parse_args() print(1) pygame.init() print(2) pygame.font.init() print(3) env = environment(traj_num=1) action_dim = 2 state = env.getState() state_dim = len(state) print('action_dimension:', action_dim, ' & state_dimension:', state_dim) destinationFlag = False collisionFlag = False awayFlag = False carla_startFlag = False agent = SACAgent(state_dim=state_dim, action_dim=action_dim) if args.load: agent.load(epoch=60, capacity=1)
assert (numIndividuals > 0) & (numTimesteps > 0) & (numNearestNeighbours > 0) & (numIndividuals > numNearestNeighbours), print("invalid arguments: numTimesteps={}!>0, numIndividuals={}!>numNearestNeighbours={}!>0".format(numTimesteps, numIndividuals, numNearestNeighbours)) ### Define Korali Problem import korali k = korali.Engine() e = korali.Experiment() ### Define results folder and loading previous results, if any resultFolder = '_result_vracer/' found = e.loadState(resultFolder + '/latest') if found == True: print("[Korali] Continuing execution from previous run...\n"); ### Define Problem Configuration e["Problem"]["Type"] = "Reinforcement Learning / Continuous" e["Problem"]["Environment Function"] = lambda x : environment( args, x ) e["Problem"]["Agents Per Environment"] = numIndividuals ### Define Agent Configuration e["Solver"]["Type"] = "Agent / Continuous / VRACER" e["Solver"]["Mode"] = "Training" e["Solver"]["Episodes Per Generation"] = 10 e["Solver"]["Experiences Between Policy Updates"] = 1 e["Solver"]["Learning Rate"] = 0.0001 e["Solver"]["Discount Factor"] = 0.995 e["Solver"]["Mini Batch"]["Size"] = 256 ### Define Variables # States (distance and angle to nearest neighbours) for i in range(numNearestNeighbours): e["Variables"][i]["Name"] = "Distance " + str(i)
def experiment(variant): args = getArgs() expl_env = NormalizedBoxEnv(environment(args, 'sac')) eval_env = NormalizedBoxEnv(environment(args, 'sac')) obs_dim = expl_env.observation_space.low.size # action_dim = expl_env.action_space.low.size # M = variant['layer_size'] qf1 = FlattenMlp( input_size=obs_dim + action_dim, output_size=1, hidden_sizes=[M, M], ) qf2 = FlattenMlp( input_size=obs_dim + action_dim, output_size=1, hidden_sizes=[M, M], ) target_qf1 = FlattenMlp( input_size=obs_dim + action_dim, output_size=1, hidden_sizes=[M, M], ) target_qf2 = FlattenMlp( input_size=obs_dim + action_dim, output_size=1, hidden_sizes=[M, M], ) policy = TanhGaussianPolicy( obs_dim=obs_dim, action_dim=action_dim, hidden_sizes=[M, M], ) trainedfile = '/home/yujr/rlkit/data/SAC/3.0/params.pkl' data = torch.load(trainedfile) print("data loaded", data['evaluation/policy']) policy = data['evaluation/policy'].stochastic_policy eval_policy = MakeDeterministic(policy) eval_path_collector = MdpPathCollector( eval_env, eval_policy, ) expl_path_collector = MdpPathCollector( expl_env, policy, ) replay_buffer = EnvReplayBuffer( variant['replay_buffer_size'], expl_env, ) trainer = SACTrainer(env=eval_env, policy=policy, qf1=qf1, qf2=qf2, target_qf1=target_qf1, target_qf2=target_qf2, **variant['trainer_kwargs']) algorithm = TorchBatchRLAlgorithm( trainer=trainer, exploration_env=expl_env, evaluation_env=eval_env, exploration_data_collector=expl_path_collector, evaluation_data_collector=eval_path_collector, replay_buffer=replay_buffer, **variant['algorithm_kwargs']) algorithm.to(ptu.device) algorithm.train()
#? 4. player = agent(2, "player") scorer = {} #* Player gets "batch_size" tries with the same parameters. #* After "batch_size", the environment's reset and the agent is updated. for _ in range(episodes): #* Initial parameters player_distance_from_hoop = np.random.randint( 100, hoop_pole_position[0] - hoop_width - player_width) player_position = (hoop_pole_position[0] - player_distance_from_hoop, screen_height - ground_thickness - player_height) for count in range(batch_size): env = environment(player_position, player_distance_from_hoop) (power, angle) = (player.predict([player_distance_from_hoop])[0][0], player.predict([player_distance_from_hoop])[0][1]) original_power = power original_angle = angle power *= 2000 angle *= 90 score = env.throw(power, angle) print("Power: ", power, "Angle: ", angle, "Score: ", score) scorer[(player_distance_from_hoop, original_power, original_angle)] = score for index, value in enumerate(scorer.values()): if value > 0: for _ in range(data_loop): player.train(list(scorer.keys())[index])
if __name__ == "__main__": # Intialization of the connector to V-Rep simulator clientID = vrep.simxStart("127.0.0.1", 19997, 1, 1, 2000, 5) res, objs = vrep.simxGetObjects(clientID, vrep.sim_handle_all, vrep.simx_opmode_blocking) if clientID > -1: print("Connect to Remote API server!") else: print('Failed connecting to remote API server') sys.exit() #Initializing the Robot information #Initializing the Learning Agent for DQN env = environment( 10, 10) #Block number of linear velocity and the grad of angular velocity action_num = env.vStateNum * env.aStateNum states_num = len(env.getState()) print(action_num, ' --- ', states_num) agent = DQNAgent(states_num, action_num) # Start Training done = False batch_size = 32 for e in range(EPISODES): print("-------------------------> ", e) print(agent.epsilon) env.reset(clientID) env.setCtrl(INIT_CORR_NUM) time.sleep(1) # Collecting the status information of mobile robot
from environment import * import matplotlib.pyplot as plt import numpy as np from robot import * from bicycleRobot import * world = environment() world.print() feature = [[1, ii] for ii in range(world.length - 1)] world.addFeature(feature) world.addGoal([9, 7]) rb = robot(0, 0, 0, world) world.addRobot(rb) world.print() world.moveRobot([1, 1], 0) world.moveRobot([0, 1], 0) world.prettyPrint() hueristic = [] world.prettyPrint(hueristic) for y in range(world.length): hueristic.append([]) for x in range(world.width): hueristic[y].append( math.sqrt((world.goalX - x)**2 + (world.goalY - y)**2)) pathPlan = world.planAStar(hueristic) world.prettyPrint(pathPlan) #Test DP algorithm on left turn scenario world costFxn = [1, 1, 1, 10] driveWorld = environment(6, 6, 1) driveWorld.addGoal([0, 3], 270)
def run_QL(initialQV=None, train=True, random=False): epochs = 100000 if train else 1 epsilon = 1. if train else 0 # E-greedy gamma = 0.1 alpha = 0.1 maze_size = 15 walls = [(3, 4), (3, 5), (3, 6)] dim = compress_dim2() qv = initialQV if initialQV is not None else np.zeros( dim) #This creates our Q-value look-up table returnSum = 0 stepSum = 0 gameplay = [] stats = np.zeros((int(epochs / 100), 2)) for i in range(epochs): env = environment.random(maze_size, walls=walls) if random else environment( maze_size=maze_size, walls=walls) state = env.state ended = False max_epoch_length = 100 while not ended and (not random or max_epoch_length > 0): if not train: gameplay.append(env.maze_string()) d = compress2(env, state) # E-greedy policy if (np.random.random() < epsilon or np.count_nonzero(qv[d[0], d[1], d[2], d[3], :]) == 0): act = np.random.randint(0, len(actions)) else: act = np.argmax(qv[d[0], d[1], d[2], d[3], :]) #select the best action d = tuple(list(d) + [act]) #append the chosen action to the decision state_new, reward, ended = env.step(act) q_next = 0 if ended else np.max(qv[d[0], d[1], d[2], d[3], :]) qv[d] += alpha * (reward + gamma * q_next - qv[d]) state = state_new returnSum += reward stepSum += 1 max_epoch_length -= 1 epsilon = epsilon * 0.9999 if (i % 100 == 0 and i > 0): print("Episode: ", i, "Average Return: ", returnSum / 100.0, "Average Steps: ", stepSum / 100.0) stats[int(i / 100) - 1, 0] = returnSum / 100.0 stats[int(i / 100) - 1, 1] = stepSum / 100.0 returnSum = 0 stepSum = 0 if train: return qv, stats else: return qv, gameplay