exit(1) if agent_host.receivedArgument("help"): print(agent_host.getUsage()) exit(0) with open('world.xml', 'r') as file: missionXML = file.read() my_client_pool = MalmoPython.ClientPool() my_client_pool.add(MalmoPython.ClientInfo('127.0.0.1', 10000)) EPISODES = int(config.get('DEFAULT', 'EPISODES')) state_size = int(config.get('DEFAULT', 'STATE_SIZE')) action_size = int(config.get('DEFAULT', 'ACTION_SIZE')) time_multiplier = int(config.get('DEFAULT', 'TIME_MULTIPLIER')) nn = DQNAgent(state_size, action_size) done = False batch_size = int(config.get('DEFAULT', 'BATCH_SIZE')) CLEARS = 0 MAX_SUCCESS_RATE = 0 GRAPH = live_graph.Graph() REWARDS_DICT = {} ALL_REWARDS = [] timestep = 0 # command line arguments try: arg_check = sys.argv[1].lower() # using arguments from command line if (arg_check not in ["zombie", "skeleton", "spider", "giant"]): print("\nInvalid mob type, defaulting to 1 zombie") mob_type = 'zombie'
import time as timer if __name__ == "__main__": ################################################################################## ##################### Uncomment for your own #################################### #pybulletPath = "/home/auggienanz/bullet3/data/" #Auggie #pybulletPath = "D:/ECE 285 - Advances in Robot Manipulation/bullet3-master/data/" #Bharat pybulletPath = 'C:/Users/Juan Camilo Castillo/Documents/bullet3/bullet3-master/data/' #Juan outputpath = 'C:/Users/Juan Camilo Castillo/Documents/ECE 285 Robotics/save/' #Juan ################################################################################# env = environment(pybulletPath, useGUI=True, movement_delta=0.003) state_size = 6 action_size = 6 agent = DQNAgent(state_size, action_size) agent.load("./run_results/JengaLearn_11.h5") for e in range(10): state = env.reset_random() #print(state) state = np.reshape(state, [1, state_size]) #timer.sleep(1) TotalReward = 0 print('Starting Policy Rolloout from learned weights') for time in range(300): action = np.argmax(agent.model.predict(state)[0]) next_state, reward, done = env.step(action) timer.sleep(0.005) next_state = np.reshape(next_state, [1, state_size]) TotalReward = reward + TotalReward state = next_state
ACTION_SAPCE = [] for i in BITRATE: for j in TARGET_BUFFER: for k in LATENCY_LIMIT: action_apace = [] action_apace.append(i) action_apace.append(j) action_apace.append(k) ACTION_SAPCE.append(action_apace) STATE_SIZE = 250 ACTION_SIZE = len(BITRATE) * len(TARGET_BUFFER) * len(LATENCY_LIMIT) BATCH_SIZE = 32 history_len = 50 done = False agent = DQNAgent(STATE_SIZE, ACTION_SIZE) def train(epoch, train_trace): # path setting TRAIN_TRACES = train_trace video_size_file = './dataset/video_trace/sports/frame_trace_' # video trace path setting, LogFile_Path = "./log/" # log file trace path setting, # load the trace all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace( TRAIN_TRACES) # random_seed random_seed = 2 video_count = 0 frame_time_len = 0.04 reward_all_sum = 0