コード例 #1
0
    exit(1)
if agent_host.receivedArgument("help"):
    print(agent_host.getUsage())
    exit(0)

with open('world.xml', 'r') as file:
    missionXML = file.read()

my_client_pool = MalmoPython.ClientPool()
my_client_pool.add(MalmoPython.ClientInfo('127.0.0.1', 10000))

EPISODES = int(config.get('DEFAULT', 'EPISODES'))
state_size = int(config.get('DEFAULT', 'STATE_SIZE'))
action_size = int(config.get('DEFAULT', 'ACTION_SIZE'))
time_multiplier = int(config.get('DEFAULT', 'TIME_MULTIPLIER'))
nn = DQNAgent(state_size, action_size)
done = False
batch_size = int(config.get('DEFAULT', 'BATCH_SIZE'))
CLEARS = 0
MAX_SUCCESS_RATE = 0
GRAPH = live_graph.Graph()
REWARDS_DICT = {}
ALL_REWARDS = []
timestep = 0

# command line arguments
try:
    arg_check = sys.argv[1].lower()  # using arguments from command line
    if (arg_check not in ["zombie", "skeleton", "spider", "giant"]):
        print("\nInvalid mob type, defaulting to 1 zombie")
        mob_type = 'zombie'
コード例 #2
0
import time as timer

if __name__ == "__main__":
    ##################################################################################
    ##################### Uncomment for your own ####################################
    #pybulletPath = "/home/auggienanz/bullet3/data/" #Auggie
    #pybulletPath = "D:/ECE 285 - Advances in Robot Manipulation/bullet3-master/data/" #Bharat
    pybulletPath = 'C:/Users/Juan Camilo Castillo/Documents/bullet3/bullet3-master/data/'  #Juan
    outputpath = 'C:/Users/Juan Camilo Castillo/Documents/ECE 285 Robotics/save/'  #Juan

    #################################################################################

    env = environment(pybulletPath, useGUI=True, movement_delta=0.003)
    state_size = 6
    action_size = 6
    agent = DQNAgent(state_size, action_size)
    agent.load("./run_results/JengaLearn_11.h5")
    for e in range(10):
        state = env.reset_random()
        #print(state)
        state = np.reshape(state, [1, state_size])
        #timer.sleep(1)
        TotalReward = 0
        print('Starting Policy Rolloout from learned weights')
        for time in range(300):
            action = np.argmax(agent.model.predict(state)[0])
            next_state, reward, done = env.step(action)
            timer.sleep(0.005)
            next_state = np.reshape(next_state, [1, state_size])
            TotalReward = reward + TotalReward
            state = next_state
コード例 #3
0
ファイル: train.py プロジェクト: tianzhaotju/Deeplive
ACTION_SAPCE = []
for i in BITRATE:
    for j in TARGET_BUFFER:
        for k in LATENCY_LIMIT:
            action_apace = []
            action_apace.append(i)
            action_apace.append(j)
            action_apace.append(k)
            ACTION_SAPCE.append(action_apace)

STATE_SIZE = 250
ACTION_SIZE = len(BITRATE) * len(TARGET_BUFFER) * len(LATENCY_LIMIT)
BATCH_SIZE = 32
history_len = 50
done = False
agent = DQNAgent(STATE_SIZE, ACTION_SIZE)


def train(epoch, train_trace):
    # path setting
    TRAIN_TRACES = train_trace
    video_size_file = './dataset/video_trace/sports/frame_trace_'  # video trace path setting,
    LogFile_Path = "./log/"  # log file trace path setting,
    # load the trace
    all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(
        TRAIN_TRACES)
    # random_seed
    random_seed = 2
    video_count = 0
    frame_time_len = 0.04
    reward_all_sum = 0