コード例 #1
0
agent = Agent('eval/prim.pth')

#save path
save_path='prim_result/'
utils.check_dirs([save_path])

shape_infopack=['demo', 'eval/demo-16.binvox', 'eval/demo-64.binvox', 'rgb', 'demo.png']
s, box, step = env.reset(shape_infopack)
acm_r=0

step_interval=20

while True:

    valid_mask = env.get_valid_action_mask(box)
    a = agent.choose_action(s, box, step, valid_mask, 1.0)
    s_, box_, step_, r, done = env.next(a)
                
    acm_r+=r
    if env.step_count%step_interval==0:
        log_info='demo_step_'+str(env.step_count)
        print(log_info)
        env.output_result(log_info, save_path)

    if done:
        log_info='demo_finish_reward_'+str(acm_r)
        print(log_info)
        env.output_result(log_info, save_path)
        env.save_edgeloop(save_path)
        break
コード例 #2
0
ファイル: main.py プロジェクト: Ahmetf1/Machine-Learning
    n_total = 0

    for n in range(n_games):
        if not n == 0:
            scores.append(score)
            avg_rewards.append(total_reward / n_moves)
            agent.learn(n_total, n_total + n_moves)
        score = 0
        game.score = 0
        total_reward = 0
        n_moves = 0
        n_total = n_moves+n_total
        done = False
        observation = game.get_states()
        while not done:
            action = agent.choose_action(observation)
            if n < -1:
                game.draw()
                action = int(msvcrt.getch().decode("utf-8"))

            observation_, reward, done, score = game.spin_once(action, n_moves)
            agent.store_data(observation, action, reward, observation_, done)
            observation = observation_
            total_reward += reward
            n_moves += 1
            #game.draw()
            if score > score_max:
                print(score)
                score_max = score
                print(f"max score: {score_max}")
        if n % 100 == 0:
コード例 #3
0
# alpha beta tau batch_size

score_record = []
score_record_step = []
count_record = []
count_record_step = []
time_record = []
time_record_step = []
for i in range(800):
    done = False
    score = 0
    obs = env.get_init_state()
    # 没分配完
    while not done:
        act = MECSnet.choose_action(obs)
        new_state, reward, done = env.step(act)
        MECSnet.remember(obs, act, reward, new_state, int(done))
        MECSnet.learn()
        score += reward
        obs = new_state
        # print('reward is: {}'.format(reward))

    # 本轮的reward追加到list中
    score_record.append(score)
    # print('episode ', i, 'score %.2f' % score, '100 game average %.2f' % np.mean(score[-100:]))
    print('episode ', i, 'score %.2f' % score, "    wrong: ", env.count_wrong)
    count_record.append(1 - env.count_wrong / num_task)
    time_record.append(env.time)
    if i % 25 == 0:
        # UEnet.save_models()