Ejemplo n.º 1
0
        'batch_size': 32,
        'state_space_dim': env.state_dim,
        'action_space_dim': env.action_dim,
        }
    agent = Agent(**params)

    score = []
    mean = []

    for episode in range(1000):
        s0 = env.reset()
        total_reward = 1
        while True:
            env.render()
            a0 = agent.act(s0)
            s1, r1, done= env.step(a0)
            
            if done:
                r1 = -1
                
            agent.put(s0, a0, r1, s1)
            
            if done:
                break

            total_reward += r1
            s0 = s1
            agent.learn()
            
        score.append(total_reward)
        mean.append( sum(score[-100:])/100)
Ejemplo n.º 2
0
Archivo: main.py Proyecto: unasm/utils
############################################### 

import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from env import ArmEnv
from rl import DDPG

MAX_EPISODES = 500
MAX_EP_STEPS = 200

env = ArmEnv()
s_dim = env.state_dim
a_dim = env.action_dim
a_bound = env.action_bound


rl = DDPG(a_dim, s_dim, a_bound)
for i in range(MAX_EPISODES):
    s = evn.reset()
    for j in range(MAX_EP_STEPS):
        env.render()
        a = rl.choose_actions(s)
        s_, r, done = env.step(a)
        rl.store_transition(s, a, r, s_)
        if rl.memory_full():
            rl.learn()
        s = s_
Ejemplo n.º 3
0
# Gloabel Variable
MAX_EPISOSES = 500
MAX_EP_STEPS = 500

# Set the environement
env = ArmEnv()
s_dim = env.state_dim
a_dim = env.action_dim
a_bound = env.action_bound

# set the RL method
rl = DDPG(a_dim, s_dim, a_bound)

# start Training

for i in range(MAX_EPISOSES):
    s = env.reset()
    for j in range(MAX_EP_STEPS):
        env.render()

        a = rl.choose_action(s)

        s_, r, done = env.step(a)

        rl.store_transitions(s, a, r, s_)

        if rl.memory_full:
            # start to learn once has fulfulled the memory
            rl.learn()
        s = s_