def test_gym_environment(self): for name in ['AirRaid-v0', 'Amidar-v0', 'Asteroids-v0']: env = GymEnvironment(name) assert env.name == name env.reset(); env.step(0); env.close()
def return_score(network_list, k): def run_pipeline(pipeline, episode_count): for i in range(episode_count): total_reward = 0 pipeline.reset_state_variables() is_done = False while not is_done: result = pipeline.env_step() result = (result[0].cuda(), *result[1:]) pipeline.step(result) reward = result[1] total_reward += reward is_done = result[2] print(f"Episode {i} total reward:{total_reward}") return total_reward score_list = [] for i, network in enumerate(network_list): score_sum = 0 if torch.cuda.is_available(): network = network.to('cuda:0') else: pass environment = GymEnvironment('BreakoutDeterministic-v4') environment.reset() # Build pipeline from specified components. environment_pipeline = EnvironmentPipeline( network, environment, encoding=bernoulli, action_function=select_softmax, output="Output Layer", time=100, history_length=1, delta=1, plot_interval=1, ) environment_pipeline.network.learning = False print("Testing: ") score_sum += run_pipeline(environment_pipeline, episode_count=2) score_list.append(score_sum / 2) torch.cuda.empty_cache() f = open('Score/' + str(k) + '.txt', 'w') f.write(str(score_list)) f.close() return score_list
# Add all layers and connections to the network. network.add_layer(inpt, name="X") network.add_layer(middle, name="Y") network.add_layer(out, name="Z") network.add_connection(inpt_middle, source="X", target="Y") network.add_connection(middle_out, source="Y", target="Z") # Load SpaceInvaders environment. environment = GymEnvironment( "SpaceInvaders-v0", BernoulliEncoder(time=int(network.dt), dt=network.dt), history_length=2, delta=4, ) environment.reset() # Plotting configuration. plot_config = { "data_step": 1, "data_length": 10, "reward_eps": 1, "reward_window": 10, "volts_type": "line" } # Build pipeline from specified components. pipeline = EnvironmentPipeline( network, environment, time=network.dt,
from bindsnet.environment import GymEnvironment from bindsnet.datasets.spike_encoders import NullEncoder parser = argparse.ArgumentParser() parser.add_argument("-n", type=int, default=1000000) parser.add_argument("--render", dest="render", action="store_true") parser.set_defaults(render=False) args = parser.parse_args() n = args.n render = args.render # Load SpaceInvaders environment. env = GymEnvironment("SpaceInvaders-v0", NullEncoder()) env.reset() total = 0 rewards = [] avg_rewards = [] lengths = [] avg_lengths = [] i, j, k = 0, 0, 0 while i < n: if render: env.render() # Select random action. a = np.random.choice(6)
def main(seed=0, time=50, n_episodes=25, percentile=99.9, plot=False): np.random.seed(seed) if torch.cuda.is_available(): torch.set_default_tensor_type('torch.cuda.FloatTensor') torch.cuda.manual_seed_all(seed) else: torch.manual_seed(seed) epsilon = 0 print() print('Loading the trained ANN...') print() # Create and train an ANN on the MNIST dataset. ANN = Network() ANN.load_state_dict( torch.load('../../params/converted_dqn_time_difference_grayscale.pt')) environment = GymEnvironment('BreakoutDeterministic-v4') f = f'{seed}_{n_episodes}_states.pt' if os.path.isfile(os.path.join(params_path, f)): print('Loading pre-gathered observation data...') states = torch.load(os.path.join(params_path, f)) else: print('Gathering observation data...') print() episode_rewards = np.zeros(n_episodes) noop_counter = 0 total_t = 0 states = [] for i in range(n_episodes): obs = environment.reset().to(device) state = torch.stack([obs] * 4, dim=2) for t in itertools.count(): encoded = torch.tensor([0.25, 0.5, 0.75, 1]) * state encoded = torch.sum(encoded, dim=2) states.append(encoded) q_values = ANN(encoded.view([1, -1]))[0] probs, best_action = policy(q_values, epsilon) action = np.random.choice(np.arange(len(probs)), p=probs) if action == 0: noop_counter += 1 else: noop_counter = 0 if noop_counter >= 20: action = np.random.choice([0, 1, 2, 3]) noop_counter = 0 next_obs, reward, done, _ = environment.step(action) next_obs = next_obs.to(device) next_state = torch.clamp(next_obs - obs, min=0) next_state = torch.cat( (state[:, :, 1:], next_state.view( [next_state.shape[0], next_state.shape[1], 1])), dim=2) episode_rewards[i] += reward total_t += 1 if done: print( f'Step {t} ({total_t}) @ Episode {i + 1} / {n_episodes}' ) print(f'Episode Reward: {episode_rewards[i]}') break state = next_state obs = next_obs states = torch.stack(states).view(-1, 6400) torch.save(states, os.path.join(params_path, f)) print() print(f'Collected {states.size(0)} Atari game frames.') print() print('Converting ANN to SNN...') # Do ANN to SNN conversion. SNN = ann_to_snn(ANN, input_shape=(6400, ), data=states, percentile=percentile) for l in SNN.layers: if l != 'Input': SNN.add_monitor(Monitor(SNN.layers[l], state_vars=['s', 'v'], time=time), name=l) spike_ims = None spike_axes = None inpt_ims = None inpt_axes = None new_life = True total_t = 0 noop_counter = 0 print() print('Testing SNN on Atari Breakout game...') print() # Test SNN on Atari Breakout. obs = environment.reset().to(device) state = torch.stack([obs] * 4, dim=2) prev_life = 5 total_reward = 0 for t in itertools.count(): sys.stdout.flush() encoded_state = torch.tensor([0.25, 0.5, 0.75, 1]) * state encoded_state = torch.sum(encoded_state, dim=2) encoded_state = encoded_state.view([1, -1]).repeat(time, 1) inpts = {'Input': encoded_state} SNN.run(inpts=inpts, time=time) spikes = { layer: SNN.monitors[layer].get('s') for layer in SNN.monitors } voltages = { layer: SNN.monitors[layer].get('v') for layer in SNN.monitors } action = torch.softmax(voltages['3'].sum(1), 0).argmax() if action == 0: noop_counter += 1 else: noop_counter = 0 if noop_counter >= 20: action = np.random.choice([0, 1, 2, 3]) noop_counter = 0 if new_life: action = 1 next_obs, reward, done, info = environment.step(action) next_obs = next_obs.to(device) if prev_life - info["ale.lives"] != 0: new_life = True else: new_life = False prev_life = info["ale.lives"] next_state = torch.clamp(next_obs - obs, min=0) next_state = torch.cat( (state[:, :, 1:], next_state.view([next_state.shape[0], next_state.shape[1], 1])), dim=2) total_reward += reward total_t += 1 SNN.reset_() if plot: # Get voltage recording. inpt = encoded_state.view(time, 6400).sum(0).view(80, 80) spike_ims, spike_axes = plot_spikes( {layer: spikes[layer] for layer in spikes}, ims=spike_ims, axes=spike_axes) inpt_axes, inpt_ims = plot_input(state, inpt, ims=inpt_ims, axes=inpt_axes) plt.pause(1e-8) if done: print(f'Episode Reward: {total_reward}') print() break state = next_state obs = next_obs model_name = '_'.join( [str(x) for x in [seed, time, n_episodes, percentile]]) columns = ['seed', 'time', 'n_episodes', 'percentile', 'reward'] data = [[seed, time, n_episodes, percentile, total_reward]] path = os.path.join(results_path, 'results.csv') if not os.path.isfile(path): df = pd.DataFrame(data=data, index=[model_name], columns=columns) else: df = pd.read_csv(path, index_col=0) if model_name not in df.index: df = df.append( pd.DataFrame(data=data, index=[model_name], columns=columns)) else: df.loc[model_name] = data[0] df.to_csv(path, index=True)
def main(seed=0, n_episodes=25, epsilon=0.05): np.random.seed(seed) if torch.cuda.is_available(): torch.set_default_tensor_type('torch.cuda.FloatTensor') torch.cuda.manual_seed_all(seed) else: torch.manual_seed(seed) print() print('Loading the trained ANN...') print() # Create and train an ANN on the MNIST dataset. ANN = Network() ANN.load_state_dict( torch.load( os.path.join(ROOT_DIR, 'params', 'converted_dqn_time_difference_grayscale.pt') ) ) environment = GymEnvironment('BreakoutDeterministic-v4') print('Gathering observation data...') print() episode_rewards = np.zeros(n_episodes) noop_counter = 0 total_t = 0 states = [] new_life = True prev_life = 5 for i in range(n_episodes): obs = environment.reset().to(device) state = torch.stack([obs] * 4, dim=2) for t in itertools.count(): encoded = torch.tensor([0.25, 0.5, 0.75, 1]) * state encoded = torch.sum(encoded, dim=2) states.append(encoded) q_values = ANN(encoded.view([1, -1]))[0] probs, best_action = policy(q_values, epsilon) action = np.random.choice(np.arange(len(probs)), p=probs) if action == 0: noop_counter += 1 else: noop_counter = 0 if noop_counter >= 20: action = np.random.choice([0, 1, 2, 3]) noop_counter = 0 if new_life: action = 1 next_obs, reward, done, info = environment.step(action) next_obs = next_obs.to(device) if prev_life - info["ale.lives"] != 0: new_life = True else: new_life = False prev_life = info["ale.lives"] next_state = torch.clamp(next_obs - obs, min=0) next_state = torch.cat( (state[:, :, 1:], next_state.view( [next_state.shape[0], next_state.shape[1], 1] )), dim=2 ) episode_rewards[i] += reward total_t += 1 if done: print(f'Step {t} ({total_t}) @ Episode {i + 1} / {n_episodes}') print(f'Episode Reward: {episode_rewards[i]}') break state = next_state obs = next_obs model_name = '_'.join([str(x) for x in [seed, n_episodes, epsilon]]) columns = [ 'seed', 'n_episodes', 'epsilon', 'avg. reward', 'std. reward' ] data = [[ seed, n_episodes, epsilon, np.mean(episode_rewards), np.std(episode_rewards) ]] path = os.path.join(results_path, 'results.csv') if not os.path.isfile(path): df = pd.DataFrame(data=data, index=[model_name], columns=columns) else: df = pd.read_csv(path, index_col=0) if model_name not in df.index: df = df.append(pd.DataFrame(data=data, index=[model_name], columns=columns)) else: df.loc[model_name] = data[0] df.to_csv(path, index=True) torch.save(episode_rewards, os.path.join(results_path, f'{model_name}_episode_rewards.pt'))
def test_gym_pipeline(self): # Build network. network = Network(dt=1.0) # Layers of neurons. inpt = Input(n=6552, traces=True) middle = LIFNodes(n=225, traces=True, thresh=-52.0 + torch.randn(225)) out = LIFNodes(n=60, refrac=0, traces=True, thresh=-40.0) # Connections between layers. inpt_middle = Connection(source=inpt, target=middle, wmax=1e-2) middle_out = Connection(source=middle, target=out, wmax=0.5, update_rule=m_stdp_et, nu=2e-2, norm=0.15 * middle.n) # Add all layers and connections to the network. network.add_layer(inpt, name='X') network.add_layer(middle, name='Y') network.add_layer(out, name='Z') network.add_connection(inpt_middle, source='X', target='Y') network.add_connection(middle_out, source='Y', target='Z') # Load SpaceInvaders environment. environment = GymEnvironment('SpaceInvaders-v0') environment.reset() # Build pipeline from specified components. for history_length in [3, 4, 5, 6]: for delta in [2, 3, 4]: p = Pipeline(network, environment, encoding=bernoulli, action_function=select_multinomial, output='Z', time=1, history_length=history_length, delta=delta) assert p.action_function == select_multinomial assert p.history_length == history_length assert p.delta == delta # Checking assertion errors for time in [0, -1]: try: p = Pipeline(network, environment, encoding=bernoulli, action_function=select_multinomial, output='Z', time=time, history_length=2, delta=4) except ValueError: pass for delta in [0, -1]: try: p = Pipeline(network, environment, encoding=bernoulli, action_function=select_multinomial, output='Z', time=time, history_length=2, delta=delta) except ValueError: pass for output in ['K']: try: p = Pipeline(network, environment, encoding=bernoulli, action_function=select_multinomial, output=output, time=time, history_length=2, delta=4) except ValueError: pass p = Pipeline(network, environment, encoding=bernoulli, action_function=select_random, output='Z', time=1, history_length=2, delta=4, save_interval=50, render_interval=5) assert p.action_function == select_random assert p.encoding == bernoulli assert p.save_interval == 50 assert p.render_interval == 5 assert p.time == 1