Esempio n. 1
0
    def work(self, encoder_network):
        while not self.coord.should_stop() and self.epoch < self.num_epochs:

            step = 0
            env = Environment(self.img_path)

            # 开始一个episode
            while step < self.train_batches_per_epoch:

                images_np = env.load_image(self.img_offset)
                self.img_offset += 1
                raw_img, target_img, act_list, mse = env.take_action(images_np)

                encoder_network.update_state_predicter(raw_img, act_list,
                                                       target_img, self.lr)
                encoder_network.update_action_predicter(
                    raw_img, act_list, target_img, self.lr)

                # 缺少自动编码器的Loss

                encoder_network.pull_all_params()

                step += 1

            self.epoch += 1
    def test_terminal_level_detection_override():
        terms = (
            ('dumb', (TermLevel.DUMB, '@')),
            ('linux', (TermLevel.ANSI_BASIC, '@')),
            ('xterm-color', (TermLevel.ANSI_BASIC, '@')),
            ('xterm-256color', (TermLevel.ANSI_EXTENDED, '@')),
            ('xterm-direct', (TermLevel.ANSI_DIRECT, '@')),
            ('fbterm', (TermLevel.ANSI_EXTENDED, '@')),
        )
        for name, expected in terms:
            detection.env = Environment(environ=dict(TERM=name, PY_CONSOLE_COLOR_SEP='@'))
            if name == 'fbterm':  # :-/
                detection.is_fbterm = True
            assert detection.detect_terminal_level() == expected

        from . import windows  # try win implementation
        terms = (
            ('dumb', (TermLevel.DUMB, '@')),
            ('xterm-color', (TermLevel.ANSI_BASIC, '@')),
            ('xterm-256color', (TermLevel.ANSI_EXTENDED, '@')),
            ('cygwin', (TermLevel.ANSI_DIRECT, '@')),  # ?
        )
        for name, expected in terms:
            windows.env = Environment(environ=dict(TERM=name, PY_CONSOLE_COLOR_SEP='@'))
            assert windows.detect_terminal_level() == expected

        windows.env = Environment(environ=dict(ANSICON='1'))
        assert windows.detect_terminal_level() ==( TermLevel.ANSI_EXTENDED, ';')
Esempio n. 3
0
def main():

	# Define if argument requirements
	exact_count = -1	# Set to -1 to disable exact arg checking
	min_count 	= 1    # Set to -1 to disable min arg checking
	max_count   = -1    # Set to -1 to disable max arg checking

	summary = "IBM Homework test script"
	usage   = "usage: %prog [options] <test_name> <args>"

	# Set the add custom options callback if necessary
	add_custom_options_cb = 0 # custom_options_cb

	# Allocate and initialize the scripts environment
	env = Environment(os.path.basename(__file__),
					  usage,
					  summary,
					  exact_count,
					  min_count,
					  max_count,
					  add_custom_options_cb)

	# Execute script specific functionality
	if env.error == False and env.complete == False:
		env.error = execute (env)

	# Exit the program
	env.exit()
Esempio n. 4
0
    def test_palette_support():
        terms = (
            ('dumb', TermLevel.DUMB),
            ('linux', TermLevel.ANSI_BASIC),
            ('xterm-color', TermLevel.ANSI_BASIC),
            ('xterm-256color', TermLevel.ANSI_EXTENDED),
        )
        for name, result in terms:
            detection.env = Environment(environ=dict(TERM=name))
            assert detection.detect_terminal_level() == result

        detection.env = Environment(environ=dict(COLORTERM='24bit'))
        assert detection.detect_terminal_level() == TermLevel.ANSI_DIRECT

        from . import windows  # try win implementation
        terms = (
            ('dumb', TermLevel.DUMB),
            ('xterm-color', TermLevel.ANSI_BASIC),
            ('xterm-256color', TermLevel.ANSI_EXTENDED),
            ('cygwin', TermLevel.ANSI_DIRECT),  # ?
        )
        for name, result in terms:
            windows.env = Environment(environ=dict(TERM=name))
            assert windows.detect_terminal_level() == result

        windows.env = Environment(environ=dict(ANSICON='1'))
        assert windows.detect_terminal_level() == TermLevel.ANSI_EXTENDED
Esempio n. 5
0
    def __init__(self, sim_number, init=True, _seed=None, ATNE=True):
        super(EnvironmentListener, self).__init__()
        seed(_seed)

        self.ATNE = ATNE

        self.sim_number = sim_number

        self.post_process = PostGraph(self.sim_number,
                                      columns=[
                                          "sim_number", "sim_step", "veh_id",
                                          "edge_id", "speed", "capacity",
                                          "budget", "prev_poi"
                                      ])
        self.t = 0

        self.break_condition = False  #condition to check if all vehicles has arrived

        file_dir = os.path.dirname(GraphSetting.sumo_config)
        map_list = glob.glob(os.path.join(file_dir, r"*.map"))
        try:
            self.sim_env = self.read(map_list[0])
            print(f"loaded map from {map_list[0]}")
        except IndexError:
            print(f".map file generating for {GraphSetting.sumo_config}")
            self.sim_env = Environment()
            self.save(GraphSetting.sumo_config, self.sim_env)

        if init:

            self.initial_reward_random(GraphSetting.reward_numbers)
            self.initial_route_random(GraphSetting.car_numbers)
            self.junction_sub()
Esempio n. 6
0
class Worker_thread:
    # スレッドは学習環境environmentを持ちます
    def __init__(self, thread_name, thread_type, parameter_server, config):
        self.environment = Environment(thread_name, thread_type, parameter_server, config)
        self.thread_type = thread_type
        self.parameter_server = parameter_server
        self.config = config

    def run(self):
        while True:
            if not(self.config.isLearned) and self.thread_type is 'learning':     # learning threadが走る
                self.environment.run()

            if not(self.config.isLearned) and self.thread_type is 'test':    # test threadを止めておく
                time.sleep(1.0)

            if self.config.isLearned and self.thread_type is 'learning':     # learning threadを止めておく
                time.sleep(3.0)
                self.parameter_server.save()
                break

            if self.config.isLearned and self.thread_type is 'test':     # test threadが走る
                time.sleep(3.0)
                self.environment.run()
                break
Esempio n. 7
0
File: main.py Progetto: ljd2439/DQN
	def __init__(self):
		self.state_dim = STATE_DIM	# m x n   grid
		self.action_dim = ACTION_DIM 	# action : up, down, right, left
		self.env = Environment(self.state_dim, self.action_dim)
		self.agent = Agent(self.state_dim, self.action_dim)

		self.episode = 0
		self.batch_size = BATCH_SIZE

		self.isTraining = TRAINING
		self.isPlaying = PLAYING

		#======opengl setting======
		argv = sys.argv
		glutInit(argv)
		glutInitWindowPosition(0,0)
		glutInitWindowSize(WIN_WIDTH, WIN_HEIGHT)
		glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGB | GLUT_DEPTH)
		glutCreateWindow("DQN example")
		glutDisplayFunc(self.display)
		glutReshapeFunc(self.reshape)
		glutKeyboardFunc(self.keyCB)
		#======================

		if self.isTraining:
			self.training()
		else:
			if self.isPlaying:
				self.agent.load(saved_weight)
				self.playing()

		glutMainLoop()
def train_elog(env: Environment, epochs: int, train_epochs_interval: int):
    for epoch in tqdm(range(epochs)):
        state = env.reset()
        done = False
        data = {
            'features': [],
            'rewards': [],
            'cards': [],
            'idxs': state.feature.idxs
        }
        while not done:
            action = DecisionResponse([])
            d: DecisionState = state.decision
            player: Player = env.players[d.controlling_player]

            player.makeDecision(state, action)

            x = state.feature.to_numpy()
            data['features'].append(x)
            data['cards'].append(action.single_card)

            obs, reward, done, _ = env.step(action)

        data['rewards'].extend([reward] *
                               (len(data['features']) - len(data['rewards'])))

        for player in env.players:
            if isinstance(player, RolloutPlayer):
                player.rollout.update(**data)
                if (epoch + 1) % train_epochs_interval == 0:
                    player.rollout.learn()
Esempio n. 9
0
def main():

    start_date = datetime.datetime(2002, 1, 1)
    end_date = datetime.datetime(2021, 1, 30)
    stocks = web.DataReader('SPY', 'yahoo', start_date, end_date)

    env = Environment(stocks, LOOK_BACK, stocks.shape[1])
    in_dim = env.observation_shape[0]
    out_dim = env.action_space.n
    agent = Agent(in_dim, out_dim)
    agent.reset()
    optimizer = optim.RMSprop(agent.parameters(), lr=0.05)

    for epi in range(EPISODES):
        state = env.set_state()
        for t in range(env.total_days - env.look_back):
            action = agent.act(state)
            state, reward, done = env.step(action)
            agent.rewards.append(reward)
            if done:
                break

        loss = agent.fit(optimizer, GAMMA)
        total_reward = sum(agent.rewards)
        reward_records = agent.rewards
        agent.reset()  #clear memory after training
        print(f'Episode {epi}, Loss: {loss}, Profit: {total_reward}')

    print("----Training Over----")
    cum_rewards = np.cumsum(reward_records)
    plt.title("Cumulative Profit on Last Episode")
    plt.xlabel("Days of Trading")
    plt.ylabel("Price ($)")
    plt.plot(cum_rewards)
    plt.show()
Esempio n. 10
0
def main(cfg: DictConfig) -> None:
    "The entry point for parsing user-provided texts"

    assert cfg.model_path is not None, "Need to specify model_path for testing."
    assert cfg.input is not None
    assert cfg.language in ("english", "chinese")
    log.info("\n" + OmegaConf.to_yaml(cfg))

    # load the model checkpoint
    model_path = hydra.utils.to_absolute_path(cfg.model_path)
    log.info("Loading the model from %s" % model_path)
    checkpoint = load_model(model_path)
    restore_hyperparams(checkpoint["cfg"], cfg)
    vocabs = checkpoint["vocabs"]

    model = Parser(vocabs, cfg)
    model.load_state_dict(checkpoint["model_state"])
    device, _ = get_device()
    model.to(device)
    log.info("\n" + str(model))
    log.info("#parameters = %d" % sum([p.numel() for p in model.parameters()]))

    input_file = hydra.utils.to_absolute_path(cfg.input)
    ds = UserProvidedTexts(input_file, cfg.language, vocabs, cfg.encoder)
    loader = DataLoader(
        ds,
        batch_size=cfg.eval_batch_size,
        collate_fn=form_batch,
        num_workers=cfg.num_workers,
        pin_memory=torch.cuda.is_available(),
    )

    env = Environment(loader, model.encoder, subbatch_max_tokens=9999999)
    state = env.reset()
    oup = (sys.stdout if cfg.output is None else open(
        hydra.utils.to_absolute_path(cfg.output), "wt"))
    time_start = time()

    with torch.no_grad():  # type: ignore
        while True:
            with torch.cuda.amp.autocast(cfg.amp):  # type: ignore
                actions, _ = model(state)
            state, done = env.step(actions)
            if done:
                for tree in env.pred_trees:
                    assert tree is not None
                    print(tree.linearize(), file=oup)
                # pred_trees.extend(env.pred_trees)
                # load the next batch
                try:
                    with torch.cuda.amp.autocast(cfg.amp):  # type: ignore
                        state = env.reset()
                except EpochEnd:
                    # no next batch available (complete)
                    log.info("Time elapsed: %f" % (time() - time_start))
                    break

    if cfg.output is not None:
        log.info("Parse trees saved to %s" % cfg.output)
Esempio n. 11
0
    def __init__(self):
        super(EnvironmentListener, self).__init__()
        self.sim_env = Environment()

        self.initial_reward_random(GraphSetting.reward_numbers)
        self.initial_route_random(GraphSetting.car_numbers)

        self.junction_sub()
    def test_color_disabled_none_false():
        detection.env = Environment(environ={})
        assert detection.color_is_disabled() is None

        detection.env = Environment(environ=dict(CLICOLOR=''))
        assert detection.color_is_disabled() is None

        detection.env = Environment(environ=dict(CLICOLOR='1'))
        assert detection.color_is_disabled() is False
Esempio n. 13
0
 def __init__(self, alpha=0.1, gamma=0.9, epsilon=0.6, verbose=True):
     self.env = Environment()
     self.alpha = alpha
     self.gamma = gamma
     self.epsilon = epsilon
     self.verbose = verbose
     self.num_actions = 8
     self.qtable = {}
     self.current_state = self.env.detect_nearby()
Esempio n. 14
0
    def test_color_allowed():
        detection.env = Environment(environ={})
        assert detection.color_is_allowed() is True

        detection.env = Environment(environ=dict(CLICOLOR='0'))
        assert detection.color_is_allowed() is False

        detection.env = Environment(environ=dict(NO_COLOR=''))
        assert detection.color_is_allowed() is False
Esempio n. 15
0
def test_lava_is_terminal():
    env = Environment(CONFIG, add_agent_value=False)
    _, r, done = env.step('R')
    assert r == 0
    assert not done

    _, r, done = env.step('R')
    assert r == -10
    assert done
Esempio n. 16
0
def test_successful_trajectory():
    env = Environment(CONFIG, add_agent_value=False)
    steps = ['R', 'U', 'R', 'R', 'R', 'R', 'R', 'U']
    for step in steps:
        s, r, done = env.step(step)

    assert s.coordinate == (1, 6)
    assert np.all(s.observation == np.array([[2, 2, 2], [0, 0, 2], [0, 0, 2]]))
    assert r == 100
    assert done
Esempio n. 17
0
    def train(self, my_lambda, true_star_value):
        environment = Environment()
        terminated = False
        eligibility = np.zeros([36])

        # initial state where both player and dealer draw black card
        card = round(random.uniform(1, 10))
        dealer_card = round(random.uniform(1, 10))

        states = []
        actions = []

        current_state = [dealer_card, card]
        current_action = self.epsilon_action(current_state)

        while not terminated:
            current_action_index = 1 if current_action == 'hit' else 0

            states.append(current_state)
            actions.append(current_action_index)

            old_card = card
            state_prime, reward = environment.step(current_state,
                                                   current_action)
            card = state_prime[1]

            # reward is 2 when state is not terminated because 0 overlaps with termination state draw reward
            # handle terminal state future action values to be 0
            current_reward = 0
            action_prime = None
            if reward != 2:
                current_reward = reward
                delta = current_reward + (0 - self.do_approximation(
                    [dealer_card, old_card], current_action_index))
            else:
                action_prime = self.epsilon_action([dealer_card, card])
                action_prime_index = 1 if action_prime == 'hit' else 0
                delta = current_reward + (self.do_approximation(
                    [dealer_card, card],
                    action_prime_index) - self.do_approximation(
                        [dealer_card, old_card], current_action_index))

            eligibility = (my_lambda * eligibility +
                           self.feature_vector.flatten())

            self.weights += self.step_size * delta * eligibility

            current_state = [dealer_card, card]
            current_action = action_prime

            if reward == -1 or reward == 1 or reward == 0:
                terminated = True

        mse = self.calculate_mse(true_star_value)
        return mse
    def __init__(self, dataset: Dataset, policy_net: PolicyNet):
        self.dataset = dataset
        self.epochs = ExpSet.epochs
        self.episodes = ExpSet.episodes
        self.training = True
        self.steps = ExpSet.max_T
        self.env = Environment(self.dataset.KG)

        self.policy_net = policy_net
        self.embedder = self.dataset.embedder
        self.optimizer = torch.optim.Adam(policy_net.parameters(), lr=ExpSet.learning_rate)
    def train(self):
        environment = Environment()
        terminated = False

        # initial state where both player and dealer draw black card
        card = round(random.uniform(1, 10))
        dealer_card = round(random.uniform(1, 10))

        states = []
        actions = []
        rewards = []

        # first state
        states.append([dealer_card, card])

        while not terminated:
            self.value_steps[dealer_card][card] += 1
            action = self.epsilon_action([dealer_card, card])
            action_index = 1 if action == 'hit' else 0

            state_prime, reward = environment.step([dealer_card, card], action)
            card = state_prime[1]

            if reward == -1 or reward == 1 or reward == 0:
                terminated = True
            else:
                # reward is 2 when state is not terminated because 0 overlaps with termination state draw reward
                states.append(state_prime)
                reward = 0

            actions.append(action_index)
            rewards.append(reward)

        average_reward = np.sum(rewards)

        visited_states = []

        for i in range(len(states)):
            card, dealer_card = states[i][1], states[i][0]

            if self.check_visited(visited_states, card, dealer_card):
                self.action_value_steps[dealer_card][card][actions[i]] += 1

                current_action_value = self.action_value_function[dealer_card][card][actions[i]]
                current_action_value_step = self.action_value_steps[dealer_card][card][actions[i]]

                # Q(St, At) = Q(St, At) + 1 / N(St, At) * (Gt - Q(St, At))
                new_action_value = (current_action_value + (1 / current_action_value_step) *
                                    (average_reward - current_action_value))

                self.action_value_function[dealer_card][card][actions[i]] = new_action_value

                visited_states.append([card, dealer_card])
Esempio n. 20
0
def simulate(env: Environment, n: int, tree: GameTree, turn_log=False, action_log=False, card_log=False) -> SimulationData:
    # TODO: Fix this shit
    sim_data = SimulationData(Supply(env.config).get_supply_card_types())

    for i in tqdm(range(n)):
        state: State = env.reset()
        if tree:
            tree.reset(state)
        done = False
        t_start = time.time()
        starting_player_buy = None
        while not done:
            action: DecisionResponse = DecisionResponse([])
            d: DecisionState = state.decision
            pid: int = d.controlling_player
            player = env.players[pid]
            player.makeDecision(state, action)

            if state.phase == Phase.ActionPhase:
                # +1 to turns to get current turn
                sim_data.update_action(i, pid, state.player_states[pid].turns + 1, action.cards[0])

            if state.phase == Phase.BuyPhase and tree:
                tree.advance(action.single_card)

            log_buy = (state.phase == Phase.BuyPhase)

            obs, reward, done, _ = env.step(action)

            if turn_log and log_buy:
                if pid == 0:
                    starting_player_buy = action.single_card
                else:
                    sim_data.update_turn(i, 0, state.player_states[0].turns, state.get_player_score(0), starting_player_buy, state.get_coin_density(0))
                    sim_data.update_turn(i, 1, state.player_states[1].turns, state.get_player_score(1), action.single_card, state.get_coin_density(1))
            if card_log and log_buy:
                if pid == 1:
                    sim_data.update_card(i, 0, state.player_states[0].turns, state.get_card_counts(0))
                    sim_data.update_card(i, 1, state.player_states[1].turns, state.get_card_counts(1))

        if state.player_states[0].turns > state.player_states[1].turns:
            sim_data.update_card(i, 0, state.player_states[0].turns, state.get_card_counts(0))
            sim_data.update_turn(i, 0, state.player_states[0].turns, state.get_player_score(0), starting_player_buy, state.get_coin_density(0))

        t_end = time.time()
        sim_data.update(env.game, t_end - t_start)

    sim_data.finalize(env.game)

    print('===SUMMARY===')
    print(sim_data.summary)

    return sim_data
    def test_color_forced():
        detection.env = Environment(environ={})
        assert detection.color_is_forced().value is None

        detection.env = Environment(environ=dict(CLICOLOR_FORCE='0'))
        assert detection.color_is_forced() is False

        detection.env = Environment(environ=dict(CLICOLOR_FORCE='foo'))
        assert detection.color_is_forced() is True

        detection.env = Environment(environ=dict(CLICOLOR_FORCE='1'))
        assert detection.color_is_forced() is True
Esempio n. 22
0
def test_reward_for_bridge_completion_is_given_only_once():
    env = Environment(CONFIG, add_agent_value=False)
    steps = ['R', 'U', 'R', 'R', 'R']
    for step in steps:
        _, r, _ = env.step(step)

    assert r == 1

    _, r, _ = env.step('L')
    _, r, _ = env.step('R')

    assert r == 0
Esempio n. 23
0
def train_iteratively(args):
    # iteration 1
    team_blue = [PGAgent(idx, "blue") for idx in range(args.n_friends)]
    team_red = [
        Agent(args.n_friends + idx, "red") for idx in range(args.n_enemies)
    ]

    training_agents = team_blue

    agents = team_blue + team_red
    env = Environment(agents)

    args.n_actions = 6 + args.n_enemies
    args.n_inputs = 4 + 3 * (args.n_friends - 1) + 3 * args.n_enemies

    model = ForwardModel(input_shape=args.n_inputs, n_actions=args.n_actions)

    for agent in training_agents:
        agent.set_model(model)

    training_agents = train_agents(env, training_agents, args)
    trained_model = copy.deepcopy(training_agents[0].model)

    for iteration in range(args.n_iterations):
        args.n_steps = 10000 * (iteration + 2)
        team_blue = [PGAgent(idx, "blue") for idx in range(args.n_friends)]
        team_red = [
            PGAgent(args.n_friends + idx, "red")
            for idx in range(args.n_enemies)
        ]

        training_agents = team_blue

        agents = team_blue + team_red
        env = Environment(agents, args)

        model = ForwardModel(input_shape=args.n_inputs,
                             n_actions=args.n_actions)
        model.load_state_dict(trained_model.state_dict())
        model.eval()
        for agent in team_red:
            agent.set_model(model)

        model = ForwardModel(input_shape=args.n_inputs,
                             n_actions=args.n_actions)
        for agent in team_blue:
            agent.set_model(model)

        training_agents = train_agents(env, training_agents, args)
        trained_model = copy.deepcopy(training_agents[0].model)
    torch.save(trained_model.state_dict(),
               args.path + f'RUN_{get_run_id()}.torch')
Esempio n. 24
0
    def __init__(self):
        iostreams = (sys.stdin, sys.stdout, sys.stderr)
        (self.stdin, self.stdout, self.stderr) = iostreams

        self.debug = False
        self.verbose = True
        self.core = True
        self.closures = True

        self.rdr = Reader()
        self.environment = Environment()

        self.init()
Esempio n. 25
0
    def __init__(self, game_dir):
        if torch.cuda.device_count() == 0:
            print_c('No cuda capable device detected!')
            raise NotImplemented

        self.agent = ManualCustomAgent(
            config_file_path='config/config_eval.yaml')
        self.env = Environment(game_dir)
        self.out_handler = OutputHandler()
        self.input_handler = InputHandler(callback=self.do_step)

        self.scores, self.dones = [0], [0]
        self.obs, self.infos = None, None
Esempio n. 26
0
    def __init__(self):
        self.stdin = sys.stdin
        self.stdout = sys.stdout
        self.stderr = sys.stderr

        self.debug = False
        self.verbose = True
        self.core = True
        self.closure = True

        self.reader = Reader()
        self.env = Environment()

        self.init()
Esempio n. 27
0
    def __init__(self):
        self.env = Environment()

        low_state_value = self.env.state_space(PVmin, Ebmin, Dmin)
        high_state_value = self.env.state_space(PVmax, Ebmax, Dmax)

        num_states = high_state_value - low_state_value + 1
        num_actions = 3
        if os.path.exists(q_table_path):
            print("Q table Loading !!!")
            self.load_q_table()
        else:
            print("Q table Initializing !!!")
            self.q_table = np.random.uniform(low=-1,
                                             high=1,
                                             size=(num_states, num_actions))
Esempio n. 28
0
def envConfig():
    if request.method == 'POST':
        global marseEnv
        mydict = request.get_json()
        marseEnv = Environment(temperature=mydict.get('temperature'),
                               humidity=mydict.get('humidity'),
                               solar_flare=mydict.get('solar-flare'),
                               storm=mydict.get('storm'),
                               area_map=mydict.get('area-map'))
        if marseEnv.solar_flare:
            myrover.battery = 11

        if marseEnv.storm:
            shield_index = None
            for index, item in enumerate(myrover.inventory):
                if item.type == 'storm-shield':
                    shield_index = index
            if shield_index is not None:
                if myrover.inventory[shield_index].qty == 1:
                    myrover.inventory.pop(shield_index)
                else:
                    myrover.inventory[shield_index].qty = myrover.inventory[
                        shield_index].qty - 1
            else:
                func = request.environ.get('werkzeug.server.shutdown')
                if func is None:
                    raise RuntimeError('Not running with the Werkzeug Server')
                func()

        return Response(status=200)
Esempio n. 29
0
def main(_):
    #cpu only
    tf.config.experimental.set_visible_devices([], 'GPU')
    tf.get_logger().setLevel('INFO')
    #tf.debugging.set_log_device_placement(True)

    config = get_config(FLAGS) or FLAGS
    env = Environment(config, is_training=True)
    game = CFRRL_Game(config, env)
    model_weights_queues = []
    experience_queues = []
    if FLAGS.num_agents == 0 or FLAGS.num_agents >= mp.cpu_count():
        FLAGS.num_agents = mp.cpu_count() - 1
    print('Agent num: %d, iter num: %d\n'%(FLAGS.num_agents+1, FLAGS.num_iter))
    for _ in range(FLAGS.num_agents):
        model_weights_queues.append(mp.Queue(1))
        experience_queues.append(mp.Queue(1))

    tm_subsets = np.array_split(game.tm_indexes, FLAGS.num_agents)

    coordinator = mp.Process(target=central_agent, args=(config, game, model_weights_queues, experience_queues))

    coordinator.start()

    agents = []
    for i in range(FLAGS.num_agents):
        agents.append(mp.Process(target=agent, args=(i, config, game, tm_subsets[i], model_weights_queues[i], experience_queues[i])))

    for i in range(FLAGS.num_agents):
        agents[i].start()

    coordinator.join()
Esempio n. 30
0
def test_env_reset_position_after_few_steps():
    env = Environment(CONFIG, add_agent_value=False)
    env.reset()
    env.step('R')
    env.step('U')
    assert env.agent_position != env.start_position

    env.reset()
    assert env.agent_position == env.start_position
Esempio n. 31
0
    def __init__( self):
        iostreams=(sys.stdin, sys.stdout, sys.stderr)
        (self.stdin, self.stdout, self.stderr) = iostreams

        self.verbose = False
        self.core = True
        self.closures = True

        self.rdr = Reader()
        self.environment = Environment()

        self.init()
Esempio n. 32
0
    def __init__(self):
        self.stdin = sys.stdin
        self.stdout = sys.stdout
        self.stderr = sys.stderr

        self.debug = False
        self.verbose = True
        self.core = True
        self.closure = True

        self.reader = Reader()
        self.env = Environment()

        self.init()
Esempio n. 33
0
def extract_functions(prgm):
    """
    Removes all function bodies from prgm and inserts
    the functions into a new environment frame.
    """
    lines = prgm.split('\n')
    lines = [line.strip() for line in lines]
    env = Environment()
    i = 0
    max_len = len(lines)
    while i < max_len:
        line = lines[i]
        # Transform classes into functions that return objects
        if line.startswith('class '):
            match_obj = re.match(r'class ([\$A-Za-z_][A-Za-z0-9_]*)\((.*)\)(.*)', line)
            child_class_name = match_obj.group(1)
            arguments = match_obj.group(2)
            parent_classes = match_obj.group(3)
            inherit_obj = re.match(r' inherits (.*)', parent_classes)
            if inherit_obj:
                parent_class_names = inherit_obj.group(1)
                classes = [name.strip() for name in parent_class_names.split(',')]
                env.new_type(classes, child_class_name)
            else:
                # There were no parent classes provided.
                # By default, this class should inherit from Object.
                env.new_type(['Object'], child_class_name)
            lines[i] = 'sub ' + child_class_name + '(' + arguments + ')'
            lines.insert(i + 1, '$type="' + child_class_name + '"')
            max_len += 1
            _, end = find_next_end_else(lines, i + 1, True)
            lines.insert(end, 'return this')
            max_len += 1
        i += 1
    i = 0
    while i < len(lines):
        line = lines[i]
        if line.startswith('sub '):
            name = name_of_function(line)
            arg_list = args_of_function(line)
            ret_type = return_type_of_function(line)
            _, end = find_next_end_else(lines, i + 1, True)
            func_body = lines[i+1 : end]
            env.assign_hook(name, Function(name, arg_list, func_body, return_type=ret_type))
            # Remove the function definition from the program,
            # and replace it with a hook directive.
            lines[i : end+1] = [':hook {0}'.format(name)]
        else:
            i += 1
    prgm = '\n'.join(lines)
    return prgm, env
Esempio n. 34
0
class Elisp(Lisp):
    def __init__(self):
        self.stdin = sys.stdin
        self.stdout = sys.stdout
        self.stderr = sys.stderr

        self.debug = False
        self.verbose = True
        self.core = True
        self.closure = True

        self.reader = Reader()
        self.env = Environment()

        self.init()

    def init(self):
        # core functions
        self.env.set('eq',      Function(self.eq))
        self.env.set('quote',   Function(self.quote))
        self.env.set('car',     Function(self.car))
        self.env.set('cdr',     Function(self.cdr))
        self.env.set('cons',    Function(self.cons))
        self.env.set('atom',    Function(self.atom))
        self.env.set('cond',    Function(self.cond))

        # utility functions
        self.env.set('print',   Function(self.println))

        # special forms
        self.env.set('lambda',  Function(self.lambda_fun))
        self.env.set('label',   Function(self.label))

        # meta-elements
        self.env.set('__elisp__',   self)
        self.env.set('__global__',  self.env)

    def lambda_fun(self, env, args):
        if self.env != env.get('__global__') and self.closure:
            return Closure(env, args[0], args[1:])
        else:
            return Lambda(args[0], args[1:])

    def usage(self):
        self.print_banner()
        print ('%s <options> [elisp files]\n' % NAME.lower())

    def print_banner(self):
        print ('The %s programming shell %s' % (NAME, VERSION))
        print ('    Type `help` for more information\n')

    def print_help(self):
        print ('Help for eLisp %s' % VERSION)
        print ('    Type `help` for more information')
        print ('    Type `env` to see the bindings in current environment')
        print ('    Type `load` followed by one or more filenames to load source files')
        print ('    Type `quit` to exit the interpreter')

    def push(self, env=None):
        if env:
            self.env = self.env.push(env)
        else:
            self.env = self.env.push()

    def pop(self):
        self.env = self.env.pop()

    def repl(self):
        while True:
            source = self.get_complete_command()

            try:
                if source in ['quit']:
                    break
                elif source in ['help']:
                    self.print_help()
                elif source.startswith('load'):
                    files = source.split(' ')[1:]
                    self.process_files(files)
                elif source in ['env']:
                    print (self.env)
                else:
                    self.process(source)
            except AttributeError:
                print ('Could not process command: ', source)
                return

    def process(self, source):
        sexpr = self.reader.get_sexpr(source)

        while sexpr:
            result = None

            try:
                result = self.eval(sexpr)
            except Error as err:
                print (err)

            if self.verbose:
                self.stdout.write('     %s\n' % result)
            sexpr = self.reader.get_sexpr()

    def eval(self, sexpr):
        try:
            return sexpr.eval(self.env)
        except ValueError as err:
            print (err)
            return FALSE

    def get_complete_command(self, line="", depth=0):
        if line != '':
            line = line + ' '

        if self.env.level != 0:
            prompt = PROMPT + '%i%s ' % (self.env.level, DEPTH_MARK * (depth + 1))
        else:
            if depth == 0:
                prompt = PROMPT
            else:
                prompt = PROMPT + "%s " % (DEPTH_MARK * (depth + 1))
            line = line + self.read_line(prompt)

            balance = 0
            for c in line:
                if c == '(':
                    balance += 1
                elif c == ')':
                    balance -= 1
            if balance > 0:
                return self.get_complete_command(line, depth + 1)
            elif balance < 0:
                raise ValueError('Invalid paren pattern')
            else:
                return line

    def read_line(self, prompt):
        if prompt and self.verbose:
            self.stdout.write('%s' % prompt)
            self.stdout.flush()

        line = self.stdin.readline()
        if len(line) == 0:
            return 'EOF'

        if line [-1] == '\n':
            line = line[:-1]

        return line

    def process_files(self, files):
        self.verbose = False

        for filename in files:
            infile = open(filename, 'r')
            self.stdin = infile

            source = self.get_complete_command()
            while source not in ['EOF']:
                self.process(source)
                source = self.get_complete_command()
            infile.close()
        self.stdin = sys.stdin
        self.verbose = True
Esempio n. 35
0
char.SetSpike(50,188)
#char.SetSpike(90,50)
g=Geometry()
char.x_co = 1
char.y_co = 3

#location of last room
last_x = 6
last_y = 1

stopper = 0
cooldown = 0
endgame = 0

environ = eval('env_%d_%d()' %(char.x_co,char.y_co))
envi = Environment(environ[0], environ[1], environ[2], environ[3], environ[4])
#print environ[1].rects
if random.randint(0, 1) == 0:
    pygame.mixer.music.load('data//snd//bgm//07 - Positive Force.mp3')
else:
    pygame.mixer.music.load('data//snd//bgm//10 - Potential for Anything.mp3')
pygame.mixer.music.play(-1, 0.0)

#title of game
pygame.display.set_caption('Py Mega Man 1')

while True:
    #Invincibility frams
    if char.cooldown > 0:
        char.cooldown -= 1
Esempio n. 36
0
portalimg=pygame.image.load('data//img//warptoken.bmp')
portal=MovingEntity(portalimg, 0, 0, ENT_PORTAL)
portal.SetPos(180, 84)

portalimg2=pygame.image.load('data//img//warptoken2.bmp')
portal2=MovingEntity(portalimg2, 0, 0, ENT_PORTAL)
portal2.SetPos(180, 84)

char.teleportpoint=[(50, 50), False]

counter=0 #random counter to cheat gifs

#initalizes all parts of screen
#if char.tokens == 0: #if unique_id not in tokens:
env=Environment(GAMERECT, g, img, bg, (char, plat, plat2, sprite, checkpoint,checkpoint2, spikes, trinket, portal))
#env2=Environment(GAMERECT, g, img, bg, (char, plat, plat2, sprite, checkpoint, spikes, trinket))
    ##env=Environment(GAMERECT, g, img, bg, (char, plat, plat2, sprite, checkpoint,checkpoint2))
#else:
#    env=Environment(GAMERECT, g, img, bg, (char, plat, plat2, sprite, checkpoint,checkpoint2, spikes))

#plays bgm
if random.randint(0, 1) == 0:
    pygame.mixer.music.load('data//snd//bgm//07 - Positive Force.mp3')
else:
    pygame.mixer.music.load('data//snd//bgm//10 - Potential for Anything.mp3')
pygame.mixer.music.play(-1, 0.0)

#title of game
pygame.display.set_caption('VVVVVV')
Esempio n. 37
0
# A_2 = np.random.rand(n_corr_states,n_ind_states)
# sum_A_2 = np.sum(A_2, axis=1)
#
#
# for i in range(len(sum_A_2)):
#
#     A_2[i] = A_2[i]/sum_A_2[i]


p_matrix = combine_transition(chan_list_1, chan_list_2, A_1, A_2)

print p_matrix

config = {'n_nodes': 3, 'p_matrix': p_matrix, 'n_channels': 3}

env = Environment(config)

history = []

for i in range(2):
    history.append(env.get_state())

data = np.array(history)

print data

corr = np.corrcoef(data, rowvar=False)

print corr

# print cov_
Esempio n. 38
0
	def new_env(self, dry=False):
		user_env = EnvFile.Load(self.id)
		new_env = Environment(tool.create(dry))
		new_env.update(user_env.env, WORK_DIR=self.__dir)
		return new_env
Esempio n. 39
0
from env import Environment

env = Environment()
file_name = env.capture_camera_image_into_temp_file()
env.save_file_for_later(file_name)

Esempio n. 40
0
 def test_finding_defining_env(self):
     outer = Environment({"my-var": 1})
     inner = Environment(outer=outer)
     assert_equals(outer, inner.defining_env("my-var"))
Esempio n. 41
0
class Lithp(Lisp):
    """ The Lithper class is the interpreter driver.  It does the following:
            1. Initialize the global environment
            2. Parse the cl arguments and act on them as appropriate
            3. Initialize the base Lisp functions
            4. Read input
            5. Evaluate
            6. Print
            7. Loop back to #4
    """
    def __init__( self):
        iostreams=(sys.stdin, sys.stdout, sys.stderr)
        (self.stdin, self.stdout, self.stderr) = iostreams

        self.verbose = False
        self.core = True
        self.closures = True

        self.rdr = Reader()
        self.environment = Environment()

        self.init()

    def init(self):
        # Define core functions
        self.environment.set("eq",     Function(self.eq))
        self.environment.set("quote",  Function(self.quote))
        self.environment.set("car",    Function(self.car))
        self.environment.set("cdr",    Function(self.cdr))
        self.environment.set("cons",   Function(self.cons))
        self.environment.set("atom",   Function(self.atom))
        self.environment.set("cond",   Function(self.cond))

        # Define utility function
        self.environment.set("print",  Function(self.println))

        # Special forms
        self.environment.set("lambda", Function(self.lambda_))
        self.environment.set("label",  Function(self.label))

        # Define meta-elements
        self.environment.set("__lithp__",  self)
        self.environment.set("__global__", self.environment)

    def usage(self):
        self.print_banner()
        print
        print NAME.lower(), " <options> [lithp files]\n"

    def print_banner(self):
        print "The", NAME, "programming shell", VERSION
        print "   by Fogus,", WWW
        print "   Type :help for more information"
        print

    def print_help(self):
        print "Help for Lithp v", VERSION
        print "  Type :help for more information"
        print "  Type :env to see the bindings in the current environment"
        print "  Type :load followed by one or more filenames to load source files"
        print "  Type :quit to exit the interpreter"

    def push(self, env=None):
        if env:
            self.environment = self.environment.push(env)
        else:
            self.environment = self.environment.push()

    def pop(self):
        self.environment = self.environment.pop()

    def repl(self):
        while True:
            # Stealing the s-expression parsing approach from [CLIPS](http://clipsrules.sourceforge.net/)
            source = self.get_complete_command()

            # Check for any REPL directives
            try:
                if source in [":quit"]:
                    break
                elif source in [":help"]:
                    self.print_help()
                elif source.startswith(":load"):
                    files = source.split(" ")[1:]
                    self.process_files(files)
                elif source in [":env"]:
                    print(self.environment)
                else:
                    self.process(source)
            except AttributeError:
                print "Could not process command: ", source
                return


    # Source is processed one s-expression at a time.
    def process(self, source):
        sexpr = self.rdr.get_sexpr(source)

        while sexpr:
            result = None

            try:
                result = self.eval(sexpr)
            except Error as err:
                print(err)

            if self.verbose:
                self.stdout.write("    %s\n" % result)

            sexpr = self.rdr.get_sexpr()

    # In the process of living my life I had always heard that closures and dynamic scope
    # cannot co-exist.  As a thought-experiment I can visualize why this is the case.  That is,
    # while a closure captures the contextual binding of a variable, lookups in dynamic scoping
    # occur on the dynamic stack.  This means that you may be able to close over a variable as
    # long as it's unique, but the moment someone else defines a variable of the same name
    # and attempt to look up the closed variable, it will resolve to the top-most binding on the
    # dynamic stack.  This assumes that the lookup occurs before the variable of the same name
    # is popped.  While this is conceptually easy to grasp, I still wanted to see what would
    # happen in practice -- and it wasn't pretty.
    def lambda_(self, env, args):
        if self.environment != env.get("__global__") and self.closures:
            return Closure(env, args[0], args[1:])
        else:
            return Lambda(args[0], args[1:])

    # Delegate evaluation to the form.
    def eval(self, sexpr):
        try:
            return sexpr.eval(self.environment)
        except ValueError as err:
            print(err)
            return FALSE

    # A complete command is defined as a complete s-expression.  Simply put, this would be any
    # atom or any list with a balanced set of parentheses.
    def get_complete_command(self, line="", depth=0):
        if line != "":
            line = line + " "

        if self.environment.level != 0:
            prompt = PROMPT + " %i%s " % (self.environment.level, DEPTH_MARK * (depth+1))
        else:
            if depth == 0:
                prompt = PROMPT + "> "
            else:
                prompt = PROMPT + "%s " % (DEPTH_MARK * (depth+1))

            line = line + self.read_line(prompt)

            # Used to balance the parens
            balance = 0
            for ch in line:
                if ch == "(":
                    # This is not perfect, but will do for now
                    balance = balance + 1
                elif ch == ")":
                    # Too many right parens is a problem
                    balance = balance - 1
            if balance > 0:
                # Balanced parens gives zero
                return self.get_complete_command(line, depth+1)
            elif balance < 0:
                raise ValueError("Invalid paren pattern")
            else:
                return line

    def read_line(self, prompt) :
        if prompt and self.verbose:
            self.stdout.write("%s" % prompt)
            self.stdout.flush()

        line = self.stdin.readline()

        if(len(line) == 0):
            return "EOF"

        if line[-1] == "\n":
            line = line[:-1]

        return line

    # Lithp also processes files using the reader plumbing.
    def process_files(self, files):
        self.verbose = False

        for filename in files:
            infile = open( filename, 'r')
            self.stdin = infile

            source = self.get_complete_command()
            while(source not in ["EOF"]):
                self.process(source)

                source = self.get_complete_command()

            infile.close()
        self.stdin = sys.stdin

        self.verbose = True
Esempio n. 42
0
 def test_changing_var_in_outer_env(self):
     env = Environment(outer=Environment({"my-var": 1}))
     env.defining_env("my-var")["my-var"] = 2
     assert_equals(2, env["my-var"])
Esempio n. 43
0
import numpy as np
from config import config
import model
from env import Environment

#############################################
config.use_gpu = True
#############################################

config.rl_model = "double_dqn"
config.rl_model = "bootstrapped_double_dqn"
config.rl_final_exploration_step = 20000
config.apply_batchnorm = False

model = model.load()
env = Environment()

max_episode = 2000
total_steps = 0
exploration_rate = config.rl_initial_exploration

dump_freq = 10
episode_rewards = 0
num_optimal_episodes = 0
sum_reward = 0
sum_loss = 0.0
save_freq = 100

bootstrapped = False
if config.rl_model in ["bootstrapped_double_dqn"]:
	bootstrapped = True
Esempio n. 44
0
def main():
    """Main function - includes tests and runs the REPL."""
    if len(sys.argv) > 1:
        first_arg = sys.argv[1]
        if first_arg == '--test':
            env = Environment()
            execute_statement('x = 3', env)
            execute_statement('x+=7', env)
            execute_statement('y=9.23', env)
            env.new_frame()
            execute_statement('x = 5', env)
            print(env.frames)
            execute_statement('z="hello world"', env)
            execute_statement('z +="!!!"', env)
            execute_statement('a= `gelatin`', env)
            print(env.frames)
            ast = AST("3*4+5 ^ 7")
            print(ast.parse())
            print(ast.collapse_indices(ast.build_indices()))
            ast = AST("18+15*9:3+10")
            print(ast.parse())
            print(ast.collapse_indices(ast.build_indices()))

            print(evaluate_expression('1+2+3+4', Environment()))
            print(evaluate_expression('45+7*8', Environment()))
            print(evaluate_expression('3.2+18^2-7', Environment()))
            print(evaluate_expression('1:2 + 1:3 + 1:5', Environment()))
            print(evaluate_expression('2:3 + 3^3 - 1:5', Environment()))
            print(evaluate_expression('1234', Environment()))
            
            ast = AST("3 + 1 == 4")
            print(ast.parse())
            ast = AST("3 + 1 > 4")
            print(ast.parse())
            ast = AST("18:1 != 18.2")
            print(ast.parse())
            ast = AST("x = 4")
            print(ast.parse())
            ast = AST("y = 3 > 4")
            print(ast.parse())
            
            env2 = Environment()
            execute_statement('x = 3+5*4', env2)
            execute_statement('y = x + 19 - 3*6', env2)
            print(env2.frames)
        elif first_arg == '--test2':
            ast = AST('x = "ice cream, eggs, and milk" + "...alpha or beta"')
            print(ast.parse())
            ast = AST('y = f(1 + 1, 2 + 2, 3 + 3) - g((9+7)*2, 128/(2+2))')
            print(ast.parse())
            ast = AST('z = f("ice cream", "eggs and milk") * g("alpha or beta", 3:8, "gamma or delta")')
            print(ast.parse())
            ast = AST('makeList(1,2,3) + makeList(4,5,6)')
            print(ast.parse())
            ast = AST('[max(16, 25), max(36, max(49, 64))]')
            print(ast.parse())
            ast = AST('[concat_lists([10], [20]), concat_lists([30], [40])]')
            print(ast.parse())
        elif first_arg == '--test3':
            ast = AST('[1, 2, 3]')
            print(ast.split_list_elems())
            ast = AST('[f(2), f(3), f(4)]')
            print(ast.split_list_elems())
            ast = AST('[f(2, 3), f(3, 4, 5), f(4, 1)]')
            print(ast.split_list_elems())
            ast = AST('1 + 2 * 3')
            print(ast.split_list_elems())
            print(ast.parse())
        elif first_arg == '--test4':
            ast = AST('x.length()')
            print(ast.parse())
            ast = AST('[1,2,3].length()')
            print(ast.parse())
            ast = AST('3.01')
            print(ast.parse())
            ast = AST('3.1')
            print(ast.parse())
        elif first_arg == '--test5':
            env = Environment()
            env.new_type(['Number'], 'ComplexNumber')
            c = {'$type': 'ComplexNumber', 'real': 1, 'imag': 2}
            print(env.value_is_a(c, 'ComplexNumber'))
            print(env.value_is_a(c, 'Number'))
            print(env.value_is_a(c, 'Int'))
            print("")
            env.new_type(['Object'], 'Food')
            env.new_type(['Food'], 'Pizza')
            env.new_type(['Food'], 'Dessert')
            env.new_type(['Dessert'], 'ChocolateItem')
            env.new_type(['Pizza'], 'PepperoniPizza')
            env.new_type(['Pizza', 'ChocolateItem'], 'ChocolatePizza')
            pepperoni_pizza = {'$type': 'PepperoniPizza'}
            chocolate_pizza = {'$type': 'ChocolatePizza'}
            print(env.value_is_a(pepperoni_pizza, 'PepperoniPizza'))
            print(env.value_is_a(pepperoni_pizza, 'Pizza'))
            print(env.value_is_a(pepperoni_pizza, 'Food'))
            print(env.value_is_a(pepperoni_pizza, 'Dessert'))
            print(env.value_is_a(pepperoni_pizza, 'ChocolateItem'))
            print("")
            print(env.value_is_a(chocolate_pizza, 'PepperoniPizza'))
            print(env.value_is_a(chocolate_pizza, 'Pizza'))
            print(env.value_is_a(chocolate_pizza, 'Food'))
            print(env.value_is_a(chocolate_pizza, 'Dessert'))
            print(env.value_is_a(chocolate_pizza, 'ChocolateItem'))
            print("")
            env.new_type(['ChocolatePizza'], 'HugeChocolatePizza')
            huge_chocolate_pizza = {'$type': 'HugeChocolatePizza'}
            print(env.value_is_a(huge_chocolate_pizza, 'PepperoniPizza'))
            print(env.value_is_a(huge_chocolate_pizza, 'Pizza'))
            print(env.value_is_a(huge_chocolate_pizza, 'Food'))
            print(env.value_is_a(huge_chocolate_pizza, 'Dessert'))
            print(env.value_is_a(huge_chocolate_pizza, 'ChocolateItem'))
            print(env.value_is_a(huge_chocolate_pizza, 'ChocolatePizza'))
            print("")
        elif first_arg == '--test6':
            ast = AST('{1, 2 | 3, 4}')
            print(ast.parse())
        elif first_arg == '--test7':
            ast = AST('throw "something"')
            print(ast.parse())
        elif first_arg == '--test8':
            ast = AST('true and not false')
            print(ast.parse())
            print(ast.collapse_indices(ast.build_indices()))
        elif first_arg == '--test9':
            sample = """
                x = 5 // comment
                // comment
                /* multi
                line
                comment
                */y = 6
                z = "https://example.com"
            """
            print(preprocess(sample))
        elif first_arg == '--test-all':
            tests.test_all('capacita_programs')
        elif first_arg == '--test-all-fast':
            tests.test_all('capacita_programs', False)
        else:
            # Run a program from a text file:
            file_name = first_arg
            try:
                execute_program(file_to_str(file_name))
            except IOError:
                print("Could not read file: " + file_name)
        exit()
    repl()