Exemple #1
def run():
    """Run the agent for a finite number of trials."""

    # Code for testing multiple alphas
    i = 0
    alphas = [0.5, 0.6, 0.7, 0.8, 0.9]
    for alpha in alphas:
        print "********************Run " + str(i) + " Alpha is " + str(alpha) +"********************"
        # Set up environment and agent
        e = Environment()  # create environment (also adds some dummy traffic)
        a = e.create_agent(LearningAgent, alpha)  # create agent
        e.set_primary_agent(a, enforce_deadline=True)  # specify agent to track
        # NOTE: You can set enforce_deadline=False while debugging to allow longer trials

        # Now simulate it
        sim = Simulator(e, update_delay=0.0001, display=False)  # create simulator (uses pygame when display=True, if available)
        # NOTE: To speed up simulation, reduce update_delay and/or set display=False

        sim.run(n_trials=100)  # run for a specified number of trials
        # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line
        i += 1
    # Set up environment and agent
    e = Environment()  # create environment (also adds some dummy traffic)
    a = e.create_agent(LearningAgent)  # create agent
    e.set_primary_agent(a, enforce_deadline=True)  # specify agent to track
    # NOTE: You can set enforce_deadline=False while debugging to allow longer trials

    # Now simulate it
    sim = Simulator(e, update_delay=0.5, display=True)  # create simulator (uses pygame when display=True, if available)
    # NOTE: To speed up simulation, reduce update_delay and/or set display=False

    sim.run(n_trials=100)  # run for a specified number of trials
Exemple #2
    def test_simple_model_checking2(self):
        path = "examples/rpython_performance/SigmaLoop.mch"
        if os.name=='nt':
        ast_string = file_to_AST_str(path)
        root = str_ast_to_python_ast(ast_string)

        # Test
        env = Environment()
        env._max_int = 2**31
        mch = parse_ast(root, env)
        type_check_bmch(root, env, mch) # also checks all included, seen, used and extend 
        solution_file_read = False
        bstates = set_up_constants(root, env, mch, solution_file_read)
        assert len(bstates)==0 # no setup possible
        bstates = exec_initialisation(root, env, mch, solution_file_read)
        assert len(bstates)==1 # only one possibility (sum:=45)  
        assert len(env.state_space.seen_states)==0        
        assert isinstance(bstates[0], BState)
        assert len(env.state_space.seen_states)==1
        invatiant = root.children[2]
        assert isinstance(invatiant, AInvariantMachineClause)
        assert interpret(invatiant, env)
        assert len(env.state_space.stack)==2 
        next_states = calc_next_states(env, mch)
        assert len(next_states)==1
        assert len(env.state_space.stack)==2 # init and empty setup
        assert env.get_value('sum')==55
        assert env.get_value('sum')==55  
Exemple #3
def create_output_directory(base_path):
        Creates either a symbolic link to the online cache or a local

    Note: With the local method, a symbolic link is created so that we can
          just tar.gz the product and place the checksum directly on the
          product cache.
          With the remote method, we just create a directory to hold the
          tar.gz and checksum before using ftp/scp to transfer the product
          over the network.

        string: The fullpath to the "output" link or directory.

        base_path - The location where to create the "output" link or
                    directory under.

    e = Environment()

    distribution_method = e.get_distribution_method()

    if distribution_method == 'local':
        distribution_directory = e.get_distribution_directory()
        return create_linked_output_directory(base_path,
        return create_local_output_directory(base_path)
    def test_types_lambda2(self):
        # Build AST
        string_to_file("#PREDICATE f=" + "%" + "(x,y).(x=0 & y=10|TRUE)", file_name)
        ast_string = file_to_AST_str(file_name)
        root = str_ast_to_python_ast(ast_string)

        # Type
        env = Environment()
        type_with_known_types(root, env, [], ["f", "x", "y"])
        assert isinstance(get_type_by_name(env, "f"), PowerSetType)
        assert isinstance(get_type_by_name(env, "f").data, CartType)
        assert isinstance(get_type_by_name(env, "f").data.left, PowerSetType)
        assert isinstance(get_type_by_name(env, "f").data.right, PowerSetType)
        dom_type = get_type_by_name(env, "f").data.left
        img_type = get_type_by_name(env, "f").data.right  # only present if lambda is ass. to var
        assert isinstance(img_type.data, BoolType)
        assert isinstance(dom_type.data, CartType)
        assert isinstance(dom_type.data.left, PowerSetType)
        assert isinstance(dom_type.data.right, PowerSetType)
        assert isinstance(dom_type.data.left.data, IntegerType)
        assert isinstance(dom_type.data.right.data, IntegerType)
        lambda_node = root.children[0].children[1]
        assert isinstance(lambda_node, ALambdaExpression)
        image_type = env.get_lambda_type_by_node(lambda_node)  # this function always returns a type
        assert isinstance(image_type, BoolType)
Exemple #5
    def __init__(self, background, player, enemy):
        Environment.__init__(self, background)
        self.player = player
        self.enemy = enemy
        if isinstance(player, BattleGroup):
            self.players = sprite.Group(player.groupmembers)
            self.players = sprite.Group(player)
        if isinstance(enemy, BattleGroup):
            self.enemies = sprite.Group(enemy.groupmembers)
            self.enemies = sprite.Group(enemy)

        self.sprites = sprite.RenderUpdates(self.players, self.enemies)
        self.combatants = sprite.Group(self.players, self.enemies)

        self.alignCombatants(self.players, 608 - 16, Direction.LEFT)
        self.alignCombatants(self.enemies, 32 + 16, Direction.RIGHT)

        # TODO: Battlefield status needs to be updated
        self.statusBar = BattlefieldStatus(self.players.sprites()[0], self.enemies.sprites()[0])

        self.frameCount = settings.fps / 2

        self.battleQueue = [(c.speed, c) for c in self.combatants]
    def add_abstract_environment(self, environment_name, environment_description):
        url = "%s/%s" % (self.paasmanager_url, "catalog/org/FIWARE/environment")

        env = Environment(environment_name, environment_description)

        payload = tostring(env.to_env_xml())
        self.__add_environment(url, payload)
Exemple #7
def run():
    """Run the agent for a finite number of trials."""
    import numpy as np

    # intuition values 1st selected
    # alpha = 0.5
    # gamma = 0.7
    # epsilon = 0.05

    # optimal values found
    alpha = 0.5
    gamma = 0.2
    epsilon = 0.07

    # some "bad" values just to test how good our optimal is
    # alpha = 0.8
    # gamma = 0.6
    # epsilon = 0.2

    # Set up environment and agent
    e = Environment()  # create environment (also adds some dummy traffic)
    a = e.create_agent(LearningAgent, alpha, gamma, epsilon)  # create agent
    e.set_primary_agent(a, enforce_deadline=True)  # set agent to track

    # Now simulate it
    sim = Simulator(e, update_delay=0.0001)  # reduce update_delay to speed up simulation
    sim.run(n_trials=100)  # press Esc or close pygame window to quit

    mean = np.mean(a.trial_array[0][75:100])
    print "Average Steps: " + str(mean)
     def execute(self, agent, x, y, env):
        newEnv = Environment()
        prereqs = [("holding", agent, x)]
        deletions = prereqs
        additions = [("empty", agent), ("on", x, y), ("clear", x)]

        if not y == "ground":
            prereqs.append(("clear", y))

        for prereq in prereqs:
            if not prereq in env.relations:
                return None

        newRelations = env.relations[:]

        for deletion in deletions:

        for addition in additions:

        newEnv.relations = newRelations;
        newEnv.steps = env.steps[:]

        newEnv.steps.append(("putDown", agent, x, y))
        return newEnv
Exemple #9
class Game(object):

    def __init__(self, max_population_size):
        width, height = (800, 800)
        self.screen = pygame.display.set_mode((width, height))
        self.env = Environment(width, height, max_population_size)

    def add_creatures(self):
        for x in range(self.env.max_population_size):
            self.env.add_creature(x=randint(0,800), y=randint(0,800))

    def handle_event(self, event):
        running = True
        if event.type == pygame.QUIT:
            running = False
        return running

    def run_game(self):
        running = True
        while running:
            for event in pygame.event.get():
                running = self.handle_event(event)
            for creature in self.env.creatures:
                pygame.draw.circle(self.screen, (255,255,255), (int(creature.x), int(creature.y)), creature.size + 1, creature.thickness)
                pygame.draw.circle(self.screen, creature.color, (int(creature.x), int(creature.y)), creature.size, creature.thickness)
def run():
    """Run the agent for a finite number of trials."""

    # Set up environment and agent
    e = Environment()  # create environment (also adds some dummy traffic)
    a = e.create_agent(LearningAgent)  # create agent
    e.set_primary_agent(a, enforce_deadline = True)  # specify agent to track
    # NOTE: You can set enforce_deadline=False while debugging to allow longer trials

    # Now simulate it
    sim = Simulator(e, update_delay=0.0005, display = False)  # create simulator (uses pygame when display=True, if available)
    # NOTE: To speed up simulation, reduce update_delay and/or set display=False

    sim.run(n_trials=100)  # run for a specified number of trials
    # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line

    print ("................................DATA SUMMARY................................")
    print ("alpha: ", a.alpha)
    print ("gamma: ", a.gamma)
    print ("epsilon: ", a.epsilon)
    print ("total actions: ", a.total_actions)
    print ("total rewards: ", a.total_rewards)
    print ("number of negative reward in each trial ", a.last_negative_reward_count_list.values())
    print ("number of actions in each trial ", a.last_actions_list.values())
    print ("total rewards in each trial ", a.last_rewards_list.values())
def distribute_statistics(immutability, source_path, packaging_path, parms):
        Determines if the distribution method is set to local or remote and
        calls the correct distribution method.

      product_file - The full path to the product either on the local system
                     or the remote destination.
      cksum_value - The check sum value of the product.

        source_path - The full path to of directory containing the data to
                      package and distribute.
        package_dir - The full path on the local system for where the packaged
                      product should be placed under.
        parms - All the user and system defined parameters.

    env = Environment()

    distribution_method = env.get_distribution_method()

    product_id = parms['product_id']
    order_id = parms['orderid']

    # The file paths to the distributed product and checksum files
    product_file = 'ERROR'
    cksum_file = 'ERROR'

    if distribution_method == DISTRIBUTION_METHOD_LOCAL:
        # Use the local cache path
        cache_path = os.path.join(settings.ESPA_LOCAL_CACHE_DIRECTORY,

        # Adjust the packaging_path to use the cache
        package_path = os.path.join(packaging_path, cache_path)

        distribute_statistics_local(immutability, product_id, source_path,

    else:  # remote
        env = Environment()

        # Determine the remote hostname to use
        destination_host = utilities.get_cache_hostname(env
        # Use the remote cache path
        cache_path = os.path.join(settings.ESPA_REMOTE_CACHE_DIRECTORY,

        options = parms['options']
        dest_user = options['destination_username']
        dest_pw = options['destination_pw']

        distribute_statistics_remote(immutability, product_id, source_path,
                                     destination_host, cache_path,
                                     dest_user, dest_pw)

    return (product_file, cksum_file)
Exemple #12
def run(msg = ''):
    """Run the agent for a finite number of trials."""

    # set up environment and agent
    e = Environment()  # create environment (also adds some dummy traffic)
    a = e.create_agent(LearningAgent)  # create agent
    e.set_primary_agent(a, enforce_deadline=True)  # specify agent to track
    # NOTE: you can set enforce_deadline=False while debugging to allow longer trials

    # Now simulate it
    sim = Simulator(e, update_delay=0, display=False)  # create simulator (uses pygame when display=True, if available)
    # NOTE: to speed up simulation, reduce update_delay and/or set display=False

    sim.run(n_trials=100)  # run for a specified number of trials
    # NOTE: to quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line

    results = a.results
    average_cycles = mean([result[0] for result in results])
    average_reward = mean([result[1] for result in results])
    average_violations = mean([result[2] for result in results])
    # print '=' * 10, msg
    # print 'Average Cycles:', average_cycles
    # print 'Average Reward:', average_reward
    # print 'Average Violations:', average_violations

    return average_cycles, average_reward, average_violations
def main():
    """Run the agent for a finite number of trials."""
    # Set up environment and agent
    file = open('Q_v2.pickle', 'r')
    Q = pickle.load(file)
    e = Environment()  # create environment (also adds some dummy traffic)
    a = e.create_agent(LearningAgent)  # create agent
    a.Q = Q
    print len(a.Q)
    e.set_primary_agent(a, enforce_deadline=False)  # specify agent to track

    # NOTE: You can set enforce_deadline=False while debugging to allow longer trials

    # Now simulate it
    sim = Simulator(e, update_delay=1, display=True)  # create simulator (uses pygame when display=True, if available)
    # NOTE: To speed up simulation, reduce update_delay and/or set display=False

    sim.run(n_trials=1000)  # run for a specified number of trials
    # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line

    f = open('Q_v2.pickle', 'w')
    pickle.dump(a.Q, f)
    def test_genAST_para_def(self):
        # Build AST
        string ='''
        MACHINE Test
        VARIABLES z
        INVARIANT z:MyType 
        INITIALISATION z:= Expr(2)
        Expr(X) == 1+X;
        MyType == NAT;
        string_to_file(string, file_name)
        ast_string = file_to_AST_str(file_name)
        root = str_ast_to_python_ast(ast_string)

        # Test
        env = Environment()
        dh = DefinitionHandler(env, str_ast_to_python_ast)
        mch = parse_ast(root, env)
        type_check_bmch(root, env, mch) # also checks all included, seen, used and extend
        arbitrary_init_machine(root, env, mch) # init VARIABLES and eval INVARIANT
        invariant = root.children[2]
        assert isinstance(invariant, AInvariantMachineClause)
        assert interpret(invariant, env)
        assert env.get_value("z")==3
Exemple #15
def run():
    """Run the agent for a finite number of trials."""

    record = []
    for q_initial in [0, 2, 10]:
        for alpha in range(1, 6):
            # Set up environment and agent
            e = Environment()  # create environment (also adds some dummy traffic)
            a = e.create_agent(LearningAgent, alpha * 0.2, q_initial)  # create agent
            e.set_primary_agent(a, enforce_deadline=True)  # specify agent to track
            # NOTE: You can set enforce_deadline=False while debugging to allow longer trials

            # Now simulate it
            sim = Simulator(e, update_delay=0, display=False)  # create simulator (uses pygame when display=True, if available)
            # NOTE: To speed up simulation, reduce update_delay and/or set display=False

            sim.run(n_trials=100)  # run for a specified number of trials
            # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line

            trip_log = pd.DataFrame(a.trip_log)
            # trip_log['Used'] = trip_log['Deadline'] - trip_log['Remaining']
            trip_log['Efficiency'] = trip_log['Remaining'] / trip_log['Deadline'] * 100
                'Success Rate': trip_log[trip_log.Success == True].shape[0],
                'Alpha': alpha * 0.2,
                'Q Initial': q_initial,
                'Efficiency': trip_log['Efficiency'].mean(),
                'Ave Reward': trip_log['Reward'].mean(),
                'Ave Penalty': trip_log['Penalty'].mean(),

    return pd.DataFrame(record)
Exemple #16
def run():
    """Run the agent for a finite number of trials."""

    # Set up environment and agent
    e = Environment()  # create environment (also adds some dummy traffic)
    a = e.create_agent(LearningAgent)  # create agent
    e.set_primary_agent(a, enforce_deadline=True)  # specify agent to track
    # NOTE: You can set enforce_deadline=False while debugging to allow longer trials

    # Now simulate it
    sim = Simulator(e, update_delay=0.00001, display=False)  # create simulator (uses pygame when display=True, if available)
    # NOTE: To speed up simulation, reduce update_delay and/or set display=False

    sim.run(n_trials=100)  # run for a specified number of trials
    # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line
    # Print summary #
    allPenalities = a.numberOfPenaltiesList
    allFailures = a.numberOfFailuresList
    numberOfTrials = float(len(allFailures))
    numberOfFailures = float(allFailures[-1])
    numberOfSuccess = numberOfTrials - numberOfFailures
    numberOfSuccessFirstHalf = ((numberOfTrials) / 2) - float(allFailures[len(allFailures)/2])
    numberOfSuccessSecondHalf = numberOfSuccess - numberOfSuccessFirstHalf
    print ("=================================================================================")
    print ("SUMMARY")
    print ("=================================================================================")
    print ("Total Penalities received = %3.2f" % (sum(allPenalities)))
    print ("\tPenalities received in the first half of trials  = %3.2f" % (sum(allPenalities[:len(allPenalities)/2])))
    print ("\tPenalities received in the second half of trials = %3.2f" % (sum(allPenalities[len(allPenalities)/2:])))
    print ("Success Rate: %3.2f%%" % (numberOfSuccess/numberOfTrials*100))
    print ("\tSuccess Rate of the first half : %3.2f%%" % (numberOfSuccessFirstHalf/(numberOfTrials/2)*100))
    print ("\tSuccess Rate of the second half: %3.2f%%" % (numberOfSuccessSecondHalf/(numberOfTrials/2)*100))
def run():
    """Run the agent for a finite number of trials."""
    # create common place to set debug values
    dbg_deadline = True
    dbg_update_delay = 0.01
    dbg_display = False
    dbg_trials = 100 
    # create switches to run as random, way_light, way_light_vehicles
    # random = take random actions only
    # way_light_only = Traffic Light, Way Point
    # way_light_Vehicle = Traffic Light, Way Point, Left, Right, Oncoming
    # way_light_modified (or any other value) = Way Point, Combination Light and Vehicle State
    dbg_runtype = 'way_light_only'

    # Set up environment and agent
    e = Environment()  # create environment (also adds some dummy traffic)
    a = e.create_agent(LearningAgent)  # create agent
    # set the run type (random choice, simple state, state with vehicles)
    a.run_type = dbg_runtype
    e.set_primary_agent(a, enforce_deadline=dbg_deadline)  # specify agent to track
    # NOTE: You can set enforce_deadline=False while debugging to allow longer trials

    # Now simulate it
    sim = Simulator(e, update_delay=dbg_update_delay, display=dbg_display)  # create simulator (uses pygame when display=True, if available)
    # NOTE: To speed up simulation, reduce update_delay and/or set display=False

    sim.run(n_trials=dbg_trials)  # run for a specified number of trials
    # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line

    # at the end of the simulation show results
    # call qlearner reset to get last trial result
def run():
    """Run the agent for a finite number of trials."""
    # create output file
    target_dir = os.path.dirname(os.path.realpath(__file__))
    target_path = os.path.join(target_dir, 'qlearning_tuning_report.txt')
    if not os.path.exists(target_dir):
	# loop the parameters
    for epsilon in [0.1, 0.5, 0.9]:
        for alpha in np.arange(0.1, 1, 0.2):
            for gamma in np.arange(0.1, 1, 0.2):
                print epsilon, alpha, gamma
                # Set up environment and agent
                e = Environment()  # create environment (also adds some dummy traffic)
                a = e.create_agent(QAgent, epsilon, alpha, gamma)  # create agent
                e.set_primary_agent(a, enforce_deadline=True)  # specify agent to track
				# NOTE: You can set enforce_deadline=False while debugging to allow longer trials

				# Now simulate it
                sim = Simulator(e, update_delay=0.001, display=False)  # create simulator (uses pygame when display=True, if available)
				# NOTE: To speed up simulation, reduce update_delay and/or set display=False
                sim.run(n_trials=100)  # run for a specified number of trials
                # get the count for the number of successful trials and average running time
                summary = sim.report()
                # write out the results
					with open(target_path, 'a') as f:
						f.write('epsilon {}, alpha {}, gamma {} : success {}, avg_time {}, total_reward {}\n'.format(epsilon, alpha, gamma, summary[0], summary[1], round(a.total_reward, 3)))
def main():

    #TEMP: hardcoded keymaps.
    inputs = [
        #    'x_axis': (pg.K_j, pg.K_l),
        #    'y_axis': (pg.K_k, pg.K_i),
        #    'brake': pg.K_SPACE,
            'thrust': pg.K_UP,
            'brake': pg.K_DOWN,
            'turn_direction': (pg.K_LEFT, pg.K_RIGHT),
            'thrust': pg.K_PERIOD,
            'brake': pg.K_e,
            'turn_direction': (pg.K_o, pg.K_u),

    env = Environment(inputs)
    disp = Display(env, SCREENSIZE)
    env.load_level(levels.inelastic_collision_bug) # TEMP, hardcoded level selection.

    main_loop(env, disp)

Exemple #20
	def test_insert(self):
		e = Environment()
		env = e.env_list
		var = Lexeme(token_type="VARIABLE", value="x")
		val = Lexeme(token_type="NUMBER", value=5)
		e.insert(var, val, env)
		self.assertEqual(e.lookup(var, env), 5)
Exemple #21
class Game(object):

    def __init__(self):
        width, height = (600, 600)
        self.screen = pygame.display.set_mode((width, height))
        self.env = Environment(width, height)

    def add_ants(self, number_of_ants):
        for x in range(number_of_ants):
            self.env.add_ant(x=50, y=50, speed=3, angle=randint(0, 360), awareness_radius=300)

    def handle_event(self, event):
        running = True
        if event.type == pygame.QUIT:
            running = False
        elif (event.type == pygame.MOUSEBUTTONDOWN) or (event.type == pygame.KEYDOWN):
            if (event.type == pygame.MOUSEBUTTONDOWN):
                print event
        return running

    def run_game(self):
        running = True
        while running:
            for event in pygame.event.get():
                running = self.handle_event(event)

            for p in self.env.ants:
                pygame.draw.circle(self.screen, p.color, (int(p.x), int(p.y)), p.size, p.thickness)
Exemple #22
 def create_new_book(self):
     capture_style = ca.dialog(message='Please Select a capture style:', 
                               Buttons=[('Single Camera', 1),
                                        ('Dual Cameras', 2)])
     if capture_style == 1:
         capture_style = 'Single'
         capture_style = 'Dual'
     identifier = ca.dialog(message='Please name this project:', 
                            Buttons=[(Gtk.STOCK_OK, Gtk.ResponseType.OK),
                                     (Gtk.STOCK_CANCEL, Gtk.ResponseType.NO)],
     if not identifier:
         return False
     location = ca.get_user_selection(title='Please choose a location for this project:')
     if not location:
         return False
     while os.path.exists(location + '/' + identifier):
         identifier = ca.dialog(message='Sorry, a project by that name already exists. '+
                                'Please try another name:', get_input=True)
         if not identifier:
             return False
         location = ca.get_user_selection(title='Please choose a location for this project:')
         if not location:
             return False
     Environment.create_new_book_stub(location, identifier)
     return Environment.get_books(location + '/' + identifier, 
    def test_genAST_subst_def2(self):
        MACHINE Test
        VARIABLES z, b, x
        INVARIANT x:NAT & z:NAT & b:BOOL
        INITIALISATION x:=2 ; Assign(x+1, z) ; Assign(TRUE, b)
        DEFINITIONS Assign(Expr, VarName) == VarName := Expr;
        string_to_file(string, file_name)
        ast_string = file_to_AST_str(file_name)
        root = str_ast_to_python_ast(ast_string)

        env = Environment()
        dh = DefinitionHandler(env, str_ast_to_python_ast)
        mch = parse_ast(root, env)
        type_check_bmch(root, env, mch) # also checks all included, seen, used and extend
        arbitrary_init_machine(root, env, mch)# init VARIABLES and eval INVARIANT
        invariant = root.children[2]
        assert isinstance(invariant, AInvariantMachineClause)
        assert interpret(invariant, env)
        assert env.get_value("z")==3
        assert env.get_value("b")==True
        assert env.get_value("x")==2
    def test_genAST_sub_var(self):
        # Build AST
        string = '''
        MACHINE Test
        VARIABLES xx
        INVARIANT xx:NAT 
                        VAR varLoc1, varLoc2 IN
                        varLoc1 := xx + 1 ;
                        varLoc2 := 2 * varLoc1 ;
                        xx := varLoc2
        string_to_file(string, file_name)
        ast_string = file_to_AST_str(file_name)
        root = str_ast_to_python_ast(ast_string)

        # Test
        env = Environment()
        env._min_int = -1
        env._max_int = 5
        mch = parse_ast(root, env)
        type_check_bmch(root, env, mch) # also checks all included, seen, used and extend
        arbitrary_init_machine(root, env, mch) # init VARIABLES and eval INVARIANT
        assert isinstance(root.children[2], AInvariantMachineClause)
        assert interpret(root.children[2], env)
        assert env.get_value("xx")==4
Exemple #25
def run():
    """Run the agent for a finite number of trials."""

    # Set up environment and agent
    e = Environment()  # create environment (also adds some dummy traffic)
    a = e.create_agent(LearningAgent)  # create agent
    e.set_primary_agent(a, enforce_deadline=True)  # specify agent to track
    # NOTE: You can set enforce_deadline=False while debugging to allow longer trials

    # Now simulate it
    sim = Simulator(e, update_delay=0.0001, display=False)  # create simulator (uses pygame when display=True, if available)
    # NOTE: To speed up simulation, reduce update_delay and/or set display=False

    sim.run(n_trials=100)  # run for a specified number of trials
    # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line

    ## print Q table
    print '+++++++++++++++++++++++++++++++++++++++++++++++'
    print 'final Q table'
    print '+++++++++++++++++++++++++++++++++++++++++++++++'
    for key in a.Q:
        print key,
        print ["%0.2f" % i for i in a.Q[key]]

    print '===================================================================='
    print 'An Array of Arrays where each subarray shows neg rewards for a trial'
    print '===================================================================='
    #print neg rewards and split term
    y=[i.split(' ') for i in x]
    print y #shows an array of arrays, could calculate total neg reward for each 
Exemple #26
def run():
    """Run the agent for a finite number of trials."""

    # Set up environment and agent
    gammas = [x / 10.0 for x in xrange(0, 10)]
    gamma_to_success_rate = OrderedDict()
    gamma_to_average_reward = OrderedDict()
    # Run a simulation for each sample gamma value to test which
    # choice of gamma results in the most successful agent
    for gamma in gammas:
        # Run 10 trials over each choice of gamma to get average performance metrics
        for trial in xrange(10):
            e = Environment()  # create environment (also adds some dummy traffic)
            a = e.create_agent(LearningAgent, (gamma))  # create agent
            e.set_primary_agent(a, enforce_deadline=True)  # set agent to track

            # Now simulate it
            sim = Simulator(e, update_delay=0.0)  # reduce update_delay to speed up simulation
            sim.run(n_trials=50)  # press Esc or close pygame window to quit

            gamma_to_success_rate[a.GAMMA] = gamma_to_success_rate.get(a.GAMMA, 0) + sim.env.successful_trials
            gamma_to_average_reward[a.GAMMA] = (
                gamma_to_average_reward.get(a.GAMMA, 0) + a.get_average_reward_per_action()

        # Get the average of the 10 trials
    for gamma in gamma_to_average_reward.keys():
        gamma_to_average_reward[gamma] = gamma_to_average_reward[gamma] / 10
        gamma_to_success_rate[gamma] = gamma_to_success_rate[gamma] / 10
    print gamma_to_average_reward
    print gamma_to_success_rate
def run():
    """Run the agent for a finite number of trials."""

    options = parseOptions()

    env = Environment()  # create environment (also adds some dummy traffic)
    sim = Simulator(env, update_delay=0, display=options.display) # create simulator (uses pygame when display=True, if available)

    results = {}

    from settings import params
    for agent, symbol in [(options.player1, 1), (options.player2, -1)]:
        kwargs = params[agent]

    sim.run(n_trials=options.iterations)  # run for a specified number of trials

    for agent in env.agents:
        results["X" if agent.symbol == 1 else 'O'] = agent.wins

    print results

    dispatcher.send(signal='main.complete', sender={})
    def test_genAST_sub_let2(self):
        string = '''
        MACHINE Test
        VARIABLES X, Y
                        LET r1, X BE
                    X  = 6 &
                        r1 = X / 2 
                        Y := r1

        string_to_file(string, file_name)
        ast_string = file_to_AST_str(file_name)
        root = str_ast_to_python_ast(ast_string)
        # Test
        env = Environment()
        env._min_int = -1
        env._max_int = 5
        mch = parse_ast(root, env)
        type_check_bmch(root, env, mch) # also checks all included, seen, used and extend
        arbitrary_init_machine(root, env, mch) # init VARIABLES and eval INVARIANT
        assert isinstance(root.children[2], AInvariantMachineClause)
        assert interpret(root.children[2], env)
        assert env.get_value("X")==10
        assert env.get_value("Y")==3
    def test_genAST_sub_any(self):
        string = '''
        MACHINE Test
        VARIABLES xx
        INVARIANT xx:NAT 
                        ANY r1, r2 WHERE
                        r1 : NAT &
                        r2 : NAT &
                        r1*r1 + r2*r2 = 25
                        xx := r1 + r2
        string_to_file(string, file_name)
        ast_string = file_to_AST_str(file_name)
        root = str_ast_to_python_ast(ast_string)

        # Test
        env = Environment()
        env._min_int = -1
        env._max_int = 5
        mch = parse_ast(root, env)
        type_check_bmch(root, env, mch) # also checks all included, seen, used and extend
        arbitrary_init_machine(root, env, mch) # init VARIABLES and eval INVARIANT
        assert isinstance(root.children[2], AInvariantMachineClause)
        assert interpret(root.children[2], env)
        assert env.get_value("xx")==5 or env.get_value("xx")==7 # 3+4 or 5+0
Exemple #30
def run():
    f = open('running_report.txt', 'w')

    # setup various parameter combinations
    discount_factors = [0.5]
    starting_learning_rates = [0.5]
    epsilon_greedy_policy = [0.09]

    for d_factor in discount_factors:
        for alpha in starting_learning_rates:
            for greedy_policy in epsilon_greedy_policy:

                """Run the agent for a finite number of trials."""
                # Set up environment and agent
                e = Environment()  # create environment (also adds some dummy traffic)
                a = e.create_agent(LearningAgent, learning_rate=alpha, discount_factor=d_factor, greedy_policy=greedy_policy)  # create agent
                e.set_primary_agent(a, enforce_deadline=True)  # specify agent to track
                # NOTE: You can set enforce_deadline=False while debugging to allow longer trials

                # Now simulate it
                sim = Simulator(e, update_delay=0, display=True)  # create simulator (uses pygame when display=True, if available)

                number_of_trials = 100

                # NOTE: To speed up simulation, reduce update_delay and/or set display=False
                sim.run(n_trials=number_of_trials)  # run for a specified number of trials

                #NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line
                print >> f, "Learning rate:", alpha
                print >> f, "Discount factor:", d_factor
                print >> f, "Greedy Policy:", greedy_policy
                print >> f, "Percentage completed: ", a.completed_trials / 100.0, "\n"

Exemple #31
 def visit_block_stmt(self, stmt: st.Block) -> None:
     self.execute_block(stmt.statements, Environment(self._environment))
Exemple #32
                    help="Random seed for repeatable experiments.")
                    choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
                    help="Log level.")
args = parser.parse_args()

logger = logging.getLogger()

if args.random_seed:

# instantiate classes
env = Environment(args)
mem = ReplayMemory(args)
net = DeepQNetwork(env.numActions(), args)
agent = Agent(env, mem, net, args)
stats = Statistics(agent, net, mem, env, args)

if args.load_weights:
    logger.info("Loading weights from %s" % args.load_weights)

if args.play_games:
    logger.info("Playing for %d game(s)" % args.play_games)
    stats.write(0, "play")
Exemple #33
class Interpreter(ExprVisitor, StmtVisitor):
    def __init__(self):
        self._GLOBALS = Environment()
        self._environment = self._GLOBALS
        self._locals = {}

        self._GLOBALS.define('clock', natives.Clock())
        self._GLOBALS.define('print', natives.Print())
        self._GLOBALS.define('println', natives.PrintLn())

    def visit_assign_expr(self, expr: ex.Assign) -> LoxValue:
        value = self._evaluate(expr.value)
        distance = self._locals.get(expr)
        if distance is None:
            self._GLOBALS.assign(expr.name, value)
            self._environment.assign_at(distance, expr.name, value)
        return value

    def visit_binary_expr(self, expr: ex.Binary) -> LoxValue:
        left = self._evaluate(expr.left)
        right = self._evaluate(expr.right)

        if expr.operator.type is TokenType.BANG_EQUAL:
            return not self._is_equal(left, right)
        if expr.operator.type is TokenType.EQUAL_EQUAL:
            return self._is_equal(left, right)
        if expr.operator.type is TokenType.GREATER:
            self._check_number_operands(expr.operator, left, right)
            return left > right
        if expr.operator.type is TokenType.GREATER_EQUAL:
            self._check_number_operands(expr.operator, left, right)
            return left >= right
        if expr.operator.type is TokenType.LESS:
            self._check_number_operands(expr.operator, left, right)
            return left < right
        if expr.operator.type is TokenType.LESS_EQUAL:
            self._check_number_operands(expr.operator, left, right)
            return left <= right
        if expr.operator.type is TokenType.MINUS:
            self._check_number_operands(expr.operator, left, right)
            return left - right
        if expr.operator.type is TokenType.SLASH:
            self._check_number_operands(expr.operator, left, right)
            self._check_denominator(expr.operator, right)
            return left / right
        if expr.operator.type is TokenType.STAR:
            self._check_number_operands(expr.operator, left, right)
            return left * right
        if expr.operator.type is TokenType.PLUS:
            if isinstance(left, float) and isinstance(right, float):
                return left + right
            if isinstance(left, str) or isinstance(right, str):
                return self.stringify(left) + self.stringify(right)
            raise RuntimeException(expr.operator, 'Incompatible operands.')
        if expr.operator.type is TokenType.COMMA:
            return self._evaluate(expr.right)

        # unreachable
        raise RuntimeError('Unreachable code.')

    def visit_call_expr(self, expr: ex.Call) -> LoxValue:
        callee = self._evaluate(expr.callee)
        arguments = []
        for argument in expr.arguments:
        if not isinstance(callee, LoxCallable):
            raise RuntimeException(expr.paren,
                                   'Can only call functions and classes.')
        function = callee
        if len(arguments) != function.arity:
            raise RuntimeException(
                f'Expected {function.arity} arguments, but got {len(arguments)}.'
        return callee.call(self, arguments)

    def visit_get_expr(self, expr: Get) -> LoxValue:
        obj = self._evaluate(expr.object)
        if isinstance(obj, LoxInstance):
            return obj.get(expr.name)
        raise RuntimeException(expr.name, 'Only instances have properties')

    def visit_grouping_expr(self, expr: ex.Grouping) -> LoxValue:
        return self._evaluate(expr.expression)

    def visit_literal_expr(self, expr: ex.Literal) -> LoxValue:
        return expr.value

    def visit_logical_expr(self, expr: ex.Logical) -> LoxValue:
        left = self._evaluate(expr.left)
        if expr.operator.type is TokenType.OR and self._is_truthy(left):
            return left
        if expr.operator.type is TokenType.AND and not self._is_truthy(left):
            return left
        return self._evaluate(expr.right)

    def visit_set_expr(self, expr: Set) -> LoxValue:
        obj = self._evaluate(expr.object)
        if not isinstance(obj, LoxInstance):
            raise RuntimeException(expr.name, 'Only instances have fields.')
        value = self._evaluate(expr.value)
        obj.set(expr.name, value)
        return value

    def visit_super_expr(self, expr: Super) -> Any:
        distance = self._locals[expr]
        superclass = self._environment.get_at(distance, 'super')
        obj = self._environment.get_at(distance - 1, 'this')
        if not isinstance(superclass, LoxClass):
            # unreachable
            raise RuntimeError('Unreachable code.')
        method = superclass.find_method(expr.method.lexeme)
        if method is None:
            raise RuntimeException(
                expr.method, f"Undefined property '{expr.method.lexeme}'.")
        return method.bind(obj)

    def visit_ternary_expr(self, expr: ex.Ternary) -> LoxValue:
        if expr.operator1.type is TokenType.QUESTION and expr.operator2.type is TokenType.COLON:
            pred = self._evaluate(expr.left)
            if self._is_truthy(pred):
                return self._evaluate(expr.center)
                return self._evaluate(expr.right)

        # unreachable
        raise RuntimeError('Unreachable code.')

    def visit_this_expr(self, expr: This) -> Any:
        return self._lookup_variable(expr.keyword, expr)

    def visit_unary_expr(self, expr: ex.Unary) -> LoxValue:
        right = self._evaluate(expr.right)
        if expr.op.type is TokenType.MINUS:
            self._check_number_operands(expr.op, right)
            return -right
        if expr.op.type is TokenType.BANG:
            return not self._is_truthy(right)

        # unreachable
        raise RuntimeError('Unreachable code.')

    def visit_variable_expr(self, expr: ex.Variable) -> LoxValue:
        return self._lookup_variable(expr.name, expr)

    def visit_block_stmt(self, stmt: st.Block) -> None:
        self.execute_block(stmt.statements, Environment(self._environment))

    def visit_break_stmt(self, stmt: st.Break) -> None:
        raise Break()

    def visit_class_stmt(self, stmt: Class) -> None:
        superclass = None
        if stmt.superclass is not None:
            superclass = self._evaluate(stmt.superclass)
            if not isinstance(superclass, LoxClass):
                raise RuntimeException(stmt.superclass.name,
                                       'Superclass must be a class.')
        self._environment.define(stmt.name.lexeme, None)
        if stmt.superclass is not None:
            self._environment = Environment(self._environment)
            self._environment.define('super', superclass)
        methods = {}
        for method in stmt.methods:
            function = LoxFunction(method, self._environment,
                                   method.name.lexeme == 'init')
            methods[method.name.lexeme] = function
        klass = LoxClass(stmt.name.lexeme, superclass, methods)
        if superclass is not None:
            self._environment = self._environment.enclosing
        self._environment.assign(stmt.name, klass)

    def visit_continue_stmt(self, stmt: st.Continue) -> None:
        raise Continue()

    def visit_expression_stmt(self, stmt: st.Expression) -> None:

    def visit_function_stmt(self, stmt: st.Function) -> None:
        function = LoxFunction(stmt, self._environment, False)
        self._environment.define(stmt.name.lexeme, function)

    def visit_if_stmt(self, stmt: st.If) -> None:
        if self._is_truthy(self._evaluate(stmt.condition)):
        elif stmt.else_branch:

    def visit_return_stmt(self, stmt: st.Return) -> None:
        raise Return(
            None if stmt.value is None else self._evaluate(stmt.value))

    def visit_var_stmt(self, stmt: st.Var) -> None:
        value = None
        if stmt.initializer is not None:
            value = self._evaluate(stmt.initializer)
        self._environment.initialize(stmt.name, value)

    def visit_while_stmt(self, stmt: st.While) -> None:
        while self._is_truthy(self._evaluate(stmt.condition)):
            except Break:
            except Continue:

    def interpret(self, stmts: List[st.Stmt]) -> None:
            for stmt in stmts:
        except RuntimeException as e:

    def execute_block(self, stmts: List[st.Stmt], env: Environment) -> None:
        prev = self._environment
            self._environment = env
            for stmt in stmts:
            self._environment = prev

    def resolve(self, expr: ex.Expr, depth: int) -> None:
        self._locals[expr] = depth

    def stringify(value: LoxValue) -> str:
        if value is None:
            return 'nil'
        if isinstance(value, float):
            text = str(value)
            if text.endswith('.0'):
                text = text[:-2]
            return text
        if isinstance(value, bool):
            return str(value).lower()
        return str(value)

    def _execute(self, stmt: st.Stmt) -> None:

    def _evaluate(self, expr: ex.Expr) -> LoxValue:
        return expr.accept(self)

    def _lookup_variable(self, name: Token, expr: ex.Expr):
        distance = self._locals.get(expr)
        if distance is None:
            return self._GLOBALS.get(name)
            return self._environment.get_at(distance, name.lexeme)

    def _is_truthy(value: LoxValue) -> bool:
        if value is None:
            return False
        if isinstance(value, bool):
            return value
        return True

    def _is_equal(a: LoxValue, b: LoxValue) -> bool:
        return a == b

    def _check_number_operands(operator: Token, *args: LoxValue) -> None:
        for operand in args:
            if not isinstance(operand, float):
                raise RuntimeException(operator, 'Operands must be numbers.')

    def _check_denominator(operator: Token, value: LoxValue) -> None:
        if value == 0:
            raise RuntimeException(operator, 'Division by zero.')
Exemple #34
        self.layers = nn.Sequential(nn.Linear(in_dim, 128), nn.ReLU(),
                                    nn.Linear(128, 128), nn.ReLU(),
                                    nn.Linear(128, out_dim))

    def forward(self, x):
        return self.layers(x)

min_epsilon = 0.05
max_epsilon = 1
epsilon_decay = 80
epsilon_episode = lambda episode: min_epsilon + np.exp(-episode / epsilon_decay
                                                       ) * 0.95

env = Environment("test_you")
state_space = 3
action_space = 3

batch_size = 32
max_size = 1000
memory = ReplayBuffer(state_space, max_size, batch_size)

device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

network = Network(state_space, action_space).to(device)
target_network = Network(state_space, action_space).to(device)

optimizer = optim.Adam(network.parameters())
Exemple #35
def train_single_net(args):
    start = time.time()
    print('Current time is: %s' % get_time())
    print('Starting at train_multi_nets...')

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_fraction)
    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        # Initial environment, replay memory, deep_q_net and agent
        env = Environment(args)
        mem = ReplayMemory(args)
        net = FRLDQN(sess, args)
        agent = Agent(env, mem, net, args)

        best_result = {'valid': {'success_rate': {1: 0., 3: 0., 5: 0., 10: 0., -1 : 0.}, 'avg_reward': 0., 'log_epoch': -1, 'step_diff': -1},
                        'test': {'success_rate': {1: 0., 3: 0., 5: 0., 10: 0., -1 : 0.}, 'avg_reward': 0., 'log_epoch': -1, 'step_diff': -1}

        # loop over epochs
        with open(args.result_dir, 'w') as outfile:
            print('\n Arguments:')
            outfile.write('\n Arguments:\n')
            for k, v in sorted(args.__dict__.items(), key=lambda x:x[0]):
                print('{}: {}'.format(k, v))
                outfile.write('{}: {}\n'.format(k, v))

            if args.load_weights:
                filename = 'weights/%s_%s.h5' % (args.train_mode, args.predict_net)

                for epoch in range(args.start_epoch, args.start_epoch + args.epochs):
                    agent.train(epoch, args.train_episodes, outfile, args.predict_net)
                    rate, reward, diff = agent.test(epoch, args.test_episodes, outfile, args.predict_net, 'valid')

                    if rate[args.success_base] > best_result['valid']['success_rate'][args.success_base]:
                        update_best(best_result, 'valid', epoch, rate, reward, diff)
                        print('best_epoch: {}\t best_success: {}\t avg_reward: {}\t step_diff: {}\n'.format(epoch, rate, reward, diff))
                        outfile.write('best_epoch: {}\t best_success: {}\t avg_reward: {}\t step_diff: {}\n\n'.format(epoch, rate, reward, diff))

                        rate, reward, diff = agent.test(epoch, args.test_episodes, outfile, args.predict_net, 'test')
                        update_best(best_result, 'test', epoch, rate, reward, diff)
                        print('\n Test results:\n success_rate: {}\t avg_reward: {}\t step_diff: {}\n'.format(rate, reward, diff))
                        outfile.write('\n Test results:\n success_rate: {}\t avg_reward: {}\t step_diff: {}\n'.format(rate, reward, diff))

                        if args.save_weights:
                            filename = 'weights/%s_%s.h5' % (args.train_mode, args.predict_net)
                            net.save_weights(filename, args.predict_net)
                            print('Saved weights %s ...\n' % filename)

                    if epoch - best_result['valid']['log_epoch'] >= args.stop_epoch_gap:
                        print('-----Early stopping, no improvement after %d epochs-----\n' % args.stop_epoch_gap)

            except KeyboardInterrupt:
                print('\n Manually kill the program ... \n')

            print('\n\n Best results:')
            outfile.write('\n\n Best results:\n')
            for data_flag, results in best_result.items():
                for k, v in results.items():
                    print('\t\t{}: {}'.format(k, v))
                    outfile.write('\t\t{}: {}\n'.format(k, v))
            end = time.time()
            outfile.write('\nTotal time cost: %ds\n' % (end - start))

    print('Current time is: %s' % get_time())
    print('Total time cost: %ds\n' % (end - start))
Exemple #36
class Sim:
    def __init__(self,
                 exploration_range=(0.1, 1.0),

        self.allies = allies
        self.opponents = opponents
        self.world_size = world_size
        self.moves_limit = sim_moves_limit
        self.training_rate = training_rate
        self.policy_dist_rate = update_rate
        self.exploration_steps = exploration_steps
        self.exploration_range = exploration_range
        self.exploration_step_value = \
        self.experience_replay = ReplayMemory(batch_size=train_batch_size,
        self.training_batch_size = train_batch_size
        self.n_games = n_games
        self.replay_mem_limit = replay_mem_limit
        self.environment = Environment(n_rows=world_size[0],

        self.metrics = {"reward": list(), "loss": list()}

        self.viz = viz
        self.viz_execution = viz_execution
        self.train_saving = train_saving

    def run(self):
        Runs general simulation and takes care of experience table population
        and agents training

        sim = 0

        while sim < self.n_games:

            sim_moves = 0

            # Prune replay memory by 1/5 if over limit size
            if self.experience_replay.is_full():

            # Get agent that is training
            training_agent = next(
                filter(lambda ag: ag.training, self.environment.agents))

            episode_reward = []

            while sim_moves < self.moves_limit and not self.environment.is_over(

                # Apply step in Environment
                curr_state, next_state, reward = self.environment.step(
                    terminal_state=sim_moves == self.moves_limit - 1)

                # Get agent chosen action
                action = training_agent.get_chosen_action()


                # Store transition in replay table
                    "state": curr_state,
                    "action": action,
                    "next_state": next_state,
                    "reward": reward["reward_value"],

                sim_moves += 1

            episode_reward = pd.DataFrame(episode_reward)

            # Append new collected avg episode reward
                    "Simulation No.":
                    "Avg Cumulative Reward":
                    "Global Capture Reward":
                    episode_reward["Global Capture Reward"].max(),
                    "Local Capture Reward":
                    episode_reward["Local Capture Reward"].mean(),
                    "Reachability Reward":
                    episode_reward["Reachability Reward"].mean()

            sim += 1

            # Diminishing exploration rate
            if sim < self.exploration_steps:

            # Train every N simulations
            if sim % self.training_rate == 0:

            # Update training net every 10 simulations
            if sim % self.policy_dist_rate == 0:
                print("Sim No. : {}".format(sim))
                print("Average Loss : {}".format(
                    sum(self.metrics["loss"]) / len(self.metrics["loss"])))
                mdf = pd.DataFrame(self.metrics["reward"])
                print("Average reward : {}".format(
                    mdf["Avg Cumulative Reward"].mean()))
                print("Average GCR : {}".format(
                    mdf["Global Capture Reward"].mean()))
                print("Average LCR : {}".format(
                    mdf["Local Capture Reward"].mean()))
                print("Average RR : {}".format(
                    mdf["Reachability Reward"].mean()))

            # Create GIF

            # Save model checkpoint
            self.save_checkpoint(training_agent, sim)

            # Reset game setting

    def update_target_net(self):
        self.environment.training_net.save_model("Models/", "temporary_update")
        self.environment.target_net.load_model("Models/", "temporary_update")

    def train_ally(self):
        Takes a random batch from experience replay memory and uses it to train
        the agent's brain NN
        if not self.experience_replay.can_replay():

        # Sample batch fro replay memory
        mini_batch = self.experience_replay.sample()

        training_net = self.environment.training_net
        target_net = self.environment.target_net

        X, y = self.create_training_batch(

        history = training_net.train(X, y, self.training_batch_size)

        self.metrics["loss"] += history.history["loss"]

    def create_training_batch(self, target_net, training_net, mini_batch):

        # Build input and target network batches
        input_batch = np.ndarray(shape=(self.training_batch_size,
                                        self.environment.n_cols, 4))

        target_batch = np.ndarray(shape=(self.training_batch_size, 1, 8))

        gamma = training_net.discount_rate

        for i, transition in enumerate(mini_batch):

            # Check for possible ending state
            if transition["next_state"] is None:
                # Assign reward as target Q values
                target = transition["reward"]
                # Compute Q values on next state
                q_next = target_net.model.predict(transition["next_state"])[0]

                # Filter not allowed moves
                agent_position = np.argwhere(transition["state"][0, :, :, 0])
                allowed_moves = self.environment.allowed_moves(
                    (agent_position[0][0], agent_position[0][1]))
                moves_mask = np.array(
                    [1 if pos else np.nan for pos in allowed_moves])
                masked_q_next = q_next * moves_mask

                # Compute target Q value
                target = transition["reward"] + gamma * (

            # Update Q values vector with target value
            target_q = training_net.model.predict(transition["state"])[0]
            target_q[0][transition["action"]] = target

            input_state = np.reshape(transition["state"],

            input_batch[i] = input_state
            target_batch[i] = target_q

        return input_batch, target_batch

    def visualize_gif(self, sim_number):

        # Create GIF
        if self.viz and self.viz_execution and self.viz_execution(sim_number):

            # Reset game

            # play game run
            sim_moves = 0
            env_seq = [copy.deepcopy(self.environment.grid)]
            while sim_moves < self.moves_limit and not self.environment.is_over(
                sim_moves += 1

            # Connect frame as save gif
            frames = [self.viz.single_frame(env) for env in env_seq]
            viz.create_gif(frames, name='simulation_%d' % sim_number)

    def save_checkpoint(self, training_agent, sim_number):

        if self.train_saving is not None and self.train_saving(sim_number):
            save_path = 'Models/'
            if not os.path.exists(save_path):

            with open(save_path + 'metrics' + '.pkl', "wb") as f:
                pickle.dump(self.metrics, f)
            # Save model
            training_agent.brain.save_model(save_path, str(sim_number))
if os.path.exists(dataset_name):
    with open(dataset_name, 'r') as f:
        t = f.read()
        vocabulary = t.split('\t')
    vocabulary_size = len(vocabulary)
    vocabulary = create_vocabulary(text)
    vocabulary_size = len(vocabulary)
    with open('datasets/all_scipop_free_voc.txt', 'w') as f:
        for w_idx, w in enumerate(vocabulary):
            if w_idx < len(vocabulary) - 1:
cpiv = get_positions_in_vocabulary(vocabulary)
# env = Environment(Lstm, BatchGenerator, vocabulary=vocabulary)
env = Environment(Lstm, BatchGenerator, vocabulary=vocabulary)

add_feed = [{'placeholder': 'dropout', 'value': 0.8}]
valid_add_feed = [{'placeholder': 'dropout', 'value': 1.}]

    num_nodes=[1300, 1300],
    # vocabulary_size=vocabulary_sizes[0],
Exemple #38
class Serial:

    ## init(): the constructor.  Many of the arguments have default values
    # and can be skipped when calling the constructor.
    def __init__(self,
        self.name = port
        self.port = port
        self.timeout = timeout
        self.parity = parity
        self.baudrate = baudrate
        self.bytesize = bytesize
        self.stopbits = stopbits
        self.xonxoff = xonxoff
        self.rtscts = rtscts
        self.is_open = False
        self.in_waiting = 0
        self._data = ""

        self.stop_event = None
        self.env = None

    ## isOpen()
    # returns True if the port to the Arduino is open.  False otherwise
    def isOpen(self):
        return self.is_open

    ## open()
    # opens the port
    def open(self):
        if not self.is_open:
            self.stop_event = threading.Event()
            self.env = Environment(self.notify, self.stop_event)
            self.is_open = True

    ## close()
    # closes the port
    def close(self):
        if self.is_open:
            self.stop_event = None
            self.env = None
            self.is_open = False

    def notify(self, data):
        self._data += data.decode('utf-8')
        self.in_waiting = len(self._data)

    ## write()
    # writes a string of characters to the internal buffer
    def write(self, data):
        rcv = data.decode('utf-8')
        if rcv[-1] == '\n':
            # execute the command and get back the output
            d = self.env.match_and_execute(rcv) + "\n"
            self._data += d
            self.in_waiting = len(self._data)

    ## read()
    # blocking read; when n > 0, then will be waiting for data
    def read(self, n=1):
        out = ''
        if n > 0:
            while len(self._data) < n:

            out = self._data[0:n]
            self._data = self._data[n:]
            self.in_waiting = len(self._data)

        return str.encode(out)

    ## readline()
    def readline(self):
        returnIndex = self._data.index("\n")
        if returnIndex != -1:
            s = self._data[0:returnIndex + 1]
            self._data = self._data[returnIndex + 1:]
            self.in_waiting = len(self._data)
            return str.encode(s)
            return str.encode("")

    ## __str__()
    def __str__(self):
        return  "Serial<id=0xa81c10, open=%s>( port='%s', baudrate=%d," \
               % ( str(self.is_open), self.port, self.baudrate ) \
               + " bytesize=%d, parity='%s', stopbits=%d, xonxoff=%d, rtscts=%d)"\
               % ( self.bytesize, self.parity, self.stopbits, self.xonxoff,
                   self.rtscts )
Exemple #39
experiment_name = 'dummy_demo'
if not os.path.exists(experiment_name):

# do you want to continue your old run?
continue_run = False
continue_file = ''

# initialises the framework. enemy number can be changed accordingly

enemy = 2

env = Environment(experiment_name=experiment_name,

# calculates the number of weights per agent
n_hidden = env.player_controller.n_hidden[0]
n_vars = (env.get_num_sensors() + 1) * n_hidden + (n_hidden + 1) * 5

best_fitness = 0
min_weight = -1
max_weight = 1

# parameter settings of the algorithm
pop_size = 10
gen_number = 10
Exemple #40
RENDER = False

    render = Renderer()

total_score = 0.0
trials = 100000
for _ in range(trials):
    if _ % max(int(trials / 1000), 1) == 0:
        sys.stdout.write("\r{:.2f}% complete".format(_ * 100.0 / trials))

    env = Environment(MAX_HUMANS, MAX_ZOMBIES, better_rewards=True)
    while len(env.humans) > 0 and len(env.zombies) > 0:
        # x = randrange(config.WIDTH)
        # y = randrange(config.HEIGHT)

        # x = env.zombies[0].x
        # y = env.zombies[0].y

        # x = env.humans[0].x
        # y = env.humans[0].y

        points = [(0, 0), (16000, 0), (0, 9000), (16000, 9000)]
        # x, y = points[randrange(len(points))]

        min_dist = 16000 + 9000
        index = -1
Exemple #41
 def open(self):
     if not self.is_open:
         self.stop_event = threading.Event()
         self.env = Environment(self.notify, self.stop_event)
         self.is_open = True
Exemple #42
    help='IP address for V-Rep server (default:'
    help='Port for SLAM-Sim server (default: 5000)'

namespace = parser.parse_args()

env = Environment()

app = Flask(__name__)

@app.route("/start-v-rep-server", methods=['POST'])
def start_vrep_server():
    vrep_ip = request.args.get('ip')
    port = request.args.get('port')
    env.connect(vrep_ip, int(port))
    if env.client_id != -1:
        time_ = datetime.now().time().isoformat()
        env.print_message(time_ + ': connected')
        answer = 'Successfully connected to V-REP server'
        # print('\033[1;32;40m ' + answer)
    from agent import DDPG, DeepQAgent
    from environment import Environment

    done_comparison_data = {
        'coords_done_fail': [45, 60, 118, 180],
        'coords_done_success': [5, 16, 122, 174],
        'img_done_fail': 'data/s8_cut_try_again.png',
        'img_done_success': 'data/game_score_s8.png',
        'restart_btn_coords': [640, 1110],
        'restart_ongame': [(2764, 93), (2624, 552)],

    scores = {
        'coords_diamonds_gathered': [11, 27, 25, 35],
        'digits_mask_addr': 'data/digits',
        'match_threshold': 10,
        'state_area': [28, 112, 0, 296],
        'time_importance': 0.7,
        'diamonds_importance': 0.3,
        'episode_time_limit': 60,
        'diamonds_total': 7

    env = Environment(device_ref_elements_data={
        'done_comparison_data': done_comparison_data,
        'scores': scores
    #agent = DDPG(env)
    agent = DeepQAgent(env)
    train(agent, env, episode_seconds_constrain=45)
Exemple #44
 def tearDown(self):
     reset = Environment(CANONICAL_SCHEME=unicode,
Exemple #45
import matplotlib.backends.backend_agg as agg

import pygame, sys
from pygame.locals import *
from environment import Environment
from agent import Agent, Action
from main import *
import numpy as np

#      teaching the agent to clean trashes properly with reinforcement learning      #

agent = Agent(pos=(0, 0))  # create a new agent
env = Environment(agent)  # add the agent to the environment

facteur = 50
agent_pos = env.agent.position  # get the agent's position
agent_pos = (agent_pos[0] * facteur, agent_pos[1] * facteur
             )  # multiply it by a factor

n_a = env.action_space_n  # get the action space size
n_s = env.state_space_n  # get the state space size

q_table = np.zeros([n_s, n_a])  # init Q table
e_table = np.zeros([n_s, n_a])  # init eligibility traces

# cleaning rate for each episode
clean_rate = []
crashes = []
Exemple #46
#!/usr/bin/env python36
# coding: utf-8

from Devices import *
from environment import Environment
from system import System
from request import Request

env = Environment()
sys_ = System(env)

# # # # # DEVICES # # # # #
for door_name in env.door_names:
    door = Door(door_name)

for room_name in env.room_names:
    lightsIndoor = IndoorLight(room_name)

    hvac = HVAC(room_name)

    if env.rooms[room_name].has_window():
        windows = Window(room_name)

        blinds = Blind(room_name)

lightsOutdoor = OutdoorLight("Outdoor Lights")
    def sample_minbatch(self):
        minibatch_indices = np.random.choice(
            range(100), 100)  #choose 100 transitions randomly from the buffer
        mini_batch = np.array([self.buffer[i] for i in minibatch_indices],
        return mini_batch

# Main entry point
if __name__ == "__main__":

    # Create an environment.
    # If display is True, then the environment will be displayed after every agent step. This can be set to False to speed up training time. The evaluation in part 2 of the coursework will be done based on the time with display=False.
    # Magnification determines how big the window will be when displaying the environment on your monitor. For desktop monitors, a value of 1000 should be about right. For laptops, a value of 500 should be about right. Note that this value does not affect the underlying state space or the learning, just the visualisation of the environment.
    environment = Environment(display=False, magnification=500)
    # Create an agent
    agent = Agent(environment)
    # Create a DQN (Deep Q-Network)
    dqn = DQN()
    #Create an experience replay buffer
    buffer = ReplayBuffer()


    #Initialise buffer with 100 transitions
    count = 0
    while count < 101:
        # Reset the environment for the start of the episode.