Example #1
0
 def get_action(self,ob): # not used
     viewport,health = ob
     inputs = {'viewport':np.array([viewport],dtype=np.float32),'health':np.array([health],dtype=np.int32)}
     outputs = self.model(inputs)
     distr = tf.cast(tf.nn.softmax(outputs['policy_logits'][0]),tf.float64)
     distr /= sum(distr)
     action_index = np.random.choice(len(env.Action), p=distr)
     return env.Action(action_index)
Example #2
0
def q_test() -> bool:
    environment_ = environment.Environment(grid_=data.GRID_1, rng=rng)
    q = StateActionFunction(environment_)

    state_ = environment.State(common.XY(x=4, y=2))
    action_ = environment.Action(common.XY(x=1, y=0))
    print(q[state_, action_])
    q[state_, action_] = 2.0
    q[state_, action_] += 0.5
    print(q[state_, action_])

    return True
Example #3
0
    def get_action(self, env, state):
        orders = env.available_orders

        forklift = None
        order = None

        if len(orders) > 0:
            for f in env.forklifts:
                if f.order == None:  # 가용한 지게차가 있는 경우
                    forklift = f
                    #Random 하게 선택하는 방식
                    order = np.random.choice(orders)
                    break

        return we.Action(forklift, order)
Example #4
0
    def get_action(self, env, state):
        orders = env.available_orders

        forklift = None
        order = None

        if len(orders) > 0:
            for f in env.forklifts:
                if f.order == None:  # 가용한 지게차가 있는 경우
                    forklift = f
                    # 순서대로 선택하는 방식
                    order = orders[0]
                    break

        return we.Action(forklift, order)
Example #5
0
    def get_action(self, env, state):

        #순차적으로 order를 배분함
        forklift = None
        order = None

        if len(self.order_sequence) > 0:
            for f in env.forklifts:
                if f.order == None:  # 가용한 지게차가 있는 경우
                    forklift = f
                    #Random 하게 선택하는 방식
                    order = self.order_sequence[0]
                    self.order_sequence.remove(order)
                    break

        return we.Action(forklift, order)
Example #6
0
    def get_action(self, env, state):
        orders = env.available_orders
        forklift = None
        order = None

        if len(orders) > 0:
            min_distance = MAX_DISTANCE

            for f in env.forklifts:
                if f.order == None:  # 가용한 지게차가 있는 경우
                    #지게차와 가장 가까운 곳의 선반쪽 선택
                    temp_order, temp_distance = self.get_nearest_order(
                        f.pos, orders)

                    if temp_distance < min_distance:
                        forklift = f
                        order = temp_order

        return we.Action(forklift, order)
Example #7
0
def environment_test() -> bool:
    environment_ = environment.Environment(grid_=data.GRID_1, rng=rng)

    for state_ in environment_.states():
        print(state_)

    print()

    for action_ in environment_.actions():
        print(action_)

    print()

    state_ = environment.State(common.XY(x=4, y=2))
    action_ = environment.Action(common.XY(x=1, y=0))
    response_ = environment_.from_state_perform_action(state_, action_)
    print(state_, action_)
    print(response_)

    return True
Example #8
0
    def get_action(self, env, state):
        if self.initial_env == None:
            self.initial_env = env.copy()

        forklift = None
        mcts_order = None
        copied_order = None

        if len(env.available_orders) > 0:
            for f in env.forklifts:
                if f.order == None:  # 가용한 지게차가 있는 경우
                    forklift = f
                    copied_order = self.exeucte_mcts(self.initial_env, state,
                                                     forklift)
                    break

        if (copied_order != None):
            mcts_order = env.get_order_by_no(copied_order.no)

        return we.Action(forklift, mcts_order)
Example #9
0
 def get_actions(self,epsilon):
     if not self._observations: return []
     vps = []
     healths = []
     for (vp,h),_ in self._observations:
         vps.append(vp)
         healths.append(h)
     healths = np.array(healths,dtype=np.int32)
     vps = np.array(vps,dtype=np.float32)
     inputs = {'viewport':vps,'health':healths}
     outputs = self.model(inputs)
     distr = tf.nn.softmax(tf.cast(outputs['policy_logits'],tf.float64)).numpy()
     if epsilon>0:
         uniform_distr= np.full(distr.shape,1/len(env.Action))
         distr = distr*(1-epsilon)+uniform_distr*epsilon
     #print(distr)
     #distr = distr/np.sum(distr,1) # distribution of actions (columns) for each player (rows)
     actions = []
     for d,(_,pi) in zip(distr,self._observations):
         #print(pi, d)
         action = env.Action(np.random.choice(len(env.Action),p=d))
         actions.append((action,pi))
     self._observations = []
     return actions
def realistic_simulation(MAX_CYCLES=MAX_CYCLES,
                         SHOW_GRAPHICAL_SIMULATION=SHOW_GRAPHICAL_SIMULATION,
                         UPDATE_PERIOD=UPDATE_PERIOD,
                         AUTO_GENERATE_RATE=AUTO_GENERATE_RATE):
    global n_border_nodes, distance_matrix
    import environment as env
    from environment import car
    number_cars_generated = 0
    n_nodes = len(env.net.network.vertexes)
    border_nodes_start = n_nodes - n_border_nodes
    border_nodes_end = n_nodes - 1
    higher_probability_factor = 4
    prob_node = 1 / (border_nodes_start +
                     higher_probability_factor * n_border_nodes)
    print("prob_node: ", prob_node)
    print(
        f"number of normal nodes: {border_nodes_start}, number of border nodes: {n_border_nodes}"
    )
    print(
        f"summed probability: {border_nodes_start * prob_node + higher_probability_factor * n_border_nodes * prob_node}"
    )
    for cycle in range(MAX_CYCLES):
        while np.random.rand() < AUTO_GENERATE_RATE:
            start_node_id, end_node_id = None, None
            start_node_id = np.random.choice(
                [x for x in range(n_nodes)],
                p=[prob_node for x in range(border_nodes_start)] + [
                    higher_probability_factor * prob_node
                    for x in range(n_border_nodes)
                ])
            is_border_node = np.random.choice(
                [0, 1],
                p=[
                    prob_node * border_nodes_start,
                    higher_probability_factor * prob_node * n_border_nodes
                ])
            if is_border_node == 1:
                distances_to_border_nodes = distance_matrix[
                    start_node_id, border_nodes_start:]
                nodes_sorted = np.argsort(distances_to_border_nodes)
                nodes_sorted = nodes_sorted[1:]
                distance_index = int(
                    np.round(
                        (1 - np.random.rand()**2) * (len(nodes_sorted) - 1)))
                end_node_id = nodes_sorted[distance_index]
            else:
                distances_to_normal_nodes = distance_matrix[
                    start_node_id, :border_nodes_start]
                nodes_sorted = np.argsort(distances_to_normal_nodes)
                nodes_sorted = nodes_sorted[1:]
                distance_index = int(
                    np.round(
                        (1 - np.random.rand()**2) * (len(nodes_sorted) - 1)))
                end_node_id = nodes_sorted[distance_index]

            # end_node_id = [np.random.choice([x for x in range(len(net.network.vertexes))])] #später noch mehrere End_nodes ermöglichen
            new_car = car.Car(number_cars_generated, start_node_id,
                              [end_node_id])
            net.network.add_car(new_car.actual_edge)
            env.cars[number_cars_generated] = new_car
            #Aktion generieren
            new_action = env.Action(cycle, number_cars_generated)
            index = env.linear_search_action_plan(new_action.cycle_nr)
            env.action_plan.insert(index, new_action)
            number_cars_generated += 1
        try:
            while env.action_plan[
                    0].cycle_nr == cycle:  #ggf vorgesehene Aktionen ausführen
                env.action_plan[0].perform_action(cycle)
                del env.action_plan[0]
        except IndexError:
            pass
        if cycle % UPDATE_PERIOD == 0:
            print(
                f"Now reached cycle {cycle}. Number of cars simulated: {number_cars_generated}"
            )
            flow_rate = np.sum(
                [x.n_cars / x.weight for x in env.net.network.edges])
            avg_flow_rate = flow_rate / len(env.cars)
            #avg_actual_time_per_edge = 1 / flow_rate # sollte proportional zu folgendem sein: np.sum([net.network.edges[x.actual_edge] for x in env.cars])
            avg_total_time_per_edge = np.sum([
                np.sum([
                    env.net.network.edges[x.future_edge_IDs[y]].weight
                    for y in range(len(x.future_edge_IDs))
                ]) + env.net.network.edges[x.actual_edge].weight
                for x in env.cars.values()
            ]) / np.sum(
                [len(x.future_edge_IDs) + 1 for x in env.cars.values()])
            avg_total_time_per_car = np.sum([
                np.sum([
                    env.net.network.edges[x.future_edge_IDs[y]].weight
                    for y in range(len(x.future_edge_IDs))
                ]) + env.net.network.edges[x.actual_edge].weight
                for x in env.cars.values()
            ]) / len(env.cars)
            print(
                f"Absolute Flow rate: {flow_rate};\nDurchschnittliche Flow Rate: {avg_flow_rate};\nDurchschnittliche Zeit pro befahrene Kante: {avg_total_time_per_edge};"
            )
            print(
                f"Durchschnittliche Gesamtfahrzeit pro Auto: {avg_total_time_per_car};\nAnzahl Autos gesamt: {len(env.cars)}"
            )
            if SHOW_GRAPHICAL_SIMULATION:
                env.plot_with_networkx()