Beispiel #1
0
 def serialize(cls, value, ctx):
     type_id = register.get_id(cls)
     is_ref, hash_code = ctx.is_ref(value)
     ctx.write(struct.pack('<i?i', type_id, is_ref, hash_code))
     if is_ref:
         return
     k, v = value
     key_type, k = decide(k)
     key_type_id = register.get_id(key_type)
     ctx.write(struct.pack('<i', key_type_id))
     key_type.serialize(k, ctx)
     value_type, v = decide(v)
     value_type_id = register.get_id(value_type)
     ctx.write(struct.pack('<i', value_type_id))
     value_type.serialize(v, ctx)
Beispiel #2
0
 def serialize(cls, value, ctx):
     type_id = register.get_id(cls)
     is_ref, hash_code = ctx.is_ref(value)
     ctx.write(struct.pack('<i?i', type_id, is_ref, hash_code))
     if is_ref:
         return
     # Serializable的子类中记录了属性的序列化类型
     # 所以实例属性不需要像容器类型一样限定成ProtocolType类型
     # 但是如果实例属性是ProtocolType类型,也是允许的
     for desc in cls.__serialize_fields__:
         _value = getattr(value, desc.name)
         _type = desc.type_info.type
         if _type == ProtocolType:
             _type, _value = decide(_value)
         else:
             _, _value = decide(_value)
         type_id = register.get_id(_type)
         ctx.write(struct.pack('<i', type_id))
         _type.serialize(_value, ctx)
Beispiel #3
0
 def serialize(cls, value, ctx):
     type_id = register.get_id(cls)
     is_ref, hash_code = ctx.is_ref(value)
     ctx.write(struct.pack('<i?i', type_id, is_ref, hash_code))
     if is_ref:
         return
     length = len(value)
     ctx.write(struct.pack('<i', length))
     for item in value:
         _type, _item = decide(item)
         _type_id = register.get_id(_type)
         ctx.write(struct.pack('<i', _type_id))
         _type.serialize(_item, ctx)
Beispiel #4
0
Datei: MC.py Projekt: wuyou33/MTA
def MC(env,
       episodes,
       target,
       behavior,
       Lambda,
       gamma=lambda x: 0.95,
       alpha=0.05,
       beta=0.0001,
       diagnose=False):
    """
    episodes:   number of episodes
    target:     target policy matrix (|S|*|A|)
    behavior:   behavior policy matrix (|S|*|A|)
    Lambda:     LAMBDA object determining each lambda for each feature (or state or observation)
    gamma:      anonymous function determining each lambda for each feature (or state or observation)
    alpha:      learning rate for the weight vector of the values
    beta:       learning rate for the auxiliary vector for off-policy
    """
    learner = MC_LEARNER(env)
    expected_return_trace = []
    variance_of_return_trace = []

    for _ in range(episodes):
        state, done = env.reset(), False

        # Get the (s, a, r) pairs for an entire episode.
        episode = []
        done = False
        while not done:
            action = decide(state, behavior)
            next_state, reward, done, _ = env.step(action)
            if done:
                learner.return_counts[next_state] += 1
            episode.append((state, action, reward))
            state = next_state

        expected_return_trace.append(np.copy(learner.expected_return))
        variance_of_return_trace.append(np.copy(learner.variance_of_return))

        # Update expected G for every visit.
        G = 0.0
        for t in range(len(episode) - 1, -1, -1):
            gamma_val = gamma(state)
            state, action, reward = episode[t]
            rho = importance_sampling_ratio(target, behavior, state, action)
            G = rho * (reward + gamma_val * G)

            learner.backward_step(state, G)

    return expected_return_trace, variance_of_return_trace, learner.return_counts
Beispiel #5
0
def clipped_PPO_loss(memories, nn_policy, nn_value, old_log_policy, adv, epsilon, device):
    rewards = torch.tensor(np.array([m.reward for m in memories], dtype=np.float32)).to(device)
    value = nn_value(torch.tensor(np.array([m.obs for m in memories], dtype=np.float32)).to(device))
    vl_loss = F.mse_loss(value.squeeze(-1), rewards)

    actions_actor = torch.DoubleTensor(np.array([m.action for m in memories])).to(device)
    actions_expert = decide(np.array([m.obs for m in memories], dtype=np.float32)).to(device)
    expert_loss = nn.MSELoss()(actions_expert, actions_actor)
    new_log_policy = compute_log_policy_prob(memories, nn_policy, device)
    rt_theta = torch.exp(new_log_policy - old_log_policy.detach()).cuda()

    adv = adv.unsqueeze(-1)
    pg_loss = -torch.mean(torch.min(rt_theta.to(device) * adv, torch.clamp(rt_theta.to(device), 1 - epsilon,
                                                                           1 + epsilon) * adv))
    return pg_loss, vl_loss, expert_loss
def true_online_gtd(env,
                    episodes,
                    target,
                    behavior,
                    Lambda,
                    gamma=lambda x: 0.95,
                    alpha=0.05,
                    beta=0.0001,
                    diagnose=False,
                    evaluation=None):
    """
    episodes:   number of episodes
    target:     target policy matrix (|S|*|A|)
    behavior:   behavior policy matrix (|S|*|A|)
    Lambda:     LAMBDA object determining each lambda for each feature (or state or observation)
    gamma:      anonymous function determining each lambda for each feature (or state or observation)
    alpha:      learning rate for the weight vector of the values
    beta:       learning rate for the auxiliary vector for off-policy
    """
    learner = TRUE_ONLINE_GTD_LEARNER(env)
    if evaluation is not None:
        value_trace = np.zeros((episodes, 1))
        value_trace[:] = np.nan
    else:
        value_trace = []
    for epi in range(episodes):
        s_curr, done = env.reset(), False
        x_curr = onehot(s_curr, env.observation_space.n)
        learner.refresh()
        if evaluation is not None:
            value_trace[epi, 0] = evaluation(learner.w_curr, 'expectation')
        else:
            value_trace.append(np.copy(learner.w_curr))
        while not done:
            action = decide(s_curr, behavior)
            rho_curr = importance_sampling_ratio(target, behavior, s_curr,
                                                 action)
            s_next, r_next, done, _ = env.step(action)
            x_next = onehot(s_next, env.observation_space.n)
            if diagnose:
                print('rho_curr: %.2e, lambda_curr: %.2e, lambda_next: %.2e' %
                      (rho_curr, Lambda.value(x_curr), Lambda.value(x_next)))
            learner.learn(r_next, gamma(x_next), gamma(x_curr), x_next, x_curr,
                          Lambda.value(x_next), Lambda.value(x_curr), rho_curr,
                          alpha, beta)
            learner.next()
            x_curr = x_next
    return value_trace
Beispiel #7
0
def compute_log_policy_prob(memories, nn_policy, device):
    '''
    Run the policy on the observation in the memory and compute the policy log probability
    '''
    n_mean, log_std = nn_policy(
        torch.tensor(np.array([m.obs for m in memories],
                              dtype=np.float32)).to(device))
    n_mean = n_mean.type(torch.DoubleTensor)
    logstd = log_std.type(torch.DoubleTensor)

    actions_critic = torch.DoubleTensor(np.array([m.action for m in memories
                                                  ])).to(device)
    actions_expert = decide(
        np.array([m.obs for m in memories], dtype=np.float32))  # .to(device))

    return log_policy_prob(n_mean, logstd, actions_critic.to(
        n_mean.device))  # , actions_expert=actions_expert)
Beispiel #8
0
 def serialize(cls, value, ctx):
     _type = cls.__generics__
     type_id = register.get_id(_type)
     is_ref, hash_code = ctx.is_ref(value)
     ctx.write(struct.pack('<i?i', type_id, is_ref, hash_code))
     if is_ref:
         return
     arr = value.value
     length = len(arr)
     ctx.write(struct.pack('<i', length))
     if issubclass(_type, BaseType):
         for item in arr:
             _type.serialize(item, ctx)
     else:
         for item in arr:
             _item_type, _item = decide(item)
             _item_type_id = register.get_id(_item_type)
             ctx.write(struct.pack('<i', _item_type_id))
             _item_type.serialize(_item, ctx)
import utils
# import the random module
import random

print('Starting the Rock Paper Scissors game!')
player_name = input('Please enter your name: ')
player_count = 0
computer_count = 0
while utils.decide(player_count, computer_count):
    print('Pick a hand: (0: Rock, 1: Paper, 2: Scissors)')
    player_hand = int(input('Please enter a number (0-2): '))

    if utils.validate(player_hand):
        # Assign a random number between 0 and 2 to computer_hand using randint
        computer_hand = random.randint(0, 2)

        utils.print_hand(player_hand, player_name)
        utils.print_hand(computer_hand, 'Computer')

        result = utils.judge(player_hand, computer_hand)
        if result == 'Win':
            player_count += 1

        elif result == 'Lose':
            computer_count += 1
        print("")
        print('Result: ' + result)
        print("")
        print("")
        print(str(player_count) + " : " + str(computer_count))
    # winner = utils.decide(player_count,computer_count)
Beispiel #10
0
def cv(up_left_x, up_left_y, bottom_right_x, bottom_right_y, logo, centered,
       phase, pos_dict, satisfactory, lost, image_list, f, is_it_big, seen):

    signal.signal(signal.SIGINT, signal_handler)
    weights = "Tiny-YOLO/logo_final.weights"
    cfg = "Tiny-YOLO/logo.cfg"
    is_permitted = True
    curr_time = 0
    detector = YOLO_Detector(weights, cfg)
    counter_for_mapping = 1
    local_logo = 0
    logo_list = ["stm", "odtu", "ort", "helikopter_inis"]
    pos_dict["turk_bayragi"] = 5
    lost_list = [False for i in range(4)]
    centered_list = [False for i in range(5)]
    order_of_positions = [2, 4, 3, 1]
    time.sleep(2)
    while True:
        time.sleep(1)
        print("phase", phase.value)
        if phase.value == 1:
            print("satisfactory", satisfactory.value)
            while satisfactory.value == 0:
                time.sleep(1)
                print("c", satisfactory.value)
                time.sleep(0.13)
                frame1 = f[-1]
                frame = quarter(frame1, counter_for_mapping)
                rects, confidences, classIDs = detector.detect(frame)
                satisfactory.value, label = decide(confidences, classIDs)
                cv2.imshow("he", frame1)
                cv2.waitKey(1)
                cv2.imshow("hey", frame)
                cv2.waitKey(1)
                if satisfactory.value:
                    del logo_list[logo_list.index(label)]
                    pos_dict[label] = order_of_positions[counter_for_mapping -
                                                         1]
                    counter_for_mapping = counter_for_mapping + 1
                    print(logo_list)
                    print(len(logo_list))
                    if len(logo_list) == 1:
                        print(logo_list[0])
                        pos_dict[logo_list[0]] = 1
                else:
                    break

        print(pos_dict)

        if phase.value == 2:
            cv2.destroyAllWindows()
            time.sleep(1)
            while True:
                #grabbed, frame = camera.read()
                #print("frame", frame.shape[:2])
                frame = f[-1]
                rects, confidences, classIDs = detector.detect(frame)
                frame, cond, (x1, x2, y1, y2), c = proper_detection(
                    frame, rects, logo.value, confidences, classIDs)
                if cond:
                    seen.value = 1
                    centered_list[:-1] = centered_list[1:]
                    centered_list[-1] = c
                    lost_list[:-1] = lost_list[1:]
                    lost_list[-1] = True
                    up_left_x.value = x1
                    up_left_y.value = y1
                    bottom_right_x.value = x2
                    bottom_right_y.value = y2
                    is_it_big.value = big_enough(x2 - x1, y2 - y1)

                else:
                    seen.value = 0
                    centered_list[:-1] = centered_list[1:]
                    centered_list[-1] = False
                    lost_list[:-1] = lost_list[1:]
                    lost_list[-1] = False

                lost.value = 0 if any(lost_list) else 1

                if (is_centered(centered_list)):
                    if is_permitted == True:
                        curr_time = time.time()
                        is_permitted = False
                    if (time.time() - curr_time > 0.2):
                        centered.value = is_centered(centered_list)

                if not (is_centered(centered_list)):
                    is_permitted = True
                    centered.value = 0

                cv2.imshow("test", frame)
                cv2.waitKey(1)
Beispiel #11
0
def cv(up_left_x, up_left_y, bottom_right_x, bottom_right_y,
		logo, centered, phase, pos_dict, satisfactory, lost, is_it_big, seen):

	signal.signal(signal.SIGINT, signal_handler)
	weights = "Tiny-YOLO/logo_final.weights"
	cfg = "Tiny-YOLO/logo.cfg"
	detector = YOLO_Detector(weights, cfg)
	counter_for_mapping = 1
	local_logo = 0
	logo_list = ["stm", "odtu", "ort", "helikopter_inis"]
	pos_dict["turk_bayragi"] = 5
	lost_list = [False for i in range(4)]
	centered_list = [False for i in range(10)]
	order_of_positions = [2, 4, 3, 1]
	camera = cv2.VideoCapture(gstreamer_pipeline(), cv2.CAP_GSTREAMER)
	time.sleep(0.2)
	#time.sleep(2)
	while True:
		time.sleep(1)
		if phase.value == 1:
			while True:
				grabbed, frame1 = camera.read()
				rects, confidences, classIDs = detector.detect(frame1)
				print("phase", phase.value)
				print("satisfactory", satisfactory.value)
				print("logo_list", logo_list)
				if satisfactory.value == 0:
					#time.sleep(1)
					#time.sleep(0.13)
					frame = quarter(frame1, counter_for_mapping)
					satisfactory.value, label= decide(confidences, classIDs)
					#cv2.imshow("he", frame1)
					#cv2.waitKey(1)
					#cv2.imshow("hey", frame)
					#cv2.waitKey(1)
					if satisfactory.value == 1:
						if not (label in logo_list):
							satisfactory.value = 0
							continue
						del logo_list[logo_list.index(label)]
						pos_dict[label] = order_of_positions[counter_for_mapping - 1]
						counter_for_mapping = counter_for_mapping + 1
						satisfactory.value = satisfactory.value + 1
						print(logo_list)
						print(len(logo_list))
						if len(logo_list) == 1:
							print(logo_list[0])
							pos_dict[logo_list[0]] = 1
					print(pos_dict)

				if phase.value == 2:
						break

				print(pos_dict)


		if phase.value == 2:
			cv2.destroyAllWindows()
			time.sleep(1)
			while True:
				grabbed, frame = camera.read()
				rects, confidences, classIDs = detector.detect(frame)
				frame, cond, (x1, x2, y1, y2), c = proper_detection(frame, rects, logo.value,
									confidences, classIDs)
				print(rects)
				if cond:
					seen.value = 1
					centered_list[:-1] = centered_list[1:]
					centered_list[-1] = c
					lost_list[:-1] = lost_list[1:]
					lost_list[-1] = True
					up_left_x.value = x1
					up_left_y.value = y1
					bottom_right_x.value = x2
					bottom_right_y.value = y2
					is_it_big.value = big_enough(x2-x1, y2-y1)

				else:
					seen.value = 0
					centered_list[:-1] = centered_list[1:]
					centered_list[-1] = False
					lost_list[:-1] = lost_list[1:]
					lost_list[-1] = False

				lost.value = 0 if any(lost_list) else 1
				centered.value = is_centered(centered_list)