def serialize(cls, value, ctx): type_id = register.get_id(cls) is_ref, hash_code = ctx.is_ref(value) ctx.write(struct.pack('<i?i', type_id, is_ref, hash_code)) if is_ref: return k, v = value key_type, k = decide(k) key_type_id = register.get_id(key_type) ctx.write(struct.pack('<i', key_type_id)) key_type.serialize(k, ctx) value_type, v = decide(v) value_type_id = register.get_id(value_type) ctx.write(struct.pack('<i', value_type_id)) value_type.serialize(v, ctx)
def serialize(cls, value, ctx): type_id = register.get_id(cls) is_ref, hash_code = ctx.is_ref(value) ctx.write(struct.pack('<i?i', type_id, is_ref, hash_code)) if is_ref: return # Serializable的子类中记录了属性的序列化类型 # 所以实例属性不需要像容器类型一样限定成ProtocolType类型 # 但是如果实例属性是ProtocolType类型,也是允许的 for desc in cls.__serialize_fields__: _value = getattr(value, desc.name) _type = desc.type_info.type if _type == ProtocolType: _type, _value = decide(_value) else: _, _value = decide(_value) type_id = register.get_id(_type) ctx.write(struct.pack('<i', type_id)) _type.serialize(_value, ctx)
def serialize(cls, value, ctx): type_id = register.get_id(cls) is_ref, hash_code = ctx.is_ref(value) ctx.write(struct.pack('<i?i', type_id, is_ref, hash_code)) if is_ref: return length = len(value) ctx.write(struct.pack('<i', length)) for item in value: _type, _item = decide(item) _type_id = register.get_id(_type) ctx.write(struct.pack('<i', _type_id)) _type.serialize(_item, ctx)
def MC(env, episodes, target, behavior, Lambda, gamma=lambda x: 0.95, alpha=0.05, beta=0.0001, diagnose=False): """ episodes: number of episodes target: target policy matrix (|S|*|A|) behavior: behavior policy matrix (|S|*|A|) Lambda: LAMBDA object determining each lambda for each feature (or state or observation) gamma: anonymous function determining each lambda for each feature (or state or observation) alpha: learning rate for the weight vector of the values beta: learning rate for the auxiliary vector for off-policy """ learner = MC_LEARNER(env) expected_return_trace = [] variance_of_return_trace = [] for _ in range(episodes): state, done = env.reset(), False # Get the (s, a, r) pairs for an entire episode. episode = [] done = False while not done: action = decide(state, behavior) next_state, reward, done, _ = env.step(action) if done: learner.return_counts[next_state] += 1 episode.append((state, action, reward)) state = next_state expected_return_trace.append(np.copy(learner.expected_return)) variance_of_return_trace.append(np.copy(learner.variance_of_return)) # Update expected G for every visit. G = 0.0 for t in range(len(episode) - 1, -1, -1): gamma_val = gamma(state) state, action, reward = episode[t] rho = importance_sampling_ratio(target, behavior, state, action) G = rho * (reward + gamma_val * G) learner.backward_step(state, G) return expected_return_trace, variance_of_return_trace, learner.return_counts
def clipped_PPO_loss(memories, nn_policy, nn_value, old_log_policy, adv, epsilon, device): rewards = torch.tensor(np.array([m.reward for m in memories], dtype=np.float32)).to(device) value = nn_value(torch.tensor(np.array([m.obs for m in memories], dtype=np.float32)).to(device)) vl_loss = F.mse_loss(value.squeeze(-1), rewards) actions_actor = torch.DoubleTensor(np.array([m.action for m in memories])).to(device) actions_expert = decide(np.array([m.obs for m in memories], dtype=np.float32)).to(device) expert_loss = nn.MSELoss()(actions_expert, actions_actor) new_log_policy = compute_log_policy_prob(memories, nn_policy, device) rt_theta = torch.exp(new_log_policy - old_log_policy.detach()).cuda() adv = adv.unsqueeze(-1) pg_loss = -torch.mean(torch.min(rt_theta.to(device) * adv, torch.clamp(rt_theta.to(device), 1 - epsilon, 1 + epsilon) * adv)) return pg_loss, vl_loss, expert_loss
def true_online_gtd(env, episodes, target, behavior, Lambda, gamma=lambda x: 0.95, alpha=0.05, beta=0.0001, diagnose=False, evaluation=None): """ episodes: number of episodes target: target policy matrix (|S|*|A|) behavior: behavior policy matrix (|S|*|A|) Lambda: LAMBDA object determining each lambda for each feature (or state or observation) gamma: anonymous function determining each lambda for each feature (or state or observation) alpha: learning rate for the weight vector of the values beta: learning rate for the auxiliary vector for off-policy """ learner = TRUE_ONLINE_GTD_LEARNER(env) if evaluation is not None: value_trace = np.zeros((episodes, 1)) value_trace[:] = np.nan else: value_trace = [] for epi in range(episodes): s_curr, done = env.reset(), False x_curr = onehot(s_curr, env.observation_space.n) learner.refresh() if evaluation is not None: value_trace[epi, 0] = evaluation(learner.w_curr, 'expectation') else: value_trace.append(np.copy(learner.w_curr)) while not done: action = decide(s_curr, behavior) rho_curr = importance_sampling_ratio(target, behavior, s_curr, action) s_next, r_next, done, _ = env.step(action) x_next = onehot(s_next, env.observation_space.n) if diagnose: print('rho_curr: %.2e, lambda_curr: %.2e, lambda_next: %.2e' % (rho_curr, Lambda.value(x_curr), Lambda.value(x_next))) learner.learn(r_next, gamma(x_next), gamma(x_curr), x_next, x_curr, Lambda.value(x_next), Lambda.value(x_curr), rho_curr, alpha, beta) learner.next() x_curr = x_next return value_trace
def compute_log_policy_prob(memories, nn_policy, device): ''' Run the policy on the observation in the memory and compute the policy log probability ''' n_mean, log_std = nn_policy( torch.tensor(np.array([m.obs for m in memories], dtype=np.float32)).to(device)) n_mean = n_mean.type(torch.DoubleTensor) logstd = log_std.type(torch.DoubleTensor) actions_critic = torch.DoubleTensor(np.array([m.action for m in memories ])).to(device) actions_expert = decide( np.array([m.obs for m in memories], dtype=np.float32)) # .to(device)) return log_policy_prob(n_mean, logstd, actions_critic.to( n_mean.device)) # , actions_expert=actions_expert)
def serialize(cls, value, ctx): _type = cls.__generics__ type_id = register.get_id(_type) is_ref, hash_code = ctx.is_ref(value) ctx.write(struct.pack('<i?i', type_id, is_ref, hash_code)) if is_ref: return arr = value.value length = len(arr) ctx.write(struct.pack('<i', length)) if issubclass(_type, BaseType): for item in arr: _type.serialize(item, ctx) else: for item in arr: _item_type, _item = decide(item) _item_type_id = register.get_id(_item_type) ctx.write(struct.pack('<i', _item_type_id)) _item_type.serialize(_item, ctx)
import utils # import the random module import random print('Starting the Rock Paper Scissors game!') player_name = input('Please enter your name: ') player_count = 0 computer_count = 0 while utils.decide(player_count, computer_count): print('Pick a hand: (0: Rock, 1: Paper, 2: Scissors)') player_hand = int(input('Please enter a number (0-2): ')) if utils.validate(player_hand): # Assign a random number between 0 and 2 to computer_hand using randint computer_hand = random.randint(0, 2) utils.print_hand(player_hand, player_name) utils.print_hand(computer_hand, 'Computer') result = utils.judge(player_hand, computer_hand) if result == 'Win': player_count += 1 elif result == 'Lose': computer_count += 1 print("") print('Result: ' + result) print("") print("") print(str(player_count) + " : " + str(computer_count)) # winner = utils.decide(player_count,computer_count)
def cv(up_left_x, up_left_y, bottom_right_x, bottom_right_y, logo, centered, phase, pos_dict, satisfactory, lost, image_list, f, is_it_big, seen): signal.signal(signal.SIGINT, signal_handler) weights = "Tiny-YOLO/logo_final.weights" cfg = "Tiny-YOLO/logo.cfg" is_permitted = True curr_time = 0 detector = YOLO_Detector(weights, cfg) counter_for_mapping = 1 local_logo = 0 logo_list = ["stm", "odtu", "ort", "helikopter_inis"] pos_dict["turk_bayragi"] = 5 lost_list = [False for i in range(4)] centered_list = [False for i in range(5)] order_of_positions = [2, 4, 3, 1] time.sleep(2) while True: time.sleep(1) print("phase", phase.value) if phase.value == 1: print("satisfactory", satisfactory.value) while satisfactory.value == 0: time.sleep(1) print("c", satisfactory.value) time.sleep(0.13) frame1 = f[-1] frame = quarter(frame1, counter_for_mapping) rects, confidences, classIDs = detector.detect(frame) satisfactory.value, label = decide(confidences, classIDs) cv2.imshow("he", frame1) cv2.waitKey(1) cv2.imshow("hey", frame) cv2.waitKey(1) if satisfactory.value: del logo_list[logo_list.index(label)] pos_dict[label] = order_of_positions[counter_for_mapping - 1] counter_for_mapping = counter_for_mapping + 1 print(logo_list) print(len(logo_list)) if len(logo_list) == 1: print(logo_list[0]) pos_dict[logo_list[0]] = 1 else: break print(pos_dict) if phase.value == 2: cv2.destroyAllWindows() time.sleep(1) while True: #grabbed, frame = camera.read() #print("frame", frame.shape[:2]) frame = f[-1] rects, confidences, classIDs = detector.detect(frame) frame, cond, (x1, x2, y1, y2), c = proper_detection( frame, rects, logo.value, confidences, classIDs) if cond: seen.value = 1 centered_list[:-1] = centered_list[1:] centered_list[-1] = c lost_list[:-1] = lost_list[1:] lost_list[-1] = True up_left_x.value = x1 up_left_y.value = y1 bottom_right_x.value = x2 bottom_right_y.value = y2 is_it_big.value = big_enough(x2 - x1, y2 - y1) else: seen.value = 0 centered_list[:-1] = centered_list[1:] centered_list[-1] = False lost_list[:-1] = lost_list[1:] lost_list[-1] = False lost.value = 0 if any(lost_list) else 1 if (is_centered(centered_list)): if is_permitted == True: curr_time = time.time() is_permitted = False if (time.time() - curr_time > 0.2): centered.value = is_centered(centered_list) if not (is_centered(centered_list)): is_permitted = True centered.value = 0 cv2.imshow("test", frame) cv2.waitKey(1)
def cv(up_left_x, up_left_y, bottom_right_x, bottom_right_y, logo, centered, phase, pos_dict, satisfactory, lost, is_it_big, seen): signal.signal(signal.SIGINT, signal_handler) weights = "Tiny-YOLO/logo_final.weights" cfg = "Tiny-YOLO/logo.cfg" detector = YOLO_Detector(weights, cfg) counter_for_mapping = 1 local_logo = 0 logo_list = ["stm", "odtu", "ort", "helikopter_inis"] pos_dict["turk_bayragi"] = 5 lost_list = [False for i in range(4)] centered_list = [False for i in range(10)] order_of_positions = [2, 4, 3, 1] camera = cv2.VideoCapture(gstreamer_pipeline(), cv2.CAP_GSTREAMER) time.sleep(0.2) #time.sleep(2) while True: time.sleep(1) if phase.value == 1: while True: grabbed, frame1 = camera.read() rects, confidences, classIDs = detector.detect(frame1) print("phase", phase.value) print("satisfactory", satisfactory.value) print("logo_list", logo_list) if satisfactory.value == 0: #time.sleep(1) #time.sleep(0.13) frame = quarter(frame1, counter_for_mapping) satisfactory.value, label= decide(confidences, classIDs) #cv2.imshow("he", frame1) #cv2.waitKey(1) #cv2.imshow("hey", frame) #cv2.waitKey(1) if satisfactory.value == 1: if not (label in logo_list): satisfactory.value = 0 continue del logo_list[logo_list.index(label)] pos_dict[label] = order_of_positions[counter_for_mapping - 1] counter_for_mapping = counter_for_mapping + 1 satisfactory.value = satisfactory.value + 1 print(logo_list) print(len(logo_list)) if len(logo_list) == 1: print(logo_list[0]) pos_dict[logo_list[0]] = 1 print(pos_dict) if phase.value == 2: break print(pos_dict) if phase.value == 2: cv2.destroyAllWindows() time.sleep(1) while True: grabbed, frame = camera.read() rects, confidences, classIDs = detector.detect(frame) frame, cond, (x1, x2, y1, y2), c = proper_detection(frame, rects, logo.value, confidences, classIDs) print(rects) if cond: seen.value = 1 centered_list[:-1] = centered_list[1:] centered_list[-1] = c lost_list[:-1] = lost_list[1:] lost_list[-1] = True up_left_x.value = x1 up_left_y.value = y1 bottom_right_x.value = x2 bottom_right_y.value = y2 is_it_big.value = big_enough(x2-x1, y2-y1) else: seen.value = 0 centered_list[:-1] = centered_list[1:] centered_list[-1] = False lost_list[:-1] = lost_list[1:] lost_list[-1] = False lost.value = 0 if any(lost_list) else 1 centered.value = is_centered(centered_list)