def func(): player = input("Please, choose and enter your username: "******"Привет, " + player + "!") i = 0 while True: i += 1 try: race = input( "Выбери страну, в которой хочешь играть: Carfagen, Serb ") if race.lower() == "carfagen": player = Units.CorfagenArmyFactory() print("Поздравляю! Твой стартовый капитал 100 монет\n") print(phr) break elif race.lower() == "serb": player = Units.SerbianArmyFactory() print("Поздравляю! Твой стартовый капитал 100 монет\n") print(phr) break else: print("Unknown race!") raise ValueError except ValueError: if i < 2: print("Choose again.") elif i == 2: print("Too many times entered wrong line, bye-bye baby)") print("I'm just joke, choose one more time!") else: return continue print() playing.play(player)
def browse_mode(): cartoon_selector = 0 cartoon = [] cartoon_cursor = 1 cartoons_number = selection.count_files("/media/PAP/cartoons/") c_path = "/media/PAP/cartoons/pap_" # if no USB key named PAP containing cartoons is found, displays an error and returns. # This will crash if it contains images that are not names pap_x and are not 1280 / 1024 size. if not os.path.exists("/media/PAP/cartoons/"): cv2.imshow("window", cv2.imread("./load_failed.png")) cv2.waitKey(0) return while (len(cartoon) == 0): cv2.imshow("window", cv2.imread("background2.png")) cv2.waitKey(10) # builds and displays the image containing thumbnails of the cartoons selection.build_selection(cartoon_selector, c_path, cartoons_number) cv2.waitKey(10) button_value = button.waitpressedbutton("browse") if (button_value == "mode"): # the mode is no longer browse, so we return to go to the right mode return if (button_value == "next"): # sets the cursor to the next cartoon cartoon_selector += 1 elif (button_value == "prev"): # sets the cursor to the previous cartoon cartoon_selector -= 1 elif (button_value == "play"): # plays the cartoon cartoon = playing.load_selected(c_path + str(cartoon_selector % cartoons_number + 1) + "/") playing.play(cartoon)
def UpdatePolicyFEList(self, weights, opt_count, scene_file_name, enlarge_lr): # store feature expecations of a newly learned policy and its difference to the expert policy print("Updating Policy FE list starts......") #start_time = timeit.default_timer() model_name, stop_status = QLearning(num_features, num_actions, self.params, weights, self.results_folder, self.behavior_type, self.train_frames, opt_count, scene_file_name, enlarge_lr=enlarge_lr) #print("Total consumed time: ", timeit.default_timer() - start_time, " s") # get the trained model print("The latest Q-learning model is: ", model_name) model = net1(self.num_features, self.num_actions, self.params['nn'], model_name) # get feature expectations by executing the learned model temp_fe, aver_score, aver_dist = play(model, weights, self.play_frames, play_rounds=10, scene_file_name=scene_file_name) # t = (weights.tanspose)*(expertFE-newPolicyFE) # hyperdistance = t temp_hyper_dis = np.abs(np.dot(weights, np.asarray(self.expert_fe)-np.asarray(temp_fe))) self.policy_fe_list[temp_hyper_dis] = temp_fe if opt_count == 1: del self.policy_fe_list[self.random_dis] self.model_list.append(model) print("Updating Policy FE list finished!") return temp_hyper_dis, aver_score, aver_dist, stop_status
def free_mode(): takes_list = [] background = cv2.imread("background2.png", 1) cv2.imshow("window", background) cv2.waitKey(50) while ("true"): mode = GPIO.input(13) # reads the circle switch if (mode == False or len(takes_list) < 3): cv2.imshow("window", background) else: cv2.imshow("window", image.averager(takes_list)) # displays a weighed average of the last 3 pictures taken cv2.waitKey(100) button_value = button.waitpressedbutton("free") if (button_value == "mode"): # The mode is no longer freemode, we have to return to go to the selected mode return if (button_value == "record"): # takes and displays a picture cv2.imshow("window", background) cv2.waitKey(10) takes_list.append(camera.get_image()) cv2.imshow("window", takes_list[len(takes_list) - 1]) cv2.waitKey(200) if (button_value == "delete" and len(takes_list) > 0): # deletes the last picture taken img_supp = cv2.imread("img_supp.png", 1) cv2.imshow("window", img_supp) cv2.waitKey(10) del takes_list[-1] cv2.imshow("window", background) cv2.waitKey(300) if (button_value == "save"): # saves the cartoon and returns if (len(os.listdir("/media")) >= 2): os.system("mount /dev/sdb1 /home/pi/Pas_a_pas/bla") playing.save_other(takes_list) os.system("umount /home/pi/Pas_a_pas/bla") playing.save(takes_list) return if (button_value == "play"): # plays the cartoon playing.play(takes_list)
def getRLAgentFE(self, W, i): # IRL_helper(W, path, self.num_frames, i) # saved_model = path # model = neural_net(self.num_states, [164, 150], saved_model) model = ddpg.train(W) print( '======================================= play ========================================' ) return play(model, W)
def free_mode(): takes_list = [] background = cv2.imread("background2.png", 1) cv2.imshow("window", background) cv2.waitKey(50) while ("true"): mode = GPIO.input(13) # reads the circle switch if (mode == False or len(takes_list) < 3): cv2.imshow("window", background) else: cv2.imshow( "window", image.averager(takes_list) ) # displays a weighed average of the last 3 pictures taken cv2.waitKey(100) button_value = button.waitpressedbutton("free") if ( button_value == "mode" ): # The mode is no longer freemode, we have to return to go to the selected mode return if (button_value == "record"): # takes and displays a picture cv2.imshow("window", background) cv2.waitKey(10) takes_list.append(camera.get_image()) cv2.imshow("window", takes_list[len(takes_list) - 1]) cv2.waitKey(200) if (button_value == "delete" and len(takes_list) > 0): # deletes the last picture taken del takes_list[-1] cv2.imshow("window", background) cv2.waitKey(300) if (button_value == "save"): # saves the cartoon and returns playing.save(takes_list) return if (button_value == "play"): # plays the cartoon playing.play(takes_list)
def getRLAgentFE( self, W, i): #get the feature expectations of a new poliicy using RL agent # saved_model = 'saved-models_brown/evaluatedPolicies/1-164-150-100-50000-100000.h5' # self.model.restore_model(saved_model) IRL_helper(W, self.behavior, self.num_frames, i, self.model ) # train the agent and save the model in a file used below # saved_model = 'saved-models_'+self.behavior+'/evaluatedPolicies/'+str(i)+'-164-150-100-50000-'+str(self.num_frames)+'.h5' # use the saved model to get the FE # model = Policy_Network(self.num_states, [164, 150], self.sess, saved_model) # self.model.restore(saved_model) # print('loaded_model--------') return play( self.model, W) #return feature expectations by executing the learned policy
def getRLAgentFE(self, W, i): #get the feature expectations of a new poliicy using RL agent IRL_helper(W, self.behavior, self.num_frames, i) # train the agent and save the model in a file used below saved_model = 'saved-models_'+self.behavior+'/evaluatedPolicies/'+str(i)+'-164-150-100-50000-'+str(self.num_frames)+'.h5' # use the saved model to get the FE model = neural_net(self.num_states, [164, 150], saved_model) return play(model, W)#return feature expectations by executing the learned policy
def assisted_mode(): # The model is loaded first model_list = selection.select_cartoon("/media/PAP/models/", "assisted") if model_list == None: return takes_list = [] background = cv2.imread("background2.png", 1) model_cursor = 0 takes_cursor = 0 # When this condition is false, the program saves automatically the cartoon and returns while (len(model_list) > len(takes_list)): if ((GPIO.input(6) == False) or (len(takes_list) == 0)): cv2.imshow("window", model_list[model_cursor % len(model_list)]) cv2.waitKey(100) else: cv2.imshow("window", takes_list[takes_cursor % len(takes_list)]) cv2.waitKey(100) # I move the window again because of a window issue, that still occurs the first time model_cursor = 1 cv2.moveWindow("window", 0, -30) button_value = button.waitpressedbutton("assisted") if (button_value == "mode"): return if (button_value == "record"): # take the picture model_cursor += 1 cv2.imshow("window", background) cv2.waitKey(20) takes_list.append(camera.get_image()) if (button_value == "delete" and len(takes_list) > 0): # delete the last picture from takes_list del (takes_list[-1]) cv2.waitKey(300) if (button_value == "save"): # save the cartoon and returns if not os.path.exists("/media/PAP/cartoons"): cv2.imshow("window", cv2.imread("./save_failed")) cv2.waitKey(100) else: playing.save(takes_list) return if (button_value == "play"): # plays the cartoon playing.play(takes_list) if (button_value == "next"): # move to next image if (GPIO.input(6) == False): model_cursor += 1 else: takes_cursor += 1 cv2.waitKey(100) if (button_value == "prev"): # move to previous image if (GPIO.input(6) == False): model_cursor -= 1 else: takes_cursor -= 1 cv2.waitKey(100) # end of while loop, save and return if not os.path.exists("/media/PAP/cartoons"): cv2.imshow("window", cv2.imread("./save_failed")) cv2.waitKey(100) else: playing.save(takes_list) return
def assisted_mode(): # The model is loaded first model_list = selection.select_cartoon("/media/PAP/models/", "assisted") if model_list == None: return takes_list = [] background = cv2.imread("background2.png", 1) model_cursor = 0 takes_cursor = 0 # When this condition is false, the program saves automatically the cartoon and returns while len(model_list) > len(takes_list): if (GPIO.input(6) == False) or (len(takes_list) == 0): cv2.imshow("window", model_list[model_cursor % len(model_list)]) cv2.waitKey(100) else: cv2.imshow("window", takes_list[takes_cursor % len(takes_list)]) cv2.waitKey(100) # I move the window again because of a window issue, that still occurs the first time model_cursor = 1 cv2.moveWindow("window", 0, -30) button_value = button.waitpressedbutton("assisted") if button_value == "mode": return if button_value == "record": # take the picture model_cursor += 1 cv2.imshow("window", background) cv2.waitKey(20) takes_list.append(camera.get_image()) if button_value == "delete" and len(takes_list) > 0: # delete the last picture from takes_list del (takes_list[-1]) cv2.waitKey(300) if button_value == "save": # save the cartoon and returns if not os.path.exists("/media/PAP/cartoons"): cv2.imshow("window", cv2.imread("./save_failed")) cv2.waitKey(100) else: playing.save(takes_list) return if button_value == "play": # plays the cartoon playing.play(takes_list) if button_value == "next": # move to next image if GPIO.input(6) == False: model_cursor += 1 else: takes_cursor += 1 cv2.waitKey(100) if button_value == "prev": # move to previous image if GPIO.input(6) == False: model_cursor -= 1 else: takes_cursor -= 1 cv2.waitKey(100) # end of while loop, save and return if not os.path.exists("/media/PAP/cartoons"): cv2.imshow("window", cv2.imread("./save_failed")) cv2.waitKey(100) else: playing.save(takes_list) return
#!/usr/bin/env python2 # -*- coding: utf-8 -*- """ Created on Fri Oct 13 19:20:56 2017 @author: lakshya """ import board_generator import playing for size in range(23, 26): for fruits in range(2, 11): board_generator.makeBoard(size, fruits) print(size, fruits) time_1, score_1, time_2, score_2 = playing.play() with open("test.txt", "a") as myfile: myfile.write(str(size) + " " + str(fruits) + "\n") myfile.write( str(time_1) + " " + str(score_1) + " " + str(time_2) + " " + str(score_2) + "\n")
def QLearning(num_features, num_actions, params, weights, results_folder, behavior_type, train_frames, opt_count, scene_file_name, continue_train=True, hitting_reaction_mode=0, enlarge_lr=0): ''' The goal of this function is to train a function approximator of Q which can take a state (eight inputs) and predict the Q values of three actions (three outputs) ''' print("Q learning starts...") # init variables epsilon = 1 # the threshold for choosing a random action over the best action according to a Q value if continue_train: epsilon = 0.5 d_epsilon = epsilon / train_frames observe_frames = 100 # we train our first model after observing certain frames replay = [ ] # store tuples of (state, action, reward, next_state) for training survive_data = [] # store how long the car survived until die loss_log = [] # store the train loss of each model score_log = [] # store the train loss of each model dist_log = [] # store the train loss of each model my_batch_size = params['batch_size'] buffer = params['buffer'] assert ( observe_frames >= my_batch_size ), "Error: The number of observed frames is less than the batch size!" # create a folder and process the file name for saving trained models model_dir = results_folder + 'models-' + behavior_type + '/' if not os.path.exists(model_dir): os.makedirs(model_dir) filename = params_to_filename(params) + '-' + str( train_frames) + '-' + str(opt_count) model_name = model_dir + filename + '.h5' weights_name = model_dir + filename + '_weights.npy' pretrained_model = '' if continue_train and (opt_count > 1): pretrained_model = model_dir + params_to_filename(params) + '-' + str( train_frames) + '-' + str(opt_count - 1) + '.h5' # init a neural network as an approximator for Q function epochCount = 1 if continue_train: epochCount = opt_count model = net1(num_features, num_actions, params['nn'], weightsFile=pretrained_model, epochCount=epochCount, enlarge_lr=enlarge_lr) # create a new game instance and get the initial state by moving forward game_state = carmunk.GameState(weights, scene_file_name) _, state, _, _, _ = game_state.frame_step((11)) #_, state, _ = game_state.frame_step((0,1)) # let's time it start_time = timeit.default_timer() expert_count = 0 stop_status = 0 # run the frames frame_idx = 0 car_move_count = 0 # track the number of moves the car is making car_surivive_move_count = 0 # store the maximum moves the car made before run into something print("In QLearning - the total number of training frames is: ", train_frames) while frame_idx < train_frames: if frame_idx % 1000 == 0: print("In QLearning - current training frame is: ", frame_idx) frame_idx += 1 car_move_count += 1 # choose an action. # before we reach the number of observing frame (for training) we just sample random actions if expert_count > 0: action = game_state.get_expert_action() expert_count -= 1 elif random.random() < epsilon or frame_idx < observe_frames: action = np.random.randint(0, 25) # produce action 0, 1, or 2 #action = np.random.random([2])*2-1 else: # get Q values for each action. Q values are scores associated with each action (there are in total 3 actions) qval = model.predict(state, batch_size=1) action = (np.argmax(qval)) # get the best action #action = model.predict(state, batch_size=1) # execute action, receive a reward and get the next state reward, next_state, _, _, _ = game_state.frame_step( action, hitting_reaction_mode=hitting_reaction_mode) if hitting_reaction_mode == 2: # use expert when hitting if next_state[0][-1] == 1: # hitting if expert_count == 0: expert_count = game_state.max_history_num else: expert_count = 0 # store experiences replay.append((state, action, reward, next_state)) # if we're done observing, start training if frame_idx > observe_frames: # If we've stored enough in our buffer, pop the oldest if len(replay) > buffer: # currently buffer = 50000 replay.pop(0) # sample our experience mini_batch = random.sample( replay, my_batch_size) # currently batchSize = 100 # get training data X_train, y_train = process_minibatch(mini_batch, model, num_features, num_actions) # train a model on this batch history = LossHistory() model.fit(X_train, y_train, batch_size=my_batch_size, epochs=1, verbose=0, callbacks=[history]) #outPutW(model.get_weights()) loss_log.append(history.losses) if frame_idx % 100 == 0: print("history.losses ", history.losses) if frame_idx % 100 == 0: temp_fe, aver_score, aver_dist = play( model, weights, play_rounds=10, scene_file_name=scene_file_name) if len(score_log) == 0 or (len(score_log) > 0 and aver_score > np.max(score_log) and aver_dist > np.max(dist_log)): model.save_weights(model_name, overwrite=True) np.save(weights_name, weights) print("Saving model inner: ", model_name) score_log.append([aver_score]) dist_log.append([aver_dist]) ''' if frame_idx % 4000 == 0: lr = 0.001 / 2**(frame_idx/4000) print('===============lr===============', lr) #optimizer = keras.optimizers.SGD(learning_rate=0.01, momentum=0.0, nesterov=False) #optimizer = keras.optimizers.RMSprop(learning_rate=0.001, rho=0.9) optimizer = keras.optimizers.Adam(learning_rate=lr, beta_1=0.9, beta_2=0.999, amsgrad=False) #optimizer = keras.optimizers.Adamax(learning_rate=0.002, beta_1=0.9, beta_2=0.999) #optimizer = keras.optimizers.Nadam(learning_rate=0.002, beta_1=0.9, beta_2=0.999) model.compile(optimizer=optimizer, loss='mse') ''' # diverges, early stop ''' if history.losses[0] > 1000: model = net1(num_features, num_actions, params['nn'], weightsFile=pretrained_model) model.save_weights(model_name, overwrite=True) np.save(weights_name, weights) print("Diverges, early stop, loss=", history.losses[0]) print("Saving model: ", model_name) stop_status = -1 break #converges, early stop if history.losses[0] < 1e-6: model.save_weights(model_name, overwrite=True) np.save(weights_name, weights) print("Converges, early stop, loss=", history.losses[0]) print("Saving model: ", model_name) stop_status = 1 break ''' # update the state state = next_state # decrease epsilon over time to reduce the chance taking a random action over the best action based on Q values if epsilon > 0.1 and frame_idx > observe_frames: epsilon -= d_epsilon # car died, update if state[0][-1] == 1: # log the car's distance at this frame index survive_data.append([frame_idx, car_move_count]) # update if car_move_count > car_surivive_move_count: car_surivive_move_count = car_move_count # time it survive_time = timeit.default_timer() - start_time fps = car_move_count / survive_time # reset car_move_count = 0 start_time = timeit.default_timer() # save the current model if frame_idx == train_frames: #model.save_weights(model_name, overwrite=True) #np.save(weights_name, weights) print("Saving model: ", model_name) # log results after we're done with all training frames log_results(results_folder, filename, survive_data, loss_log, score_log, dist_log) print("Q learning finished!") return model_name, stop_status