def track(env, terminal, done, policy): # episode done if terminal or done: # save all data utils.save_as_pickle(env.world.episode_info, None, 'rp_ep{}_{}'.format(env.world.position_index, policy), force_save=True) # reset env.world.episode_info = utils.reset_tracking_info() env.world.episode_info['a1'].append( env.world.positions[env.world.position_index + 1][0].tolist()) env.world.episode_info['a2'].append( env.world.positions[env.world.position_index + 1][1].tolist()) env.world.episode_info['goal'] = env.world.positions[ env.world.position_index + 1][-1].tolist() env.world.episode_info['random_positions'] = env.world.positions[ env.world.position_index + 1] return a1_pos = utils.get_position(env, 'a1').tolist() a2_pos = utils.get_position(env, 'a2').tolist() goal_pos = utils.get_position(env, 'goal').tolist() env.world.episode_info['a1'].append(a1_pos) env.world.episode_info['a2'].append(a2_pos) env.world.episode_info['goal'] = goal_pos
def pre_process(leaf_size=40, processed_data=r'./proc_data.tree', lookup=r'./id_star.lkp'): """ Function to pre process the stars data into a kd tree and save the data Args: leaf_size: The number of points at which the algorithm switches over to brute-force. Has to be positive processed_data: The location where the processed data is to be stored as a pickle file lookup: The location where the data id to object lookup is stored Returns: No return """ utils.skip_line(1) stars = [] idx = 0 id_star_dct = {} for line in sys.stdin: star_name, x, y, z = utils.parse_line(line) stars.append([x, y, z]) id_star_dct[idx] = star(x, y, z, star_name) idx += 1 # construct a kd-tree tree = spatial.KDTree(stars, leafsize=leaf_size) utils.save_as_pickle(data=tree, pickle_file=processed_data) utils.save_as_pickle(data=id_star_dct, pickle_file=lookup)
def calculate_user_preferences(user_id_list: str=None, start: str=Config.train_start, end:str=Config.train_end, save_path: str='./dev_recommendation_sources/'): if user_id_list == 'dev': user_id_list = load_pickle(Config.dev_user_list) elif user_id_list == 'test': user_id_list = load_pickle(Config.test_user_list) elif isinstance(user_id_list, str): # 파일 경로를 입력할 경우 user_id_list = load_pickle(user_id_list) print('Load calculating tools...', end='\t') try: user_time_read = load_user_time_read(Config.user_time_read) post_id_encoder = PostIdEncoder(Config.encodings_root) tfidf_generator = TFIDFGenerator(Config.tfidf_root) except: # When import in Jupyter Notebook user_time_read = load_user_time_read("../preprocessed/user_time_read.json") post_id_encoder = PostIdEncoder("../encodings/") tfidf_generator = TFIDFGenerator("../tfidf") print('loaded!') post_meta_id = [] # user_id_list의 유저들이 조회한 모든 글 posts_raw = [] # 유저들의 로그에서 등장한 모든 글을 담을 리스트 user_preferences_raw = [] # 유저들의 feature 벡터를 담을 리스트 for user_id in tqdm(user_id_list, desc=f'User Preference Extraction ({start}-{end})'): # 설정한 구간에 대한 해당 유저의 로그 history = filter_read_by_time(user_time_read[user_id], start, end) history = squeeze(list(map(lambda x: x[-1], history))) # 유저 로그로부터 TF-IDF 행렬 생성 user_tfidf = tfidf_generator.generate(post_id_encoder.transform(history), drop_id=False) # # TF-IDF 행렬로부터 유저 feature 벡터를 생성 preference = sparse.csr_matrix(user_tfidf.iloc[:, 1:].values.sum(axis=0)[:, np.newaxis]) # post_meta_id 컬럼을 제외한 뒤 summation user_tfidf = user_tfidf.groupby('post_meta_id').first().reset_index() # faster than drop_duplicates() user_tfidf = user_tfidf.loc[~user_tfidf['post_meta_id'].isin(post_meta_id), :] if len(user_tfidf) > 0: post_meta_id.extend(user_tfidf['post_meta_id'].tolist()) posts_raw.append(sparse.csr_matrix(user_tfidf.iloc[:, 1:])) # post_meta_id 컬럼을 제외하고 append -> post_meta_id 리스트를 개별적으로 생성하므로 불필요 user_preferences_raw.append(preference) print('Postprocessing...', end='\t') posts = sparse.vstack(posts_raw) user_preferences = sparse.hstack(user_preferences_raw) idf = np.array(np.log(tfidf_generator.DF.values.squeeze()) - np.log((posts != 0).sum(axis=0) + 1e-4)) # 1e-4: to prevent ZeroDivisionError recommend_output = (posts.multiply(idf)).dot(user_preferences) print('finished!') if save_path: interval = f'{start}-{end}' if interval not in os.listdir(save_path): os.mkdir(os.path.join(save_path, interval)) save_path = os.path.join(save_path, interval) sparse.save_npz(os.path.join(save_path, f'recommend_output.npz'), recommend_output) sparse.save_npz(os.path.join(save_path, f'user_preferences.npz'), user_preferences) np.save(os.path.join(save_path, f'idf.npy'), idf) sparse.save_npz(os.path.join(save_path, f'posts.npz'), posts) save_as_pickle(os.path.join(save_path, f'post_meta_id.pkl'), post_meta_id) print(f'Saved successfully in {save_path}😎') else: return recommend_output, user_preferences, idf, posts, post_meta_id
def run_switch_ssm_fitness_simulations(input_fnames, output_fname, sim_params, sim_label): """ Run switch SSM fitness simulations. Compare the results of different growth policies. """ print "running switch ssm fitness simulations..." fitness_params_fname = input_fnames[0] ssm_params_fname = input_fnames[1] params = simulation.load_params(fitness_params_fname) model_params = simulation.load_params(ssm_params_fname) params.update(model_params) all_policies = OrderedDict() all_policies["Posterior predictive"] = policies.posterior_pred_policy all_policies["Plastic"] = policies.plastic_growth_policy all_policies["Random"] = policies.rand_growth_policy all_policies["Glucose-only"] = policies.glu_only_growth_policy all_policies["Posterior pred. (BH)"] = policies.bh_particle_filter_policy all_policies["Random (BH)"] = policies.bh_rand_growth_policy # fixed parameters for all simulations p_init_output = params["p_init_output"] time_obj = time_unit.Time(params["t_start"], params["t_end"], step_size=params["step_size"]) # include model parameters in into simulation parameters set # so that the policies that need to run the model (like the # posterior predictive policy) can access it params.update(sim_params) # include list of policies we ran params["policies"] = all_policies.keys() # setting of probabilities for data-generating SSM model p_switch_to_switch = params["p_switch_to_switch"] p_noswitch_to_switch = params["p_noswitch_to_switch"] nutr_labels = params["nutr_labels"] nutr_growth_rates = params["nutr_growth_rates"] out_trans_mat1 = params["out_trans_mat1"] out_trans_mat2 = params["out_trans_mat2"] def nutrient_simulator(time_obj): return nutrient_simulators.ssm_nutrient_simulator(time_obj, out_trans_mat1=out_trans_mat1, out_trans_mat2=out_trans_mat2, p_switch_to_switch=p_switch_to_switch, p_noswitch_to_switch=p_noswitch_to_switch, p_init_output=p_init_output) # simulate mixed sugar environment env_obj = \ env.MixedDiscEnvironment(nutr_labels, nutrient_simulator, nutr_growth_rates=nutr_growth_rates) fitness_obj = fitness.FitnessSim(all_policies, env_obj, params) #sim_results = {} sim_results = fitness_obj.simulate(time_obj) final_results = {"sim_params": sim_params, "sim_results": sim_results} utils.save_as_pickle(output_fname, sim_results, extra={"params": params})
def merge_switch_ssm_fitness_sims(input_fnames, output_fname): """ Combine all the switch SSM fitness simulations into a single pickle file. """ ### combine all the simulations into one pickle file. all_sim_data = OrderedDict() for fname in input_fnames: sim_name = os.path.basename(fname).split(".data")[0] curr_sim_data = utils.load_pickle(fname) all_sim_data[sim_name] = curr_sim_data extra = {} utils.save_as_pickle(output_fname, all_sim_data, extra)
def run_bet_hedge_sims_2_nutr(param_fname, output_fname, label): """ Running bet hedging simulations for two nutrients. """ print "running bet hedge simulations (2 nutrients)..." # simulate two nutrient model params = simulation.load_params(param_fname) print "params: ", params data = {} extra = {"params": params} # run bet hedging simulation ### ### need skeleton code here ### # save results as a pickle file utils.save_as_pickle(output_fname, data, extra)
def _save_info(self): self.data = [ self.pos_queries, self.test_pos, self.obs, self.actions, self.q_vals, self.argmaxs, self.swapped_obs, self.swapped_actions, self.swapped_q_vals, self.swapped_argmaxs ] self.names = [ "pos_queries", "test_positions", "obs", "actions", "q_vals", "argmaxs", "swapped_obs", "swapped_actions", "swapped_q_vals", "swapped_argmaxs" ] assert len(self.data) == len( self.names ), "Number of data to be saved must be matched with the number of names to save data under!" print("Saving under filename: {} this information: {}".format( self.filename, self.names)) # Creating directory if it doesn't exist os.makedirs(os.path.dirname(self.filename), exist_ok=True) # plt.show() self.info = utils.save_as_pickle(self.data, self.names, self.filename)
def load_train_labels(): """ loads the train labels as a numpy array :return: """ config = get_config() with open(config['image_paths']['train_labels'], newline='') as csv_file: reader = csv.DictReader( csv_file, delimiter=',', ) labels = np.asarray([row['label'] for row in reader]) label_dict = get_label_dict() return np.asarray( [label_dict[label_string] for label_string in labels]) if __name__ == '__main__': config = get_config() train_images_np = transform_images( config['image_paths']['train_images_raw']) save_as_pickle(train_images_np, config['image_paths']['train_images_pickle']) test_images_np = transform_images(config['image_paths']['test_images_raw']) save_as_pickle(test_images_np, config['image_paths']['test_images_pickle']) # load_train_labels() # loaded = load_pickle( config['image_paths']['train_images_pickle'] ) # print(loaded.shape)
def get_save_df(root_dir: str = Config.raw_dir, vocab_path: str = VOCAB_PATH): metadata = load_raw('metadata', root_dir) vocab = load_pickle(vocab_path) df = get_df(metadata, vocab) save_as_pickle(df.values.squeeze().tolist(), '../tfidf/df_vocab7000_aggregation.pkl')