Esempio n. 1
0
def track(env, terminal, done, policy):
    # episode done
    if terminal or done:
        # save all data
        utils.save_as_pickle(env.world.episode_info,
                             None,
                             'rp_ep{}_{}'.format(env.world.position_index,
                                                 policy),
                             force_save=True)
        # reset
        env.world.episode_info = utils.reset_tracking_info()
        env.world.episode_info['a1'].append(
            env.world.positions[env.world.position_index + 1][0].tolist())
        env.world.episode_info['a2'].append(
            env.world.positions[env.world.position_index + 1][1].tolist())
        env.world.episode_info['goal'] = env.world.positions[
            env.world.position_index + 1][-1].tolist()
        env.world.episode_info['random_positions'] = env.world.positions[
            env.world.position_index + 1]
        return

    a1_pos = utils.get_position(env, 'a1').tolist()
    a2_pos = utils.get_position(env, 'a2').tolist()
    goal_pos = utils.get_position(env, 'goal').tolist()

    env.world.episode_info['a1'].append(a1_pos)
    env.world.episode_info['a2'].append(a2_pos)
    env.world.episode_info['goal'] = goal_pos
Esempio n. 2
0
def pre_process(leaf_size=40,
                processed_data=r'./proc_data.tree',
                lookup=r'./id_star.lkp'):
    """
	Function to pre process the stars data into a kd tree and save the data

	Args:
		leaf_size: The number of points at which the algorithm switches over to brute-force. Has to be positive
		processed_data: The location where the processed data is to be stored as a pickle file
		lookup: The location where the data id to object lookup is stored

	Returns:
		No return
	"""
    utils.skip_line(1)
    stars = []
    idx = 0
    id_star_dct = {}
    for line in sys.stdin:
        star_name, x, y, z = utils.parse_line(line)
        stars.append([x, y, z])
        id_star_dct[idx] = star(x, y, z, star_name)
        idx += 1
    # construct a kd-tree
    tree = spatial.KDTree(stars, leafsize=leaf_size)
    utils.save_as_pickle(data=tree, pickle_file=processed_data)
    utils.save_as_pickle(data=id_star_dct, pickle_file=lookup)
def calculate_user_preferences(user_id_list: str=None, start: str=Config.train_start, end:str=Config.train_end, save_path: str='./dev_recommendation_sources/'):
    if user_id_list == 'dev':
        user_id_list = load_pickle(Config.dev_user_list)
    elif user_id_list == 'test':
        user_id_list = load_pickle(Config.test_user_list)
    elif isinstance(user_id_list, str): # 파일 경로를 입력할 경우
        user_id_list = load_pickle(user_id_list)

    print('Load calculating tools...', end='\t')
    try:
        user_time_read = load_user_time_read(Config.user_time_read)
        post_id_encoder = PostIdEncoder(Config.encodings_root)
        tfidf_generator = TFIDFGenerator(Config.tfidf_root)
    except: # When import in Jupyter Notebook
        user_time_read = load_user_time_read("../preprocessed/user_time_read.json")
        post_id_encoder = PostIdEncoder("../encodings/")
        tfidf_generator = TFIDFGenerator("../tfidf")
    print('loaded!')

    post_meta_id = [] # user_id_list의 유저들이 조회한 모든 글
    posts_raw = [] # 유저들의 로그에서 등장한 모든 글을 담을 리스트
    user_preferences_raw = [] # 유저들의 feature 벡터를 담을 리스트

    for user_id in tqdm(user_id_list, desc=f'User Preference Extraction ({start}-{end})'):
        # 설정한 구간에 대한 해당 유저의 로그
        history = filter_read_by_time(user_time_read[user_id], start, end) 
        history = squeeze(list(map(lambda x: x[-1], history)))

        # 유저 로그로부터 TF-IDF 행렬 생성
        user_tfidf = tfidf_generator.generate(post_id_encoder.transform(history), drop_id=False) # 

        # TF-IDF 행렬로부터 유저 feature 벡터를 생성
        preference = sparse.csr_matrix(user_tfidf.iloc[:, 1:].values.sum(axis=0)[:, np.newaxis]) # post_meta_id 컬럼을 제외한 뒤 summation
        user_tfidf = user_tfidf.groupby('post_meta_id').first().reset_index() # faster than drop_duplicates()
        user_tfidf = user_tfidf.loc[~user_tfidf['post_meta_id'].isin(post_meta_id), :]
        if len(user_tfidf) > 0:
            post_meta_id.extend(user_tfidf['post_meta_id'].tolist())
        posts_raw.append(sparse.csr_matrix(user_tfidf.iloc[:, 1:])) # post_meta_id 컬럼을 제외하고 append -> post_meta_id 리스트를 개별적으로 생성하므로 불필요
        user_preferences_raw.append(preference)

    print('Postprocessing...', end='\t')
    posts = sparse.vstack(posts_raw)
    user_preferences = sparse.hstack(user_preferences_raw)
    idf = np.array(np.log(tfidf_generator.DF.values.squeeze()) - np.log((posts != 0).sum(axis=0) + 1e-4)) # 1e-4: to prevent ZeroDivisionError
    recommend_output = (posts.multiply(idf)).dot(user_preferences)
    print('finished!')

    if save_path:
        interval = f'{start}-{end}'
        if interval not in os.listdir(save_path):
            os.mkdir(os.path.join(save_path, interval))
            save_path = os.path.join(save_path, interval)
        sparse.save_npz(os.path.join(save_path, f'recommend_output.npz'), recommend_output)
        sparse.save_npz(os.path.join(save_path, f'user_preferences.npz'), user_preferences)
        np.save(os.path.join(save_path, f'idf.npy'), idf)
        sparse.save_npz(os.path.join(save_path, f'posts.npz'), posts)
        save_as_pickle(os.path.join(save_path, f'post_meta_id.pkl'), post_meta_id)
        print(f'Saved successfully in {save_path}😎')
    else:
        return recommend_output, user_preferences, idf, posts, post_meta_id
Esempio n. 4
0
def run_switch_ssm_fitness_simulations(input_fnames, output_fname,
                                       sim_params, sim_label):
    """
    Run switch SSM fitness simulations.
    Compare the results of different growth policies.
    """
    print "running switch ssm fitness simulations..."
    fitness_params_fname = input_fnames[0]
    ssm_params_fname = input_fnames[1]
    params = simulation.load_params(fitness_params_fname)
    model_params = simulation.load_params(ssm_params_fname)
    params.update(model_params)
    all_policies = OrderedDict()
    all_policies["Posterior predictive"] = policies.posterior_pred_policy
    all_policies["Plastic"] = policies.plastic_growth_policy
    all_policies["Random"] = policies.rand_growth_policy
    all_policies["Glucose-only"] = policies.glu_only_growth_policy
    all_policies["Posterior pred. (BH)"] = policies.bh_particle_filter_policy
    all_policies["Random (BH)"] = policies.bh_rand_growth_policy
    # fixed parameters for all simulations
    p_init_output = params["p_init_output"]
    time_obj = time_unit.Time(params["t_start"],
                              params["t_end"],
                              step_size=params["step_size"])
    # include model parameters in into simulation parameters set
    # so that the policies that need to run the model (like the
    # posterior predictive policy) can access it
    params.update(sim_params)
    # include list of policies we ran
    params["policies"] = all_policies.keys()
    # setting of probabilities for data-generating SSM model
    p_switch_to_switch = params["p_switch_to_switch"]
    p_noswitch_to_switch = params["p_noswitch_to_switch"]
    nutr_labels = params["nutr_labels"]
    nutr_growth_rates = params["nutr_growth_rates"]
    out_trans_mat1 = params["out_trans_mat1"]
    out_trans_mat2 = params["out_trans_mat2"]
    def nutrient_simulator(time_obj):
        return nutrient_simulators.ssm_nutrient_simulator(time_obj,
                                                          out_trans_mat1=out_trans_mat1,
                                                          out_trans_mat2=out_trans_mat2,
                                                          p_switch_to_switch=p_switch_to_switch,
                                                          p_noswitch_to_switch=p_noswitch_to_switch,
                                                          p_init_output=p_init_output)

    # simulate mixed sugar environment
    env_obj = \
      env.MixedDiscEnvironment(nutr_labels,
                               nutrient_simulator,
                               nutr_growth_rates=nutr_growth_rates)
    fitness_obj = fitness.FitnessSim(all_policies, env_obj, params)
    #sim_results = {}
    sim_results = fitness_obj.simulate(time_obj)
    final_results = {"sim_params": sim_params,
                     "sim_results": sim_results}
    utils.save_as_pickle(output_fname, sim_results,
                         extra={"params": params})
Esempio n. 5
0
def merge_switch_ssm_fitness_sims(input_fnames, output_fname):
    """
    Combine all the switch SSM fitness simulations into a single pickle file.
    """
    ### combine all the simulations into one pickle file.
    all_sim_data = OrderedDict()
    for fname in input_fnames:
        sim_name = os.path.basename(fname).split(".data")[0]
        curr_sim_data = utils.load_pickle(fname)
        all_sim_data[sim_name] = curr_sim_data
    extra = {}
    utils.save_as_pickle(output_fname, all_sim_data, extra)
Esempio n. 6
0
def run_bet_hedge_sims_2_nutr(param_fname, output_fname, label):
    """
    Running bet hedging simulations for two nutrients.
    """
    print "running bet hedge simulations (2 nutrients)..."
    # simulate two nutrient model
    params = simulation.load_params(param_fname)
    print "params: ", params
    data = {}
    extra = {"params": params}
    # run bet hedging simulation
    ###
    ### need skeleton code here
    ###
    # save results as a pickle file
    utils.save_as_pickle(output_fname, data, extra)
Esempio n. 7
0
    def _save_info(self):
        self.data = [
            self.pos_queries, self.test_pos, self.obs, self.actions,
            self.q_vals, self.argmaxs, self.swapped_obs, self.swapped_actions,
            self.swapped_q_vals, self.swapped_argmaxs
        ]
        self.names = [
            "pos_queries", "test_positions", "obs", "actions", "q_vals",
            "argmaxs", "swapped_obs", "swapped_actions", "swapped_q_vals",
            "swapped_argmaxs"
        ]

        assert len(self.data) == len(
            self.names
        ), "Number of data to be saved must be matched with the number of names to save data under!"

        print("Saving under filename: {} this information: {}".format(
            self.filename, self.names))
        # Creating directory if it doesn't exist
        os.makedirs(os.path.dirname(self.filename), exist_ok=True)
        # plt.show()

        self.info = utils.save_as_pickle(self.data, self.names, self.filename)
Esempio n. 8
0
def load_train_labels():
    """
    loads the train labels as a numpy array
    :return:
    """
    config = get_config()
    with open(config['image_paths']['train_labels'], newline='') as csv_file:
        reader = csv.DictReader(
            csv_file,
            delimiter=',',
        )
        labels = np.asarray([row['label'] for row in reader])
        label_dict = get_label_dict()
        return np.asarray(
            [label_dict[label_string] for label_string in labels])


if __name__ == '__main__':
    config = get_config()
    train_images_np = transform_images(
        config['image_paths']['train_images_raw'])
    save_as_pickle(train_images_np,
                   config['image_paths']['train_images_pickle'])
    test_images_np = transform_images(config['image_paths']['test_images_raw'])
    save_as_pickle(test_images_np, config['image_paths']['test_images_pickle'])
    # load_train_labels()

    # loaded = load_pickle( config['image_paths']['train_images_pickle'] )
    # print(loaded.shape)
Esempio n. 9
0
def get_save_df(root_dir: str = Config.raw_dir, vocab_path: str = VOCAB_PATH):
    metadata = load_raw('metadata', root_dir)
    vocab = load_pickle(vocab_path)
    df = get_df(metadata, vocab)
    save_as_pickle(df.values.squeeze().tolist(),
                   '../tfidf/df_vocab7000_aggregation.pkl')