def main(): # requires n_comp_to_use, pc1_chunk_size import sys logger.log(sys.argv) common_arg_parser = get_common_parser() cma_args, cma_unknown_args = common_arg_parser.parse_known_args() this_run_dir = get_dir_path_for_this_run(cma_args) traj_params_dir_name = get_full_params_dir(this_run_dir) intermediate_data_dir = get_intermediate_data_dir(this_run_dir) if not os.path.exists(intermediate_data_dir): os.makedirs(intermediate_data_dir) ''' ========================================================================================== get the pc vectors ========================================================================================== ''' logger.log("grab final params") final_file = get_full_param_traj_file_path(traj_params_dir_name, "final") final_params = pd.read_csv(final_file, header=None).values[0] logger.log("grab start params") start_file = get_full_param_traj_file_path(traj_params_dir_name, "start") start_params = pd.read_csv(start_file, header=None).values[0] count_file = get_full_param_traj_file_path(traj_params_dir_name, "total_num_dumped") total_num = pd.read_csv(count_file, header=None).values[0] V = final_params - start_params all_thetas_downsampled = get_allinone_concat_df( dir_name=traj_params_dir_name).values[::2] unduped_angles_along_the_way = [] duped_angles_along_the_way = [] diff_along = [] num = 2 #TODO hardcode! undup_ipca = PCA(n_components=1) # for sparse PCA to speed up all_matrix_buffer = [] for chunk in all_param_iterator: chunk = chunk.values undup_ipca.partial_fit(chunk) unduped_angle = cal_angle(V, undup_ipca.components_[0]) #TODO ignore 90 or 180 for now if unduped_angle > 90: unduped_angle = 180 - unduped_angle unduped_angles_along_the_way.append(unduped_angle) all_matrix_buffer.extend(chunk) weights = gen_weights(all_param_iterator._currow, total_num) duped_in_so_far = dup_so_far_buffer(all_matrix_buffer, last_percentage, num) logger.log( f"currently at {all_param_iterator._currow}, last_pecentage: {last_percentage}" ) # ipca = PCA(n_components=1) # for sparse PCA to speed up # ipca.fit(duped_in_so_far) ipca = WPCA( n_components=cma_args.n_comp_to_use) # for sparse PCA to speed up for i in range(0, len(duped_in_so_far), cma_args.chunk_size): logger.log( f"partial fitting: i : {i} len(duped_in_so_far): {len(duped_in_so_far)}" ) if i + cma_args.chunk_size > len(duped_in_so_far): ipca.partial_fit(duped_in_so_far[i:]) else: ipca.partial_fit(duped_in_so_far[i:i + cma_args.chunk_size]) duped_angle = cal_angle(V, ipca.components_[0]) #TODO ignore 90 or 180 for now if duped_angle > 90: duped_angle = 180 - duped_angle duped_angles_along_the_way.append(duped_angle) diff_along.append(unduped_angle - duped_angle) plot_dir = get_plot_dir(cma_args) if not os.path.exists(plot_dir): os.makedirs(plot_dir) angles_plot_name = f"duped exponential 2, num dup: {num}" \ f"cma_args.pc1_chunk_size: {cma_args.pc1_chunk_size} " plot_2d(plot_dir, angles_plot_name, np.arange(len(duped_angles_along_the_way)), duped_angles_along_the_way, "num of chunks", "angle with diff in degrees", False) angles_plot_name = f"unduped exponential 2, num dup: {num}" \ f"cma_args.pc1_chunk_size: {cma_args.pc1_chunk_size} " plot_2d(plot_dir, angles_plot_name, np.arange(len(unduped_angles_along_the_way)), unduped_angles_along_the_way, "num of chunks", "angle with diff in degrees", False) angles_plot_name = f"undup - dup diff_along exponential 2, num dup: {num}" \ f"cma_args.pc1_chunk_size: {cma_args.pc1_chunk_size} " plot_2d(plot_dir, angles_plot_name, np.arange(len(diff_along)), diff_along, "num of chunks", "angle with diff in degrees", False) del all_matrix_buffer import gc gc.collect()