def load_local_view(train_ratio=0.9): """ NOTE: this function don't shuffle yield twice 1. length of all training data 2. (train_x, train_y), (test_x, test_y) of global view (2001 bins) """ # make sure we use the same shuffle indices with load_ctx(local_all_pc_flux_filename): local_all_pcs = np.loadtxt(local_all_pc_flux_filename) with load_ctx(local_all_non_pc_flux_filename): local_all_non_pcs = np.loadtxt(local_all_non_pc_flux_filename) all_x = np.concatenate([local_all_pcs, local_all_non_pcs]) all_x = all_x.reshape(*all_x.shape, 1) all_y = np.concatenate([ np.ones(len(local_all_pcs), dtype=np.int), np.zeros(len(local_all_non_pcs), dtype=np.int) ]) len_all_x, len_all_y = len(all_x), len(all_y) assert len_all_x == len_all_y, f"data and label size different ({len_all_x} != {len_all_y})" shuffle_idx = yield len_all_x # need to shuffle all_x and all_y if shuffle_idx is not None: all_x = all_x[shuffle_idx] all_y = all_y[shuffle_idx] num_train = int(len_all_x * train_ratio) train_x, test_x = all_x[:num_train], all_x[num_train:] train_y, test_y = all_y[:num_train], all_y[num_train:] yield (train_x, train_y), (test_x, test_y)
def load_global_view(train_ratio=0.9, shuffle_idx=None): """ NOTE: it is a generator, NOT a normal function \n The reason for this is that "load_global_view" and "load_local_view" have to share shuffle indices. \n The indices is passed by other function, but this function don't know the length of the shuffle indices, which is yield by the function.\n yield twice 1. length of all training data 2. (train_x, train_y), (test_x, test_y) of global view (2001 bins) """ with load_ctx(all_pc_flux_filename): all_pcs = np.loadtxt(all_pc_flux_filename) with load_ctx(all_non_pc_flux_filename): all_non_pcs = np.loadtxt(all_non_pc_flux_filename) all_x = np.concatenate([all_pcs, all_non_pcs]) all_x = all_x.reshape(*all_x.shape, 1) all_y = np.concatenate([ np.ones(len(all_pcs), dtype=np.int), np.zeros(len(all_non_pcs), dtype=np.int) ]) len_all_x, len_all_y = len(all_x), len(all_y) assert len_all_x == len_all_y, f"data and label size different ({len_all_x} != {len_all_y})" shuffle_idx = yield len_all_x if shuffle_idx is not None: all_x = all_x[shuffle_idx] all_y = all_y[shuffle_idx] num_train = int(len_all_x * train_ratio) train_x, test_x = all_x[:num_train], all_x[num_train:] train_y, test_y = all_y[:num_train], all_y[num_train:] yield (train_x, train_y), (test_x, test_y)
def get_binned_normalized_PC_flux(num=1, merge=True, shuffle=False, num_bins=num_bins, overwrite=False, bin_width=None, return_kepids=False, scramble_id=None, nth=1): if num == np.inf and os.path.exists(all_pc_flux_filename) \ and not overwrite: print('argument "return_kepids" is ignored') with load_ctx(all_pc_flux_filename): res = np.loadtxt(all_pc_flux_filename) return res if shuffle: if nth == 1: print("shuffle used") else: shuffle = False # ignore shuffle print("shuffle ignored because nth != 1") all_pcs = get_PC_IDs(num=num, shuffle=shuffle, nth=nth) # print(all_pcs) pcs, others = get_binned_normalized_flux_by_IDs(all_pcs, merge=merge, overwrite=overwrite, num_bins=num_bins, bin_width=bin_width, scramble_id=scramble_id) if others is not None: # raise ValueError(f'{others} should be None') """ do nothing """ # 1. all_pc_flux_filename not exist, and num==np.inf # 2. overwrite # Under the two situations, the final file will be written if num == np.inf and (not os.path.exists(all_pc_flux_filename) or overwrite): # write all data to a file # filename is import from config.py with save_ctx(all_pc_flux_filename): np.savetxt(all_pc_flux_filename, pcs, fmt=_float_fmt) res = pcs if not return_kepids else (pcs, all_pcs) return res
def get_local_binned_normalized_Non_PC_flux(num=1, merge=True, shuffle=False, overwrite=False, scramble_id=None, return_kepids=False, nth=1): """ if num == np.inf, return all flux (default to 1) return binned, normalized local view of Non-PC flux """ if num == np.inf and os.path.exists(local_all_non_pc_flux_filename) \ and not overwrite: with load_ctx(local_all_non_pc_flux_filename): res = np.loadtxt(local_all_non_pc_flux_filename) return res if shuffle: if nth == 1: print("shuffle used") else: print("shuffle ignored") shuffle = False all_non_pcs = get_NonPC_IDs(num=num, shuffle=shuffle, nth=nth) # print(all_pcs) pcs, others = get_binned_local_view_by_IDs(all_non_pcs, merge=merge, overwrite=overwrite, scramble_id=scramble_id) if pcs is not None: # raise ValueError(f'{pcs} should be None') pass if num == np.inf and (not os.path.exists(local_all_non_pc_flux_filename) or overwrite): # write all data to a file # filename is import from config.py with save_ctx(local_all_non_pc_flux_filename): np.save(local_all_non_pc_flux_filename, others, fmt=_float_fmt) if return_kepids: return others, all_non_pcs return others
def get_binned_normalized_Non_PC_flux(num=1, merge=True, shuffle=False, overwrite=False, num_bins=num_bins, bin_width=None, return_kepids=False, scramble_id=None, nth=1): if num == np.inf and os.path.exists(all_non_pc_flux_filename) \ and not overwrite: with load_ctx(all_non_pc_flux_filename): res = np.loadtxt(all_non_pc_flux_filename) return res if shuffle: if nth == 1: print("shuffle used") else: shuffle = False # ignore shuffle print("shuffle ignored because nth != 1") all_others = get_NonPC_IDs(num=num, shuffle=shuffle, nth=nth) pcs, others = get_binned_normalized_flux_by_IDs(all_others, merge=merge, overwrite=overwrite, num_bins=num_bins, bin_width=bin_width, scramble_id=scramble_id) if pcs is not None: # raise ValueError(f'{pcs} should be None') pass if num == np.inf and (not os.path.exists(all_non_pc_flux_filename) or overwrite): # write to file (import from config.py) with save_ctx(all_non_pc_flux_filename): np.savetxt(all_non_pc_flux_filename, others, fmt=_float_fmt) res = others if not return_kepids else (others, all_others) return res