def main(): parser = argparse.ArgumentParser( description='Eliminate the 1D subspace that correspond to BMI') parser.add_argument('--in-data-dict-bin', type=str) parser.add_argument('--in-feature-dim', type=int, default=20) parser.add_argument('--out-data-dict-bin', type=str) args = parser.parse_args() in_dict_obj = load_object(args.in_data_dict_bin) scan_name_list = list(in_dict_obj.keys()) data_X = np.zeros((len(scan_name_list), args.in_feature_dim), dtype=float) data_Y = np.zeros((len(scan_name_list), ), dtype=float) for idx_scan in range(len(scan_name_list)): scan_name = scan_name_list[idx_scan] data_X[idx_scan, :] = in_dict_obj[scan_name]['ImageData'][:] data_Y[idx_scan] = in_dict_obj[scan_name]['bmi'] linear_reg_obj = EigenThoraxLinearRegression1D(data_X, data_Y) linear_reg_obj.run_regression() projected_data_X = linear_reg_obj.project_to_complement_space() for idx_scan in range(len(scan_name_list)): scan_name = scan_name_list[idx_scan] in_dict_obj[scan_name]['ImageData'] = projected_data_X[idx_scan, :] save_object(in_dict_obj, args.out_data_dict_bin)
def main(): parser = argparse.ArgumentParser(description='Load a saved pca object') parser.add_argument('--low-dim-bin-path', type=str) parser.add_argument('--save-bin-path', type=str) parser.add_argument('--num-pca-component', type=int, default=10) parser.add_argument('--dim-embedded', type=int, default=2) args = parser.parse_args() logger.info(f'Load low dim data from {args.low_dim_bin_path}') low_dim_array = load_object(args.low_dim_bin_path) data_matrix = np.zeros((len(low_dim_array), args.num_pca_component)) for sample_idx in range(len(low_dim_array)): data_matrix[sample_idx, :] = low_dim_array[sample_idx]['low_dim'][:] logger.info(f'Num of sample: {data_matrix.shape[0]}') logger.info(f'Num of included PCs: {data_matrix.shape[1]}') logger.info('Start tSNE') # embedded_matrix = TSNE(perplexity=50, learning_rate=10000, n_components=args.dim_embedded).fit_transform(data_matrix) embedded_matrix = TSNE(perplexity=50, n_iter=100000, n_components=args.dim_embedded).fit_transform( data_matrix) # embedded_matrix = TSNE(perplexity=50, learning_rate=10000, n_components=args.dim_embedded).fit_transform( # data_matrix) logger.info('Complete') logger.info(f'Output shape: {embedded_matrix.shape}') for sample_idx in range((len(low_dim_array))): low_dim_array[sample_idx]['tsne_data'] = embedded_matrix[sample_idx, :] # logger.info(low_dim_array[0]) logger.info(f'Save data to {args.save_bin_path}') save_object(low_dim_array, args.save_bin_path)
def main(): parser = argparse.ArgumentParser(description='Load a saved pca object') parser.add_argument('--in-pca-data-bin', type=str) parser.add_argument('--label-file', type=str) parser.add_argument('--out-data-dict-bin', type=str) args = parser.parse_args() low_dim_array = load_object(args.in_pca_data_bin) label_obj = ClinicalDataReaderSPORE.create_spore_data_reader_xlsx(args.label_file) data_dict = generate_data_dict(low_dim_array, label_obj) logger.info(f'Save dict data object to {args.out_data_dict_bin}') save_object(data_dict, args.out_data_dict_bin)
def run_dimension_reduction(self, save_bin_path): pca_nii_3d = PCA_NII_3D(None, None, 1) pca_nii_3d.load_pca(self._pca_bin_path) image_feature_data_obj = load_object(self._data_bin_path) projected_matrix = pca_nii_3d._get_pca().transform(image_feature_data_obj['data_matrix']) out_data = { 'file_list': image_feature_data_obj['file_list'], 'projected_matrix': projected_matrix } save_object(out_data, save_bin_path)
def save_gaussian_model(self, out_bin): save_object(self._gaussian_model, out_bin)
def save_data_dict_bin(self, bin_file_path): save_object(self._data_dict, bin_file_path)
def run_dimension_reduction(self, save_bin_path): self._low_dim_dict = self.run_parallel() save_object(self._low_dim_dict, save_bin_path)
def save_pca_obj(self, file_path): save_object(self._pca, file_path)
def save_bin(self, out_path): save_object(self._in_dict_obj, out_path)