def load_data(self): self.scatter3DWindow.move(int(0.60 * self.screen_size.width()), int(0.05 * self.screen_size.height())) self.scatter3DWindow.resize(int(0.20 * self.screen_size.width()), int(0.20*self.screen_size.width())) self.tern2DWindow.move(int(0.60 * self.screen_size.width()), int(0.45 * self.screen_size.height())) self.tern2DWindow.resize(int(0.20 * self.screen_size.width()), int(0.20*self.screen_size.width())) view.start_progress_bar(self.progressBar, start=0, stop=4) self.data.screen_size = self.screen_size self.data.OS = self.OS # reinitialize auto_cluster data self.data.tab_cluster_data = pd.Series self.data.auto_cluster_idx = [] # load fcs file sample_size = self.clusterSampleSize.text() sample_size = int(sample_size) successful_load = self.data.fcs_read(sample_size) if successful_load: # fill parameter table self.data.param_combo_box_list = view.init_param_table(self.parameterTable, self.data.params) view.update_progress_bar(self.progressBar) QtWidgets.QApplication.processEvents() # transform data self.data.transform_data() # print successful load and display number of cells self.fileLabel.setText(self.data.sample_name + '\n' + self.data.data_size.__str__() + ' total cells (' + self.data.raw.shape[0].__str__() + ' sampled cells)') view.update_progress_bar(self.progressBar) QtWidgets.QApplication.processEvents() # initialize 2D and 3D zbow graph self.data.zbow_3d_plot(self.scatter3DWindow, scale=self.scatterScaleOption.currentIndex(), color=self.scatterColorOption.currentIndex()) self.data.zbow_2d_plot(self.tern2DWindow, scale=self.ternScaleOption.currentIndex(), color=self.ternColorOption.currentIndex()) view.update_progress_bar(self.progressBar) QtWidgets.QApplication.processEvents() self.cluster_data() view.update_progress_bar(self.progressBar) QtWidgets.QApplication.processEvents() self.data.outliers_removed = False else: helper.error_message(self.error_dialog, 'Could not load .fcs or .csv file')
def make_cluster_plots(self): if isinstance(self.data.path_name, str): default_path = self.data.path_name else: default_path = '~/' file_list = QtWidgets.QFileDialog.getOpenFileNames(caption='Select cluster summary files', directory=default_path, filter='*Summary.csv') QtWidgets.QApplication.processEvents() if file_list[0]: view.start_progress_bar(self.progressBar, start=0, stop=len(file_list[0])-1) path_name = os.path.dirname(os.path.abspath(file_list[0][0])) cluster_figure, cluster_ax = plt.subplots() cluster_figure.set_dpi(300) sample_name = list() for i, file in enumerate(file_list[0]): sample_name.append(os.path.splitext(os.path.basename(file))[0]) tab_data = pd.read_csv(file) tab_data = tab_data[tab_data['id'] != 'noise'] bar_data = tab_data['percentage'] bar_color = tab_data[['mean R', 'mean G', 'mean B']] cluster_ax.boxplot(bar_data, positions=[i+1], sym='', vert=True, medianprops=dict(color='k')) x_coord = [i+1] * len(bar_data) bar_data_square = [j ** 2 for j in bar_data] x_fudge_factor = np.divide(x_coord, bar_data_square) x_fudge_factor[x_fudge_factor > 0.2] = 0.2 x_fudge_factor[x_fudge_factor < 0.02] = 0.02 x_fudge_choice = [uniform(-x_fudge_factor[k], x_fudge_factor[k]) for k, val in enumerate(x_fudge_factor)] x_coord = np.array(x_coord) + np.array(x_fudge_choice) bar_color['alpha'] = [0.7] * len(bar_color) bar_color = [tuple(x) for x in bar_color.values] cluster_ax.scatter(x_coord, bar_data, s=100, c=bar_color) view.update_progress_bar(self.progressBar) QtWidgets.QApplication.processEvents() plt.ylim(0, 100) plt.xlim(0.5, len(file_list[0])+0.5) sample_name = [x.replace('_Summary', '') for x in sample_name] sample_name = [x[0:20] for x in sample_name] plt.xticks(range(1, len(file_list[0])+1), sample_name) cluster_ax.tick_params(axis='x', labelsize='x-small') cluster_ax.yaxis.set_major_locator(ticker.MultipleLocator(10)) plt.savefig(os.path.join(path_name, 'combined_cluster_graph.png'), dpi=300, transparent=True, pad_inches=0, Bbox='tight') plt.savefig(os.path.join(path_name, 'combined_cluster_graph.eps'), dpi=300, transparent=True, pad_inches=0, Bbox='tight') plt.close(cluster_figure) else: helper.error_message(self.error_dialog, 'No files selected')
def save_data(self): from datetime import datetime num_of_progress_bar_steps = 4 + len(self.data.tab_cluster_data['id']) view.start_progress_bar(self.progressBar, start=0, stop=num_of_progress_bar_steps) # get directory to save to self.data.save_folder = QtWidgets.QFileDialog.getExistingDirectory(caption='Select directory to save output', directory=os.path.dirname(self.data.file_name)) if self.data.save_folder: # make subdirectories if they don't exist if not os.path.isdir(os.path.join(self.data.save_folder, 'ternary_plots')): os.makedirs(os.path.join(self.data.save_folder, 'ternary_plots')) if not os.path.isdir(os.path.join(self.data.save_folder, 'cluster_backgates')): os.makedirs(os.path.join(self.data.save_folder, 'cluster_backgates')) if not os.path.isdir(os.path.join(self.data.save_folder, 'bar_graphs_and_cluster_plots')): os.makedirs(os.path.join(self.data.save_folder, 'bar_graphs_and_cluster_plots')) if not os.path.isdir(os.path.join(self.data.save_folder, 'cluster_solutions')): os.makedirs(os.path.join(self.data.save_folder, 'cluster_solutions')) if not os.path.isdir(os.path.join(self.data.save_folder, 'cluster_summaries')): os.makedirs(os.path.join(self.data.save_folder, 'cluster_summaries')) # self.data.tab_cluster_data.to_pickle(path=os.path.join(self.data.save_folder, # self.data.sample_name + '_Summary.pkl'), # compression=None) self.data.tab_cluster_data.to_csv(os.path.join(self.data.save_folder, 'cluster_summaries', self.data.sample_name + '_Summary.csv'), index=False, header=True) if self.data.outliers_removed: cluster_solution = self.data.raw_filtered cluster_solution.insert(0, 'clusterID', self.data.cluster_data_idx) else: cluster_solution = self.data.raw cluster_solution.insert(loc=0, column='clusterID', value=pd.Series(self.data.cluster_data_idx)) # cluster_solution.to_pickle(path=os.path.join(self.data.save_folder, # self.data.sample_name + '_cluster_solution.pkl'), # compression=None) cluster_solution.to_csv(path_or_buf=os.path.join(self.data.save_folder, 'cluster_solutions', self.data.sample_name + '_cluster_solution.csv'), index=False, header=True) metadata_output = {'fcs_file': self.data.file_name, 'date_and_time': datetime.now(), 'original_sample_size': self.data.data_size, 'sample_size': cluster_solution.shape[0], 'HDBSCAN_min_cluster_size': self.clusterMinClusterSize.text(), 'HDBSCAN_min_samples': self.clusterMinSamples.text(), 'noise_cluster_idx': self.data.noise_cluster_idx, 'red_only_idx': self.data.red_only_cluster_idx, 'gini_coefficient': self.data.gini, 'shannon_entropy': self.data.shannon } del cluster_solution metadata_output = pd.DataFrame.from_dict(metadata_output, orient='index') metadata_output.to_csv(path_or_buf=os.path.join(self.data.save_folder, 'cluster_solutions', self.data.sample_name + '_metadata.csv'), index=True, header=False) view.update_progress_bar(self.progressBar) QtWidgets.QApplication.processEvents() self.data.make_output_plots(scale=self.ternScaleOption.currentIndex(), color=self.ternColorOption.currentIndex(), progress_bar=self.progressBar) else: helper.error_message(self.error_dialog, 'Could not retrieve directory to save to')