Ejemplo n.º 1
0
    def load_data(self):

        self.scatter3DWindow.move(int(0.60 * self.screen_size.width()), int(0.05 * self.screen_size.height()))
        self.scatter3DWindow.resize(int(0.20 * self.screen_size.width()), int(0.20*self.screen_size.width()))

        self.tern2DWindow.move(int(0.60 * self.screen_size.width()), int(0.45 * self.screen_size.height()))
        self.tern2DWindow.resize(int(0.20 * self.screen_size.width()), int(0.20*self.screen_size.width()))

        view.start_progress_bar(self.progressBar, start=0, stop=4)

        self.data.screen_size = self.screen_size
        self.data.OS = self.OS

        # reinitialize auto_cluster data
        self.data.tab_cluster_data = pd.Series
        self.data.auto_cluster_idx = []

        # load fcs file
        sample_size = self.clusterSampleSize.text()
        sample_size = int(sample_size)
        successful_load = self.data.fcs_read(sample_size)

        if successful_load:
            # fill parameter table
            self.data.param_combo_box_list = view.init_param_table(self.parameterTable, self.data.params)

            view.update_progress_bar(self.progressBar)
            QtWidgets.QApplication.processEvents()
            # transform data
            self.data.transform_data()

            # print successful load and display number of cells
            self.fileLabel.setText(self.data.sample_name + '\n' + self.data.data_size.__str__() + ' total cells (' +
                                   self.data.raw.shape[0].__str__() + ' sampled cells)')

            view.update_progress_bar(self.progressBar)
            QtWidgets.QApplication.processEvents()
            # initialize 2D and 3D zbow graph

            self.data.zbow_3d_plot(self.scatter3DWindow,
                                   scale=self.scatterScaleOption.currentIndex(),
                                   color=self.scatterColorOption.currentIndex())

            self.data.zbow_2d_plot(self.tern2DWindow,
                                   scale=self.ternScaleOption.currentIndex(),
                                   color=self.ternColorOption.currentIndex())

            view.update_progress_bar(self.progressBar)
            QtWidgets.QApplication.processEvents()
            self.cluster_data()

            view.update_progress_bar(self.progressBar)
            QtWidgets.QApplication.processEvents()
            self.data.outliers_removed = False
        else:
            helper.error_message(self.error_dialog, 'Could not load .fcs or .csv file')
Ejemplo n.º 2
0
    def make_cluster_plots(self):
        if isinstance(self.data.path_name, str):
            default_path = self.data.path_name
        else:
            default_path = '~/'

        file_list = QtWidgets.QFileDialog.getOpenFileNames(caption='Select cluster summary files',
                                                           directory=default_path,
                                                           filter='*Summary.csv')

        QtWidgets.QApplication.processEvents()

        if file_list[0]:
            view.start_progress_bar(self.progressBar, start=0, stop=len(file_list[0])-1)
            path_name = os.path.dirname(os.path.abspath(file_list[0][0]))
            cluster_figure, cluster_ax = plt.subplots()
            cluster_figure.set_dpi(300)
            sample_name = list()
            for i, file in enumerate(file_list[0]):
                sample_name.append(os.path.splitext(os.path.basename(file))[0])
                tab_data = pd.read_csv(file)
                tab_data = tab_data[tab_data['id'] != 'noise']
                bar_data = tab_data['percentage']
                bar_color = tab_data[['mean R', 'mean G', 'mean B']]
                cluster_ax.boxplot(bar_data, positions=[i+1], sym='', vert=True, medianprops=dict(color='k'))

                x_coord = [i+1] * len(bar_data)

                bar_data_square = [j ** 2 for j in bar_data]

                x_fudge_factor = np.divide(x_coord, bar_data_square)
                x_fudge_factor[x_fudge_factor > 0.2] = 0.2
                x_fudge_factor[x_fudge_factor < 0.02] = 0.02

                x_fudge_choice = [uniform(-x_fudge_factor[k], x_fudge_factor[k]) for k, val in enumerate(x_fudge_factor)]

                x_coord = np.array(x_coord) + np.array(x_fudge_choice)

                bar_color['alpha'] = [0.7] * len(bar_color)
                bar_color = [tuple(x) for x in bar_color.values]

                cluster_ax.scatter(x_coord, bar_data, s=100, c=bar_color)

                view.update_progress_bar(self.progressBar)
                QtWidgets.QApplication.processEvents()

            plt.ylim(0, 100)
            plt.xlim(0.5, len(file_list[0])+0.5)

            sample_name = [x.replace('_Summary', '') for x in sample_name]
            sample_name = [x[0:20] for x in sample_name]

            plt.xticks(range(1, len(file_list[0])+1), sample_name)

            cluster_ax.tick_params(axis='x', labelsize='x-small')
            cluster_ax.yaxis.set_major_locator(ticker.MultipleLocator(10))

            plt.savefig(os.path.join(path_name, 'combined_cluster_graph.png'),
                        dpi=300, transparent=True, pad_inches=0, Bbox='tight')

            plt.savefig(os.path.join(path_name, 'combined_cluster_graph.eps'),
                        dpi=300, transparent=True, pad_inches=0, Bbox='tight')

            plt.close(cluster_figure)
        else:
            helper.error_message(self.error_dialog, 'No files selected')
Ejemplo n.º 3
0
    def save_data(self):
        from datetime import datetime

        num_of_progress_bar_steps = 4 + len(self.data.tab_cluster_data['id'])

        view.start_progress_bar(self.progressBar, start=0, stop=num_of_progress_bar_steps)

        # get directory to save to
        self.data.save_folder = QtWidgets.QFileDialog.getExistingDirectory(caption='Select directory to save output',
                                                                           directory=os.path.dirname(self.data.file_name))

        if self.data.save_folder:
            # make subdirectories if they don't exist

            if not os.path.isdir(os.path.join(self.data.save_folder, 'ternary_plots')):
                os.makedirs(os.path.join(self.data.save_folder, 'ternary_plots'))

            if not os.path.isdir(os.path.join(self.data.save_folder, 'cluster_backgates')):
                os.makedirs(os.path.join(self.data.save_folder, 'cluster_backgates'))

            if not os.path.isdir(os.path.join(self.data.save_folder, 'bar_graphs_and_cluster_plots')):
                os.makedirs(os.path.join(self.data.save_folder, 'bar_graphs_and_cluster_plots'))

            if not os.path.isdir(os.path.join(self.data.save_folder, 'cluster_solutions')):
                os.makedirs(os.path.join(self.data.save_folder, 'cluster_solutions'))

            if not os.path.isdir(os.path.join(self.data.save_folder, 'cluster_summaries')):
                os.makedirs(os.path.join(self.data.save_folder, 'cluster_summaries'))

            # self.data.tab_cluster_data.to_pickle(path=os.path.join(self.data.save_folder,
            #                                                        self.data.sample_name + '_Summary.pkl'),
            #                                      compression=None)
            self.data.tab_cluster_data.to_csv(os.path.join(self.data.save_folder, 'cluster_summaries',
                                                           self.data.sample_name + '_Summary.csv'),
                                              index=False, header=True)

            if self.data.outliers_removed:
                cluster_solution = self.data.raw_filtered
                cluster_solution.insert(0, 'clusterID', self.data.cluster_data_idx)
            else:
                cluster_solution = self.data.raw
                cluster_solution.insert(loc=0, column='clusterID', value=pd.Series(self.data.cluster_data_idx))

            # cluster_solution.to_pickle(path=os.path.join(self.data.save_folder,
            #                                              self.data.sample_name + '_cluster_solution.pkl'),
            #                            compression=None)

            cluster_solution.to_csv(path_or_buf=os.path.join(self.data.save_folder, 'cluster_solutions',
                                                             self.data.sample_name + '_cluster_solution.csv'),
                                    index=False, header=True)

            metadata_output = {'fcs_file': self.data.file_name,
                               'date_and_time': datetime.now(),
                               'original_sample_size': self.data.data_size,
                               'sample_size': cluster_solution.shape[0],
                               'HDBSCAN_min_cluster_size': self.clusterMinClusterSize.text(),
                               'HDBSCAN_min_samples': self.clusterMinSamples.text(),
                               'noise_cluster_idx': self.data.noise_cluster_idx,
                               'red_only_idx': self.data.red_only_cluster_idx,
                               'gini_coefficient': self.data.gini,
                               'shannon_entropy': self.data.shannon
                               }

            del cluster_solution

            metadata_output = pd.DataFrame.from_dict(metadata_output, orient='index')
            metadata_output.to_csv(path_or_buf=os.path.join(self.data.save_folder, 'cluster_solutions',
                                                            self.data.sample_name + '_metadata.csv'),
                                   index=True, header=False)

            view.update_progress_bar(self.progressBar)
            QtWidgets.QApplication.processEvents()

            self.data.make_output_plots(scale=self.ternScaleOption.currentIndex(),
                                        color=self.ternColorOption.currentIndex(),
                                        progress_bar=self.progressBar)
        else:
            helper.error_message(self.error_dialog, 'Could not retrieve directory to save to')
Ejemplo n.º 4
0
    def make_output_plots(self, scale, color, progress_bar):
        # get scale data: scale_list = ['custom', 'default', 'linear']
        if scale == 0:
            scale_data = self.custom_transformed.as_matrix()
        elif scale == 1:
            scale_data = self.default_transformed.as_matrix()
        elif scale == 2:
            scale_data = self.linear_transformed.as_matrix()

        if scale == 0:
            contour_data = self.custom_ternary.as_matrix()
        elif scale == 1:
            contour_data = self.default_ternary.as_matrix()
        elif scale == 2:
            contour_data = self.linear_ternary.as_matrix()

        # get color data:color_list = ['custom', 'default', 'cluster color', 'linear']
        if color == 0:
            color_data = self.custom_transformed.as_matrix()
        elif color == 1:
            color_data = self.default_transformed[['RFP', 'YFP', 'CFP']].as_matrix()
        elif color == 2:
            if self.tab_cluster_data.empty:
                color_data = helper.distinguishable_colors(1)
            else:
                pseudo_color = helper.distinguishable_colors(self.tab_cluster_data['id'].count())
                pseudo_color[self.noise_cluster_idx] = "#646464"

                color_data = [None] * scale_data.shape[0]
                for i in range(0, scale_data.shape[0]):
                    color_data[i] = pseudo_color[self.cluster_data_idx[i]]
        elif color == 3:
            color_data = self.linear_transformed[['RFP', 'YFP', 'CFP']].as_matrix()

        # this assures that R + G + B = scale, which is required for the ternary library
        total = scale_data.sum(axis=1)
        scale_data = scale_data/total[:, None]


        ###### TERNARY PLOT WITH CONTOUR ######
        # get Gaussian kernel for contour plot
        xmin = 0
        xmax = 1
        ymin = 0
        ymax = 1

        X, Y = np.mgrid[xmin:xmax:200j, ymin:ymax:200j]
        positions = np.vstack([X.ravel(), Y.ravel()])
        values = np.vstack([contour_data[:, 0], contour_data[:, 1]])
        kernel = sd.gaussian_kde(values)
        Z = np.reshape(kernel(positions).T, X.shape)

        # new way with library
        scale = 1
        tern_figure, tern_plot = ternary.figure(scale=scale)
        tern_figure.set_size_inches(5.37, 5)  # this is proper scaling to approximate an equilateral triangle
        tern_figure.set_dpi(300)
        # tern_plot.set_title("ternary plot", fontsize=18)
        tern_plot.boundary(linewidth=1.0)
        tern_plot.gridlines(multiple=0.1, color='grey')

        tern_plot.scatter(scale_data, marker='o', color=color_data, s=2)

        tern_plot.clear_matplotlib_ticks()

        plt.contour(X, Y, Z, colors='k', alpha=0.6, linewidths=1)

        ternary_filename = os.path.join(self.save_folder, 'ternary_plots', self.sample_name)
        plt.savefig(ternary_filename + '.png', dpi=300, transparent=True, pad_inches=0, Bbox='tight')
        plt.savefig(ternary_filename + '.eps', dpi=300, transparent=True, pad_inches=0, Bbox='tight')

        plt.close(tern_figure)

        view.update_progress_bar(progress_bar)
        QtWidgets.QApplication.processEvents()

        ######### BAR GRAPH AND CLUSTER PLOT #########
        bar_data = self.tab_cluster_data['percentage']
        bar_color = self.tab_cluster_data[['mean R', 'mean G', 'mean B']]

        bar_figure, bar_ax = plt.subplots()
        bar_figure.set_size_inches(3, 6)
        bar_figure.set_dpi(300)

        for j in range(0, len(bar_data)):
            if j == 0:
                b_col = bar_color.iloc[j].values.tolist()
                b_col.append(1)
                bar_ax.bar(0, bar_data.iloc[j], color=b_col)

                total = bar_data.iloc[j]
            else:
                b_col = bar_color.iloc[j].values.tolist()

                if j == self.noise_cluster_idx:
                    b_col.append(0)  # set noise cluster to total transparency
                else:
                    b_col.append(1)

                bar_ax.bar(0, bar_data.iloc[j], bottom=total, color=b_col)
                total = total + bar_data.iloc[j]

        plt.ylim(0, 100)
        plt.xticks([])

        bar_filename = os.path.join(self.save_folder, 'bar_graphs_and_cluster_plots', self.sample_name)

        plt.savefig(bar_filename + 'bar_graph.png',
                    dpi=300, transparent=True, pad_inches=0, Bbox='tight')

        plt.savefig(bar_filename + 'bar_graph.eps',
                    dpi=300, transparent=True, pad_inches=0, Bbox='tight')

        plt.close(bar_figure)

        view.update_progress_bar(progress_bar)
        QtWidgets.QApplication.processEvents()

        cluster_figure, cluster_ax = plt.subplots()
        cluster_figure.set_size_inches(3, 6)
        cluster_figure.set_dpi(300)

        cluster_ax.boxplot(bar_data, sym='', vert=True, medianprops=dict(color='k'))

        x_coord = [1] * len(bar_data)

        bar_data_square = [i**2 for i in bar_data]

        x_fudge_factor = np.divide(x_coord, bar_data_square)
        x_fudge_factor[x_fudge_factor > 0.2] = 0.2
        x_fudge_factor[x_fudge_factor < 0.02] = 0.02

        x_fudge_choice = [uniform(-x_fudge_factor[i], x_fudge_factor[i]) for i, val in enumerate(x_fudge_factor)]

        x_coord = np.array(x_coord) + np.array(x_fudge_choice)

        bar_color['alpha'] = [0.7] * len(bar_color)
        bar_color = [tuple(x) for x in bar_color.values]

        cluster_ax.scatter(x_coord, bar_data, s=150, c=bar_color)

        plt.ylim(0, 100)
        cluster_ax.yaxis.set_major_locator(ticker.MultipleLocator(10))

        plt.savefig(bar_filename + 'cluster_graph.png',
                    dpi=300, transparent=True, pad_inches=0, Bbox='tight')

        plt.savefig(bar_filename + 'cluster_graph.eps',
                    dpi=300, transparent=True, pad_inches=0, Bbox='tight')

        plt.close(cluster_figure)

        view.update_progress_bar(progress_bar)
        QtWidgets.QApplication.processEvents()

        ######### BACKGATE PLOTS #####
        for i in set(self.cluster_data_idx):
            meanG = np.mean(self.default_transformed.loc[self.cluster_data_idx == i, 'YFP'])
            meanB = np.mean(self.default_transformed.loc[self.cluster_data_idx == i, 'CFP'])

            if meanG < 0.5 < meanB:
                quadrant = 1
            elif meanB > 0.5 and meanG > 0.5:
                quadrant = 2
            elif meanB < 0.5 and meanG < 0.5:
                quadrant = 3
            elif meanB < 0.5 < meanG:
                quadrant = 4

            self.make_backgate_plot(i, quadrant)

            view.update_progress_bar(progress_bar)
            QtWidgets.QApplication.processEvents()