Beispiel #1
0
    def save_as_xlsx(self, filename: str):
        assert self.__last_result is not None
        dataset, X, linkage_matrix, dendrogram_res = self.__last_result
        n_clusters = self.n_clusters
        flags = fcluster(linkage_matrix, n_clusters, criterion="maxclust")
        flag_set = set(flags)
        n_clusters = len(flag_set)

        wb = openpyxl.Workbook()
        prepare_styles(wb)
        ws = wb.active
        ws.title = self.tr("README")
        description = \
            """
            This Excel file was generated by QGrain ({0}).

            Please cite:
            Liu, Y., Liu, X., Sun, Y., 2021. QGrain: An open-source and easy-to-use software for the comprehensive analysis of grain size distributions. Sedimentary Geology 423, 105980. https://doi.org/10.1016/j.sedgeo.2021.105980

            It contanins three (or n_clusters + 3) sheets:
            1. The first sheet is the dataset which was used to perform the hierarchy clustering algorithm.
            2. The second sheet is used to put the clustering flags of all samples.
            3. The third sheet is the typical sampels (i.e, the first sample of each cluster was selected).
            4. If the number of clusters less equal to 100, the samples of each cluster will be save to individual sheets.

            The base hierarchy clusrting algorithm is implemented by Scipy. You can get the details of algorithm from the following website.
            https://docs.scipy.org/doc/scipy/reference/cluster.hierarchy.html

            """.format(QGRAIN_VERSION)

        def write(row, col, value, style="normal_light"):
            cell = ws.cell(row + 1, col + 1, value=value)
            cell.style = style

        lines_of_desc = description.split("\n")
        for row, line in enumerate(lines_of_desc):
            write(row, 0, line, style="description")
        ws.column_dimensions[column_to_char(0)].width = 200

        ws = wb.create_sheet(self.tr("Dataset"))
        write(0, 0, self.tr("Sample Name"), style="header")
        ws.column_dimensions[column_to_char(0)].width = 16
        for col, value in enumerate(dataset.classes_μm, 1):
            write(0, col, value, style="header")
            ws.column_dimensions[column_to_char(col)].width = 10
        for row, sample in enumerate(dataset.samples, 1):
            if row % 2 == 0:
                style = "normal_dark"
            else:
                style = "normal_light"
            write(row, 0, sample.name, style=style)
            for col, value in enumerate(sample.distribution, 1):
                write(row, col, value, style=style)
            QCoreApplication.processEvents()

        ws = wb.create_sheet(self.tr("Flags"))
        write(0, 0, self.tr("Sample Name"), style="header")
        write(0, 1, self.tr("Flag of the Cluster"), style="header")
        ws.column_dimensions[column_to_char(0)].width = 16
        ws.column_dimensions[column_to_char(1)].width = 16
        for row, (sample, flag) in enumerate(zip(dataset.samples, flags), 1):
            if row % 2 == 0:
                style = "normal_dark"
            else:
                style = "normal_light"
            write(row, 0, sample.name, style=style)
            write(row, 1, flag, style=style)
            QCoreApplication.processEvents()

        typical_samples = []
        temp_flag_set = set()
        for i, flag in enumerate(flags):
            if len(temp_flag_set) == n_clusters:
                break
            if flag not in temp_flag_set:
                typical_samples.append(dataset.samples[i])
                temp_flag_set.add(flag)

        ws = wb.create_sheet(self.tr("Typical Samples"))
        write(0, 0, self.tr("Sample Name"), style="header")
        ws.column_dimensions[column_to_char(0)].width = 16
        for col, value in enumerate(dataset.classes_μm, 1):
            write(0, col, value, style="header")
            ws.column_dimensions[column_to_char(col)].width = 10
        for row, sample in enumerate(typical_samples, 1):
            if row % 2 == 0:
                style = "normal_dark"
            else:
                style = "normal_light"
            write(row, 0, sample.name, style=style)
            for col, value in enumerate(sample.distribution, 1):
                write(row, col, value, style=style)
            QCoreApplication.processEvents()

        if n_clusters <= 100:
            for flag in flag_set:
                samples = []
                for sample, in_this_cluster in zip(dataset.samples,
                                                   np.equal(flags, flag)):
                    if in_this_cluster:
                        samples.append(sample)

                ws = wb.create_sheet(self.tr("Cluster{0}").format(flag))
                write(0, 0, self.tr("Sample Name"), style="header")
                ws.column_dimensions[column_to_char(0)].width = 16
                for col, value in enumerate(dataset.classes_μm, 1):
                    write(0, col, value, style="header")
                    ws.column_dimensions[column_to_char(col)].width = 10
                for row, sample in enumerate(samples, 1):
                    if row % 2 == 0:
                        style = "normal_dark"
                    else:
                        style = "normal_light"
                    write(row, 0, sample.name, style=style)
                    for col, value in enumerate(sample.distribution, 1):
                        write(row, col, value, style=style)
                    QCoreApplication.processEvents()

        wb.save(filename)
        wb.close()
Beispiel #2
0
    def save_result_excel(self, filename: str, result: EMMAResult):
        # get the mode size of each end-members
        modes = [(i, result.dataset.classes_μm[np.unravel_index(
            np.argmax(result.end_members[i]), result.end_members[i].shape)])
                 for i in range(result.n_members)]
        # sort them by mode size
        modes.sort(key=lambda x: x[1])
        wb = openpyxl.Workbook()
        prepare_styles(wb)

        ws = wb.active
        ws.title = self.tr("README")
        description = \
            """
            This Excel file was generated by QGrain ({0}).

            Please cite:
            Liu, Y., Liu, X., Sun, Y., 2021. QGrain: An open-source and easy-to-use software for the comprehensive analysis of grain size distributions. Sedimentary Geology 423, 105980. https://doi.org/10.1016/j.sedgeo.2021.105980

            It contanins three sheets:
            1. The first sheet is the dataset which was used to perform the EMMA algorithm.
            2. The second sheet is used to put the distributions of all end-members.
            3. The third sheet is the end-member fractions of all samples.

            This EMMA algorithm was implemented by QGrian, using the famous machine learning framework, PyTorch.

            EMMA algorithm details
                N_samples: {1},
                Distribution Type: {2},
                N_members: {3},
                N_iterations: {4},
                Spent Time: {5} s,

                Computing Device: {6},
                Distance: {7},
                Minimum N_iterations: {8},
                Maximum N_iterations: {9},
                Learning Rate: {10},
                eps: {11},
                tol: {12},
                ftol: {13}

            """.format(QGRAIN_VERSION,
                    result.dataset.n_samples,
                    result.distribution_type.name,
                    result.n_members,
                    result.n_iterations,
                    result.time_spent,
                    result.resolver_setting.device,
                    result.resolver_setting.distance,
                    result.resolver_setting.min_niter,
                    result.resolver_setting.max_niter,
                    result.resolver_setting.lr,
                    result.resolver_setting.eps,
                    result.resolver_setting.tol,
                    result.resolver_setting.ftol)

        def write(row, col, value, style="normal_light"):
            cell = ws.cell(row + 1, col + 1, value=value)
            cell.style = style

        lines_of_desc = description.split("\n")
        for row, line in enumerate(lines_of_desc):
            write(row, 0, line, style="description")
        ws.column_dimensions[column_to_char(0)].width = 200

        ws = wb.create_sheet(self.tr("Dataset"))
        write(0, 0, self.tr("Sample Name"), style="header")
        ws.column_dimensions[column_to_char(0)].width = 16
        for col, value in enumerate(result.dataset.classes_μm, 1):
            write(0, col, value, style="header")
            ws.column_dimensions[column_to_char(col)].width = 10
        for row, sample in enumerate(result.dataset.samples, 1):
            if row % 2 == 0:
                style = "normal_dark"
            else:
                style = "normal_light"
            write(row, 0, sample.name, style=style)
            for col, value in enumerate(sample.distribution, 1):
                write(row, col, value, style=style)
            QCoreApplication.processEvents()

        ws = wb.create_sheet(self.tr("End-members"))
        write(0, 0, self.tr("End-member"), style="header")
        ws.column_dimensions[column_to_char(0)].width = 16
        for col, value in enumerate(result.dataset.classes_μm, 1):
            write(0, col, value, style="header")
            ws.column_dimensions[column_to_char(col)].width = 10
        for row, (index, _) in enumerate(modes, 1):
            if row % 2 == 0:
                style = "normal_dark"
            else:
                style = "normal_light"
            write(row, 0, f"EM{row}", style=style)
            for col, value in enumerate(result.end_members[index], 1):
                write(row, col, value, style=style)
            QCoreApplication.processEvents()

        ws = wb.create_sheet(self.tr("Fractions"))
        write(0, 0, self.tr("Sample Name"), style="header")
        ws.column_dimensions[column_to_char(0)].width = 16
        for i in range(result.n_members):
            write(0, i + 1, f"EM{i+1}", style="header")
            ws.column_dimensions[column_to_char(i + 1)].width = 10
        for row, fractions in enumerate(result.fractions, 1):
            if row % 2 == 0:
                style = "normal_dark"
            else:
                style = "normal_light"
            write(row, 0, result.dataset.samples[row - 1].name, style=style)
            for col, (index, _) in enumerate(modes, 1):
                write(row, col, fractions[index], style=style)
            QCoreApplication.processEvents()

        wb.save(filename)
        wb.close()
Beispiel #3
0
    def save_file(self, filename: str):
        wb = openpyxl.Workbook()
        prepare_styles(wb)

        ws = wb.active
        ws.title = self.tr("README")
        description = \
            """
            This Excel file was generated by QGrain ({0}).

            Please cite:
            Liu, Y., Liu, X., Sun, Y., 2021. QGrain: An open-source and easy-to-use software for the comprehensive analysis of grain size distributions. Sedimentary Geology 423, 105980. https://doi.org/10.1016/j.sedgeo.2021.105980

            It contanins one sheet:
            1. The sheet puts the statistic parameters and the classification groups of the samples.

            The statistic formulas are referred to Blott & Pye (2001)'s work.
            The classification of GSDs is referred to Folk (1957)'s and Blott & Pye (2012)'s scheme.

            References:
                1.Blott, S. J. & Pye, K. Particle size scales and classification of sediment types based on particle size distributions: Review and recommended procedures. Sedimentology 59, 2071–2096 (2012).
                2.Blott, S. J. & Pye, K. GRADISTAT: a grain-size distribution and statistics package for the analysis of unconsolidated sediments. Earth Surf. Process. Landforms 26, 1237–1248 (2001).
                3.Folk, R. L. The Distinction between Grain Size and Mineral Composition in Sedimentary-Rock Nomenclature. The Journal of Geology 62, 344–359 (1954).

            """.format(QGRAIN_VERSION)

        def write(row, col, value, style="normal_light"):
            cell = ws.cell(row + 1, col + 1, value=value)
            cell.style = style

        lines_of_desc = description.split("\n")
        for row, line in enumerate(lines_of_desc):
            write(row, 0, line, style="description")
        ws.column_dimensions[column_to_char(0)].width = 200

        ws = wb.create_sheet(self.tr("Parameters and Groups"))
        proportion_key, proportion_desciption = self.proportion
        col_names = [
            f"{self.tr('Mean')}[{self.unit}]",
            self.tr("Mean Desc."), f"{self.tr('Median')} [{self.unit}]",
            f"{self.tr('Modes')} [{self.unit}]",
            self.tr("STD (Sorting)"),
            self.tr("Sorting Desc."),
            self.tr("Skewness"),
            self.tr("Skew. Desc."),
            self.tr("Kurtosis"),
            self.tr("Kurt. Desc."),
            f"({proportion_desciption})\n{self.tr('Proportion')} [%]",
            self.tr("Group\n(Folk, 1954)"),
            self.tr("Group\nSymbol (Blott & Pye, 2012)"),
            self.tr("Group\n(Blott & Pye, 2012)")
        ]
        col_keys = [(True, "mean"), (True, "mean_description"),
                    (True, "median"), (True, "modes"), (True, "std"),
                    (True, "std_description"), (True, "skewness"),
                    (True, "skewness_description"), (True, "kurtosis"),
                    (True, "kurtosis_description"), (False, proportion_key),
                    (False, "group_Folk54"), (False, "group_BP12_symbol"),
                    (False, "group_BP12")]
        write(0, 0, self.tr("Sample Name"), style="header")
        ws.column_dimensions[column_to_char(0)].width = 16
        for col, moment_name in enumerate(col_names, 1):
            write(0, col, moment_name, style="header")
            if col in (2, 4, 6, 8, 10, 11, 12, 14):
                ws.column_dimensions[column_to_char(col)].width = 30
            else:
                ws.column_dimensions[column_to_char(col)].width = 16
        ws.column_dimensions[column_to_char(len(col_names))].width = 40
        for row, sample in enumerate(self.__dataset.samples, 1):
            if row % 2 == 0:
                style = "normal_dark"
            else:
                style = "normal_light"
            write(row, 0, sample.name, style=style)
            statistic = get_all_statistic(sample.classes_μm, sample.classes_φ,
                                          sample.distribution)
            if self.is_geometric:
                if self.is_FW57:
                    sub_key = "geometric_FW57"
                else:
                    sub_key = "geometric"
            else:
                if self.is_FW57:
                    sub_key = "logarithmic_FW57"
                else:
                    sub_key = "logarithmic"
            for col, (in_sub, key) in enumerate(col_keys, 1):
                value = statistic[sub_key][key] if in_sub else statistic[key]
                if key == "modes":
                    write(row,
                          col,
                          ", ".join([f"{m:0.4f}" for m in value]),
                          style=style)
                elif key[-11:] == "_proportion":
                    write(row,
                          col,
                          ", ".join([f"{p*100:0.4f}" for p in value]),
                          style=style)
                else:
                    write(row, col, value, style=style)

        wb.save(filename)
        wb.close()
Beispiel #4
0
    def save_typical(self, filename):
        assert self.last_results is not None
        if len(self.last_results) == 0:
            return
        cluster = OPTICS(min_samples=self.min_samples_input.value(),
                         min_cluster_size=self.min_cluster_size_input.value(),
                         xi=self.xi_input.value())
        classes_μm = self.last_results[0].classes_μm
        flags = cluster.fit_predict(self.data_to_clustering)
        flag_set = set(flags)
        typicals = []
        for flag in flag_set:
            if flag != -1:
                key = np.equal(flags, flag)
                typical = np.mean(self.stacked_components[key], axis=0)
                typicals.append(typical)

        wb = openpyxl.Workbook()
        prepare_styles(wb)
        ws = wb.active
        ws.title = self.tr("README")
        description = \
            """
            This Excel file was generated by QGrain ({0}).

            Please cite:
            Liu, Y., Liu, X., Sun, Y., 2021. QGrain: An open-source and easy-to-use software for the comprehensive analysis of grain size distributions. Sedimentary Geology 423, 105980. https://doi.org/10.1016/j.sedgeo.2021.105980

            It contanins 2 + N_clusters sheets:
            1. The first sheet is the sum distributions of all component clusters.
            2. The second sheet is used to put the component distributions that not in any cluster.
            3. The left sheet is the component distributions of each cluster, separately.

            The clustering algorithm is OPTICS, implemented by scikit-learn.
            https://scikit-learn.org/stable/modules/generated/sklearn.cluster.OPTICS.html

            Clustering algorithm details
                min_samples={1}
                min_cluster_size={2}
                xi={3}
                others=default

            """.format(QGRAIN_VERSION,
                       self.min_samples_input.value(),
                       self.min_cluster_size_input.value(),
                       self.xi_input.value())

        def write(row, col, value, style="normal_light"):
            cell = ws.cell(row + 1, col + 1, value=value)
            cell.style = style

        lines_of_desc = description.split("\n")
        for row, line in enumerate(lines_of_desc):
            write(row, 0, line, style="description")
        ws.column_dimensions[column_to_char(0)].width = 200

        ws = wb.create_sheet(self.tr("Typical Components"))
        write(0, 0, self.tr("Typical Component"), style="header")
        ws.column_dimensions[column_to_char(0)].width = 16
        for col, value in enumerate(classes_μm, 1):
            write(0, col, value, style="header")
            ws.column_dimensions[column_to_char(col)].width = 10
        for row, distribution in enumerate(typicals, 1):
            if row % 2 == 0:
                style = "normal_dark"
            else:
                style = "normal_light"
            write(row, 0, self.tr("Component{0}").format(row), style=style)
            for col, value in enumerate(distribution, 1):
                write(row, col, value, style=style)
            QCoreApplication.processEvents()

        for flag in flag_set:
            if flag == -1:
                ws = wb.create_sheet(self.tr("Not Clustered"), 2)
            else:
                ws = wb.create_sheet(self.tr("Cluster{0}").format(flag + 1))

            write(0, 0, self.tr("Index"), style="header")
            ws.column_dimensions[column_to_char(0)].width = 16
            for col, value in enumerate(classes_μm, 1):
                write(0, col, value, style="header")
                ws.column_dimensions[column_to_char(col)].width = 10
            key = np.equal(flags, flag)
            for row, component in enumerate(self.stacked_components[key], 1):
                if row % 2 == 0:
                    style = "normal_dark"
                else:
                    style = "normal_light"
                write(row, 0, str(row), style=style)
                for col, value in enumerate(component, 1):
                    write(row, col, value, style=style)
                QCoreApplication.processEvents()

        wb.save(filename)
        wb.close()
Beispiel #5
0
    def save_excel(self, filename, align_components=False):
        if self.n_results == 0:
            return

        results = self.__fitting_results.copy()
        classes_μm = results[0].classes_μm
        n_components_list = [
            result.n_components for result in self.__fitting_results
        ]
        count_dict = Counter(n_components_list)
        max_n_components = max(count_dict.keys())
        self.logger.debug(
            f"N_components: {count_dict}, Max N_components: {max_n_components}"
        )

        flags = []
        if not align_components:
            for result in results:
                flags.extend(range(result.n_components))
        else:
            n_components_desc = "\n".join([
                self.tr("{0} Component(s): {1}").format(n_components, count)
                for n_components, count in count_dict.items()
            ])
            self.show_info(
                self.tr("N_components distribution of Results:\n{0}").format(
                    n_components_desc))
            stacked_components = []
            for result in self.__fitting_results:
                for component in result.components:
                    stacked_components.append(component.distribution)
            stacked_components = np.array(stacked_components)
            cluser = KMeans(n_clusters=max_n_components)
            flags = cluser.fit_predict(stacked_components)
            # check flags to make it unique
            flag_index = 0
            for i, result in enumerate(self.__fitting_results):
                result_flags = set()
                for component in result.components:
                    if flags[flag_index] in result_flags:
                        if flags[flag_index] == max_n_components:
                            flags[flag_index] = max_n_components - 1
                        else:
                            flag_index[flag_index] += 1
                        result_flags.add(flags[flag_index])
                    flag_index += 1

            flag_set = set(flags)
            picked = []
            for target_flag in flag_set:
                for i, flag in enumerate(flags):
                    if flag == target_flag:
                        picked.append(
                            (target_flag,
                             logarithmic(classes_μm,
                                         stacked_components[i])["mean"]))
                        break
            picked.sort(key=lambda x: x[1])
            flag_map = {flag: index for index, (flag, _) in enumerate(picked)}
            flags = np.array([flag_map[flag] for flag in flags])

        wb = openpyxl.Workbook()
        prepare_styles(wb)
        ws = wb.active
        ws.title = self.tr("README")
        description = \
            """
            This Excel file was generated by QGrain ({0}).

            Please cite:
            Liu, Y., Liu, X., Sun, Y., 2021. QGrain: An open-source and easy-to-use software for the comprehensive analysis of grain size distributions. Sedimentary Geology 423, 105980. https://doi.org/10.1016/j.sedgeo.2021.105980

            It contanins 4 + max(N_components) sheets:
            1. The first sheet is the sample distributions of SSU results.
            2. The second sheet is used to put the infomation of fitting.
            3. The third sheet is the statistic parameters calculated by statistic moment method.
            4. The fouth sheet is the distributions of unmixed components and their sum of each sample.
            5. Other sheets are the unmixed end-member distributions which were discretely stored.

            The SSU algorithm is implemented by QGrain.

            """.format(QGRAIN_VERSION)

        def write(row, col, value, style="normal_light"):
            cell = ws.cell(row + 1, col + 1, value=value)
            cell.style = style

        lines_of_desc = description.split("\n")
        for row, line in enumerate(lines_of_desc):
            write(row, 0, line, style="description")
        ws.column_dimensions[column_to_char(0)].width = 200

        ws = wb.create_sheet(self.tr("Sample Distributions"))
        write(0, 0, self.tr("Sample Name"), style="header")
        ws.column_dimensions[column_to_char(0)].width = 16
        for col, value in enumerate(classes_μm, 1):
            write(0, col, value, style="header")
            ws.column_dimensions[column_to_char(col)].width = 10
        for row, result in enumerate(results, 1):
            if row % 2 == 0:
                style = "normal_dark"
            else:
                style = "normal_light"
            write(row, 0, result.sample.name, style=style)
            for col, value in enumerate(result.sample.distribution, 1):
                write(row, col, value, style=style)
            QCoreApplication.processEvents()

        ws = wb.create_sheet(self.tr("Information of Fitting"))
        write(0, 0, self.tr("Sample Name"), style="header")
        ws.column_dimensions[column_to_char(0)].width = 16
        headers = [
            self.tr("Distribution Type"),
            self.tr("N_components"),
            self.tr("Resolver"),
            self.tr("Resolver Settings"),
            self.tr("Initial Guess"),
            self.tr("Reference"),
            self.tr("Spent Time [s]"),
            self.tr("N_iterations"),
            self.tr("Final Distance [log10MSE]")
        ]
        for col, value in enumerate(headers, 1):
            write(0, col, value, style="header")
            if col in (4, 5, 6):
                ws.column_dimensions[column_to_char(col)].width = 10
            else:
                ws.column_dimensions[column_to_char(col)].width = 10
        for row, result in enumerate(results, 1):
            if row % 2 == 0:
                style = "normal_dark"
            else:
                style = "normal_light"
            write(row, 0, result.sample.name, style=style)
            write(row, 1, result.distribution_type.name, style=style)
            write(row, 2, result.n_components, style=style)
            write(row, 3, result.task.resolver, style=style)
            write(row,
                  4,
                  self.tr("Default") if result.task.resolver_setting is None
                  else result.task.resolver_setting.__str__(),
                  style=style)
            write(row,
                  5,
                  self.tr("None") if result.task.initial_guess is None else
                  result.task.initial_guess.__str__(),
                  style=style)
            write(row,
                  6,
                  self.tr("None") if result.task.reference is None else
                  result.task.reference.__str__(),
                  style=style)
            write(row, 7, result.time_spent, style=style)
            write(row, 8, result.n_iterations, style=style)
            write(row, 9, result.get_distance("log10MSE"), style=style)

        ws = wb.create_sheet(self.tr("Statistic Moments"))
        write(0, 0, self.tr("Sample Name"), style="header")
        ws.merge_cells(start_row=1, start_column=1, end_row=2, end_column=1)
        ws.column_dimensions[column_to_char(0)].width = 16
        headers = []
        sub_headers = [
            self.tr("Proportion"),
            self.tr("Mean [φ]"),
            self.tr("Mean [μm]"),
            self.tr("STD [φ]"),
            self.tr("STD [μm]"),
            self.tr("Skewness"),
            self.tr("Kurtosis")
        ]
        for i in range(max_n_components):
            write(0,
                  i * len(sub_headers) + 1,
                  self.tr("C{0}").format(i + 1),
                  style="header")
            ws.merge_cells(start_row=1,
                           start_column=i * len(sub_headers) + 2,
                           end_row=1,
                           end_column=(i + 1) * len(sub_headers) + 1)
            headers.extend(sub_headers)
        for col, value in enumerate(headers, 1):
            write(1, col, value, style="header")
            ws.column_dimensions[column_to_char(col)].width = 10
        flag_index = 0
        for row, result in enumerate(results, 2):
            if row % 2 == 0:
                style = "normal_light"
            else:
                style = "normal_dark"
            write(row, 0, result.sample.name, style=style)
            for component in result.components:
                index = flags[flag_index]
                write(row,
                      index * len(sub_headers) + 1,
                      component.fraction,
                      style=style)
                write(row,
                      index * len(sub_headers) + 2,
                      component.logarithmic_moments["mean"],
                      style=style)
                write(row,
                      index * len(sub_headers) + 3,
                      component.geometric_moments["mean"],
                      style=style)
                write(row,
                      index * len(sub_headers) + 4,
                      component.logarithmic_moments["std"],
                      style=style)
                write(row,
                      index * len(sub_headers) + 5,
                      component.geometric_moments["std"],
                      style=style)
                write(row,
                      index * len(sub_headers) + 6,
                      component.logarithmic_moments["skewness"],
                      style=style)
                write(row,
                      index * len(sub_headers) + 7,
                      component.logarithmic_moments["kurtosis"],
                      style=style)
                flag_index += 1

        ws = wb.create_sheet(self.tr("Unmixed Components"))
        ws.merge_cells(start_row=1, start_column=1, end_row=1, end_column=2)
        write(0, 0, self.tr("Sample Name"), style="header")
        ws.column_dimensions[column_to_char(0)].width = 16
        for col, value in enumerate(classes_μm, 2):
            write(0, col, value, style="header")
            ws.column_dimensions[column_to_char(col)].width = 10
        row = 1
        for result_index, result in enumerate(results, 1):
            if result_index % 2 == 0:
                style = "normal_dark"
            else:
                style = "normal_light"
            write(row, 0, result.sample.name, style=style)
            ws.merge_cells(start_row=row + 1,
                           start_column=1,
                           end_row=row + result.n_components + 1,
                           end_column=1)
            for component_i, component in enumerate(result.components, 1):
                write(row, 1, self.tr("C{0}").format(component_i), style=style)
                for col, value in enumerate(
                        component.distribution * component.fraction, 2):
                    write(row, col, value, style=style)
                row += 1
            write(row, 1, self.tr("Sum"), style=style)
            for col, value in enumerate(result.distribution, 2):
                write(row, col, value, style=style)
            row += 1

        ws_dict = {}
        flag_set = set(flags)
        for flag in flag_set:
            ws = wb.create_sheet(self.tr("Unmixed EM{0}").format(flag + 1))
            write(0, 0, self.tr("Sample Name"), style="header")
            ws.column_dimensions[column_to_char(0)].width = 16
            for col, value in enumerate(classes_μm, 1):
                write(0, col, value, style="header")
                ws.column_dimensions[column_to_char(col)].width = 10
            ws_dict[flag] = ws

        flag_index = 0
        for row, result in enumerate(results, 1):
            if row % 2 == 0:
                style = "normal_dark"
            else:
                style = "normal_light"

            for component in result.components:
                flag = flags[flag_index]
                ws = ws_dict[flag]
                write(row, 0, result.sample.name, style=style)
                for col, value in enumerate(component.distribution, 1):
                    write(row, col, value, style=style)
                flag_index += 1

        wb.save(filename)
        wb.close()
Beispiel #6
0
    def on_generate_clicked(self):
        if self.update_timer.isActive():
            self.preview_button.setText(self.tr("Preview"))
            self.update_timer.stop()
            self.update_chart()

        filename, _ = self.file_dialog.getSaveFileName(
            self, self.tr("Choose a filename to save the generated dataset"),
            None, "Microsoft Excel (*.xlsx)")
        if filename is None or filename == "":
            return
        n_samples = self.n_samples_input.value()
        dataset = self.get_random_dataset(n_samples)
        # generate samples
        self.cancel_button.setEnabled(True)
        self.generate_button.setEnabled(False)
        format_str = self.tr("Generating {0} samples: %p%").format(n_samples)
        self.progress_bar.setFormat(format_str)
        self.progress_bar.setValue(0)

        def cancel():
            self.progress_bar.setFormat(self.tr("Task canceled"))
            self.progress_bar.setValue(0)
            self.cancel_button.setEnabled(False)
            self.generate_button.setEnabled(True)
            self.cancel_flag = False

        samples = []
        for i in range(n_samples):
            if self.cancel_flag:
                cancel()
                return
            sample = dataset.get_sample(i)
            samples.append(sample)
            progress = (i + 1) / n_samples * 50
            self.progress_bar.setValue(progress)
            QCoreApplication.processEvents()

        # save file to excel file
        format_str = self.tr("Writing {0} samples to excel file: %p%").format(
            n_samples)
        self.progress_bar.setFormat(format_str)
        self.progress_bar.setValue(50)

        wb = openpyxl.Workbook()
        prepare_styles(wb)

        ws = wb.active
        ws.title = self.tr("README")
        description = \
            """
            This Excel file was generated by QGrain ({0}).

            Please cite:
            Liu, Y., Liu, X., Sun, Y., 2021. QGrain: An open-source and easy-to-use software for the comprehensive analysis of grain size distributions. Sedimentary Geology 423, 105980. https://doi.org/10.1016/j.sedgeo.2021.105980

            It contanins n_components + 3 sheets:
            1. The first sheet is the random settings which were used to generate random parameters.
            2. The second sheet is the generated dataset.
            3. The third sheet is random parameters which were used to calulate the component distributions and their mixture.
            4. The left sheets are the component distributions of all samples.

            Artificial dataset
                Using skew normal distribution as the base distribution of each component (i.e. end-member).
                Skew normal distribution has three parameters, shape, location and scale.
                Where shape controls the skewness, location and scale are simliar to that of the Normal distribution.
                When shape = 0, it becomes Normal distribution.
                The weight parameter controls the fraction of the component, where fraction_i = weight_i / sum(weight_i).
                By assigning the mean and std of each parameter, random parameters was generate by the `scipy.stats.truncnorm.rvs` function of Scipy.

            Sampling settings
                Minimum size [μm]: {1},
                Maximum size [μm]: {2},
                N_classes: {3},
                Precision: {4},
                Noise: {5},
                N_samples: {6}

            """.format(QGRAIN_VERSION,
                       self.minimum_size_input.value(),
                       self.maximum_size_input.value(),
                       self.n_classes_input.value(),
                       self.precision_input.value(),
                       self.precision_input.value()+1,
                       n_samples)

        def write(row, col, value, style="normal_light"):
            cell = ws.cell(row + 1, col + 1, value=value)
            cell.style = style

        lines_of_desc = description.split("\n")
        for row, line in enumerate(lines_of_desc):
            write(row, 0, line, style="description")
        ws.column_dimensions[column_to_char(0)].width = 200

        ws = wb.create_sheet(self.tr("Random Settings"))
        write(0, 0, self.tr("Parameter"), style="header")
        ws.merge_cells(start_row=1, start_column=1, end_row=2, end_column=1)
        write(0, 1, self.tr("Shape"), style="header")
        ws.merge_cells(start_row=1, start_column=2, end_row=1, end_column=3)
        write(0, 3, self.tr("Location"), style="header")
        ws.merge_cells(start_row=1, start_column=4, end_row=1, end_column=5)
        write(0, 5, self.tr("Scale"), style="header")
        ws.merge_cells(start_row=1, start_column=6, end_row=1, end_column=7)
        write(0, 7, self.tr("Weight"), style="header")
        ws.merge_cells(start_row=1, start_column=8, end_row=1, end_column=9)
        ws.column_dimensions[column_to_char(0)].width = 16
        for col in range(1, 9):
            ws.column_dimensions[column_to_char(col)].width = 16
            if col % 2 == 0:
                write(1, col, self.tr("Mean"), style="header")
            else:
                write(1, col, self.tr("STD"), style="header")
        for row, comp_params in enumerate(self.target, 2):
            if row % 2 == 1:
                style = "normal_dark"
            else:
                style = "normal_light"
            write(row, 0, self.tr("Component{0}").format(row - 1), style=style)
            for i, key in enumerate(["shape", "loc", "scale", "weight"]):
                mean, std = comp_params[key]
                write(row, i * 2 + 1, mean, style=style)
                write(row, i * 2 + 2, std, style=style)

        ws = wb.create_sheet(self.tr("Dataset"))
        write(0, 0, self.tr("Sample Name"), style="header")
        ws.column_dimensions[column_to_char(0)].width = 24
        for col, value in enumerate(dataset.classes_μm, 1):
            write(0, col, value, style="header")
            ws.column_dimensions[column_to_char(col)].width = 10
        for row, sample in enumerate(samples, 1):
            if row % 2 == 0:
                style = "normal_dark"
            else:
                style = "normal_light"
            write(row, 0, sample.name, style=style)
            for col, value in enumerate(sample.distribution, 1):
                write(row, col, value, style=style)

            if self.cancel_flag:
                cancel()
                return
            progress = 50 + (row / n_samples) * 10
            self.progress_bar.setValue(progress)
            QCoreApplication.processEvents()

        ws = wb.create_sheet(self.tr("Parameters"))
        write(0, 0, self.tr("Sample Name"), style="header")
        ws.merge_cells(start_row=1, start_column=1, end_row=2, end_column=1)
        ws.column_dimensions[column_to_char(0)].width = 24
        for i in range(dataset.n_components):
            write(0,
                  4 * i + 1,
                  self.tr("Component{0}").format(i + 1),
                  style="header")
            ws.merge_cells(start_row=1,
                           start_column=4 * i + 2,
                           end_row=1,
                           end_column=4 * i + 5)
            for j, header_name in enumerate([
                    self.tr("Shape"),
                    self.tr("Location"),
                    self.tr("Scale"),
                    self.tr("Weight")
            ]):
                write(1, 4 * i + 1 + j, header_name, style="header")
                ws.column_dimensions[column_to_char(4 * i + 1 + j)].width = 16
        for row, sample in enumerate(samples, 2):
            if row % 2 == 1:
                style = "normal_dark"
            else:
                style = "normal_light"
            write(row, 0, sample.name, style=style)
            for i, comp_param in enumerate(sample.parameter.components):
                write(row, 4 * i + 1, comp_param.shape, style=style)
                write(row, 4 * i + 2, comp_param.loc, style=style)
                write(row, 4 * i + 3, comp_param.scale, style=style)
                write(row, 4 * i + 4, comp_param.weight, style=style)
            if self.cancel_flag:
                cancel()
                return
            progress = 60 + (row / n_samples) * 10
            self.progress_bar.setValue(progress)
            QCoreApplication.processEvents()

        for i in range(dataset.n_components):
            ws = wb.create_sheet(self.tr("Component{0}").format(i + 1))
            write(0, 0, self.tr("Sample Name"), style="header")
            ws.column_dimensions[column_to_char(0)].width = 24
            for col, value in enumerate(dataset.classes_μm, 1):
                write(0, col, value, style="header")
                ws.column_dimensions[column_to_char(col)].width = 10
            for row, sample in enumerate(samples, 1):
                if row % 2 == 0:
                    style = "normal_dark"
                else:
                    style = "normal_light"
                write(row, 0, sample.name, style=style)
                for col, value in enumerate(sample.components[i].distribution,
                                            1):
                    write(row, col, value, style=style)
            if self.cancel_flag:
                cancel()
                return
            progress = 70 + (
                (i * n_samples + row) / n_samples * dataset.n_components) * 30
            self.progress_bar.setValue(progress)
            QCoreApplication.processEvents()
        wb.save(filename)
        wb.close()

        self.progress_bar.setValue(100)
        self.progress_bar.setFormat(self.tr("Task finished"))
        self.cancel_button.setEnabled(False)
        self.generate_button.setEnabled(True)
Beispiel #7
0
    def save_as_xlsx(self, filename: str):
        assert self.last_result is not None
        dataset, transformed, components, ratios = self.last_result
        n_samples, n_components = transformed.shape

        wb = openpyxl.Workbook()
        prepare_styles(wb)
        ws = wb.active
        ws.title = self.tr("README")
        description = \
            """
            This Excel file was generated by QGrain ({0}).

            Please cite:
            Liu, Y., Liu, X., Sun, Y., 2021. QGrain: An open-source and easy-to-use software for the comprehensive analysis of grain size distributions. Sedimentary Geology 423, 105980. https://doi.org/10.1016/j.sedgeo.2021.105980

            It contanins three sheets:
            1. The first sheet is the dataset which was used to perform the PCA algorithm.
            2. The second sheet is used to put the distributions of all PCs.
            3. The third sheet is the PC variation of all samples.

            The base PCA algorithm is implemented by scikit-learn. You can get the details of algorithm from the following website.
            https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html

            """.format(QGRAIN_VERSION)

        def write(row, col, value, style="normal_light"):
            cell = ws.cell(row + 1, col + 1, value=value)
            cell.style = style

        lines_of_desc = description.split("\n")
        for row, line in enumerate(lines_of_desc):
            write(row, 0, line, style="description")
        ws.column_dimensions[column_to_char(0)].width = 200

        ws = wb.create_sheet(self.tr("Dataset"))
        write(0, 0, self.tr("Sample Name"), style="header")
        ws.column_dimensions[column_to_char(0)].width = 16
        for col, value in enumerate(dataset.classes_μm, 1):
            write(0, col, value, style="header")
            ws.column_dimensions[column_to_char(col)].width = 10
        for row, sample in enumerate(dataset.samples, 1):
            if row % 2 == 0:
                style = "normal_dark"
            else:
                style = "normal_light"
            write(row, 0, sample.name, style=style)
            for col, value in enumerate(sample.distribution, 1):
                write(row, col, value, style=style)
            QCoreApplication.processEvents()

        ws = wb.create_sheet(self.tr("PCs"))
        write(0, 0, self.tr("PC"), style="header")
        ws.column_dimensions[column_to_char(0)].width = 16
        for col, value in enumerate(dataset.classes_μm, 1):
            write(0, col, value, style="header")
            ws.column_dimensions[column_to_char(col)].width = 10
        for row, component in enumerate(components, 1):
            if row % 2 == 0:
                style = "normal_dark"
            else:
                style = "normal_light"
            write(row, 0, self.tr("PC{0}").format(row), style=style)
            for col, value in enumerate(component, 1):
                write(row, col, value, style=style)
            QCoreApplication.processEvents()

        ws = wb.create_sheet(self.tr("Variations of PCs"))
        write(0, 0, self.tr("Sample Name"), style="header")
        ws.column_dimensions[column_to_char(0)].width = 16
        for i in range(n_components):
            write(0,
                  i + 1,
                  self.tr("PC{0} ({1:0.4f})").format(i + 1, ratios[i]),
                  style="header")
            ws.column_dimensions[column_to_char(i + 1)].width = 10
        for row, varations in enumerate(transformed, 1):
            if row % 2 == 0:
                style = "normal_dark"
            else:
                style = "normal_light"
            write(row, 0, dataset.samples[row - 1].name, style=style)
            for col, value in enumerate(varations, 1):
                write(row, col, value, style=style)
            QCoreApplication.processEvents()

        wb.save(filename)
        wb.close()