def save_as_xlsx(self, filename: str): assert self.__last_result is not None dataset, X, linkage_matrix, dendrogram_res = self.__last_result n_clusters = self.n_clusters flags = fcluster(linkage_matrix, n_clusters, criterion="maxclust") flag_set = set(flags) n_clusters = len(flag_set) wb = openpyxl.Workbook() prepare_styles(wb) ws = wb.active ws.title = self.tr("README") description = \ """ This Excel file was generated by QGrain ({0}). Please cite: Liu, Y., Liu, X., Sun, Y., 2021. QGrain: An open-source and easy-to-use software for the comprehensive analysis of grain size distributions. Sedimentary Geology 423, 105980. https://doi.org/10.1016/j.sedgeo.2021.105980 It contanins three (or n_clusters + 3) sheets: 1. The first sheet is the dataset which was used to perform the hierarchy clustering algorithm. 2. The second sheet is used to put the clustering flags of all samples. 3. The third sheet is the typical sampels (i.e, the first sample of each cluster was selected). 4. If the number of clusters less equal to 100, the samples of each cluster will be save to individual sheets. The base hierarchy clusrting algorithm is implemented by Scipy. You can get the details of algorithm from the following website. https://docs.scipy.org/doc/scipy/reference/cluster.hierarchy.html """.format(QGRAIN_VERSION) def write(row, col, value, style="normal_light"): cell = ws.cell(row + 1, col + 1, value=value) cell.style = style lines_of_desc = description.split("\n") for row, line in enumerate(lines_of_desc): write(row, 0, line, style="description") ws.column_dimensions[column_to_char(0)].width = 200 ws = wb.create_sheet(self.tr("Dataset")) write(0, 0, self.tr("Sample Name"), style="header") ws.column_dimensions[column_to_char(0)].width = 16 for col, value in enumerate(dataset.classes_μm, 1): write(0, col, value, style="header") ws.column_dimensions[column_to_char(col)].width = 10 for row, sample in enumerate(dataset.samples, 1): if row % 2 == 0: style = "normal_dark" else: style = "normal_light" write(row, 0, sample.name, style=style) for col, value in enumerate(sample.distribution, 1): write(row, col, value, style=style) QCoreApplication.processEvents() ws = wb.create_sheet(self.tr("Flags")) write(0, 0, self.tr("Sample Name"), style="header") write(0, 1, self.tr("Flag of the Cluster"), style="header") ws.column_dimensions[column_to_char(0)].width = 16 ws.column_dimensions[column_to_char(1)].width = 16 for row, (sample, flag) in enumerate(zip(dataset.samples, flags), 1): if row % 2 == 0: style = "normal_dark" else: style = "normal_light" write(row, 0, sample.name, style=style) write(row, 1, flag, style=style) QCoreApplication.processEvents() typical_samples = [] temp_flag_set = set() for i, flag in enumerate(flags): if len(temp_flag_set) == n_clusters: break if flag not in temp_flag_set: typical_samples.append(dataset.samples[i]) temp_flag_set.add(flag) ws = wb.create_sheet(self.tr("Typical Samples")) write(0, 0, self.tr("Sample Name"), style="header") ws.column_dimensions[column_to_char(0)].width = 16 for col, value in enumerate(dataset.classes_μm, 1): write(0, col, value, style="header") ws.column_dimensions[column_to_char(col)].width = 10 for row, sample in enumerate(typical_samples, 1): if row % 2 == 0: style = "normal_dark" else: style = "normal_light" write(row, 0, sample.name, style=style) for col, value in enumerate(sample.distribution, 1): write(row, col, value, style=style) QCoreApplication.processEvents() if n_clusters <= 100: for flag in flag_set: samples = [] for sample, in_this_cluster in zip(dataset.samples, np.equal(flags, flag)): if in_this_cluster: samples.append(sample) ws = wb.create_sheet(self.tr("Cluster{0}").format(flag)) write(0, 0, self.tr("Sample Name"), style="header") ws.column_dimensions[column_to_char(0)].width = 16 for col, value in enumerate(dataset.classes_μm, 1): write(0, col, value, style="header") ws.column_dimensions[column_to_char(col)].width = 10 for row, sample in enumerate(samples, 1): if row % 2 == 0: style = "normal_dark" else: style = "normal_light" write(row, 0, sample.name, style=style) for col, value in enumerate(sample.distribution, 1): write(row, col, value, style=style) QCoreApplication.processEvents() wb.save(filename) wb.close()
def save_result_excel(self, filename: str, result: EMMAResult): # get the mode size of each end-members modes = [(i, result.dataset.classes_μm[np.unravel_index( np.argmax(result.end_members[i]), result.end_members[i].shape)]) for i in range(result.n_members)] # sort them by mode size modes.sort(key=lambda x: x[1]) wb = openpyxl.Workbook() prepare_styles(wb) ws = wb.active ws.title = self.tr("README") description = \ """ This Excel file was generated by QGrain ({0}). Please cite: Liu, Y., Liu, X., Sun, Y., 2021. QGrain: An open-source and easy-to-use software for the comprehensive analysis of grain size distributions. Sedimentary Geology 423, 105980. https://doi.org/10.1016/j.sedgeo.2021.105980 It contanins three sheets: 1. The first sheet is the dataset which was used to perform the EMMA algorithm. 2. The second sheet is used to put the distributions of all end-members. 3. The third sheet is the end-member fractions of all samples. This EMMA algorithm was implemented by QGrian, using the famous machine learning framework, PyTorch. EMMA algorithm details N_samples: {1}, Distribution Type: {2}, N_members: {3}, N_iterations: {4}, Spent Time: {5} s, Computing Device: {6}, Distance: {7}, Minimum N_iterations: {8}, Maximum N_iterations: {9}, Learning Rate: {10}, eps: {11}, tol: {12}, ftol: {13} """.format(QGRAIN_VERSION, result.dataset.n_samples, result.distribution_type.name, result.n_members, result.n_iterations, result.time_spent, result.resolver_setting.device, result.resolver_setting.distance, result.resolver_setting.min_niter, result.resolver_setting.max_niter, result.resolver_setting.lr, result.resolver_setting.eps, result.resolver_setting.tol, result.resolver_setting.ftol) def write(row, col, value, style="normal_light"): cell = ws.cell(row + 1, col + 1, value=value) cell.style = style lines_of_desc = description.split("\n") for row, line in enumerate(lines_of_desc): write(row, 0, line, style="description") ws.column_dimensions[column_to_char(0)].width = 200 ws = wb.create_sheet(self.tr("Dataset")) write(0, 0, self.tr("Sample Name"), style="header") ws.column_dimensions[column_to_char(0)].width = 16 for col, value in enumerate(result.dataset.classes_μm, 1): write(0, col, value, style="header") ws.column_dimensions[column_to_char(col)].width = 10 for row, sample in enumerate(result.dataset.samples, 1): if row % 2 == 0: style = "normal_dark" else: style = "normal_light" write(row, 0, sample.name, style=style) for col, value in enumerate(sample.distribution, 1): write(row, col, value, style=style) QCoreApplication.processEvents() ws = wb.create_sheet(self.tr("End-members")) write(0, 0, self.tr("End-member"), style="header") ws.column_dimensions[column_to_char(0)].width = 16 for col, value in enumerate(result.dataset.classes_μm, 1): write(0, col, value, style="header") ws.column_dimensions[column_to_char(col)].width = 10 for row, (index, _) in enumerate(modes, 1): if row % 2 == 0: style = "normal_dark" else: style = "normal_light" write(row, 0, f"EM{row}", style=style) for col, value in enumerate(result.end_members[index], 1): write(row, col, value, style=style) QCoreApplication.processEvents() ws = wb.create_sheet(self.tr("Fractions")) write(0, 0, self.tr("Sample Name"), style="header") ws.column_dimensions[column_to_char(0)].width = 16 for i in range(result.n_members): write(0, i + 1, f"EM{i+1}", style="header") ws.column_dimensions[column_to_char(i + 1)].width = 10 for row, fractions in enumerate(result.fractions, 1): if row % 2 == 0: style = "normal_dark" else: style = "normal_light" write(row, 0, result.dataset.samples[row - 1].name, style=style) for col, (index, _) in enumerate(modes, 1): write(row, col, fractions[index], style=style) QCoreApplication.processEvents() wb.save(filename) wb.close()
def save_file(self, filename: str): wb = openpyxl.Workbook() prepare_styles(wb) ws = wb.active ws.title = self.tr("README") description = \ """ This Excel file was generated by QGrain ({0}). Please cite: Liu, Y., Liu, X., Sun, Y., 2021. QGrain: An open-source and easy-to-use software for the comprehensive analysis of grain size distributions. Sedimentary Geology 423, 105980. https://doi.org/10.1016/j.sedgeo.2021.105980 It contanins one sheet: 1. The sheet puts the statistic parameters and the classification groups of the samples. The statistic formulas are referred to Blott & Pye (2001)'s work. The classification of GSDs is referred to Folk (1957)'s and Blott & Pye (2012)'s scheme. References: 1.Blott, S. J. & Pye, K. Particle size scales and classification of sediment types based on particle size distributions: Review and recommended procedures. Sedimentology 59, 2071–2096 (2012). 2.Blott, S. J. & Pye, K. GRADISTAT: a grain-size distribution and statistics package for the analysis of unconsolidated sediments. Earth Surf. Process. Landforms 26, 1237–1248 (2001). 3.Folk, R. L. The Distinction between Grain Size and Mineral Composition in Sedimentary-Rock Nomenclature. The Journal of Geology 62, 344–359 (1954). """.format(QGRAIN_VERSION) def write(row, col, value, style="normal_light"): cell = ws.cell(row + 1, col + 1, value=value) cell.style = style lines_of_desc = description.split("\n") for row, line in enumerate(lines_of_desc): write(row, 0, line, style="description") ws.column_dimensions[column_to_char(0)].width = 200 ws = wb.create_sheet(self.tr("Parameters and Groups")) proportion_key, proportion_desciption = self.proportion col_names = [ f"{self.tr('Mean')}[{self.unit}]", self.tr("Mean Desc."), f"{self.tr('Median')} [{self.unit}]", f"{self.tr('Modes')} [{self.unit}]", self.tr("STD (Sorting)"), self.tr("Sorting Desc."), self.tr("Skewness"), self.tr("Skew. Desc."), self.tr("Kurtosis"), self.tr("Kurt. Desc."), f"({proportion_desciption})\n{self.tr('Proportion')} [%]", self.tr("Group\n(Folk, 1954)"), self.tr("Group\nSymbol (Blott & Pye, 2012)"), self.tr("Group\n(Blott & Pye, 2012)") ] col_keys = [(True, "mean"), (True, "mean_description"), (True, "median"), (True, "modes"), (True, "std"), (True, "std_description"), (True, "skewness"), (True, "skewness_description"), (True, "kurtosis"), (True, "kurtosis_description"), (False, proportion_key), (False, "group_Folk54"), (False, "group_BP12_symbol"), (False, "group_BP12")] write(0, 0, self.tr("Sample Name"), style="header") ws.column_dimensions[column_to_char(0)].width = 16 for col, moment_name in enumerate(col_names, 1): write(0, col, moment_name, style="header") if col in (2, 4, 6, 8, 10, 11, 12, 14): ws.column_dimensions[column_to_char(col)].width = 30 else: ws.column_dimensions[column_to_char(col)].width = 16 ws.column_dimensions[column_to_char(len(col_names))].width = 40 for row, sample in enumerate(self.__dataset.samples, 1): if row % 2 == 0: style = "normal_dark" else: style = "normal_light" write(row, 0, sample.name, style=style) statistic = get_all_statistic(sample.classes_μm, sample.classes_φ, sample.distribution) if self.is_geometric: if self.is_FW57: sub_key = "geometric_FW57" else: sub_key = "geometric" else: if self.is_FW57: sub_key = "logarithmic_FW57" else: sub_key = "logarithmic" for col, (in_sub, key) in enumerate(col_keys, 1): value = statistic[sub_key][key] if in_sub else statistic[key] if key == "modes": write(row, col, ", ".join([f"{m:0.4f}" for m in value]), style=style) elif key[-11:] == "_proportion": write(row, col, ", ".join([f"{p*100:0.4f}" for p in value]), style=style) else: write(row, col, value, style=style) wb.save(filename) wb.close()
def save_typical(self, filename): assert self.last_results is not None if len(self.last_results) == 0: return cluster = OPTICS(min_samples=self.min_samples_input.value(), min_cluster_size=self.min_cluster_size_input.value(), xi=self.xi_input.value()) classes_μm = self.last_results[0].classes_μm flags = cluster.fit_predict(self.data_to_clustering) flag_set = set(flags) typicals = [] for flag in flag_set: if flag != -1: key = np.equal(flags, flag) typical = np.mean(self.stacked_components[key], axis=0) typicals.append(typical) wb = openpyxl.Workbook() prepare_styles(wb) ws = wb.active ws.title = self.tr("README") description = \ """ This Excel file was generated by QGrain ({0}). Please cite: Liu, Y., Liu, X., Sun, Y., 2021. QGrain: An open-source and easy-to-use software for the comprehensive analysis of grain size distributions. Sedimentary Geology 423, 105980. https://doi.org/10.1016/j.sedgeo.2021.105980 It contanins 2 + N_clusters sheets: 1. The first sheet is the sum distributions of all component clusters. 2. The second sheet is used to put the component distributions that not in any cluster. 3. The left sheet is the component distributions of each cluster, separately. The clustering algorithm is OPTICS, implemented by scikit-learn. https://scikit-learn.org/stable/modules/generated/sklearn.cluster.OPTICS.html Clustering algorithm details min_samples={1} min_cluster_size={2} xi={3} others=default """.format(QGRAIN_VERSION, self.min_samples_input.value(), self.min_cluster_size_input.value(), self.xi_input.value()) def write(row, col, value, style="normal_light"): cell = ws.cell(row + 1, col + 1, value=value) cell.style = style lines_of_desc = description.split("\n") for row, line in enumerate(lines_of_desc): write(row, 0, line, style="description") ws.column_dimensions[column_to_char(0)].width = 200 ws = wb.create_sheet(self.tr("Typical Components")) write(0, 0, self.tr("Typical Component"), style="header") ws.column_dimensions[column_to_char(0)].width = 16 for col, value in enumerate(classes_μm, 1): write(0, col, value, style="header") ws.column_dimensions[column_to_char(col)].width = 10 for row, distribution in enumerate(typicals, 1): if row % 2 == 0: style = "normal_dark" else: style = "normal_light" write(row, 0, self.tr("Component{0}").format(row), style=style) for col, value in enumerate(distribution, 1): write(row, col, value, style=style) QCoreApplication.processEvents() for flag in flag_set: if flag == -1: ws = wb.create_sheet(self.tr("Not Clustered"), 2) else: ws = wb.create_sheet(self.tr("Cluster{0}").format(flag + 1)) write(0, 0, self.tr("Index"), style="header") ws.column_dimensions[column_to_char(0)].width = 16 for col, value in enumerate(classes_μm, 1): write(0, col, value, style="header") ws.column_dimensions[column_to_char(col)].width = 10 key = np.equal(flags, flag) for row, component in enumerate(self.stacked_components[key], 1): if row % 2 == 0: style = "normal_dark" else: style = "normal_light" write(row, 0, str(row), style=style) for col, value in enumerate(component, 1): write(row, col, value, style=style) QCoreApplication.processEvents() wb.save(filename) wb.close()
def save_excel(self, filename, align_components=False): if self.n_results == 0: return results = self.__fitting_results.copy() classes_μm = results[0].classes_μm n_components_list = [ result.n_components for result in self.__fitting_results ] count_dict = Counter(n_components_list) max_n_components = max(count_dict.keys()) self.logger.debug( f"N_components: {count_dict}, Max N_components: {max_n_components}" ) flags = [] if not align_components: for result in results: flags.extend(range(result.n_components)) else: n_components_desc = "\n".join([ self.tr("{0} Component(s): {1}").format(n_components, count) for n_components, count in count_dict.items() ]) self.show_info( self.tr("N_components distribution of Results:\n{0}").format( n_components_desc)) stacked_components = [] for result in self.__fitting_results: for component in result.components: stacked_components.append(component.distribution) stacked_components = np.array(stacked_components) cluser = KMeans(n_clusters=max_n_components) flags = cluser.fit_predict(stacked_components) # check flags to make it unique flag_index = 0 for i, result in enumerate(self.__fitting_results): result_flags = set() for component in result.components: if flags[flag_index] in result_flags: if flags[flag_index] == max_n_components: flags[flag_index] = max_n_components - 1 else: flag_index[flag_index] += 1 result_flags.add(flags[flag_index]) flag_index += 1 flag_set = set(flags) picked = [] for target_flag in flag_set: for i, flag in enumerate(flags): if flag == target_flag: picked.append( (target_flag, logarithmic(classes_μm, stacked_components[i])["mean"])) break picked.sort(key=lambda x: x[1]) flag_map = {flag: index for index, (flag, _) in enumerate(picked)} flags = np.array([flag_map[flag] for flag in flags]) wb = openpyxl.Workbook() prepare_styles(wb) ws = wb.active ws.title = self.tr("README") description = \ """ This Excel file was generated by QGrain ({0}). Please cite: Liu, Y., Liu, X., Sun, Y., 2021. QGrain: An open-source and easy-to-use software for the comprehensive analysis of grain size distributions. Sedimentary Geology 423, 105980. https://doi.org/10.1016/j.sedgeo.2021.105980 It contanins 4 + max(N_components) sheets: 1. The first sheet is the sample distributions of SSU results. 2. The second sheet is used to put the infomation of fitting. 3. The third sheet is the statistic parameters calculated by statistic moment method. 4. The fouth sheet is the distributions of unmixed components and their sum of each sample. 5. Other sheets are the unmixed end-member distributions which were discretely stored. The SSU algorithm is implemented by QGrain. """.format(QGRAIN_VERSION) def write(row, col, value, style="normal_light"): cell = ws.cell(row + 1, col + 1, value=value) cell.style = style lines_of_desc = description.split("\n") for row, line in enumerate(lines_of_desc): write(row, 0, line, style="description") ws.column_dimensions[column_to_char(0)].width = 200 ws = wb.create_sheet(self.tr("Sample Distributions")) write(0, 0, self.tr("Sample Name"), style="header") ws.column_dimensions[column_to_char(0)].width = 16 for col, value in enumerate(classes_μm, 1): write(0, col, value, style="header") ws.column_dimensions[column_to_char(col)].width = 10 for row, result in enumerate(results, 1): if row % 2 == 0: style = "normal_dark" else: style = "normal_light" write(row, 0, result.sample.name, style=style) for col, value in enumerate(result.sample.distribution, 1): write(row, col, value, style=style) QCoreApplication.processEvents() ws = wb.create_sheet(self.tr("Information of Fitting")) write(0, 0, self.tr("Sample Name"), style="header") ws.column_dimensions[column_to_char(0)].width = 16 headers = [ self.tr("Distribution Type"), self.tr("N_components"), self.tr("Resolver"), self.tr("Resolver Settings"), self.tr("Initial Guess"), self.tr("Reference"), self.tr("Spent Time [s]"), self.tr("N_iterations"), self.tr("Final Distance [log10MSE]") ] for col, value in enumerate(headers, 1): write(0, col, value, style="header") if col in (4, 5, 6): ws.column_dimensions[column_to_char(col)].width = 10 else: ws.column_dimensions[column_to_char(col)].width = 10 for row, result in enumerate(results, 1): if row % 2 == 0: style = "normal_dark" else: style = "normal_light" write(row, 0, result.sample.name, style=style) write(row, 1, result.distribution_type.name, style=style) write(row, 2, result.n_components, style=style) write(row, 3, result.task.resolver, style=style) write(row, 4, self.tr("Default") if result.task.resolver_setting is None else result.task.resolver_setting.__str__(), style=style) write(row, 5, self.tr("None") if result.task.initial_guess is None else result.task.initial_guess.__str__(), style=style) write(row, 6, self.tr("None") if result.task.reference is None else result.task.reference.__str__(), style=style) write(row, 7, result.time_spent, style=style) write(row, 8, result.n_iterations, style=style) write(row, 9, result.get_distance("log10MSE"), style=style) ws = wb.create_sheet(self.tr("Statistic Moments")) write(0, 0, self.tr("Sample Name"), style="header") ws.merge_cells(start_row=1, start_column=1, end_row=2, end_column=1) ws.column_dimensions[column_to_char(0)].width = 16 headers = [] sub_headers = [ self.tr("Proportion"), self.tr("Mean [φ]"), self.tr("Mean [μm]"), self.tr("STD [φ]"), self.tr("STD [μm]"), self.tr("Skewness"), self.tr("Kurtosis") ] for i in range(max_n_components): write(0, i * len(sub_headers) + 1, self.tr("C{0}").format(i + 1), style="header") ws.merge_cells(start_row=1, start_column=i * len(sub_headers) + 2, end_row=1, end_column=(i + 1) * len(sub_headers) + 1) headers.extend(sub_headers) for col, value in enumerate(headers, 1): write(1, col, value, style="header") ws.column_dimensions[column_to_char(col)].width = 10 flag_index = 0 for row, result in enumerate(results, 2): if row % 2 == 0: style = "normal_light" else: style = "normal_dark" write(row, 0, result.sample.name, style=style) for component in result.components: index = flags[flag_index] write(row, index * len(sub_headers) + 1, component.fraction, style=style) write(row, index * len(sub_headers) + 2, component.logarithmic_moments["mean"], style=style) write(row, index * len(sub_headers) + 3, component.geometric_moments["mean"], style=style) write(row, index * len(sub_headers) + 4, component.logarithmic_moments["std"], style=style) write(row, index * len(sub_headers) + 5, component.geometric_moments["std"], style=style) write(row, index * len(sub_headers) + 6, component.logarithmic_moments["skewness"], style=style) write(row, index * len(sub_headers) + 7, component.logarithmic_moments["kurtosis"], style=style) flag_index += 1 ws = wb.create_sheet(self.tr("Unmixed Components")) ws.merge_cells(start_row=1, start_column=1, end_row=1, end_column=2) write(0, 0, self.tr("Sample Name"), style="header") ws.column_dimensions[column_to_char(0)].width = 16 for col, value in enumerate(classes_μm, 2): write(0, col, value, style="header") ws.column_dimensions[column_to_char(col)].width = 10 row = 1 for result_index, result in enumerate(results, 1): if result_index % 2 == 0: style = "normal_dark" else: style = "normal_light" write(row, 0, result.sample.name, style=style) ws.merge_cells(start_row=row + 1, start_column=1, end_row=row + result.n_components + 1, end_column=1) for component_i, component in enumerate(result.components, 1): write(row, 1, self.tr("C{0}").format(component_i), style=style) for col, value in enumerate( component.distribution * component.fraction, 2): write(row, col, value, style=style) row += 1 write(row, 1, self.tr("Sum"), style=style) for col, value in enumerate(result.distribution, 2): write(row, col, value, style=style) row += 1 ws_dict = {} flag_set = set(flags) for flag in flag_set: ws = wb.create_sheet(self.tr("Unmixed EM{0}").format(flag + 1)) write(0, 0, self.tr("Sample Name"), style="header") ws.column_dimensions[column_to_char(0)].width = 16 for col, value in enumerate(classes_μm, 1): write(0, col, value, style="header") ws.column_dimensions[column_to_char(col)].width = 10 ws_dict[flag] = ws flag_index = 0 for row, result in enumerate(results, 1): if row % 2 == 0: style = "normal_dark" else: style = "normal_light" for component in result.components: flag = flags[flag_index] ws = ws_dict[flag] write(row, 0, result.sample.name, style=style) for col, value in enumerate(component.distribution, 1): write(row, col, value, style=style) flag_index += 1 wb.save(filename) wb.close()
def on_generate_clicked(self): if self.update_timer.isActive(): self.preview_button.setText(self.tr("Preview")) self.update_timer.stop() self.update_chart() filename, _ = self.file_dialog.getSaveFileName( self, self.tr("Choose a filename to save the generated dataset"), None, "Microsoft Excel (*.xlsx)") if filename is None or filename == "": return n_samples = self.n_samples_input.value() dataset = self.get_random_dataset(n_samples) # generate samples self.cancel_button.setEnabled(True) self.generate_button.setEnabled(False) format_str = self.tr("Generating {0} samples: %p%").format(n_samples) self.progress_bar.setFormat(format_str) self.progress_bar.setValue(0) def cancel(): self.progress_bar.setFormat(self.tr("Task canceled")) self.progress_bar.setValue(0) self.cancel_button.setEnabled(False) self.generate_button.setEnabled(True) self.cancel_flag = False samples = [] for i in range(n_samples): if self.cancel_flag: cancel() return sample = dataset.get_sample(i) samples.append(sample) progress = (i + 1) / n_samples * 50 self.progress_bar.setValue(progress) QCoreApplication.processEvents() # save file to excel file format_str = self.tr("Writing {0} samples to excel file: %p%").format( n_samples) self.progress_bar.setFormat(format_str) self.progress_bar.setValue(50) wb = openpyxl.Workbook() prepare_styles(wb) ws = wb.active ws.title = self.tr("README") description = \ """ This Excel file was generated by QGrain ({0}). Please cite: Liu, Y., Liu, X., Sun, Y., 2021. QGrain: An open-source and easy-to-use software for the comprehensive analysis of grain size distributions. Sedimentary Geology 423, 105980. https://doi.org/10.1016/j.sedgeo.2021.105980 It contanins n_components + 3 sheets: 1. The first sheet is the random settings which were used to generate random parameters. 2. The second sheet is the generated dataset. 3. The third sheet is random parameters which were used to calulate the component distributions and their mixture. 4. The left sheets are the component distributions of all samples. Artificial dataset Using skew normal distribution as the base distribution of each component (i.e. end-member). Skew normal distribution has three parameters, shape, location and scale. Where shape controls the skewness, location and scale are simliar to that of the Normal distribution. When shape = 0, it becomes Normal distribution. The weight parameter controls the fraction of the component, where fraction_i = weight_i / sum(weight_i). By assigning the mean and std of each parameter, random parameters was generate by the `scipy.stats.truncnorm.rvs` function of Scipy. Sampling settings Minimum size [μm]: {1}, Maximum size [μm]: {2}, N_classes: {3}, Precision: {4}, Noise: {5}, N_samples: {6} """.format(QGRAIN_VERSION, self.minimum_size_input.value(), self.maximum_size_input.value(), self.n_classes_input.value(), self.precision_input.value(), self.precision_input.value()+1, n_samples) def write(row, col, value, style="normal_light"): cell = ws.cell(row + 1, col + 1, value=value) cell.style = style lines_of_desc = description.split("\n") for row, line in enumerate(lines_of_desc): write(row, 0, line, style="description") ws.column_dimensions[column_to_char(0)].width = 200 ws = wb.create_sheet(self.tr("Random Settings")) write(0, 0, self.tr("Parameter"), style="header") ws.merge_cells(start_row=1, start_column=1, end_row=2, end_column=1) write(0, 1, self.tr("Shape"), style="header") ws.merge_cells(start_row=1, start_column=2, end_row=1, end_column=3) write(0, 3, self.tr("Location"), style="header") ws.merge_cells(start_row=1, start_column=4, end_row=1, end_column=5) write(0, 5, self.tr("Scale"), style="header") ws.merge_cells(start_row=1, start_column=6, end_row=1, end_column=7) write(0, 7, self.tr("Weight"), style="header") ws.merge_cells(start_row=1, start_column=8, end_row=1, end_column=9) ws.column_dimensions[column_to_char(0)].width = 16 for col in range(1, 9): ws.column_dimensions[column_to_char(col)].width = 16 if col % 2 == 0: write(1, col, self.tr("Mean"), style="header") else: write(1, col, self.tr("STD"), style="header") for row, comp_params in enumerate(self.target, 2): if row % 2 == 1: style = "normal_dark" else: style = "normal_light" write(row, 0, self.tr("Component{0}").format(row - 1), style=style) for i, key in enumerate(["shape", "loc", "scale", "weight"]): mean, std = comp_params[key] write(row, i * 2 + 1, mean, style=style) write(row, i * 2 + 2, std, style=style) ws = wb.create_sheet(self.tr("Dataset")) write(0, 0, self.tr("Sample Name"), style="header") ws.column_dimensions[column_to_char(0)].width = 24 for col, value in enumerate(dataset.classes_μm, 1): write(0, col, value, style="header") ws.column_dimensions[column_to_char(col)].width = 10 for row, sample in enumerate(samples, 1): if row % 2 == 0: style = "normal_dark" else: style = "normal_light" write(row, 0, sample.name, style=style) for col, value in enumerate(sample.distribution, 1): write(row, col, value, style=style) if self.cancel_flag: cancel() return progress = 50 + (row / n_samples) * 10 self.progress_bar.setValue(progress) QCoreApplication.processEvents() ws = wb.create_sheet(self.tr("Parameters")) write(0, 0, self.tr("Sample Name"), style="header") ws.merge_cells(start_row=1, start_column=1, end_row=2, end_column=1) ws.column_dimensions[column_to_char(0)].width = 24 for i in range(dataset.n_components): write(0, 4 * i + 1, self.tr("Component{0}").format(i + 1), style="header") ws.merge_cells(start_row=1, start_column=4 * i + 2, end_row=1, end_column=4 * i + 5) for j, header_name in enumerate([ self.tr("Shape"), self.tr("Location"), self.tr("Scale"), self.tr("Weight") ]): write(1, 4 * i + 1 + j, header_name, style="header") ws.column_dimensions[column_to_char(4 * i + 1 + j)].width = 16 for row, sample in enumerate(samples, 2): if row % 2 == 1: style = "normal_dark" else: style = "normal_light" write(row, 0, sample.name, style=style) for i, comp_param in enumerate(sample.parameter.components): write(row, 4 * i + 1, comp_param.shape, style=style) write(row, 4 * i + 2, comp_param.loc, style=style) write(row, 4 * i + 3, comp_param.scale, style=style) write(row, 4 * i + 4, comp_param.weight, style=style) if self.cancel_flag: cancel() return progress = 60 + (row / n_samples) * 10 self.progress_bar.setValue(progress) QCoreApplication.processEvents() for i in range(dataset.n_components): ws = wb.create_sheet(self.tr("Component{0}").format(i + 1)) write(0, 0, self.tr("Sample Name"), style="header") ws.column_dimensions[column_to_char(0)].width = 24 for col, value in enumerate(dataset.classes_μm, 1): write(0, col, value, style="header") ws.column_dimensions[column_to_char(col)].width = 10 for row, sample in enumerate(samples, 1): if row % 2 == 0: style = "normal_dark" else: style = "normal_light" write(row, 0, sample.name, style=style) for col, value in enumerate(sample.components[i].distribution, 1): write(row, col, value, style=style) if self.cancel_flag: cancel() return progress = 70 + ( (i * n_samples + row) / n_samples * dataset.n_components) * 30 self.progress_bar.setValue(progress) QCoreApplication.processEvents() wb.save(filename) wb.close() self.progress_bar.setValue(100) self.progress_bar.setFormat(self.tr("Task finished")) self.cancel_button.setEnabled(False) self.generate_button.setEnabled(True)
def save_as_xlsx(self, filename: str): assert self.last_result is not None dataset, transformed, components, ratios = self.last_result n_samples, n_components = transformed.shape wb = openpyxl.Workbook() prepare_styles(wb) ws = wb.active ws.title = self.tr("README") description = \ """ This Excel file was generated by QGrain ({0}). Please cite: Liu, Y., Liu, X., Sun, Y., 2021. QGrain: An open-source and easy-to-use software for the comprehensive analysis of grain size distributions. Sedimentary Geology 423, 105980. https://doi.org/10.1016/j.sedgeo.2021.105980 It contanins three sheets: 1. The first sheet is the dataset which was used to perform the PCA algorithm. 2. The second sheet is used to put the distributions of all PCs. 3. The third sheet is the PC variation of all samples. The base PCA algorithm is implemented by scikit-learn. You can get the details of algorithm from the following website. https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html """.format(QGRAIN_VERSION) def write(row, col, value, style="normal_light"): cell = ws.cell(row + 1, col + 1, value=value) cell.style = style lines_of_desc = description.split("\n") for row, line in enumerate(lines_of_desc): write(row, 0, line, style="description") ws.column_dimensions[column_to_char(0)].width = 200 ws = wb.create_sheet(self.tr("Dataset")) write(0, 0, self.tr("Sample Name"), style="header") ws.column_dimensions[column_to_char(0)].width = 16 for col, value in enumerate(dataset.classes_μm, 1): write(0, col, value, style="header") ws.column_dimensions[column_to_char(col)].width = 10 for row, sample in enumerate(dataset.samples, 1): if row % 2 == 0: style = "normal_dark" else: style = "normal_light" write(row, 0, sample.name, style=style) for col, value in enumerate(sample.distribution, 1): write(row, col, value, style=style) QCoreApplication.processEvents() ws = wb.create_sheet(self.tr("PCs")) write(0, 0, self.tr("PC"), style="header") ws.column_dimensions[column_to_char(0)].width = 16 for col, value in enumerate(dataset.classes_μm, 1): write(0, col, value, style="header") ws.column_dimensions[column_to_char(col)].width = 10 for row, component in enumerate(components, 1): if row % 2 == 0: style = "normal_dark" else: style = "normal_light" write(row, 0, self.tr("PC{0}").format(row), style=style) for col, value in enumerate(component, 1): write(row, col, value, style=style) QCoreApplication.processEvents() ws = wb.create_sheet(self.tr("Variations of PCs")) write(0, 0, self.tr("Sample Name"), style="header") ws.column_dimensions[column_to_char(0)].width = 16 for i in range(n_components): write(0, i + 1, self.tr("PC{0} ({1:0.4f})").format(i + 1, ratios[i]), style="header") ws.column_dimensions[column_to_char(i + 1)].width = 10 for row, varations in enumerate(transformed, 1): if row % 2 == 0: style = "normal_dark" else: style = "normal_light" write(row, 0, dataset.samples[row - 1].name, style=style) for col, value in enumerate(varations, 1): write(row, col, value, style=style) QCoreApplication.processEvents() wb.save(filename) wb.close()