def slot_convert(self, kind, column_idx): column = self.dataset.get_column_name(column_idx) # Determine which function to run in order to convert if kind == "binary": convert_func = clarite.modify.make_binary elif kind == "categorical": convert_func = clarite.modify.make_categorical elif kind == "continuous": convert_func = clarite.modify.make_continuous else: show_warning( "Error converting to unknown", "Converting to unknown type is not yet implemented.", ) return # Try to convert, reporting any errors try: self.appctx.update_data(convert_func(self.dataset.df, only=column)) except ValueError as e: show_warning(f"Error converting to {kind}", str(e)) return # Log successful conversion info_str = f"\nConverted '{column}' to {kind} in '{self.dataset.get_selector_name()}'\n" self.appctx.log_info("\n" + "=" * 80 + info_str + "=" * 80) self.appctx.log_python(f"{self.dataset.get_python_name()} = " f"clarite.modify.make_{kind}(" f"data={self.dataset.get_python_name()}, " f"skip=[], " f"only=['{column}'])")
def submit(self): # TODO: Add any warnings here if self.new_name == self.old_name: self.reject() elif self.new_name in list(self.data): show_warning( "Error renaming column", f"The specified column ('{self.new_name}') already exists", ) else: self.accept()
def submit(self): if self.data_name is not None and self.data_name in [ d.name for d in self.appctx.datasets ]: show_warning( "Dataset already exists", f"A dataset named '{self.data_name}' already exists.\n" f"Use a different name or clear the dataset name field.", ) else: print(f"Calculating Percent NA") # Run with a progress dialog RunProgress.run_with_progress( progress_str="Calculating Percent NA...", function=self.get_func(), slot=self.appctx.add_dataset, parent=self, ) self.log_command() self.accept()
def submit(self): if self.data_name is not None and self.data_name in [ d.name for d in self.appctx.datasets ]: show_warning( "Dataset already exists", f"A dataset named '{self.data_name}' already exists.\n" f"Use a different name or clear the dataset name field.", ) elif "converged" not in list(self.dataset.df): show_warning("Incorrect Data Input", "A 'converged' column must be present") elif "pvalue" not in list(self.dataset.df): show_warning("Incorrect Data Input", "A 'pvalue' column must be present") else: print(f"Adding corrected P-values...") # Run with a progress dialog if self.data_name is None: slot = self.appctx.update_data else: slot = self.appctx.add_dataset RunProgress.run_with_progress( progress_str="Adding corrected P-values...", function=self.get_func(), slot=slot, parent=self, ) self.log_command() self.accept()
def launch_get_category_file(self): """Launch a dialog to load a file which specified categories for each variable""" options = QFileDialog.Options() options |= QFileDialog.DontUseNativeDialog filename, _ = QFileDialog.getOpenFileName( self, f"Load Variable Category File", "", f"TSV Files (*.tsv *.txt)", options=options, ) # Set filename if len(filename) == 0: return # Read file try: categories = pd.read_csv(filename, sep="\t") except Exception as e: show_warning("Variable Categories File Error", f"Error reading file: {str(e)}") return # Must have two columns if len(list(categories)) != 2: show_warning( "Variable Categories File Error", f"Expected 2 columns, found {len(list(categories)):,} columns", ) return # Set columns and convert to a dictionary categories.columns = ["variable", "category"] categories = categories.set_index("variable") categories = categories.to_dict()["category"] self.categories = categories self.category_file_btn.setText( f"{len(categories):,} variable categories loaded")
def submit(self): if self.data_name is not None and self.data_name in [ d.name for d in self.appctx.datasets ]: show_warning( "Dataset already exists", f"A dataset named '{self.data_name}' already exists.\n" f"Use a different name or clear the dataset name field.", ) elif self.outcome is None: show_warning("Missing Parameter", "A phenotype must be selected") else: print(f"Running EWAS...") # Run with a progress dialog RunProgress.run_with_progress( progress_str="Running EWAS...", function=self.get_func(), slot=self.appctx.add_dataset, parent=self, ) self.log_command() self.accept()
def slot_rename(self, column_idx): column = self.dataset.get_column_name(column_idx) # Show a dialog new_name = RenameDialog.get_new_name( data=self.appctx.datasets[self.appctx.current_dataset_idx].df, column=column, parent=self.parent(), ) if new_name is None: return try: self.appctx.update_data( self.dataset.df.rename(columns={column: new_name})) except ValueError as e: show_warning("Error renaming column", str(e)) return # Log successful rename info_str = f"\nRenamed '{column}' to '{new_name}' in '{self.dataset.get_selector_name()}'\n" self.appctx.log_info("\n" + "=" * 80 + info_str + "=" * 80) self.appctx.log_python(f"{self.dataset.get_python_name()} = " f"{self.dataset.get_python_name()}.rename(" f"columns={repr({column: new_name})})")
def launch_get_weight_specific(self): """Launch a dialog to load a file matching variables to weights""" options = QFileDialog.Options() options |= QFileDialog.DontUseNativeDialog filename, _ = QFileDialog.getOpenFileName( self, f"Load Specific Weights File", "", f"TSV Files (*.tsv *.txt)", options=options, ) # Set filename if len(filename) == 0: return # Read file try: weights = pd.read_csv(filename, sep="\t") except Exception as e: show_warning("Specific Weights File Error", f"Error reading file: {str(e)}") return # Must have two columns if len(list(weights)) != 2: show_warning( "Specific Weights File Error", f"Expected 2 columns, found {len(list(weights)):,} columns", ) return # Set columns and convert to a dictionary weights.columns = ["variable", "weight"] weights = weights.set_index("variable") weights = weights.to_dict()["weight"] # Check that some variables/weights matched unique_vars = len(set(weights.keys()) & set(list(self.dataset.df))) unique_weights = len( set(weights.values()) & set(list(self.survey_df.df))) missing_weights = (set(list(self.dataset.df)) - set(weights.keys()) - set(weights.values()) - set(self.covariates) - {self.outcome, self.cluster, self.strata, self.fpc}) if unique_vars < 1: show_warning( "Specific Weights File Error", f"Loaded {filename}\n" "No variables matched columns in the input data.\n\n" "The first column of the specific weights file must list variable names and " "a header line must be present.", ) elif unique_weights < 1: show_warning( "Specific Weights File Error", f"Loaded {filename}\n" "No weights matched columns in the survey data.\n\n" "The second column of the specific weights file must list weight names and " "a header line must be present.", ) elif len(missing_weights) > 0 and len(missing_weights) <= 5: show_warning( "Specific Weights File Error", f"Loaded {filename}\n" "Some variables are missing weights:\n\n" f"{', '.join(sorted(list(missing_weights)))}", ) elif len(missing_weights) > 5: show_warning( "Specific Weights File Error", f"Loaded {filename}\n" "More than 5 variables are missing weights, including:\n\n" f"{', '.join(sorted(list(missing_weights))[:5])}", ) else: self.weights = weights self.weight_specific_btn.setText( f"{unique_weights:,} different weights " f"assigned to {unique_vars:,} variables")