Example #1
0
    def slot_convert(self, kind, column_idx):
        column = self.dataset.get_column_name(column_idx)

        # Determine which function to run in order to convert
        if kind == "binary":
            convert_func = clarite.modify.make_binary
        elif kind == "categorical":
            convert_func = clarite.modify.make_categorical
        elif kind == "continuous":
            convert_func = clarite.modify.make_continuous
        else:
            show_warning(
                "Error converting to unknown",
                "Converting to unknown type is not yet implemented.",
            )
            return

        # Try to convert, reporting any errors
        try:
            self.appctx.update_data(convert_func(self.dataset.df, only=column))
        except ValueError as e:
            show_warning(f"Error converting to {kind}", str(e))
            return

        # Log successful conversion
        info_str = f"\nConverted '{column}' to {kind} in '{self.dataset.get_selector_name()}'\n"
        self.appctx.log_info("\n" + "=" * 80 + info_str + "=" * 80)
        self.appctx.log_python(f"{self.dataset.get_python_name()} = "
                               f"clarite.modify.make_{kind}("
                               f"data={self.dataset.get_python_name()}, "
                               f"skip=[], "
                               f"only=['{column}'])")
Example #2
0
 def submit(self):
     # TODO: Add any warnings here
     if self.new_name == self.old_name:
         self.reject()
     elif self.new_name in list(self.data):
         show_warning(
             "Error renaming column",
             f"The specified column ('{self.new_name}') already exists",
         )
     else:
         self.accept()
Example #3
0
 def submit(self):
     if self.data_name is not None and self.data_name in [
             d.name for d in self.appctx.datasets
     ]:
         show_warning(
             "Dataset already exists",
             f"A dataset named '{self.data_name}' already exists.\n"
             f"Use a different name or clear the dataset name field.",
         )
     else:
         print(f"Calculating Percent NA")
         # Run with a progress dialog
         RunProgress.run_with_progress(
             progress_str="Calculating Percent NA...",
             function=self.get_func(),
             slot=self.appctx.add_dataset,
             parent=self,
         )
         self.log_command()
         self.accept()
 def submit(self):
     if self.data_name is not None and self.data_name in [
             d.name for d in self.appctx.datasets
     ]:
         show_warning(
             "Dataset already exists",
             f"A dataset named '{self.data_name}' already exists.\n"
             f"Use a different name or clear the dataset name field.",
         )
     elif "converged" not in list(self.dataset.df):
         show_warning("Incorrect Data Input",
                      "A 'converged' column must be present")
     elif "pvalue" not in list(self.dataset.df):
         show_warning("Incorrect Data Input",
                      "A 'pvalue' column must be present")
     else:
         print(f"Adding corrected P-values...")
         # Run with a progress dialog
         if self.data_name is None:
             slot = self.appctx.update_data
         else:
             slot = self.appctx.add_dataset
         RunProgress.run_with_progress(
             progress_str="Adding corrected P-values...",
             function=self.get_func(),
             slot=slot,
             parent=self,
         )
         self.log_command()
         self.accept()
    def launch_get_category_file(self):
        """Launch a dialog to load a file which specified categories for each variable"""
        options = QFileDialog.Options()
        options |= QFileDialog.DontUseNativeDialog
        filename, _ = QFileDialog.getOpenFileName(
            self,
            f"Load Variable Category File",
            "",
            f"TSV Files (*.tsv *.txt)",
            options=options,
        )
        # Set filename
        if len(filename) == 0:
            return

        # Read file
        try:
            categories = pd.read_csv(filename, sep="\t")
        except Exception as e:
            show_warning("Variable Categories File Error",
                         f"Error reading file: {str(e)}")
            return

        # Must have two columns
        if len(list(categories)) != 2:
            show_warning(
                "Variable Categories File Error",
                f"Expected 2 columns, found {len(list(categories)):,} columns",
            )
            return

        # Set columns and convert to a dictionary
        categories.columns = ["variable", "category"]
        categories = categories.set_index("variable")
        categories = categories.to_dict()["category"]

        self.categories = categories
        self.category_file_btn.setText(
            f"{len(categories):,} variable categories loaded")
Example #6
0
 def submit(self):
     if self.data_name is not None and self.data_name in [
             d.name for d in self.appctx.datasets
     ]:
         show_warning(
             "Dataset already exists",
             f"A dataset named '{self.data_name}' already exists.\n"
             f"Use a different name or clear the dataset name field.",
         )
     elif self.outcome is None:
         show_warning("Missing Parameter", "A phenotype must be selected")
     else:
         print(f"Running EWAS...")
         # Run with a progress dialog
         RunProgress.run_with_progress(
             progress_str="Running EWAS...",
             function=self.get_func(),
             slot=self.appctx.add_dataset,
             parent=self,
         )
         self.log_command()
         self.accept()
Example #7
0
    def slot_rename(self, column_idx):
        column = self.dataset.get_column_name(column_idx)
        # Show a dialog
        new_name = RenameDialog.get_new_name(
            data=self.appctx.datasets[self.appctx.current_dataset_idx].df,
            column=column,
            parent=self.parent(),
        )
        if new_name is None:
            return

        try:
            self.appctx.update_data(
                self.dataset.df.rename(columns={column: new_name}))
        except ValueError as e:
            show_warning("Error renaming column", str(e))
            return

        # Log successful rename
        info_str = f"\nRenamed '{column}' to '{new_name}' in '{self.dataset.get_selector_name()}'\n"
        self.appctx.log_info("\n" + "=" * 80 + info_str + "=" * 80)
        self.appctx.log_python(f"{self.dataset.get_python_name()} = "
                               f"{self.dataset.get_python_name()}.rename("
                               f"columns={repr({column: new_name})})")
Example #8
0
    def launch_get_weight_specific(self):
        """Launch a dialog to load a file matching variables to weights"""
        options = QFileDialog.Options()
        options |= QFileDialog.DontUseNativeDialog
        filename, _ = QFileDialog.getOpenFileName(
            self,
            f"Load Specific Weights File",
            "",
            f"TSV Files (*.tsv *.txt)",
            options=options,
        )
        # Set filename
        if len(filename) == 0:
            return

        # Read file
        try:
            weights = pd.read_csv(filename, sep="\t")
        except Exception as e:
            show_warning("Specific Weights File Error",
                         f"Error reading file: {str(e)}")
            return

        # Must have two columns
        if len(list(weights)) != 2:
            show_warning(
                "Specific Weights File Error",
                f"Expected 2 columns, found {len(list(weights)):,} columns",
            )
            return

        # Set columns and convert to a dictionary
        weights.columns = ["variable", "weight"]
        weights = weights.set_index("variable")
        weights = weights.to_dict()["weight"]

        # Check that some variables/weights matched
        unique_vars = len(set(weights.keys()) & set(list(self.dataset.df)))
        unique_weights = len(
            set(weights.values()) & set(list(self.survey_df.df)))
        missing_weights = (set(list(self.dataset.df)) - set(weights.keys()) -
                           set(weights.values()) - set(self.covariates) -
                           {self.outcome, self.cluster, self.strata, self.fpc})
        if unique_vars < 1:
            show_warning(
                "Specific Weights File Error",
                f"Loaded {filename}\n"
                "No variables matched columns in the input data.\n\n"
                "The first column of the specific weights file must list variable names and "
                "a header line must be present.",
            )
        elif unique_weights < 1:
            show_warning(
                "Specific Weights File Error",
                f"Loaded {filename}\n"
                "No weights matched columns in the survey data.\n\n"
                "The second column of the specific weights file must list weight names and "
                "a header line must be present.",
            )
        elif len(missing_weights) > 0 and len(missing_weights) <= 5:
            show_warning(
                "Specific Weights File Error",
                f"Loaded {filename}\n"
                "Some variables are missing weights:\n\n"
                f"{', '.join(sorted(list(missing_weights)))}",
            )
        elif len(missing_weights) > 5:
            show_warning(
                "Specific Weights File Error",
                f"Loaded {filename}\n"
                "More than 5 variables are missing weights, including:\n\n"
                f"{', '.join(sorted(list(missing_weights))[:5])}",
            )
        else:
            self.weights = weights
            self.weight_specific_btn.setText(
                f"{unique_weights:,} different weights "
                f"assigned to {unique_vars:,} variables")