def select_problem(self, selection, parameter=None): """Select a prediction problem and extract information. Update the select_problem attribute and generate the cutoff times, the target entity and update the entityset. Args: selection: Name of the chosen prediction problem. data: Entityset representation of the data. parameters: A variable to change the default parameters, if any. Returns: The updated version of the entityset and cutoff time label. """ # problem selection if selection == 'LengthOfStay': self.chosen_problem = LengthOfStay() elif selection == 'MortalityPrediction': self.chosen_problem = MortalityPrediction() elif selection == 'MissedAppointmentProblemDefinition': self.chosen_problem = MissedAppointmentProblemDefinition() elif selection == 'ProlongedLengthOfStay' and parameter: self.chosen_problem = ProlongedLengthOfStay(parameter) elif selection == 'ProlongedLengthOfStay': self.chosen_problem = ProlongedLengthOfStay() elif selection == 'Readmission' and parameter: self.chosen_problem = Readmission(parameter) elif selection == 'Readmission': self.chosen_problem = Readmission() elif selection == 'DiagnosisPrediction' and parameter: self.chosen_problem = DiagnosisPrediction(parameter) elif selection == 'DiagnosisPrediction': raise ValueError('unspecified diagnosis code') else: raise ValueError('{} is not a defined problem'.format(selection)) # target label calculation self.es, self.target_entity, cutoff = self.chosen_problem.generate_cutoff_times( self.es) return cutoff
def test_generate_labels_success_threshold(entityset_success, cutoff_times): es, _, generated_df = Readmission(6).generate_cutoff_times( entityset_success) generated_df.index = cutoff_times.index # both should have the same index labels = list(generated_df['label']) assert labels == [False, False, False, True, False, False]
def readmission(): return Readmission()
def select_problem(self, selection, parameter=None): """Select a prediction problem and extract information. Update the select_problem attribute and generate the cutoff times, the target entity and update the entityset. Args: selection (str): Name of the chosen prediction problem. parameters (dict): Variables to change the default parameters, if any. Returns: featuretools.EntitySet, str, pandas.DataFrame: * An updated EntitySet if a new column is generated. * A string indicating the selected target entity. * A dataframe of cutoff times and their target labels. """ LOGGER.info("Selecting %s prediction problem", selection) # problem selection if selection == 'LengthOfStay': self.chosen_problem = LengthOfStay() elif selection == 'MortalityPrediction': self.chosen_problem = MortalityPrediction() elif selection == 'MissedAppointment': self.chosen_problem = MissedAppointment() elif selection == 'ProlongedLengthOfStay' and parameter: self.chosen_problem = ProlongedLengthOfStay(parameter) elif selection == 'ProlongedLengthOfStay': self.chosen_problem = ProlongedLengthOfStay() elif selection == 'Readmission' and parameter: self.chosen_problem = Readmission(parameter) elif selection == 'Readmission': self.chosen_problem = Readmission() elif selection == 'DiagnosisPrediction' and parameter: self.chosen_problem = DiagnosisPrediction(parameter) elif selection == 'DiagnosisPrediction': raise ValueError('unspecified diagnosis code') else: raise ValueError('{} is not a defined problem'.format(selection)) # target label calculation self.es, self.target_entity, cutoff = self.chosen_problem.generate_cutoff_times(self.es) # set default pipeline if self.chosen_problem.prediction_type == "classification": pipeline = "Random Forest" else: pipeline = "Random Forest Regressor" self.modeler = Modeler(pipeline, self.chosen_problem.prediction_type) return cutoff