예제 #1
0
    def __train_test_val_split(self):
        y = self.__same_length_vectors[Data_Fields.get_target()]
        binary_fields = [
            field for field in Data_Fields.get_binary_vars()
            if field in Config.DATA_FIELDS_IN_ANALYSIS
        ]
        continuous_fields = [
            field for field in Data_Fields.get_continuous_vars()
            if field in Config.DATA_FIELDS_IN_ANALYSIS
        ]
        predictors_names = binary_fields + continuous_fields

        predictors_vectors_tuple = tuple(
            [self.__same_length_vectors[name] for name in predictors_names])
        X = np.stack(predictors_vectors_tuple, axis=1)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.1,
                                                            random_state=42)
        X_train, X_val, y_train, y_val = train_test_split(X_train,
                                                          y_train,
                                                          test_size=0.2,
                                                          random_state=42)

        self.X_train = X_train
        self.X_val = X_val
        self.X_test = X_test
        self.y_train = y_train
        self.y_val = y_val
        self.y_test = y_test
        self.predictors_names = predictors_names
예제 #2
0
    def __init__(self):
        self.__class__.WORK_DIR = 'C:/Users/normy/corona_classifier_files/classification'
        self.__class__.COMMON_FILES_DIR = 'C:/Users/normy/PycharmProjects/corona_classifier/common_files'
        self.__class__.RAW_DATA_PATH = 'C:/Users/normy/PycharmProjects/covidclinicaldata/data'
        self.__class__.PICKLE_PATH = os.path.join(self.WORK_DIR,
                                                  'pickle_files')
        self.__class__.YAML_FILE_DIR = os.path.join(os.path.dirname(__file__),
                                                    'yaml_files')
        self.__class__.COMMON_YAML_FILE_DIR = os.path.join(
            self.COMMON_FILES_DIR, 'yaml_files')
        self.__class__.OUTPUT_PATH = os.path.join(self.WORK_DIR, 'outputs')

        self.__class__.DATA_FIELDS_IN_ANALYSIS = [
            Data_Fields.get_target(), *Data_Fields.get_binary_vars(),
            *Data_Fields.get_continuous_vars()
        ]

        static_values = self.load_yaml(self.YAML_FILE_DIR, 'static_values')
        self.__class__.DATA_FIELD_MISSING_VALUES_THRESHOLD = static_values[
            'data_field_missing_values_threshold']
        self.__class__.BOOTSTRAP_PATIENT_ENLARGEMENT_SIZE = static_values[
            'bootstrap_patient_enlargement_size']
        self.__class__.MODEL_THRESHOLDS = static_values['model_thresholds']

        self.__class__.CONTINUOUS_FIELDS_THRESHOLDS = self.load_yaml(
            self.COMMON_YAML_FILE_DIR, 'continuous_fields_thresholds')
예제 #3
0
    def __binary_one_hot_encoding(self):
        data_fields = Data_Fields.get_binary_vars()
        data_fields.append(Data_Fields.get_target())

        for patient in self.__patients:
            for field in data_fields:
                if getattr(patient, field) is None:
                    continue
                elif getattr(patient, field) not in ('TRUE', 'Positive'):
                    setattr(patient, field, 0)
                else:
                    setattr(patient, field, 1)