def predict(self, F, datainfo, timeinfo): print('\nFile: {} Class: {} Function: {} State: {}'.format( 'architectures.py', 'OriginalEnsemble', 'predict', 'Start')) info = extract(datainfo, timeinfo) self._info.update(info) print_time_info(self._info) test_data = get_data(F, self._info) print('test_data.shape: {}'.format(test_data.shape)) transformed_test_data = self._transform(test_data, DataType.TEST) train_data = self._transform(self._train_data, DataType.TRAIN) train_labels = self._train_labels print('transformed_test_data.shape: {}'.format( transformed_test_data.shape)) print('train_data.shape: {}'.format(train_data.shape)) size = len(train_data) if len(transformed_test_data) > len( train_data) else len(transformed_test_data) train_weights = correct_covariate_shift( train_data, self._test_sampler.sample(transformed_test_data, size), self._random_state, self._correction_threshold, self._correction_n_splits) if self._should_correct else None fixed_hyperparameters, search_space = Profile.parse_profile( self._profile) if self._best_hyperparameters is None: tuner = HyperparametersTuner(fixed_hyperparameters, search_space, self._max_evaluations) self._best_hyperparameters = tuner.get_best_hyperparameters( train_data, train_labels, self._validation_ratio, self._random_state) print('self._best_hyperparameters: {}'.format( self._best_hyperparameters)) if has_sufficient_time(self._dataset_budget_threshold, self._info): t_d, validation_data, t_l, validation_labels = train_test_split( train_data, train_labels, test_size=self._validation_ratio, random_state=self._random_state, shuffle=True, stratify=train_labels) self._classifier = LGBMClassifier() self._classifier.set_params(**self._best_hyperparameters) self._classifier.fit(train_data, train_labels, sample_weight=train_weights) else: print('Time budget exceeded.') predictions = self._classifier.predict_proba(transformed_test_data)[:, 1] self._iteration += 1 print('predictions.shape: {}'.format(predictions.shape)) print('File: {} Class: {} Function: {} State: {} \n'.format( 'architectures.py', 'OriginalEnsemble', 'predict', 'End')) return predictions
def predict(self, F, datainfo, timeinfo): print('\nFile: {} Class: {} Function: {} State: {}'.format( 'architectures.py', 'OriginalEnsemble', 'predict', 'Start')) info = extract(datainfo, timeinfo) self._info.update(info) print_time_info(self._info) test_data = get_data(F, self._info) print('test_data.shape: {}'.format(test_data.shape)) transformed_test_data = self._transform(test_data, DataType.TEST) train_data = self._transform(self._train_data, DataType.TRAIN) train_labels = self._train_labels print('transformed_test_data.shape: {}'.format( transformed_test_data.shape)) print('train_data.shape: {}'.format(train_data.shape)) size = len(train_data) if len(transformed_test_data) > len( train_data) else len(transformed_test_data) train_weights = correct_covariate_shift( train_data, self._test_sampler.sample(transformed_test_data, size), self._random_state, self._correction_threshold, self._correction_n_splits) if self._should_correct else None fixed_hyperparameters, search_space = Profile.parse_profile( self._profile) if self._best_hyperparameters is None: tuner = HyperparametersTuner(fixed_hyperparameters, search_space, self._max_evaluations) self._best_hyperparameters = tuner.get_best_hyperparameters( train_data, train_labels, self._validation_ratio, self._random_state) print('self._best_hyperparameters: {}'.format( self._best_hyperparameters)) if has_sufficient_time(self._dataset_budget_threshold, self._info) or len(self._classifiers) == 0: t_d, validation_data, t_l, validation_labels = train_test_split( train_data, train_labels, test_size=self._validation_ratio, random_state=self._random_state, shuffle=True, stratify=train_labels) new_classifier = LGBMClassifier() new_classifier.set_params(**self._best_hyperparameters) new_classifier.fit(train_data, train_labels, sample_weight=train_weights) new_predictions = new_classifier.predict_proba(validation_data)[:, 1] new_weight = compute_weight(new_predictions, validation_labels, self._epsilon) self._ensemble_weights = np.array([]) for i in range(len(self._classifiers)): currrent_classifier = self._classifiers[i] currrent_classifier_predictions = currrent_classifier.predict_proba( validation_data)[:, 1] currrent_classifier_weight = compute_weight( currrent_classifier_predictions, validation_labels, self._epsilon) self._ensemble_weights = np.append(self._ensemble_weights, currrent_classifier_weight) self._classifiers = np.append(self._classifiers, new_classifier) self._ensemble_weights = np.append(self._ensemble_weights, new_weight) print('self._ensemble_weights: {}'.format(self._ensemble_weights)) if len(self._classifiers) > self._ensemble_size: i = remove_worst_classifier(self._classifiers, validation_data, validation_labels) print('Removed classifier: {}'.format(i)) self._classifiers = np.delete(self._classifiers, i) self._ensemble_weights = np.delete(self._ensemble_weights, i) else: print('Time budget exceeded.') if len(self._classifiers) == 1: predictions = self._classifiers[0].predict_proba( transformed_test_data)[:, 1] else: predictions = np.zeros(len(transformed_test_data)) for i in range(len(self._classifiers)): predictions = np.add( predictions, self._ensemble_weights[i] * self._classifiers[i].predict_proba( transformed_test_data)[:, 1]) predictions = np.divide(predictions, np.sum(self._ensemble_weights)) self._iteration += 1 print('predictions.shape: {}'.format(predictions.shape)) print('File: {} Class: {} Function: {} State: {} \n'.format( 'architectures.py', 'OriginalEnsemble', 'predict', 'End')) return predictions