def main(): for set_name in ConfigHelper.get_datasets(): MetricsHelper.reset_metrics() data, set_target = IOHelper.read_dataset(set_name) feats, labels = DataHelper.extract_feature_labels(data, set_target) DataHelper.create_label_mapping(labels) max_nb_feats = DataHelper.calculate_max_nb_features(feats) for e in range(ConfigHelper.nb_executions): start = time.time() print("Execution " + str(e)) train_idxs, test_idxs = DataHelper.split_in_sets(feats, labels) train_X = DataHelper.select_rows(feats, train_idxs, copy=False) train_y = DataHelper.select_rows(labels, train_idxs, copy=False) test_X = DataHelper.select_rows(feats, test_idxs, copy=False) test_y = DataHelper.select_rows(labels, test_idxs, copy=False) for noise_level in ConfigHelper.noise_levels: noisy_idxs, noisy_train_y = DataHelper.insert_noise( train_y, noise_level) for name, clf, clean_type in ConfigHelper.get_classifiers(): algorithm_data = ConfigHelper.choose_algorithm( clf, clean_type, train_X, noisy_train_y, noisy_idxs, max_nb_feats) chosen_rate = algorithm_data[0] chosen_threshold = algorithm_data[1] chosen_X = algorithm_data[2] chosen_y = algorithm_data[3] chosen_clf = algorithm_data[4] true_filtered = algorithm_data[5] false_filtered = algorithm_data[6] chosen_clf.fit(chosen_X, chosen_y) predictions = chosen_clf.predict(test_X) error = MetricsHelper.calculate_error_score( test_y, predictions) MetricsHelper.metrics.append([ set_name, e, noise_level, name, chosen_rate, chosen_threshold, error, true_filtered, false_filtered ]) print(str(time.time() - start)) IOHelper.store_results(MetricsHelper.convert_metrics_to_frame(), "final_" + set_name)
class Evaluator: def __init__(self, **query_params): self._DEFAULT_LOCATIONS = ["rathmines-dublin"] self._DEFAULT_PRICE_RANGE = (0, 2000) self._locations = self._DEFAULT_LOCATIONS if "locations" not in query_params else query_params[ "locations"] self._price_range = \ self._DEFAULT_PRICE_RANGE if "price_range" not in query_params else query_params["price_range"] print(self._locations) print(self._price_range) self._daft_scraper = DaftScraper(self._locations, self._price_range) self._property_dao = PropertyDao() self._email_handler = EmailHandler() self._metrics_helper = MetricsHelper() def run(self): _current_daft_snapshot = self._daft_scraper.query_all_properties() _db_daft_snapshot = self._property_dao.return_all_properties() _db_daft_snapshot_ids = self._return_property_ids(_db_daft_snapshot) print(len(_current_daft_snapshot)) self._metrics_helper.put_metric_data("NumberOfPropertiesReturned", len(_current_daft_snapshot), "Count") _new_properties = list( filter(lambda x: x.id not in _db_daft_snapshot_ids, _current_daft_snapshot)) print(len(_new_properties)) print(_new_properties) if len(_new_properties) > 0: self._update_db_with_new_properties(_new_properties) self._call_email_handler(_new_properties) @staticmethod def _return_property_ids(properties): return set(map(lambda x: x.id, properties)) def _update_db_with_new_properties(self, properties): for property_ in properties: self._property_dao.add_property(property_) def _call_email_handler(self, properties): self._email_handler.send_new_prop_emails(properties)
def __init__(self, **query_params): self._DEFAULT_LOCATIONS = ["rathmines-dublin"] self._DEFAULT_PRICE_RANGE = (0, 2000) self._locations = self._DEFAULT_LOCATIONS if "locations" not in query_params else query_params[ "locations"] self._price_range = \ self._DEFAULT_PRICE_RANGE if "price_range" not in query_params else query_params["price_range"] print(self._locations) print(self._price_range) self._daft_scraper = DaftScraper(self._locations, self._price_range) self._property_dao = PropertyDao() self._email_handler = EmailHandler() self._metrics_helper = MetricsHelper()
def _calculate_cv_error(base_clf, best_rate, X, y, is_y_noise, clean_type, max_nb_feats, major_oob_label): errors = [] skf = StratifiedKFold(n_splits=NoiseDetectionEnsemble.k_folds, shuffle=True) for train_idxs, val_idxs in skf.split(X=range(len(y)), y=y): train_X = DataHelper.select_rows(X, train_idxs, copy=False) train_y = DataHelper.select_rows(y, train_idxs, copy=False) train_is_y_noise = DataHelper.select_rows(is_y_noise, train_idxs, copy=False) clean_train = NoiseDetectionEnsemble._clean_noisy_data(train_X, train_y, train_is_y_noise, clean_type, major_oob_label) train_X, train_y, adapted_rate = DataHelper.adapt_rate(clean_train[0], clean_train[1], best_rate) ensemble = RF(501, n_jobs=-1, max_features="sqrt") ensemble.fit(train_X, train_y) val_X = DataHelper.select_rows(X, val_idxs, copy=False) val_y = DataHelper.select_rows(y, val_idxs, copy=False) predictions = ensemble.predict(val_X) error = MetricsHelper.calculate_error_score(val_y, predictions) errors.append(error) return mean(errors)
def main(): view_type = ConfigHelper.view_type filename = ConfigHelper.dataset_file X, y = IOHelper.read_view_with_classes(view_type, filename) [MetricsHelper.update_best_metrics(KCM_FGH.run(view_type, e, X), y) \ for e in range(1, ConfigHelper.nb_executions+1)]
def choose_algorithm(clf, clean_type, train_X, noisy_train_y, noisy_idxs, max_nb_feats): chosen_rate = nan chosen_threshold = nan chosen_X = None chosen_y = None chosen_clf = None true_filtered = 0 if clean_type == None: chosen_X = train_X chosen_y = noisy_train_y chosen_clf = clf elif clean_type == "maj": filt_X, filt_y = MajorityFiltering.run(train_X, noisy_train_y) chosen_X = filt_X chosen_y = filt_y chosen_clf = MajorityFiltering.get_ensemble() true_filtered = MetricsHelper.calculate_true_filter(chosen_y.index, noisy_idxs) else: algorithm_data = NoiseDetectionEnsemble.run(clf, clean_type, train_X, noisy_train_y, max_nb_feats) chosen_rate = algorithm_data[0] chosen_threshold = algorithm_data[1] chosen_X = algorithm_data[2] chosen_y = algorithm_data[3] chosen_X, chosen_y, adapted_rate = DataHelper.adapt_rate(chosen_X, chosen_y, chosen_rate) chosen_clf = RF(n_estimators=501, max_features="sqrt", n_jobs=-1) true_filtered = MetricsHelper.calculate_true_filter(chosen_y.index, noisy_idxs) tot_filtered = len(train_X)-len(chosen_X.index.unique()) false_filtered = tot_filtered-true_filtered return [chosen_rate, chosen_threshold, chosen_X, chosen_y, chosen_clf, true_filtered/len(train_X), false_filtered/len(train_X)]
def _mark_as_noisy(oob_matrix, y, threshold): nb_instances = len(y) errors = [MetricsHelper.calculate_error_score([y.iloc[i]]*len(oob_matrix[i]), oob_matrix[i]) for i in range(nb_instances)] is_noise_list = [error > threshold for error in errors] is_noise = Series(is_noise_list, index=y.index, dtype=bool, name="is_noise") return is_noise
train_y, ConfigHelper.max_nb_features, is_train=True) test_X = IOHelper.read_dataset("test") DataHelper.add_nan_indication_cols(test_X) DataHelper.remove_high_nan_rate_cols(test_X, True) DataHelper.remove_small_variance_cols(test_X, True) DataHelper.fill_missing_data(test_X, is_train=False) test_X = DataHelper.split_categorical_cols(test_X, is_train=False) DataHelper.scale_continuous_cols(test_X, is_train=False) DataHelper.select_best_features(test_X, train_X, None, ConfigHelper.max_nb_features, is_train=False) DataHelper.reset_scaler() for name, model in ConfigHelper.get_submission_models(): print "Training" model.fit(train_X, train_y) print "Predicting" probs = model.predict_proba(test_X) submission = MetricsHelper.get_submission(test_X.index, probs) print "Saving submission" IOHelper.store_submission(submission, name)
def main(): readme_text = st.markdown(get_file_content_as_string("about.md")) # dataframe = pd.DataFrame( # np.random.randn(2, 2), # columns=('col %d' % i for i in range(2))) # st.write(dataframe) data = st.sidebar.file_uploader('Upload a file') app_model = st.sidebar.selectbox( "Choose model", ["Pix2Pix L1 norm", "Pix2Pix Perceptual Loss"]) app_seg = st.sidebar.selectbox("Use pre-processing?", ["No", "Yes"]) app_epoch = st.sidebar.selectbox("Select epoch", ['500', '10', '100', '200', '300', '400']) if data: readme_text.empty() #download image locally and save it in the appropriate format image = Image.open(data) A, B = save_image(image) has_gt = False if B is None: st.markdown('### Input image') st.image([A], caption=['Rendered image']) else: st.markdown('### Input and ground truth images') st.image([A, B], caption=['Rendered image', 'Ground truth image']) has_gt = True #get model if app_model == 'Pix2Pix L1 norm': model = 'pix2pix' elif app_model == 'Pix2Pix Perceptual Loss': model = 'pix2pixpl' else: st.sidebar.success('You must select one model.') #use segmentation? use_seg = False if app_seg == 'No': use_seg = False elif app_seg == 'Yes': use_seg = True #get epoch if app_epoch == '10': epoch = 10 elif app_epoch == '100': epoch = 100 elif app_epoch == '200': epoch = 200 elif app_epoch == '300': epoch = 300 elif app_epoch == '400': epoch = 400 elif app_epoch == '500': epoch = 500 #run prediction #new_image = ri.predict('./checkpoints','./db',model,epoch) pipeline = Pipeline('./checkpoints', './db', model, epoch) print('*** {}'.format(use_seg)) new_image, over_image = pipeline.run_pipeline(use_seg) #display results if not use_seg: st.markdown('### Reconstruction result') st.image([new_image], caption=['Reconstructed image']) else: st.markdown('### Reconstruction and segmentation results') st.image([new_image, over_image], caption=['Reconstructed image', 'Segmentation mask']) #compute metrics if ground truth is available if has_gt: st.markdown('### Quality metrics') metrics = np.zeros((3, 2)) metobj = MetricsHelper(A, new_image, B) metrics[0, :] = metobj.compute_mse() metrics[1, :] = metobj.compute_ssim() metrics[2, :] = metobj.compute_mae() dataframe = pd.DataFrame(metrics, columns=('Original rendered', ' Reconstructed'), index=['MSE', 'SSIM', 'MAE']) st.table(dataframe)
if __name__ == "__main__": data = IOHelper.read_dataset("train") feats, labels = DataHelper.extract_feature_labels(data) predef = ConfigHelper.use_predefined_cols DataHelper.add_nan_indication_cols(feats) DataHelper.remove_high_nan_rate_cols(feats, predef) DataHelper.remove_small_variance_cols(feats, predef) for e in xrange(ConfigHelper.nb_executions): print "Execution: " + str(e + 1) MetricsHelper.reset_metrics() for f, (train_idxs, val_idxs) in enumerate(ConfigHelper.k_fold_cv(labels)): start_time = time.time() print "Fold: " + str(f + 1) DataHelper.reset_scaler() train_X = DataHelper.select_rows(feats, train_idxs, copy=True) train_y = DataHelper.select_rows(labels, train_idxs, copy=False) val_X = DataHelper.select_rows(feats, val_idxs, copy=True) val_y = DataHelper.select_rows(labels, val_idxs, copy=False) train_y = DataHelper.remove_high_nan_rate_rows(train_X, train_y)
from metrics_helper import MetricsHelper from evaluator import Evaluator import datetime import time evaluator = Evaluator(locations=[ "rathmines-dublin", "rathgar-dublin", "ranelagh-dublin", "ballsbridge-dublin", "donnybrook-dublin", "dublin-4-dublin", "dublin-6-dublin" ], price_range=(1350, 1600)) metrics_helper = MetricsHelper() print("starting at: {}".format(datetime.datetime.now())) start_time = time.time() try: evaluator.run() except: metrics_helper.put_metric_data("sucessful run", 0, "Count") else: metrics_helper.put_metric_data("sucessful run", 1, "Count") end_time = time.time() total_time = end_time - start_time metrics_helper.put_metric_data("Total Run Time", total_time, "Seconds")