Esempio n. 1
0
def main():

    for set_name in ConfigHelper.get_datasets():

        MetricsHelper.reset_metrics()

        data, set_target = IOHelper.read_dataset(set_name)

        feats, labels = DataHelper.extract_feature_labels(data, set_target)
        DataHelper.create_label_mapping(labels)
        max_nb_feats = DataHelper.calculate_max_nb_features(feats)

        for e in range(ConfigHelper.nb_executions):
            start = time.time()
            print("Execution " + str(e))

            train_idxs, test_idxs = DataHelper.split_in_sets(feats, labels)

            train_X = DataHelper.select_rows(feats, train_idxs, copy=False)
            train_y = DataHelper.select_rows(labels, train_idxs, copy=False)
            test_X = DataHelper.select_rows(feats, test_idxs, copy=False)
            test_y = DataHelper.select_rows(labels, test_idxs, copy=False)

            for noise_level in ConfigHelper.noise_levels:

                noisy_idxs, noisy_train_y = DataHelper.insert_noise(
                    train_y, noise_level)

                for name, clf, clean_type in ConfigHelper.get_classifiers():

                    algorithm_data = ConfigHelper.choose_algorithm(
                        clf, clean_type, train_X, noisy_train_y, noisy_idxs,
                        max_nb_feats)

                    chosen_rate = algorithm_data[0]
                    chosen_threshold = algorithm_data[1]
                    chosen_X = algorithm_data[2]
                    chosen_y = algorithm_data[3]
                    chosen_clf = algorithm_data[4]
                    true_filtered = algorithm_data[5]
                    false_filtered = algorithm_data[6]

                    chosen_clf.fit(chosen_X, chosen_y)
                    predictions = chosen_clf.predict(test_X)
                    error = MetricsHelper.calculate_error_score(
                        test_y, predictions)

                    MetricsHelper.metrics.append([
                        set_name, e, noise_level, name, chosen_rate,
                        chosen_threshold, error, true_filtered, false_filtered
                    ])
            print(str(time.time() - start))

        IOHelper.store_results(MetricsHelper.convert_metrics_to_frame(),
                               "final_" + set_name)
class Evaluator:
    def __init__(self, **query_params):
        self._DEFAULT_LOCATIONS = ["rathmines-dublin"]
        self._DEFAULT_PRICE_RANGE = (0, 2000)

        self._locations = self._DEFAULT_LOCATIONS if "locations" not in query_params else query_params[
            "locations"]
        self._price_range = \
            self._DEFAULT_PRICE_RANGE if "price_range" not in query_params else query_params["price_range"]

        print(self._locations)
        print(self._price_range)

        self._daft_scraper = DaftScraper(self._locations, self._price_range)
        self._property_dao = PropertyDao()
        self._email_handler = EmailHandler()
        self._metrics_helper = MetricsHelper()

    def run(self):
        _current_daft_snapshot = self._daft_scraper.query_all_properties()
        _db_daft_snapshot = self._property_dao.return_all_properties()

        _db_daft_snapshot_ids = self._return_property_ids(_db_daft_snapshot)

        print(len(_current_daft_snapshot))
        self._metrics_helper.put_metric_data("NumberOfPropertiesReturned",
                                             len(_current_daft_snapshot),
                                             "Count")
        _new_properties = list(
            filter(lambda x: x.id not in _db_daft_snapshot_ids,
                   _current_daft_snapshot))
        print(len(_new_properties))
        print(_new_properties)

        if len(_new_properties) > 0:
            self._update_db_with_new_properties(_new_properties)
            self._call_email_handler(_new_properties)

    @staticmethod
    def _return_property_ids(properties):
        return set(map(lambda x: x.id, properties))

    def _update_db_with_new_properties(self, properties):
        for property_ in properties:
            self._property_dao.add_property(property_)

    def _call_email_handler(self, properties):
        self._email_handler.send_new_prop_emails(properties)
    def __init__(self, **query_params):
        self._DEFAULT_LOCATIONS = ["rathmines-dublin"]
        self._DEFAULT_PRICE_RANGE = (0, 2000)

        self._locations = self._DEFAULT_LOCATIONS if "locations" not in query_params else query_params[
            "locations"]
        self._price_range = \
            self._DEFAULT_PRICE_RANGE if "price_range" not in query_params else query_params["price_range"]

        print(self._locations)
        print(self._price_range)

        self._daft_scraper = DaftScraper(self._locations, self._price_range)
        self._property_dao = PropertyDao()
        self._email_handler = EmailHandler()
        self._metrics_helper = MetricsHelper()
	def _calculate_cv_error(base_clf, best_rate, X, y, is_y_noise, clean_type, 
							max_nb_feats, major_oob_label):

		errors = []

		skf = StratifiedKFold(n_splits=NoiseDetectionEnsemble.k_folds, 
								shuffle=True)

		for train_idxs, val_idxs in skf.split(X=range(len(y)), y=y):

			train_X = DataHelper.select_rows(X, train_idxs, copy=False)
			train_y = DataHelper.select_rows(y, train_idxs, copy=False)
			train_is_y_noise = DataHelper.select_rows(is_y_noise, train_idxs,
												copy=False)
	
			clean_train = NoiseDetectionEnsemble._clean_noisy_data(train_X,
													train_y, train_is_y_noise,
													clean_type, major_oob_label)

			train_X, train_y, adapted_rate = DataHelper.adapt_rate(clean_train[0], 
																clean_train[1], 
																best_rate)

			ensemble = RF(501, n_jobs=-1, max_features="sqrt")
			ensemble.fit(train_X, train_y)

			val_X = DataHelper.select_rows(X, val_idxs, copy=False)
			val_y = DataHelper.select_rows(y, val_idxs, copy=False)

			predictions = ensemble.predict(val_X)
			error = MetricsHelper.calculate_error_score(val_y, predictions)
			errors.append(error)

		return mean(errors)
def main():

    view_type = ConfigHelper.view_type
    filename = ConfigHelper.dataset_file

    X, y = IOHelper.read_view_with_classes(view_type, filename)

    [MetricsHelper.update_best_metrics(KCM_FGH.run(view_type, e, X), y) \
     for e in range(1, ConfigHelper.nb_executions+1)]
Esempio n. 6
0
	def choose_algorithm(clf, clean_type, train_X, noisy_train_y,
						noisy_idxs, max_nb_feats):
		chosen_rate = nan
		chosen_threshold = nan
		chosen_X = None
		chosen_y = None
		chosen_clf = None
		true_filtered = 0

		if clean_type == None:
			chosen_X = train_X
			chosen_y = noisy_train_y
			chosen_clf = clf

		elif clean_type == "maj":
			filt_X, filt_y = MajorityFiltering.run(train_X, 
												   noisy_train_y)
			chosen_X = filt_X
			chosen_y = filt_y
			chosen_clf = MajorityFiltering.get_ensemble()
			true_filtered = MetricsHelper.calculate_true_filter(chosen_y.index,
															noisy_idxs)
		else:
			algorithm_data = NoiseDetectionEnsemble.run(clf, clean_type,
										   		train_X,
								   		   		noisy_train_y, 
								   		   		max_nb_feats)
			chosen_rate = algorithm_data[0]
			chosen_threshold = algorithm_data[1]
			chosen_X = algorithm_data[2]
			chosen_y = algorithm_data[3]
			chosen_X, chosen_y, adapted_rate = DataHelper.adapt_rate(chosen_X,
														chosen_y, chosen_rate)

			chosen_clf = RF(n_estimators=501, max_features="sqrt", n_jobs=-1)

			true_filtered = MetricsHelper.calculate_true_filter(chosen_y.index,
															noisy_idxs)

		tot_filtered = len(train_X)-len(chosen_X.index.unique())
		false_filtered = tot_filtered-true_filtered

		return [chosen_rate, chosen_threshold, chosen_X, chosen_y, chosen_clf,
				true_filtered/len(train_X), false_filtered/len(train_X)]
	def _mark_as_noisy(oob_matrix, y, threshold):

		nb_instances = len(y)

		errors = [MetricsHelper.calculate_error_score([y.iloc[i]]*len(oob_matrix[i]), 
						oob_matrix[i]) for i in range(nb_instances)]

		is_noise_list = [error > threshold  for error in errors]
		is_noise = Series(is_noise_list, index=y.index, dtype=bool, name="is_noise")
		return is_noise
                                              train_y,
                                              ConfigHelper.max_nb_features,
                                              is_train=True)

    test_X = IOHelper.read_dataset("test")

    DataHelper.add_nan_indication_cols(test_X)
    DataHelper.remove_high_nan_rate_cols(test_X, True)
    DataHelper.remove_small_variance_cols(test_X, True)

    DataHelper.fill_missing_data(test_X, is_train=False)
    test_X = DataHelper.split_categorical_cols(test_X, is_train=False)
    DataHelper.scale_continuous_cols(test_X, is_train=False)
    DataHelper.select_best_features(test_X,
                                    train_X,
                                    None,
                                    ConfigHelper.max_nb_features,
                                    is_train=False)
    DataHelper.reset_scaler()

    for name, model in ConfigHelper.get_submission_models():

        print "Training"
        model.fit(train_X, train_y)

        print "Predicting"
        probs = model.predict_proba(test_X)
        submission = MetricsHelper.get_submission(test_X.index, probs)

        print "Saving submission"
        IOHelper.store_submission(submission, name)
Esempio n. 9
0
def main():

    readme_text = st.markdown(get_file_content_as_string("about.md"))

    # dataframe = pd.DataFrame(
    #     np.random.randn(2, 2),
    #     columns=('col %d' % i for i in range(2)))
    # st.write(dataframe)

    data = st.sidebar.file_uploader('Upload a file')
    app_model = st.sidebar.selectbox(
        "Choose model", ["Pix2Pix L1 norm", "Pix2Pix Perceptual Loss"])
    app_seg = st.sidebar.selectbox("Use pre-processing?", ["No", "Yes"])
    app_epoch = st.sidebar.selectbox("Select epoch",
                                     ['500', '10', '100', '200', '300', '400'])

    if data:
        readme_text.empty()

        #download image locally and save it in the appropriate format
        image = Image.open(data)
        A, B = save_image(image)
        has_gt = False
        if B is None:
            st.markdown('### Input image')
            st.image([A], caption=['Rendered image'])
        else:
            st.markdown('### Input and ground truth images')
            st.image([A, B], caption=['Rendered image', 'Ground truth image'])
            has_gt = True

        #get model
        if app_model == 'Pix2Pix L1 norm':
            model = 'pix2pix'
        elif app_model == 'Pix2Pix Perceptual Loss':
            model = 'pix2pixpl'
        else:
            st.sidebar.success('You must select one model.')

        #use segmentation?
        use_seg = False
        if app_seg == 'No':
            use_seg = False
        elif app_seg == 'Yes':
            use_seg = True

        #get epoch
        if app_epoch == '10':
            epoch = 10
        elif app_epoch == '100':
            epoch = 100
        elif app_epoch == '200':
            epoch = 200
        elif app_epoch == '300':
            epoch = 300
        elif app_epoch == '400':
            epoch = 400
        elif app_epoch == '500':
            epoch = 500

        #run prediction
        #new_image = ri.predict('./checkpoints','./db',model,epoch)
        pipeline = Pipeline('./checkpoints', './db', model, epoch)
        print('*** {}'.format(use_seg))

        new_image, over_image = pipeline.run_pipeline(use_seg)
        #display results
        if not use_seg:
            st.markdown('### Reconstruction result')
            st.image([new_image], caption=['Reconstructed image'])
        else:
            st.markdown('### Reconstruction and segmentation results')
            st.image([new_image, over_image],
                     caption=['Reconstructed image', 'Segmentation mask'])

        #compute metrics if ground truth is available
        if has_gt:
            st.markdown('### Quality metrics')
            metrics = np.zeros((3, 2))
            metobj = MetricsHelper(A, new_image, B)
            metrics[0, :] = metobj.compute_mse()
            metrics[1, :] = metobj.compute_ssim()
            metrics[2, :] = metobj.compute_mae()

            dataframe = pd.DataFrame(metrics,
                                     columns=('Original rendered',
                                              ' Reconstructed'),
                                     index=['MSE', 'SSIM', 'MAE'])
            st.table(dataframe)
if __name__ == "__main__":

    data = IOHelper.read_dataset("train")
    feats, labels = DataHelper.extract_feature_labels(data)

    predef = ConfigHelper.use_predefined_cols

    DataHelper.add_nan_indication_cols(feats)
    DataHelper.remove_high_nan_rate_cols(feats, predef)
    DataHelper.remove_small_variance_cols(feats, predef)

    for e in xrange(ConfigHelper.nb_executions):
        print "Execution: " + str(e + 1)

        MetricsHelper.reset_metrics()

        for f, (train_idxs,
                val_idxs) in enumerate(ConfigHelper.k_fold_cv(labels)):
            start_time = time.time()
            print "Fold: " + str(f + 1)

            DataHelper.reset_scaler()

            train_X = DataHelper.select_rows(feats, train_idxs, copy=True)
            train_y = DataHelper.select_rows(labels, train_idxs, copy=False)
            val_X = DataHelper.select_rows(feats, val_idxs, copy=True)
            val_y = DataHelper.select_rows(labels, val_idxs, copy=False)

            train_y = DataHelper.remove_high_nan_rate_rows(train_X, train_y)
Esempio n. 11
0
from metrics_helper import MetricsHelper
from evaluator import Evaluator
import datetime
import time

evaluator = Evaluator(locations=[
    "rathmines-dublin", "rathgar-dublin", "ranelagh-dublin",
    "ballsbridge-dublin", "donnybrook-dublin", "dublin-4-dublin",
    "dublin-6-dublin"
],
                      price_range=(1350, 1600))
metrics_helper = MetricsHelper()

print("starting at: {}".format(datetime.datetime.now()))
start_time = time.time()

try:
    evaluator.run()
except:
    metrics_helper.put_metric_data("sucessful run", 0, "Count")
else:
    metrics_helper.put_metric_data("sucessful run", 1, "Count")

end_time = time.time()
total_time = end_time - start_time
metrics_helper.put_metric_data("Total Run Time", total_time, "Seconds")