Python FlightHoloClean.FlightHoloClean Exemples, ml.datasets.flights.FlightHoloClean.FlightHoloClean.FlightHoloClean Python Exemples

Exemple #1

0

Afficher le fichier

def getConfig(dataset):
    path = None
    future_steps = -1
    if type(dataset) == type(FlightHoloClean()):
        path = '/home/felix/phd/round_robin_part/flights'
        future_steps = 30

    if type(dataset) == type(FlightHoloClean()):
        path = '/home/felix/phd/round_robin_part/flights'
        future_steps = 30

    return path, future_steps

Exemple #2

0

Afficher le fichier

Fichier : potential_general_active_learning_sq_totalf.py Projet : yuancz/ExampleDrivenErrorDetection

def load_model(dataSet, classifier):
    dataset_log_files = {}
    dataset_log_files[HospitalHoloClean().name] = "hospital"
    dataset_log_files[BlackOakDataSetUppercase().name] = "blackoak"
    dataset_log_files[FlightHoloClean().name] = "flight"
    # not yet
    dataset_log_files[Salary().name] = "hospital"  # be careful
    dataset_log_files[Book().name] = "hospital"  # be careful

    potential_model_dir = Config.get("column.potential.models")

    tp_model = pickle.load(
        open(potential_model_dir + "/tp_model" +
             dataset_log_files[dataSet.name] + "_" + classifier.name + ".p"))
    fpfn_model = pickle.load(
        open(potential_model_dir + "/fpfn_model" +
             dataset_log_files[dataSet.name] + "_" + classifier.name + ".p"))

    delta_tp_model = pickle.load(
        open(potential_model_dir + "/delta_tp_model" +
             dataset_log_files[dataSet.name] + "_" + classifier.name + ".p"))
    delta_fpfn_model = pickle.load(
        open(potential_model_dir + "/delta_fpfn_model" +
             dataset_log_files[dataSet.name] + "_" + classifier.name + ".p"))

    return tp_model, fpfn_model, delta_tp_model, delta_fpfn_model

Exemple #3

0

Afficher le fichier

Fichier : explain.py Projet : yuancz/ExampleDrivenErrorDetection

def main():
    app = QApplication(sys.argv)

    #data = BlackOakDataSetUppercase()
    data = FlightHoloClean()

    ex = Example(data)
    sys.exit(app.exec_())

Exemple #4

0

Afficher le fichier

Fichier : potential_general_active_learning_sq_explain.py Projet : yuancz/ExampleDrivenErrorDetection

def load_model(dataSet, classifier):

    dataset_log_files = {}
    dataset_log_files[HospitalHoloClean().name] = "hospital"
    dataset_log_files[BlackOakDataSetUppercase().name] = "blackoak"
    dataset_log_files[FlightHoloClean().name] = "flight"

    potential_model_dir = Config.get("column.potential.models")

    return pickle.load(
        open(potential_model_dir + "/model" + dataset_log_files[dataSet.name] +
             "_" + classifier.name + ".p"))

Exemple #5

0

Afficher le fichier

def load_model(dataSet):
    dataset_log_files = {}
    dataset_log_files[HospitalHoloClean().name] = "hospital"
    dataset_log_files[BlackOakDataSetUppercase().name] = "blackoak"
    dataset_log_files[FlightHoloClean().name] = "flight"
    # not yet
    #dataset_log_files[Salary().name] = "hospital"  # be careful
    #dataset_log_files[Book().name] = "hospital"  # be careful

    potential_model_dir = '/home/felix/ExampleDrivenErrorDetection/potential models/unique_false_current_hist'

    return pickle.load(
        open(potential_model_dir + "/model" + dataset_log_files[dataSet.name] +
             "_" + "XGBoost" + ".p"))

Exemple #6

0

Afficher le fichier

Fichier : potential_general_active_learning_greaterthan_class.py Projet : yuancz/ExampleDrivenErrorDetection

def load_model(dataSet, classifier):

    dataset_log_files = {}
    dataset_log_files[HospitalHoloClean().name] = "hospital"
    dataset_log_files[BlackOakDataSetUppercase().name] = "blackoak"
    dataset_log_files[FlightHoloClean().name] = "flight"
    # not yet
    dataset_log_files[Salary().name] = "hospital"  # be careful
    dataset_log_files[Book().name] = "hospital"  # be careful

    #potential_model_dir = Config.get("column.potential.models")
    potential_model_dir = "/home/felix/ExampleDrivenErrorDetection/potential models/classification"

    return pickle.load(
        open(potential_model_dir + "/model" + dataset_log_files[dataSet.name] +
             "_" + classifier.name + ".p"))

Exemple #7

0

Afficher le fichier

Fichier : predictF.py Projet : yuancz/ExampleDrivenErrorDetection

def load_model(dataSet):
    dataset_log_files = {}
    dataset_log_files[HospitalHoloClean().name] = "hospital"
    dataset_log_files[BlackOakDataSetUppercase().name] = "blackoak"
    dataset_log_files[FlightHoloClean().name] = "flight"
    # not yet
    # dataset_log_files[Salary().name] = "hospital"  # be careful
    # dataset_log_files[Book().name] = "hospital"  # be careful

    #potential_model_dir = '/home/felix/ExampleDrivenErrorDetection/potential models/current_total_f'
    potential_model_dir = '/home/felix/ExampleDrivenErrorDetection/potential models/simulation100data'

    tp_model = pickle.load(
        open(potential_model_dir + "/tp_model_" + "XGBoost" + ".p"))
    fp_model = pickle.load(
        open(potential_model_dir + "/fp_model_" + "XGBoost" + ".p"))
    fn_model = pickle.load(open(potential_model_dir + "/fn_model_XGBoost.p"))

    return tp_model, fp_model, fn_model

Exemple #8

0

Afficher le fichier

classifier_log_paths = {}
classifier_log_paths[
    XGBoostClassifier.
    name] = "/home/felix/SequentialPatternErrorDetection/progress_log_data/log_newer/xgboost"
classifier_log_paths[
    LinearSVMClassifier.
    name] = "/home/felix/SequentialPatternErrorDetection/progress_log_data/log_newer/linearsvm"
classifier_log_paths[
    NaiveBayesClassifier.
    name] = "/home/felix/SequentialPatternErrorDetection/progress_log_data/log_newer/naivebayes"

dataset_log_files = {}
dataset_log_files[HospitalHoloClean().name] = "hospital"
dataset_log_files[BlackOakDataSetUppercase().name] = "blackoak"
dataset_log_files[FlightHoloClean().name] = "flight"
#dataset_log_files[Salary().name] = "salary1"

classifier_to_use = XGBoostClassifier
model_for_dataset = HospitalHoloClean()

datasets = [HospitalHoloClean(), BlackOakDataSetUppercase(), FlightHoloClean()]

for i in range(len(datasets)):
    if datasets[i].name == model_for_dataset.name:
        datasets.pop(i)
        break

print "datasets used for training:"
for i in range(len(datasets)):
    print datasets[i]

Exemple #9

0

Afficher le fichier

from sets import Set

from ml.datasets.flights.FlightHoloClean import FlightHoloClean
from ml.tools.katara_new.Katara import Katara

data = FlightHoloClean()

#data.dirty_pd.to_csv('/tmp/data.csv', index=False)

tool = Katara(
    "/home/felix/ExampleDrivenErrorDetection/data/katara/flights.txt", data)

print "Fscore: " + str(tool.calculate_total_fscore())
print "Precision: " + str(tool.calculate_total_precision())
print "Recall: " + str(tool.calculate_total_recall())

for c in range(data.shape[1]):
    print tool.calculate_fscore_by_column(c)

Exemple #10

0

Afficher le fichier

Fichier : CalculateTotalAllUncertainty.py Projet : yuancz/ExampleDrivenErrorDetection

'''

#log_folder = "unique_batch"
#log_folder = "bart/fd1/20percent"
#log_folder = "word_unigrams"
#log_folder = "unigrams"
#log_folder = "bigrams"
#log_folder = "metadata"
#log_folder = "unique_batch"
#log_folder = "unigram_metadata_naivebayes"
#log_folder = "unigram_metadata_linearsvm"
#log_folder = "food"
log_folder = "deep_all"

#dataset = FoodHoloClean()
dataset = FlightHoloClean(
)  #FlightHoloClean()#BlackOakDataSetUppercase()#HospitalHoloClean() #BlackOakDataSetUppercase()
#future_steps = 60 #BlackOak = 7, Flights = 9
'''
from ml.datasets.BartDataset.BartDataSet import BartDataset
dataset = BartDataset(BlackOakDataSetUppercase(), "CityFD_20percent")
'''
future_steps = 20  # 60

n = dataset.get_number_dirty_columns()

best_sum_total_f = {}
best_col_seq = {}

for d in range(10):
    file_path = "/home/felix/ExampleDrivenErrorDetection/progress_log_data/" + log_folder + "/log_progress_" + dataset.name + "_" + str(
        d) + ".csv"

Exemple #11

0

Afficher le fichier

cutting = True

use_potential = False

classifier_log_paths = {}
#classifier_log_paths[XGBoostClassifier.name] = "/home/felix/SequentialPatternErrorDetection/progress_log_data/log_newer/xgboost"
#classifier_log_paths[LinearSVMClassifier.name] = "/home/felix/SequentialPatternErrorDetection/progress_log_data/log_newer/linearsvm"
#classifier_log_paths[NaiveBayesClassifier.name] = "/home/felix/SequentialPatternErrorDetection/progress_log_data/log_newer/naivebayes"

#classifier_log_paths[XGBoostClassifier.name] = "/home/felix/ExampleDrivenErrorDetection/progress_log_data/unique"

dataset_log_files = {}
dataset_log_files[HospitalHoloClean().name] = "hospital"
dataset_log_files[BlackOakDataSetUppercase().name] = "blackoak"
dataset_log_files[FlightHoloClean().name] = "flight"
dataset_log_files[Book().name] = "book"
dataset_log_files[Salary().name] = "salaries"
dataset_log_files[Restaurant().name] = "restaurant"

classifier_to_use = XGBoostClassifier
model_for_dataset = HospitalHoloClean()

datasets = [
    HospitalHoloClean(),
    BlackOakDataSetUppercase(),
    FlightHoloClean(),
    Book(),
    Salary(),
    Restaurant()
]

Exemple #12

0

Afficher le fichier

Fichier : general_active_learning_soft_certainty.py Projet : yuancz/ExampleDrivenErrorDetection

            certainty_array[index_i] = np.square(1.0 - value)
            indexmap[index_i] = key
            index_i += 1

        prob = normalize(certainty_array)

        print certainty_array
        print prob

        return indexmap[np.random.choice(len(prob), 1, p=prob)[0]]


#input

from ml.datasets.flights.FlightHoloClean import FlightHoloClean
dataSet = FlightHoloClean()
from ml.datasets.hospital.HospitalHoloClean import HospitalHoloClean
#dataSet = HospitalHoloClean()
from ml.datasets.blackOak.BlackOakDataSetUppercase import BlackOakDataSetUppercase
#dataSet = BlackOakDataSetUppercase()

#print("read: %s seconds ---" % (time.time() - start_time))

start_time = time.time()

train_fraction = 1.0
ngrams = 1
runSVD = False
use_metadata = True
user_error_probability = 0.0
step_size = 10

Exemple #13

0

Afficher le fichier

feature_names = [
    i for j, i in enumerate(feature_names) if j in which_features_to_use
]

use_absolute_difference = True  # False == Squared / True == Absolute

enable_plotting = True

cutting = True

use_potential = False

classifier_log_paths = {}

dataset = FlightHoloClean()


def getConfig(dataset):
    path = None
    future_steps = -1
    if type(dataset) == type(FlightHoloClean()):
        path = '/home/felix/phd/round_robin_part/flights'
        future_steps = 4 * 2 + 20

    return path, future_steps


mypath, future_steps = getConfig(dataset)

n = dataset.get_number_dirty_columns()

Exemple #14

0

Afficher le fichier

import numpy as np
from ml.datasets.blackOak.BlackOakDataSetUppercase import BlackOakDataSetUppercase
from ml.datasets.flights.FlightHoloClean import FlightHoloClean
from ml.datasets.products.Products import Products
from ml.datasets.luna.book.Book import Book
from ml.datasets.electronics.Electronics import Electronics
from ml.datasets.salary_data.Salary import Salary
import pandas as pd
import csv
from ml.data_generator.generate_bart_config import generate_bart_config
from shutil import copyfile

datasets = [
    BlackOakDataSetUppercase().clean_pd.values,
    FlightHoloClean().clean_pd.values,
    Salary().clean_pd.values,
    Electronics().clean_pd.values,
    Book().clean_pd.values,
    Products().clean_pd.values
]

for n in range(1000):
    # select dataset
    dataset_id = np.random.randint(len(datasets))
    dataset = datasets[dataset_id]

    # select number of rows
    max_rows = 2000
    if datasets[dataset_id].shape[0] < max_rows:
        max_rows = datasets[dataset_id].shape[0]

Exemple #15

0

Afficher le fichier

from ml.tools.dboost.TestDBoost import run_params_mixture
from ml.tools.dboost.TestDBoost import run_params_hist
from ml.tools.dboost.TestDBoost import run_params_gaussian

import time
import numpy as np
import glob
from ml.configuration.Config import Config
import os

mypath = Config.get("logging.folder") + "/out/server_dboost"
mylist = [f for f in glob.glob(mypath + "/*.txt")]

datasets = [
    FlightHoloClean(),
    Beers(),
    BlackOakDataSetUppercase(),
    HospitalHoloClean(),
    Movies(),
    Restaurant(),
    Citation(),
    Salary()
]

N = 1

path_folder = Config.get("logging.folder") + "/out/dboost_runtime"

if not os.path.exists(path_folder):
    os.makedirs(path_folder)

Exemple #16

0

Afficher le fichier

Fichier : FPotentialPredictCut.py Projet : yuancz/ExampleDrivenErrorDetection

enable_plotting = True

classifier_log_paths = {}
#classifier_log_paths[XGBoostClassifier.name] = "/home/felix/SequentialPatternErrorDetection/progress_log_data/log_newer/xgboost"
#classifier_log_paths[LinearSVMClassifier.name] = "/home/felix/SequentialPatternErrorDetection/progress_log_data/log_newer/linearsvm"
#classifier_log_paths[NaiveBayesClassifier.name] = "/home/felix/SequentialPatternErrorDetection/progress_log_data/log_newer/naivebayes"

classifier_log_paths[XGBoostClassifier.name] = "/home/felix/SequentialPatternErrorDetection/progress_log_data/neweat_backup"#"/home/felix/ExampleDrivenErrorDetection/progress_log_data/new_mean_certainty_change_all"#hist_change"



dataset_log_files = {}
dataset_log_files[HospitalHoloClean().name] = "hospital"
dataset_log_files[BlackOakDataSetUppercase().name] = "blackoak"
dataset_log_files[FlightHoloClean().name] = "flight"
dataset_log_files[Book().name] = "book"
dataset_log_files[Salary().name] = "salaries"
dataset_log_files[Restaurant().name] = "restaurant"


classifier_to_use = XGBoostClassifier
model_for_dataset = HospitalHoloClean()

datasets = [HospitalHoloClean(), BlackOakDataSetUppercase(), FlightHoloClean(), Book(), Salary(), Restaurant()]

for i in range(len(datasets)):
    if datasets[i].name == model_for_dataset.name:
        datasets.pop(i)
        break