예제 #1
0
def experiment_invoke(dataset='bank'):
    if dataset == 'wine':
        reader = WineDataReader()
    else:
        reader = BankDataReader()

    ds = DataSplitter(reader)
    ds.read_split_data()
    run_final = False
    er = KMeansExp(reader, ds, run_final)
    er.experiment()

    er = EMExp(reader, ds, run_final)
    er.experiment()
    er = ANNExp(reader, ds)
    er.experiment_bank()
    er.experiment_clusters()

    run_final = True
    er = KMeansExp(reader, ds, run_final)
    er.experiment()

    er = EMExp(reader, ds, run_final)
    er.experiment()
    er = ANNExp(reader, ds)
    er.experiment_bank()
    er.experiment_clusters()
 def __init__(self, convertor, splitter_method_index, splitter_method_parameter):
     DataSplitter.__init__(self, convertor, splitter_method_index, splitter_method_parameter)
     self.splitter_given_n = splitter_method_parameter
     self.start_time_id = 0
     self.methods = {
         0: self.get_given_n_by_user,
         1: self.get_given_n_by_item,
         2: self.get_given_n_by_user_date,
         3: self.get_given_n_by_item_date,
         4: self.get_given_n_by_date,
     }
예제 #3
0
 def __init__(self, convertor, splitter_method_index,
              splitter_method_parameter):
     DataSplitter.__init__(self, convertor, splitter_method_index,
                           splitter_method_parameter)
     self.splitter_given_n = splitter_method_parameter
     self.start_time_id = 0
     self.methods = {
         0: self.get_given_n_by_user,
         1: self.get_given_n_by_item,
         2: self.get_given_n_by_user_date,
         3: self.get_given_n_by_item_date,
         4: self.get_given_n_by_date,
     }
예제 #4
0
파일: kFolderTester.py 프로젝트: rafg92/ALW
    def splitClassificationTest(self):

        train, test = DataSplitter().splitDataEqually(self.data, self.labelCol)
        i = 0
        for clf in self.classifiers:
            self.testClassifier(clf, train, test, i)
            i += 1
예제 #5
0
파일: kFolderTester.py 프로젝트: rafg92/ALW
    def splitMultiClassificationTest(self):

        train, test = DataSplitter().splitDataEqually(self.data, self.labelCol)
        i = 0
        print("DATA IN TESTER", self.data)
        for clf in self.classifiers:
            self.testMultiClassifier(clf, train, test, i)
            i += 1
예제 #6
0
파일: kFolderTester.py 프로젝트: rafg92/ALW
    def createFolders(self):
        #it prepares the folders for k-fold cross validation

        folders = [[]] * self.k
        dataTmp = self.data.copy()
        for u in range(0, self.k):

            #it splits data equally according to each label
            folder, dataTmp = DataSplitter().splitDataEqually(
                dataTmp, self.labelCol, 1.0 / (self.k - u))
            folders[u] = folder

        self.folders = folders
예제 #7
0
    data = data.iloc[:, 1:]

    # One-hot encode the data using pandas get_dummies
    data = pd.get_dummies(data)

    # data transformation: real values into labels to classify
    data = data.apply(transRow, axis=1)

    notWeek = eliminateWeekSections(data.columns)
    print(notWeek)
    data = data[data.columns[notWeek]]
    print(data.columns)

    labelName = "shares"

    train, test = DataSplitter().splitDataEqually(data, labelName)
    Y_train = pd.factorize(train[labelName])[0]
    X_train_origin = train.iloc[:, 0:train.columns.size - 1].copy()
    Y_test = pd.factorize(test[labelName])[0]
    X_test_origin = test.iloc[:, 0:test.columns.size - 1].copy()

    scaler = preprocessing.MinMaxScaler(feature_range=(-1, 1))

    scaler.fit(X_train_origin)
    #scaling of training data
    X_train_origin = pd.DataFrame(scaler.transform(X_train_origin.copy()), columns=X_train_origin.columns)
    # apply same transformation to test data
    X_test_origin = pd.DataFrame(scaler.transform(X_test_origin.copy()), columns=X_test_origin.columns)

    trainTmp = X_train_origin.copy()
    trainTmp[labelName] = Y_train
 def __init__(self, convertor, splitter_method_index,
              splitter_method_parameter):
     DataSplitter.__init__(self, convertor, splitter_method_index,
                           splitter_method_parameter)
예제 #9
0
파일: Regression.py 프로젝트: rafg92/ALW
if(__name__ == "__main__"):
    np.random.seed(12345)

    # Read in data and display first 5 rows
    data = pd.read_csv('regression.csv', sep=",")

    print('The shape of our data is:', data.shape)

    #one hot encoding: transorming nominal values
    labelName = "G3"
    labels = data[labelName]
    data = pd.get_dummies(data)
    data[labelName] = labels

    train, test = DataSplitter().splitData(data.copy())

    print(train.copy())

    #preparing test and training for final evaluation: using copies not to create problems

    scaler = preprocessing.MinMaxScaler(feature_range=(-1, 1))

    #don't cheat: fit only on training data
    scaler.fit(train)

    trainTmp = pd.DataFrame(scaler.transform(train.copy()), columns=train.columns)
    # apply same transformation to test data
    testTmp = pd.DataFrame(scaler.transform(test.copy()), columns=test.columns)

    fsSize = train.columns.size
예제 #10
0
    data = data.iloc[:, 1:]

    # One-hot encode the data using pandas get_dummies
    data = pd.get_dummies(data)

    # data transformation: real values into labels to classify
    data = data.apply(transRow, axis=1)

    notWeek = eliminateWeekSections(data.columns)
    print(notWeek)
    data = data[data.columns[notWeek]]
    print(data.columns)

    labelName = "shares"

    train, test = DataSplitter().splitDataEqually(data, labelName)
    Y_train = pd.factorize(train[labelName])[0]
    X_train_origin = train.iloc[:, 0:train.columns.size - 1].copy()
    Y_test = pd.factorize(test[labelName])[0]
    X_test_origin = test.iloc[:, 0:test.columns.size - 1].copy()

    scaler = preprocessing.MinMaxScaler(feature_range=(-1, 1))

    scaler.fit(X_train_origin)
    #scaling of training data
    X_train_origin = pd.DataFrame(scaler.transform(X_train_origin.copy()),
                                  columns=X_train_origin.columns)
    # apply same transformation to test data
    X_test_origin = pd.DataFrame(scaler.transform(X_test_origin.copy()),
                                 columns=X_test_origin.columns)
 def __init__(self, convertor, splitter_method_index, splitter_method_parameter):
     DataSplitter.__init__(self, convertor, splitter_method_index, splitter_method_parameter)
예제 #12
0
    data = data.iloc[:, 1:]

    # print(data)

    # One-hot encode the data using pandas get_dummies
    data = pd.get_dummies(data)

    notWeek = eliminateWeekSections(data.columns)
    print(notWeek)
    data = data[data.columns[notWeek]]
    print(data.columns)

    labelName = "shares"

    train, test = DataSplitter().splitData(data.copy())

    print("Splitted")

    print("Fitting")

    scaler = preprocessing.MinMaxScaler(feature_range=(-1, 1))

    scaler.fit(train)

    print("Fitted")

    trainTmp = pd.DataFrame(scaler.transform(train.copy()),
                            columns=train.columns)
    # apply same transformation to test data
    testTmp = pd.DataFrame(scaler.transform(test.copy()), columns=test.columns)
예제 #13
0
 def split_data(self, save_path, experiment_id):
     if not self.load_train_test_data(save_path, experiment_id):
         self.methods[self.splitter_method_index](self.splitter_given_n)
         DataSplitter.split_data(self, save_path, experiment_id)
 def split_data(self, save_path, experiment_id):
     if not self.load_train_test_data(save_path, experiment_id):
         self.methods[self.splitter_method_index](self.splitter_given_n)
         DataSplitter.split_data(self, save_path, experiment_id)