コード例 #1
0
    def experimentDefaultSetting(self, trainset, testset):
        print("Reading data")
        x, y = DataService().read_corpus(trainset)
        clf = SVM().construct_classifier("linear", 1.0)

        # Vectorize the text data and return an (n_samples, n_features) matrix.
        x_vec = DataService().vectorize_input(x)
        conversion_dict, y = DataService().labels_string_to_float(y)

        x_train, y_train, x_dev, y_dev, x_test, y_test = DataService(
        ).test_dev_train_split(x_vec, y)
        x_dev_train, y_dev_train, x_dev_test, y_dev_test = DataService(
        ).test_train_split(x_dev, y_dev)

        start_time = datetime.utcnow()
        print('Fitting training data on', len(x_train), 'Samples')
        clf.fit(x_train, y_train)

        training_time = (datetime.utcnow() - start_time).seconds
        print("Training took", training_time, 'seconds..')

        y_pred = clf.predict(x_dev_test)
        print("Accuracy score:",
              accuracy_score(y_pred=y_pred, y_true=y_dev_test))
        print("F1 score (macro):",
              f1_score(y_pred=y_pred, y_true=y_dev_test, average='macro'))
コード例 #2
0
 def experiment_probabilities_binary(self):
     x, y = DataService().read_corpus('trainset.txt', use_sentiment=True)
     x_train, y_train, x_test, y_test = DataService().test_train_split(x, y)
     classifier = ClassifierService().construct_classifier()
     classifier.fit(x_train, y_train)
     y_pred_prob = classifier.predict_proba(x_test)
     y_pred_class = classifier.predict(x_test)
     param = classifier.get_params()
コード例 #3
0
    def experiment_probabilities_multi(self):
        x, y = DataService().read_corpus('trainset.txt', use_sentiment=False)
        x_train, y_train, x_test, y_test = DataService().test_train_split(x, y)
        classifier = ClassifierService().construct_classifier()
        classifier.fit(x_train, y_train)
        params = classifier.get_params(deep=True)
        y_pred_prob = classifier.predict_proba(x_test)
        print("bug stop")

        print("\nPosterior probabilities multi-class:")
        print('\t', y_pred_prob)
コード例 #4
0
    def experiment_binary(self):
        """
        The code below runs the experiment for the binary classification problem
        """
        x, y = DataService().read_corpus('trainset.txt', use_sentiment=True)

        """
        OUR COMMENT:
        A splitpoint variable is used to divide the whole dataset into 75% training and 25% test sets.
        """
        x_train, y_train, x_test, y_test = DataService().test_train_split(x, y)

        classifier = ClassifierService().construct_classifier()

        """
        OUR COMMENT for classifier.fit:
        The classifier object is a Pipeline object from the scikit-learn package.
        From the documentation: https://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html,
        we observe that this object is used to apply a number of transforms using an estimator.

        From the object we see that the final_estimator is a MultinomialNB, which is a Naive Bayes Estimator for multinomial
        objects (the sentences have more than one words).

        The fit functions fits each pattern (combination of one sentence from the set Xtrain and label from the set Ytrain)
        using the MultinomialNB estimator.

        There is no output to the function. However, it modifies the object classifier such that its parameters are trained
        """
        classifier.fit(x_train, y_train)

        """
        OUR COMMENT for classifier.predict:
        After the Pipeline object has fitted on the training data, we call .predict to predict on the test set Xtest.
        The object will perform a forward pass of the test data without updating its network paramaters.

        The function takes as input a vector of input. In this case a list of sentences.

        The output of the network is a vector of sentiment labels. In this case, the size of the output is 1500
        """
        y_pred = classifier.predict(x_test)

        print("\nPrinting scores for binary problem")
        PrintScores().print_precision_score(y_test=y_test, y_pred=y_pred)
        PrintScores().print_recall_score(y_test=y_test, y_pred=y_pred)
        PrintScores().print_f1_score(y_test=y_test, y_pred=y_pred)

        print("\nPrinting accuracy score")
        PrintScores().print_accuracy_score(y_test=y_test, y_pred=y_pred)

        PrintScores().print_confusion_matrix(y_test=y_test, y_pred=y_pred)
コード例 #5
0
ファイル: DataAdd.py プロジェクト: langqy/OrcTestToolsKit
    def __init__(self):
        """
        :return:
        """
        ViewAdd.__init__(self, def_view_data)

        self.__service = DataService()
        self.__id = None

        self.sig_submit.connect(self.__save)
コード例 #6
0
    def experiment_multi_class(self):
        x, y = DataService().read_corpus('trainset.txt', use_sentiment=False)

        x_train, y_train, x_test, y_test = DataService().test_train_split(x, y)

        classifier = ClassifierService().construct_classifier()

        classifier.fit(x_train, y_train)

        y_pred = classifier.predict(x_test)

        print("\nPrinting scores for multi-class problem")
        PrintScores().print_precision_score(y_test=y_test, y_pred=y_pred)
        PrintScores().print_recall_score(y_test=y_test, y_pred=y_pred)
        PrintScores().print_f1_score(y_test=y_test, y_pred=y_pred)

        print("\nPrinting accuracy score")
        PrintScores().print_accuracy_score(y_test=y_test, y_pred=y_pred)

        PrintScores().print_confusion_matrix(y_test=y_test, y_pred=y_pred)
コード例 #7
0
    def experimentBestModel(self, trainset, testset):
        print("Reading data")
        x, y = DataService().read_corpus(trainset)
        clf = SVM().construct_best_classifier()

        # Vectorize the text data and return an (n_samples, n_features) matrix.
        x_vec = DataService().vectorize_input(x)
        conversion_dict, y = DataService().labels_string_to_float(y)

        x_train, y_train, x_dev, y_dev, x_test, y_test = DataService(
        ).test_dev_train_split(x_vec, y)

        dev_sets = DataService().cross_validation_split(x_train, y_train)

        best_accuracy = -inf
        best_classifier = None

        clf.fit(x_train, y_train)
        y_pred = clf.predict(x_test)

        print("F1 score (macro):",
              f1_score(y_pred=y_pred, y_true=y_test, average='macro'))
コード例 #8
0
def main(params):

    data_service = DataService()
    env_manager = EnvManager(data_service)

    if params[1] == "swap":
        env_manager.swap_db(params[2], params[3])

        print(params[2] + " swapped to: " + params[3])

    if params[1] == "which":
        db = env_manager.get_current(params[2])
        msg = db["name"] + " - " + db["conf_data"]["DB_HOST"]

        print("current db: " + msg)
コード例 #9
0
    def experimentCombinatorialCrossValidation(self, trainset, testset):
        print("Reading data")
        x, y = DataService().read_corpus(trainset)
        clf = SVM().construct_classifier("linear", 1.0)

        # Vectorize the text data and return an (n_samples, n_features) matrix.
        x_vec = DataService().vectorize_input(x)
        conversion_dict, y = DataService().labels_string_to_float(y)

        x_train, y_train, x_dev, y_dev, x_test, y_test = DataService(
        ).test_dev_train_split(x_vec, y)

        dev_sets = DataService().cross_validation_split(x_train, y_train)

        best_accuracy = -inf
        best_classifier = None

        cv_results = {}
        for gamma in arange(0.5, 1.4, 0.15):
            for C in arange(0.5, 2.1, 0.25):
                print("\nProcessing Gamma:", gamma, "C:", C)
                average_score = []
                for set in dev_sets:
                    clf = SVM().construct_rbf_classifier(kernel='rbf',
                                                         gamma=gamma,
                                                         C=C)
                    validation_set = set
                    union_set = DataService().construct_union_set(
                        set.copy(), dev_sets.copy())

                    # fit on the rest of the data
                    clf.fit(union_set[0], union_set[1])

                    # validate on validation set
                    y_pred = clf.predict(validation_set[0])

                    score = f1_score(y_true=validation_set[1],
                                     y_pred=y_pred,
                                     average='binary')
                    average_score.append(score)
                score = round(mean(average_score), 3)
                cv_results[[C, gamma]] = score
                print("Average F1 score for C:", str(C) + ".", score)

                # save the best model and use that to classify the testset
                if score > best_accuracy:
                    best_accuracy = score
                    best_classifier = clf

        y_pred = best_classifier.predict(x_test)
        print("F1 score (macro):",
              f1_score(y_pred=y_pred, y_true=y_test, average='macro'))
コード例 #10
0
    def experimentLinearKernel(self, trainset, testset):
        print("Reading data")
        x, y = DataService().read_corpus(trainset)

        # Vectorize the text data and return an (n_samples, n_features) matrix.
        x_vec = DataService().vectorize_input(x)
        conversion_dict, y = DataService().labels_string_to_float(y)

        x_train, y_train, x_dev, y_dev, x_test, y_test = DataService(
        ).test_dev_train_split(x_vec, y)

        dev_sets = DataService().cross_validation_split(x_train, y_train)

        best_accuracy = -inf
        best_classifier = None

        cv_results1 = {}
        cv_results2 = {}
        for C in arange(0.5, 2.25, 0.25):
            print("\nProcessing C:", C)
            average_score1 = []
            average_score2 = []
            for set in dev_sets:
                clf2 = SVM().construct_linear_classifier(penalty='l2', C=C)
                validation_set = set
                union_set = DataService().construct_union_set(
                    set.copy(), dev_sets.copy())

                # fit on the rest of the data
                clf2.fit(union_set[0], union_set[1])

                # validate on validation set
                y_pred = clf2.predict(validation_set[0])

                score = f1_score(y_true=validation_set[1],
                                 y_pred=y_pred,
                                 average='binary')
                average_score1.append(score)
            cv_results1[C] = mean(average_score1)

            score = round(mean(average_score2), 3)
            print("Average F1 score for CLF1:", round(mean(average_score1), 3))
            print("Average F1 score for CLF2:", round(mean(average_score2), 3))

            # save the best model and use that to classify the testset
            if score > best_accuracy:
                best_accuracy = score
                best_classifier = clf2

        y_pred = best_classifier.predict(x_test)
        print("F1 score (macro):",
              f1_score(y_pred=y_pred, y_true=y_test, average='macro'))
コード例 #11
0
ファイル: Window.py プロジェクト: Sheep83/ExcelApp
        search['values'] = self.populateMenu(self.deptList)
        # menuChoices = ['Dept 1', 'Dept 2']
        # tkvar.set(menuChoices[0])
        print(menuChoices[1])
        # popupMenu = OptionMenu(root, tkvar, *menuChoices)
        Label(root, text="Choose a Dept").place(x=50, y=250)
        # popupMenu.place(x=50, y=300)
        search.place(x=50, y=250)
        sumText = Label(self, text=("Dept Total" + (self.getDeptTotal)))
        sumText.place(x=200, y=550)

        def change_dropdown(*args):
            print(self.tkvar.get())

        # link function to change dropdown
        self.tkvar.trace('w', change_dropdown)
        print(filename)

    def getDeptTotal(self):
        ws = self.currentFile["Sheet1"]
        dept = self.tkvar.get()
        total = DataService.sumDept(dept, ws)
        return str(total)


root = Tk()
root.geometry("800x600")
DataService = DataService()
app = Window(root)
app.mainloop()
コード例 #12
0
    def experimentFeatures(self, trainset, testset):
        print("Reading data")
        x, y = DataService().read_corpus(trainset)
        clf = SVM().construct_classifier("linear", 1.0)

        # Vectorize the text data and return an (n_samples, n_features) matrix.
        x_vec = DataService().vectorize_input(x)
        conversion_dict, y = DataService().labels_string_to_float(y)

        x_train, y_train, x_dev, y_dev, x_test, y_test = DataService(
        ).test_dev_train_split(x_vec, y)
        x_dev_train, y_dev_train, x_dev_test, y_dev_test = DataService(
        ).test_train_split(x_dev, y_dev)

        start_time = datetime.utcnow()
        print('Fitting training data on', len(x_dev_train), 'Samples')
        clf.fit(x_train, y_train)
        non_zero = []
        training_time = (datetime.utcnow() - start_time).seconds
        print("Training took", training_time, 'seconds..')

        y_pred = clf.predict(x_dev_test)
        print("Accuracy score:",
              accuracy_score(y_pred=y_pred, y_true=y_dev_test))
        print("F1 score (macro):",
              f1_score(y_pred=y_pred, y_true=y_dev_test, average='macro'))

        coef = clf.coef_

        def identity(x):
            return x

        vec = TfidfVectorizer(preprocessor=identity, tokenizer=identity)
        vec.fit_transform(x)
        names = vec.get_feature_names()
        coefs_and_features = list(zip(coef[0], names))
        list_sorted_pos = sorted(coefs_and_features,
                                 key=lambda x: x[0],
                                 reverse=True)
        list_sorted_neg = sorted(coefs_and_features, key=lambda x: x[0])
        features = []
        for i in range(200):
            features.append(list_sorted_pos[i][1])
        for i in range(200):
            features.append(list_sorted_neg[i][1])
        print("\nneg", list_sorted_neg[:100], "\npos", list_sorted_pos[:100])

        new_data = DataService().get_features_from_data(x, features)

        clf2 = SVM().construct_classifier("linear", 1.0)
        # Vectorize the text data and return an (n_samples, n_features) matrix.
        x_vec = DataService().vectorize_input(new_data)
        conversion_dict, y = DataService().labels_string_to_float(y)
        x_train, y_train, x_dev, y_dev, x_test, y_test = DataService(
        ).test_dev_train_split(x_vec, y)
        x_dev_train, y_dev_train, x_dev_test, y_dev_test = DataService(
        ).test_train_split(x_dev, y_dev)
        start_time = datetime.utcnow()
        print("\nTRIMMED DATA SET\n----------")
        print('Fitting training data on', len(x_dev_train), 'Samples')
        clf2.fit(x_dev_train, y_dev_train)
        non_zero = []

        training_time = (datetime.utcnow() - start_time).seconds
        print("Training took", training_time, 'seconds..')

        y_pred = clf2.predict(x_dev_test)
        print("Accuracy score:",
              accuracy_score(y_pred=y_pred, y_true=y_dev_test))
        print("F1 score (macro):",
              f1_score(y_pred=y_pred, y_true=y_dev_test, average='macro'))
コード例 #13
0
 def __init__(self, window, dataservice):
     self.window = window
     self.dataservice = DataService()
コード例 #14
0
from DataService import DataService
from RecommenderEngine import RecommenderEngine
from Server import Server

data = DataService()
recommender = RecommenderEngine(data)
server = Server(data, recommender)
server.run()
コード例 #15
0
ファイル: DataDef.py プロジェクト: langqy/OrcTestToolsKit
    def __init__(self):

        ModelTable.__init__(self)

        service = DataService()
        self.usr_set_service(service)
コード例 #16
0
#!/usr/bin/python
import sys
from tkinter import *
from DataService import DataService
from EnvManager import EnvManager
from GuiService import GuiService

master = Tk()
data_service = DataService()
env_manager = EnvManager(data_service)
gui_service = GuiService(master, env_manager)

configs = data_service.get_db_opts()
values = [x["name"] for x in configs]

envs = data_service.get_envs()

gui_service.create_main_menu()

for i, item in enumerate(envs):
    item["current"] = env_manager.get_current(item["name"])["name"]
    gui_service.init_env_menu(item, values, i)

Button(master, text='Save',
       command=lambda: gui_service.save_changes(envs)).grid(row=len(envs),
                                                            column=1,
                                                            sticky=W,
                                                            pady=4)
Button(master, text='Reload',
       command=lambda: gui_service.reload_state(envs)).grid(row=len(envs),
                                                            column=2,