Ejemplo n.º 1
0
    def make_use_w2v_big(self):
        x_all = self.__vectors_provider.get_w2v_big_vectors()
        y_all = self.__data_source.get_y()

        # TODO here grid search

        estimators = []
        part1 = LogisticRegression(C=0.5, solver='liblinear')
        estimators.append(('logistic', part1))
        part2 = SVC(C=10, kernel='rbf')
        estimators.append(('svc', part2))
        part3 = KNeighborsClassifier(algorithm='auto',
                                     metric='minkowski',
                                     weights='distance')
        estimators.append(('knn', part3))

        model1 = VotingClassifier(estimators)

        cross_val_accuracy, cross_val_f1, train_accuracy, train_f1, test_accuracy, test_f1, y_true, y_pred \
            = Evaluator.evaluate(model1, x_all, y_all)

        self.__visualizer.show_results(self.__CLASSIFIER_NAME, "model1",
                                       "Word2VecBig", cross_val_accuracy,
                                       cross_val_f1, train_accuracy, train_f1,
                                       test_accuracy, test_f1, y_true, y_pred)
Ejemplo n.º 2
0
    def make_use_w2v_with_tfidf(self):
        x_all = self.__vectors_provider.get_w2v_tfidf_vectors()
        y_all = self.__data_source.get_y()

        # TODO here grid search

        model1 = SVC(C=1, kernel='linear', probability=True)

        cross_val_accuracy, cross_val_f1, train_accuracy, train_f1, test_accuracy, test_f1, y_true, y_pred \
            = Evaluator.evaluate(model1, x_all, y_all)

        self.__visualizer.show_results(
            self.__CLASSIFIER_NAME, "(C=1, kernel='linear', probability=True)",
            "Word2Vec&TF-IDF", cross_val_accuracy, cross_val_f1,
            train_accuracy, train_f1, test_accuracy, test_f1, y_true, y_pred)
Ejemplo n.º 3
0
    def make_use_w2v_with_tfidf(self):
        x_all = self.__vectors_provider.get_w2v_tfidf_vectors()
        y_all = self.__data_source.get_y()

        # TODO here grid search

        model1 = LogisticRegression(C=1.0, solver='sag', n_jobs=-1)

        cross_val_accuracy, cross_val_f1, train_accuracy, train_f1, test_accuracy, test_f1, y_true, y_pred \
            = Evaluator.evaluate(model1, x_all, y_all)

        self.__visualizer.show_results(self.__CLASSIFIER_NAME,
                                       "(C=1.0, solver='sag')",
                                       "Word2Vec&TF-IDF", cross_val_accuracy,
                                       cross_val_f1, train_accuracy, train_f1,
                                       test_accuracy, test_f1, y_true, y_pred)
Ejemplo n.º 4
0
    def make_use_w2v_with_tfidf(self):
        x_all = self.__vectors_provider.get_w2v_tfidf_vectors()
        y_all = self.__data_source.get_y()

        # TODO here grid search

        model1 = KNeighborsClassifier(algorithm='auto', metric='minkowski', weights='distance')

        cross_val_accuracy, cross_val_f1, train_accuracy, train_f1, test_accuracy, test_f1, y_true, y_pred \
            = Evaluator.evaluate(model1, x_all, y_all)

        self.__visualizer.show_results(self.__CLASSIFIER_NAME, "(algorithm='auto', metric='minkowski', weights='distance')",
                                       "Word2Vec&TF-IDF",
                                       cross_val_accuracy, cross_val_f1,
                                       train_accuracy, train_f1,
                                       test_accuracy, test_f1, y_true, y_pred)
Ejemplo n.º 5
0
    def make_use_w2v_old(self):
        """ Обучает старый датасет с использованием w2v обученного на новом """

        x_all = self.__vectors_provider.get_w2v_old_vectors()
        y_all = self.__data_source.get_y()

        # TODO here grid search

        model1 = SVC(C=1, kernel='linear', probability=True)

        cross_val_accuracy, cross_val_f1, train_accuracy, train_f1, test_accuracy, test_f1, y_true, y_pred \
            = Evaluator.evaluate(model1, x_all, y_all)

        self.__visualizer.show_results(
            self.__CLASSIFIER_NAME, "(C=1, kernel='linear', probability=True)",
            "Word2VecNewOld", cross_val_accuracy, cross_val_f1, train_accuracy,
            train_f1, test_accuracy, test_f1, y_true, y_pred)
Ejemplo n.º 6
0
    def make_use_w2v_old(self):
        """ Обучает старый датасет с использованием w2v обученного на новом """

        x_all = self.__vectors_provider.get_w2v_old_vectors()
        y_all = self.__data_source.get_y()

        # TODO here grid search

        model1 = LogisticRegression(C=1.0, solver='sag', n_jobs=-1)

        cross_val_accuracy, cross_val_f1, train_accuracy, train_f1, test_accuracy, test_f1, y_true, y_pred \
            = Evaluator.evaluate(model1, x_all, y_all)

        self.__visualizer.show_results(self.__CLASSIFIER_NAME,
                                       "(C=1.0, solver='sag')",
                                       "Word2VecNewOld", cross_val_accuracy,
                                       cross_val_f1, train_accuracy, train_f1,
                                       test_accuracy, test_f1, y_true, y_pred)
Ejemplo n.º 7
0
    def make_use_w2v_old(self):
        """ Обучает старый датасет с использованием w2v обученного на новом """

        x_all = self.__vectors_provider.get_w2v_old_vectors()
        y_all = self.__data_source.get_y()

        # TODO here grid search

        model1 = KNeighborsClassifier(algorithm='auto', metric='minkowski', weights='distance')

        cross_val_accuracy, cross_val_f1, train_accuracy, train_f1, test_accuracy, test_f1, y_true, y_pred \
            = Evaluator.evaluate(model1, x_all, y_all)

        self.__visualizer.show_results(self.__CLASSIFIER_NAME, "(algorithm='auto', metric='minkowski', weights='distance')",
                                       "Word2VecNewOld",
                                       cross_val_accuracy, cross_val_f1,
                                       train_accuracy, train_f1,
                                       test_accuracy, test_f1, y_true, y_pred)
Ejemplo n.º 8
0
    def make_use_w2v(self):
        x_all = self.__vectors_provider.get_w2v_vectors()
        y_all = self.__data_source.get_y()

        # TODO here grid search

        model1 = LogisticRegression(C=1.0, solver='sag', n_jobs=-1)
        Evaluator.cross_probabilities(model=model1,
                                      x_all=x_all,
                                      y_all=y_all,
                                      data_source=self.__data_source,
                                      method='w2v')

        cross_val_accuracy, cross_val_f1, train_accuracy, train_f1, test_accuracy, test_f1, y_true, y_pred \
            = Evaluator.evaluate(model1, x_all, y_all)

        self.__visualizer.show_results(self.__CLASSIFIER_NAME,
                                       "(C=1.0, solver='sag')", "Word2Vec",
                                       cross_val_accuracy, cross_val_f1,
                                       train_accuracy, train_f1, test_accuracy,
                                       test_f1, y_true, y_pred)