Ejemplo n.º 1
0
    def test_maxent_iteration(self):
        print("MaxEntClassifier iteration")
        print("---" * 45)
        print("Train num = %s" % self.train_num)
        print("Test num = %s" % self.test_num)
        print("maxiter = %s" % self.max_iter)

        from spa.classifiers import MaxEntClassifier

        m = MaxEntClassifier(self.max_iter)
        iter_results = m.test(self.train_data, self.train_labels,
                              self.best_words, self.test_data)

        filepath = "f_runout/MaxEnt-iteration-%s-train-%d-test-%d-f-%d-maxiter-%d-%s.xls" % \
                   (self.type,
                    self.train_num,
                    self.test_num,
                    self.feature_num,
                    self.max_iter,
                    datetime.datetime.now().strftime(
                        "%Y-%m-%d-%H-%M-%S"))

        results = []
        for i in range(len(iter_results)):
            try:
                results.append(
                    get_accuracy(self.test_labels, iter_results[i],
                                 self.parameters))
            except ZeroDivisionError:
                print("ZeroDivisionError")

        Write2File.write_contents(filepath, results)
Ejemplo n.º 2
0
    def test_maxent_iteration(self):
        print("MaxEntClassifier iteration")
        print("---" * 45)
        print("Train num = %s" % self.train_num)
        print("Test num = %s" % self.test_num)
        print("maxiter = %s" % self.max_iter)

        from spa.classifiers import MaxEntClassifier

        m = MaxEntClassifier(self.max_iter)
        iter_results = m.test(self.train_data, self.train_labels, self.best_words, self.test_data)

        filepath = "f_runout/MaxEnt-iteration-%s-train-%d-test-%d-f-%d-maxiter-%d-%s.xls" % \
                   (self.type,
                    self.train_num,
                    self.test_num,
                    self.feature_num,
                    self.max_iter,
                    datetime.datetime.now().strftime(
                        "%Y-%m-%d-%H-%M-%S"))

        results = []
        for i in range(len(iter_results)):
            try:
                results.append(get_accuracy(self.test_labels, iter_results[i], self.parameters))
            except ZeroDivisionError:
                print("ZeroDivisionError")

        Write2File.write_contents(filepath, results)
Ejemplo n.º 3
0
    def write(self, filepath, classify_labels, i=-1):
        results = get_accuracy(self.test_labels, classify_labels, self.parameters)
        if i >= 0:
            self.precisions[i][0] = results[10][1] / 100
            self.precisions[i][1] = results[7][1] / 100

        Write2File.write_contents(filepath, results)
Ejemplo n.º 4
0
    def write(self, filepath, classify_labels, i=-1):
        results = get_accuracy(self.test_labels, classify_labels, self.parameters)
        if i >= 0:
            self.precisions[i][0] = results[10][1] / 100
            self.precisions[i][1] = results[7][1] / 100

        Write2File.write_contents(filepath, results)
Ejemplo n.º 5
0
def test_dict():
    """
    test the classifier based on Sentiment Dict
    """
    print("DictClassifier")
    print("---" * 45)

    from spa.classifiers import DictClassifier

    ds = DictClassifier()

    # 对一个单句进行情感分析
    # a_sentence = "剁椒鸡蛋好咸,土豆丝很好吃"    # result值: 修改前(1)/修改后(1)
    # a_sentence = "要是米饭再多点儿就好了"    # result值: 修改前(1)/修改后(0)
    # a_sentence = "要是米饭再多点儿就更好了"    # result值: 修改前(0)/修改后(0)
    # a_sentence = "不太好吃,相当难吃,要是米饭再多点儿就好了"    # result值: 修改前(1)/修改后(0)
    # result = ds.analyse_sentence(a_sentence)
    # print(result)

    # 对一个文件内语料进行情感分析
    corpus_filepath = "D:/My Data/NLP/SA/waimai/positive_corpus_v1.txt"
    runout_filepath_ = "f_runout/f_dict-positive_test.txt"
    pos_results = ds.analysis_file(corpus_filepath,
                                   runout_filepath_,
                                   start=3000,
                                   end=4000 - 1)
    #
    # corpus_filepath = "D:/My Data/NLP/SA/waimai/negative_corpus_v1.txt"
    # runout_filepath_ = "f_runout/f_dict-negative_test.txt"
    # neg_results = ds.analysis_file(corpus_filepath, runout_filepath_, start=3000, end=4000-1)
    #
    origin_labels = [1] * 1000 + [0] * 1000
    classify_labels = pos_results + neg_results
    #
    # print(len(classify_labels))
    #
    filepath = "f_runout/Dict-waimai-%s.xls" % (
        datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S"))
    results = get_accuracy(origin_labels, classify_labels, [1000, 1000, 0])
    #
    Write2File.write_contents(filepath, results)