Beispiel #1
0
    def q3(self):
        print_question('3')

        # compaure variances
        svd = self.svd
        svd.fit(self.tfidf)
        variances = svd.explained_variance_ratio_.cumsum().tolist()
        plt.plot(range(1, 1001), variances, label="SVD")
        plt.xlabel('r')
        plt.ylabel('variance ratio')
        plt.title('variance ratio & r value relation')
        plt.show()

        # compare different r
        nmf_res = []
        svd_res = []
        all_r = [1, 2, 3, 5, 10, 20, 50, 100, 300]
        for r in all_r:
            km_svd = KMeans(n_clusters=2, max_iter=100, n_init=3)
            svd = TruncatedSVD(n_components=r, random_state=0)
            km_svd.fit(svd.fit_transform(self.tfidf))
            msg = 'svd with r=%s' % str(r)
            res_svd = self.show_result(km_svd.labels_, msg)

            km_nmf = KMeans(n_clusters=2, max_iter=100, n_init=3)
            nmf = NMF(n_components=r, init='random', random_state=0)
            km_nmf.fit(nmf.fit_transform(self.tfidf))
            msg = 'nmf with r=%s' % str(r)
            res_nmf = self.show_result(km_nmf.labels_, msg)

            nmf_res.append(res_nmf)
            svd_res.append(res_svd)

        plot_res(svd_res, all_r, 'SVD Result')
        plot_res(nmf_res, all_r, 'NMF Result')
 def e(self):
     print_question('e')
     hard_classifier = svm.LinearSVC(C=1000, random_state=42)
     soft_classifier = svm.LinearSVC(C=0.001, random_state=42)
     self.show_result(*self.svm_classify_SVD(hard_classifier, 'Hard Margin SVM'))
     self.show_result(*self.svm_classify_SVD(soft_classifier, 'Soft Margin SVM'))      
     self.show_result(*self.svm_classify_NMF(hard_classifier, 'Hard Margin SVM'))
     self.show_result(*self.svm_classify_NMF(soft_classifier, 'Soft Margin SVM'))
    def i(self):
        print_question('i')
        params = [0.001, 0.1, 1, 10, 1000]
        penalties = ['l1', 'l2']

        for p in penalties:
            for c in params:
                lg = LogisticRegression(C=c, penalty=p)
                msg = 'Logistic Regression Classifier with c=%s, penalty=%s' % (str(c), p)
                self.show_result(*self.prob_classify_NMF(lg, msg))
                self.show_result(*self.prob_classify_SVD(lg, msg))
    def a(self):
        # count the documents in each category
        print_question('a')
        count = defaultdict(int)
        for d in self.train_data.target:
            count[d] += 1

        colors = rand_color_arr(8)
        plt.barh(self.train_data.target_names, count.values(), alpha=0.8, color=colors)
        plt.xlabel('Class')
        plt.ylabel('Number')
        plt.title('the Number of Documents per Class')
        plt.show()
    def c(self):
        print_question('c')
        allDoc = []
        for cat in allCat:
            data = fetch_data([cat], 'train').data
            poke = ""
            for doc in data:
                poke = poke + " " + doc
            allDoc.append(poke)

        vectors_full = self.to_vec(allDoc)
        tficf_train = self.to_tfidf(vectors_full)
        tficf_train_copy = tficf_train.copy()
        features = self.vectorizer.get_feature_names()
        for i in range(4):
            words = []
            for j in range(10):
                doc = tficf_train_copy[i]
                max_index = np.argmax(doc)
                words.append(features[max_index])
                tficf_train_copy[i, max_index] = 0
            print allCat[i], words
    def f(self):
        print_question('f')
        best_score = 0
        best_gamma = 0
        for gamma in [0.001, 0.01, 0.1, 1, 10, 100, 1000]:
            classifier = svm.LinearSVC(C=gamma, random_state=42)
            classifier.fit(self.tfidf_SVD, self.train_labels)
            scores = (cross_val_score(classifier, self.tfidf_SVD, self.train_labels, cv=5))
            score = scores.mean()
            if score > best_score:
                best_score = score
                best_gamma = gamma

            print "Accuracy: %.5f | gamma: " % score, gamma

        print "Best Accuracy: %.5f | gamma: %d" % (best_score, best_gamma)

        classifier = svm.LinearSVC(C=best_gamma, random_state=42)
        self.show_result(*self.svm_classify_SVD(classifier, 'Best SVM'))

        best_score = 0
        best_gamma = 0
        for gamma in [0.001, 0.01, 0.1, 1, 10, 100, 1000]:
            classifier = svm.LinearSVC(C=gamma, random_state=42)
            classifier.fit(self.tfidf_NMF, self.train_labels)
            scores = (cross_val_score(classifier, self.tfidf_NMF, self.train_labels, cv=5))
            score = scores.mean()
            if score > best_score:
                best_score = score
                best_gamma = gamma

            print "Accuracy: %.5f | gamma: " % score, gamma

        print "Best Accuracy: %.5f | gamma: %d" % (best_score, best_gamma)

        classifier = svm.LinearSVC(C=best_gamma, random_state=42)
        self.show_result(*self.svm_classify_NMF(classifier, 'Best SVM'))
    def j(self):
        print_question('j')
        # build training data
        train = fetch_data(cat_4, 'train')
        vectors = self.to_vec(train.data)
        tfidf = self.to_tfidf(vectors)
        nmf = self.to_NMF(tfidf)

        # build testing data
        test = fetch_data(cat_4, 'test')
        vectors_test = self.vectorizer.transform(test.data)
        tfidf_test = self.tfidf_transformer.transform(vectors_test)
        nmf_test = self.nmf.transform(tfidf_test)

        # build classifiers
        svc = svm.LinearSVC(C=1, random_state=42)
        nb = MultinomialNB()
        ovo = OneVsOneClassifier(svc)
        ovr = OneVsRestClassifier(svc)

        # train and test
        self.multi_classify(nb, nmf, nmf_test, train.target, test.target, 'naive bayes')
        self.multi_classify(ovo, nmf, nmf_test, train.target, test.target, 'one vs one')      
        self.multi_classify(ovr, nmf, nmf_test, train.target, test.target, 'one vs rest')
Beispiel #8
0
#!/usr/bin/env python3

import DHUPythonDev
from utils import print_question

print_question("""
問1.
下記の要件を満たすクラスを実装せよ

1-2.
【定義したモジュールを利用する】
上記constに定義した数値tauをprintを用いて出力するプログラムを実装せよ。
""")

print(DHUPythonDev.const.τ)

print_question("""
1-5.
【定義したモジュールを利用する】
ここまでのテストとして、inputを用いて受けた数値を半径として、円の面積を出力するプログラムを実装せよ
""")

print(DHUPythonDev.math.Math.circleArea(10))
Beispiel #9
0
from sqlalchemy.orm import sessionmaker
from model import engine
from utils import print_questions, select_question, print_question

if __name__ == '__main__':
    Session = sessionmaker(bind=engine, autoflush=False, autocommit=False)
    session = Session()

    while True:
        question = select_question(session)
        if not question: continue
        print_question(question)
Beispiel #10
0
 def q2(self):
     print_question('2')
     km = KMeans(n_clusters=2, max_iter=200, n_init=5)
     km.fit(self.tfidf)
     self.show_result(km.labels_, 'quesiton 2')
Beispiel #11
0
 def q1(self):
     print_question('1')
     print "dimensions: ", self.tfidf.shape
Beispiel #12
0
if __name__ == '__main__':
    Session = sessionmaker(bind=engine)
    session = Session()

    subject = select_subject(session)
    selected_topic = select_topic(session, subject)

    questions = list()
    try:
        start_id = sys.argv[1]
        questions = session.query(Question).filter(Question.topic == selected_topic).filter(Question.id >= start_id)
    except:
        questions = session.query(Question).filter(Question.topic == selected_topic).all()

    for question in questions:
        last_id = print_question(question)
        if safe_prompt(session, "Edit this question? ") in ("Y",'y','Yes','yes'):
            while True:
                print_available_answers(session, last_id)
                command = re.match(match_command, safe_prompt(session, "Command: "))
                if not command: break
                if command.group('command') == 'u':
                    id_selected = int(command.group('ans_id'))
                    answer = session.query(Answer).filter(Answer.id == id_selected).first()
                    answer.question = None
                    session.add(answer)
                    session.commit()
                if command.group('command') == 'a':
                    ans_id = int(command.group('ans_id'))
                    q_id = int(command.group('q_id'))
                    answer = session.query(Answer).filter(Answer.id == ans_id).first()
 def h(self):
     print_question('h')
     lg = LogisticRegression()
     self.show_result(*self.prob_classify_NMF(lg, 'Logistic Regression Classifier'))
     self.show_result(*self.prob_classify_SVD(lg, 'Logistic Regression Classifier'))
 def g(self):
     print_question('g')
     nb = MultinomialNB()
     self.show_result(*self.prob_classify_NMF(nb, 'Naive Beyes Classifier'))
     self.show_result(*self.prob_classify_SVD(nb, 'Naive Beyes Classifier'))
 def d(self):
     print_question('d')
     print 'SVD shape: %s' % str(self.tfidf_SVD.shape)
     print 'NMF shape: %s' % str(self.tfidf_NMF.shape)
 def b(self):
     print_question('b')
     vectorizer_2 = CountVectorizer(analyzer='word', stop_words=stop_words, min_df=2, tokenizer=stemTokenizer)
     vectors_2 = vectorizer_2.fit_transform(self.train_data.data)
     print "terms num when mid_df = 2: %d" % vectors_2.shape[1]
     print "terms num when mid_df = 5: %d" % self.vectors.shape[1]