Python dict2matrix 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: tools.util

메소드/함수: dict2matrix

hotexamples.com에서의 예제들: 2

Python dict2matrix - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 tools.util.dict2matrix에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

def classificationTest(
    train_set,
    train_label,
    test_set,
    test_label,
    lowFreqK=2,
    classifier=MultinomialNB()):  # RandomForestClassifier(n_estimators=100)
    print 'classification test processing ...'
    all = train_set[:].tolist()
    all.extend(test_set)
    # 去停用词
    stoplist = set('for a of the and to in'.split())
    allTexts = [[
        word
        for word in text.lower().replace(',', '').replace('.', '').split()
        if word not in stoplist
    ] for text in all]
    # 去低频词
    frequency = defaultdict(int)
    for text in allTexts:
        for token in text:
            frequency[token] += 1
    allTexts = [[token for token in text if frequency[token] > lowFreqK]
                for text in allTexts]

    # 构建字典
    dictionary = corpora.Dictionary(allTexts[0:len(train_set)])

    # 怎么把dict转化为列表形式的向量 http://www.mamicode.com/info-detail-1518042.html
    num_terms = len(dictionary.keys())
    all_features = dict2matrix([dictionary.doc2bow(text) for text in allTexts],
                               num_terms).toarray()
    train_data_features = all_features[0:len(train_set)]
    test_data_features = all_features[len(train_set):]

    classifier = classifier.fit(train_data_features, train_label)
    result = classifier.predict(test_data_features)

    printlabels = [1, 0]  # 这个要对应实际的类别类型
    res = [
        accuracy_score(test_label, result),
        precision_score(test_label, result, pos_label=1),
        precision_score(test_label, result, pos_label=0),
        recall_score(test_label, result, pos_label=1),
        recall_score(test_label, result, pos_label=0),
        f1_score(test_label, result, pos_label=1),
        f1_score(test_label, result, pos_label=0)
    ]
    #print result.astype(np.int).tolist()
    #print test_label.astype(np.int).tolist()
    #print confusion_matrix(test_label, result,labels=printlabels)
    return res

예제 #2

파일 보기

def classify_test_21(train_set,
                     train_label,
                     test_set,
                     test_label,
                     reverseVetorize=False):
    print 'final_sa_method:classify_test'
    all = train_set[:].tolist()
    all.extend(test_set)
    # 去停用词
    stoplist = set('for a of the and to in'.split())
    allTexts = [[
        word
        for word in text.lower().replace(',', '').replace('.', '').split()
        if word not in stoplist
    ] for text in all]
    # 去低频词
    k = 2
    frequency = defaultdict(int)
    for text in allTexts:
        for token in text:
            frequency[token] += 1
    allTexts = [[token for token in text if frequency[token] > k]
                for text in allTexts]

    # 构建字典
    dictionary = corpora.Dictionary(allTexts[0:len(train_set)])

    # 怎么把dict转化为列表形式的向量 http://www.mamicode.com/info-detail-1518042.html
    num_terms = len(dictionary.keys())
    all_features = dict2matrix([dictionary.doc2bow(text) for text in allTexts],
                               num_terms).toarray()
    train_data_features = all_features[0:len(train_set)]
    test_data_features = all_features[len(train_set):]

    #print train_data_features.toarray()

    classier = MultinomialNB()  # RandomForestClassifier(n_estimators=100)
    classier = classier.fit(train_data_features, train_label)

    print "Predicting test labels..."
    result = classier.predict(test_data_features)
    print 'result: ', result

    from sklearn.metrics import accuracy_score, confusion_matrix
    print accuracy_score(test_label, result)
    printlabels = [1, 0]  # 这个要对应实际的类别类型
    print printlabels
    print confusion_matrix(test_label, result, labels=printlabels)