Example #1
0
def test_transformer_idf_setter():
    X = CountVectorizer().fit_transform(JUNK_FOOD_DOCS)
    orig = TfidfTransformer().fit(X)
    copy = TfidfTransformer()
    copy.idf_ = orig.idf_
    assert_array_equal(
        copy.transform(X).toarray(),
        orig.transform(X).toarray())
Example #2
0
def test_transformer_idf_setter():
    X = CountVectorizer().fit_transform(JUNK_FOOD_DOCS)
    orig = TfidfTransformer().fit(X)
    copy = TfidfTransformer()
    copy.idf_ = orig.idf_
    assert_array_equal(
        copy.transform(X).toarray(),
        orig.transform(X).toarray())
Example #3
0
def tfidf_test(data, tf_vocab, idf_diag):
    """ input: sentences """
    vectorizer = CountVectorizer(vocabulary=tf_vocab)
    tf = vectorizer.transform(data)  # 返回的是稀疏表示

    transformer = TfidfTransformer()
    transformer.idf_ = idf_diag
    tfidf = transformer.transform(tf)
    tfidf = tfidf.toarray()

    return tfidf
def model_forTypeFinal(tags_final):
    f_open = open('/home/stu/model/new_feature_names1.txt',
                  'r',
                  encoding='UTF-8')
    f_text = f_open.read()
    f_list = eval(f_text)  # 將字符串str當成有效的表達式來求值並返回計算結果
    file_set = set(f_list)
    type(f_list)

    # info_forModel = {}
    # info_forModel = info_jieba
    # info_forModel['tags_final'] = tags_final
    # info_forModel['weight'] = weight

    tags_final_forModel = tags_final.split("、")
    tags_setted = list(set(tags_final_forModel) & file_set)
    x_test = [' '.join(tags_setted)]

    f_open = open('/home/stu/model/new_vocabulary.txt', 'r', encoding='UTF-8')
    f_text = f_open.read()
    vocab = eval(f_text)

    f_open = open('/home/stu/model/new_idf_all.txt', 'r', encoding='UTF-8')
    f_text = f_open.read()
    idf_all = np.asarray(eval(f_text))

    count_v2 = CountVectorizer(vocabulary=vocab)
    counts_test = count_v2.transform(x_test)
    # print("the shape of test is " + repr(counts_test.shape))

    tfidftransformer = TfidfTransformer()
    tfidftransformer.idf_ = idf_all
    x_test = tfidftransformer.transform(counts_test)

    model_path = '/home/stu/model/new_clf.pickle'
    model = pickle.load(open(model_path, "rb"))

    y_pred = model.predict(x_test)
    preds = y_pred.tolist()
    id2c = id2c_mapping[preds[0]]

    return id2c