Esempio n. 1
0
import MongodbConn
import cutWordsproject
import random
from sklearn import linear_model
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import roc_auc_score
from sklearn.externals import joblib
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score
import sys
reload(sys)
sys.setdefaultencoding('utf-8')

load = cutWordsproject.DataParser()

# conn = MongodbConn.MongoPipeline()
# conn.open_connection("test")
data_nagitive_111 = load.loadData('new_shiyan', 'nagitive_111')
data_positive_111 = load.loadData('new_shiyan', 'positive_111')
data_positive_kad = load.loadData('new_shiyan', 'positive_kad')
data_nagitive_kad = load.loadData('new_shiyan', 'nagitive_kad')

positive_111 = list()
nagitive_111 = list()
positive_kad = list()
nagitive_kad = list()

train = list()
train_classify = list()
Esempio n. 2
0
import random
import MongodbConnLocal
import cutWordsproject
import MongodbConn
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import TruncatedSVD
from scipy.io import mmwrite, mmread
from sklearn import feature_extraction
#<---------------------讲真,python要是再不对中文友好点我就把这几行代码写成宏-------------------------------->
import sys

reload(sys)
sys.setdefaultencoding('utf-8')
#<---------------------讲真,python要是再不对中文友好点我就把这几行代码写成宏-------------------------------->

load = cutWordsproject.DataParser()  #实例化加载数据类
webParser = cutWordsproject.ContextExtraction()  #实例化正文提取类
wordsParser = cutWordsproject.CutWords()  #实例化分词类
words2DataParser = cutWordsproject.Words2Data()  #实例化量化类


def method1():
    conn = MongodbConn.MongoPipeline()
    conn.open_connection("new_shiyan")
    # wordsParser.loadUsrWordsTable('wordsTable.txt')
    datas = load.loadData('new_shiyan', 'kangduoai_n_1')
    dataSet = list()
    # f = open('corups.txt', 'a')
    num = 0
    for data in datas:
        if num % 100 == 0: