Esempio n. 1
0
def get_delete():
    username = request.forms.get('username')
    passwd = request.forms.get('passwd')
    if check_login(username , passwd):
        sql.main(9 , **{'name':username})
        return '<p><b>User has been deleted from database!</b></p>'
    else:
        return '<p><b>Password wrong or User wrong , delete user Failed!</p>'
Esempio n. 2
0
def blog_delete(md5url):
    '''
    删除博文,但是不删除对于博文的评价,评价是很重要的信息数据,不能随便的删除
    '''
    import json
    response.set_header('Access-Control-Allow-Origin','*')
    sql.main(10 , **{'md5url' : md5url})
    return json.dumps({'delete': True} , ensure_ascii = False , skipkeys = True)
Esempio n. 3
0
def blog_comment():
    '''
    这里的comment的数据制定了我们对博文的评价成程度
    1,2,3,4,5几个等级
    '''
    import json
    response.set_header('Access-Control-Allow-Origin','*')
    data = eval(request.body.readlines()[0].decode('utf8'))
    sql.main(7 , **{'md5url' : data['md5url'] , 'grade' : data['grade']})
    return json.dumps({'done': True} , ensure_ascii = False , skipkeys = True)
Esempio n. 4
0
def crawl_sample(filename, ans, flag):
    soup = BeautifulSoup(ans, 'lxml')
    # 提取正文,富文本化
    main = soup.article
    feature = {}
    if main == None:
        main = soup.find(id='article_content')
        if main == None:
            print('目标URL不是博文页面')
            return
        else:
            # 此时进入csdn的另外的主题里面
            feature['md5url'] = filename
            feature['size'] = len(main.get_text())
            feature['number_like'] = int(soup.find(id='btnDigg').dd.string)
            feature['number_reader'] = int(''.join(
                filter(str.isdigit,
                       soup.find(class_='link_view').string)))
            feature['number_code'] = len(main.find_all(name='pre'))
            feature['number_photo'] = len(main.find_all(name='img'))
            feature['number_link'] = len(main.find_all(name='a'))
    else:
        feature['md5url'] = filename
        feature['size'] = len(main.get_text())
        feature['number_like'] = int(
            soup.find(class_='left_fixed').find(class_='txt').string)
        feature['number_reader'] = int(
            soup.find(class_='btn-noborder').find(class_='txt').string)
        # feature['number_comment'] = int(soup.find(class_ = 'load_comment').span.string)
        # 之后的 grade and analyse_grade 都是用户的添加和我们的自然语言文本分析的结果
        feature['number_code'] = len(main.find_all(name='pre'))
        feature['number_photo'] = len(main.find_all(name='img'))
        # feaure 存储进入数据库
        print('样本入数据库结束')
        noise = main.find_all(class_='article_bar clearfix')[0]
        noise.decompose()  # 删除标签
        feature['number_link'] = len(main.find_all(name='a'))

    if flag != 0:
        sql.main(6, **feature)  # 只有不是测试网而言的网页才一会进入aifeature表中存储对应的网页样本信息
    else:
        return feature  # 信息返还给我们的后端的对应路由
    k = main.prettify().replace('"', '\'')
    # 为了高亮代码块,我们需要将csdn的css样式表插入进来,head头需要包含
    ans = "<html><head><meta charset='utf8'></head><body>" + k + '</body></html>'
    pages = {'md5url': filename, 'content': ans}
    sql.main(4, **pages)
    print('文本入数据库结束')
    feature['blog_name'] = soup.title.string
    return feature
Esempio n. 5
0
 def loaddataset(self, filename):
     # 加载数据集
     save = sql.main(5)
     self.datamat = []  # 数据集
     self.classlabels = []  # 标志集
     for line in save:
         self.datamat.append(
             [line[1], line[2], line[3], line[5], line[6], line[7], 1.0])
         self.classlabels.append(line[4])
     self.datamat = np.mat(self.datamat)
     self.nSampNum, self.nSampDim = np.shape(self.datamat)
     self.nSampDim -= 1  # 偏置值不进行计算
Esempio n. 6
0
def load_data_for_grade(inx = None):
    '''
    该函数从数据库中抽取所有的样本空间数据,将信息规整病返还给监督学习组件去学习
    '''
    save = []
    if inx == None:
        save = sql.main(5)    # 获取全部的样本信息
    else:
        inx.append(0)
        save = [inx]
    print('save' , save)
    # data
    data = []
    for i in save:
        # 数值型变量离散化
        p = []
        # size 离散化
        if 0 <= i[1] < 1000 : p.append(1)
        elif 1000 <= i[1] < 2000 : p.append(2)
        elif 2000 <= i[1] < 3000 : p.append(3)
        elif 3000 <= i[1] < 5000 : p.append(4)
        else : p.append(5)
        # number_reader
        if 0 <= i[2] < 200 : p.append(1)
        elif 200 <= i[2] < 400 : p.append(2)
        elif 400 <= i[2] < 600 : p.append(3)
        elif 600 <= i[2] < 800 : p.append(4)
        elif 800 <= i[2] < 1700 : p.append(5)
        else : p.append(6)
        # number_like
        if i[3] == 0 : p.append(1)
        elif 1 <= i[3] <= 3 : p.append(2)
        else : p.append(3)
        # number_code
        if i[5] == 0 : p.append(1)
        elif 1 <= i[5] <= 3 : p.append(2)
        else : p.append(3)
        # number_photo
        if i[6] == 0 : p.append(1)
        elif 1 <= i[6] <= 2 : p.append(2)
        elif 3 <= i[6] <= 4 : p.append(3)
        else : p.append(4)
        # number_link
        if i[7] == 0 : p.append(1)
        elif 1 <= i[7] <= 3 : p.append(2)
        else : p.append(3)
        # grade --++
        p.append(int(i[4]))
        data.append(p)
    label = ['content size' , 'number_reader' , 'number_like' , 'number_code' , 'number_photo' , 'number_link']
    return data , label
Esempio n. 7
0
def main(ans):
    k = 20  # 默认只有10个
    data = sql.main(5)
    test = []
    label = []
    for i in data:
        p = []
        p.append(i[1])
        p.extend(i[3:])
        test.append(p)
        label.append(i[2])
    ansp = [
        ans['size'], ans['number_like'], ans['grade'], ans['number_code'],
        ans['number_photo'], ans['number_link']
    ]
    return classify(np.array(test), ansp, label, k)
Esempio n. 8
0
def history():
    import json
    response.set_header('Access-Control-Allow-Origin','*')
    res = sql.main(5)
    new = []
    for i in res:
        p = {}
        p['md5url'] = i[0]
        p['size'] = i[1]
        p['number_reader'] = i[2]
        p['number_like'] = i[3]
        p['grade'] = i[4]
        p['number_code'] = i[5]
        p['number_photo'] = i[6]
        p['number_link'] = i[7]
        new.append(p)
    return json.dumps(new , ensure_ascii = False , skipkeys = True)
Esempio n. 9
0
def load_data_for_reader(inx = None):
    '''
    该函数一样使用决策树的代码,只不过改变我们的决策的目标,决策目标更换成对应的阅读量
    '''
    save = []
    if inx == None : save = sql.main(5)
    else:
        save = [inx]
        print(save)
    data = []
    for i in save:
        p = []
        # size 离散化
        if 0 <= i[1] < 1000 : p.append(1)
        elif 1000 <= i[1] < 5000 : p.append(2)
        else : p.append(3)
        # number_like
        if i[3] == 0 : p.append(1)
        elif 1 <= i[3] < 5 : p.append(2)
        else : p.append(3)
        # grade
        if 0 <= i[4] < 60 : p.append(1)
        elif 60 <= i[4] < 80 : p.append(2)
        else : p.append(3)
        # number_code
        if i[5] == 0 : p.append(1)
        elif 1 <= i[5] <= 3 : p.append(2)
        else : p.append(3)
        # number_photo
        if i[6] == 0 : p.append(1)
        elif 1 <= i[6] <= 3 : p.append(2)
        else : p.append(3)
        # number_link
        if i[7] == 0 : p.append(1)
        elif 1 <= i[7] <= 3 : p.append(2)
        else : p.append(3)
        # number_reader
        if 0 <= i[2] < 500 : p.append(1)
        elif 500 <= i[2] < 1000 : p.append(2)
        else : p.append(3)
        data.append(p)
    label = ['content size' , 'number_reader' , 'number_like' , 'number_code' , 'number_photo' , 'number_link']
    return data , label
Esempio n. 10
0
def check_login(username , passwd):
    print(username , passwd)
    save = sql.main(1 , **{'name' : username})
    if save[1] == passwd : return True
    else : return False
Esempio n. 11
0
def blog_open(md5url):
    '''
    该模块返回一个新的富文本博文给用户
    '''
    response.set_header('Access-Control-Allow-Origin','*')
    return sql.main(3 , **{'md5url' :md5url})[0][1].decode('utf8')
Esempio n. 12
0
def get_signup():
    username = request.forms.get('username')
    passwd = request.forms.get('passwd')
    sql.main(2 , **{'name' : username , 'passwd' : passwd})
    return '<p><b>Create login successfully!</b></p>'