def get_delete(): username = request.forms.get('username') passwd = request.forms.get('passwd') if check_login(username , passwd): sql.main(9 , **{'name':username}) return '<p><b>User has been deleted from database!</b></p>' else: return '<p><b>Password wrong or User wrong , delete user Failed!</p>'
def blog_delete(md5url): ''' 删除博文,但是不删除对于博文的评价,评价是很重要的信息数据,不能随便的删除 ''' import json response.set_header('Access-Control-Allow-Origin','*') sql.main(10 , **{'md5url' : md5url}) return json.dumps({'delete': True} , ensure_ascii = False , skipkeys = True)
def blog_comment(): ''' 这里的comment的数据制定了我们对博文的评价成程度 1,2,3,4,5几个等级 ''' import json response.set_header('Access-Control-Allow-Origin','*') data = eval(request.body.readlines()[0].decode('utf8')) sql.main(7 , **{'md5url' : data['md5url'] , 'grade' : data['grade']}) return json.dumps({'done': True} , ensure_ascii = False , skipkeys = True)
def crawl_sample(filename, ans, flag): soup = BeautifulSoup(ans, 'lxml') # 提取正文,富文本化 main = soup.article feature = {} if main == None: main = soup.find(id='article_content') if main == None: print('目标URL不是博文页面') return else: # 此时进入csdn的另外的主题里面 feature['md5url'] = filename feature['size'] = len(main.get_text()) feature['number_like'] = int(soup.find(id='btnDigg').dd.string) feature['number_reader'] = int(''.join( filter(str.isdigit, soup.find(class_='link_view').string))) feature['number_code'] = len(main.find_all(name='pre')) feature['number_photo'] = len(main.find_all(name='img')) feature['number_link'] = len(main.find_all(name='a')) else: feature['md5url'] = filename feature['size'] = len(main.get_text()) feature['number_like'] = int( soup.find(class_='left_fixed').find(class_='txt').string) feature['number_reader'] = int( soup.find(class_='btn-noborder').find(class_='txt').string) # feature['number_comment'] = int(soup.find(class_ = 'load_comment').span.string) # 之后的 grade and analyse_grade 都是用户的添加和我们的自然语言文本分析的结果 feature['number_code'] = len(main.find_all(name='pre')) feature['number_photo'] = len(main.find_all(name='img')) # feaure 存储进入数据库 print('样本入数据库结束') noise = main.find_all(class_='article_bar clearfix')[0] noise.decompose() # 删除标签 feature['number_link'] = len(main.find_all(name='a')) if flag != 0: sql.main(6, **feature) # 只有不是测试网而言的网页才一会进入aifeature表中存储对应的网页样本信息 else: return feature # 信息返还给我们的后端的对应路由 k = main.prettify().replace('"', '\'') # 为了高亮代码块,我们需要将csdn的css样式表插入进来,head头需要包含 ans = "<html><head><meta charset='utf8'></head><body>" + k + '</body></html>' pages = {'md5url': filename, 'content': ans} sql.main(4, **pages) print('文本入数据库结束') feature['blog_name'] = soup.title.string return feature
def loaddataset(self, filename): # 加载数据集 save = sql.main(5) self.datamat = [] # 数据集 self.classlabels = [] # 标志集 for line in save: self.datamat.append( [line[1], line[2], line[3], line[5], line[6], line[7], 1.0]) self.classlabels.append(line[4]) self.datamat = np.mat(self.datamat) self.nSampNum, self.nSampDim = np.shape(self.datamat) self.nSampDim -= 1 # 偏置值不进行计算
def load_data_for_grade(inx = None): ''' 该函数从数据库中抽取所有的样本空间数据,将信息规整病返还给监督学习组件去学习 ''' save = [] if inx == None: save = sql.main(5) # 获取全部的样本信息 else: inx.append(0) save = [inx] print('save' , save) # data data = [] for i in save: # 数值型变量离散化 p = [] # size 离散化 if 0 <= i[1] < 1000 : p.append(1) elif 1000 <= i[1] < 2000 : p.append(2) elif 2000 <= i[1] < 3000 : p.append(3) elif 3000 <= i[1] < 5000 : p.append(4) else : p.append(5) # number_reader if 0 <= i[2] < 200 : p.append(1) elif 200 <= i[2] < 400 : p.append(2) elif 400 <= i[2] < 600 : p.append(3) elif 600 <= i[2] < 800 : p.append(4) elif 800 <= i[2] < 1700 : p.append(5) else : p.append(6) # number_like if i[3] == 0 : p.append(1) elif 1 <= i[3] <= 3 : p.append(2) else : p.append(3) # number_code if i[5] == 0 : p.append(1) elif 1 <= i[5] <= 3 : p.append(2) else : p.append(3) # number_photo if i[6] == 0 : p.append(1) elif 1 <= i[6] <= 2 : p.append(2) elif 3 <= i[6] <= 4 : p.append(3) else : p.append(4) # number_link if i[7] == 0 : p.append(1) elif 1 <= i[7] <= 3 : p.append(2) else : p.append(3) # grade --++ p.append(int(i[4])) data.append(p) label = ['content size' , 'number_reader' , 'number_like' , 'number_code' , 'number_photo' , 'number_link'] return data , label
def main(ans): k = 20 # 默认只有10个 data = sql.main(5) test = [] label = [] for i in data: p = [] p.append(i[1]) p.extend(i[3:]) test.append(p) label.append(i[2]) ansp = [ ans['size'], ans['number_like'], ans['grade'], ans['number_code'], ans['number_photo'], ans['number_link'] ] return classify(np.array(test), ansp, label, k)
def history(): import json response.set_header('Access-Control-Allow-Origin','*') res = sql.main(5) new = [] for i in res: p = {} p['md5url'] = i[0] p['size'] = i[1] p['number_reader'] = i[2] p['number_like'] = i[3] p['grade'] = i[4] p['number_code'] = i[5] p['number_photo'] = i[6] p['number_link'] = i[7] new.append(p) return json.dumps(new , ensure_ascii = False , skipkeys = True)
def load_data_for_reader(inx = None): ''' 该函数一样使用决策树的代码,只不过改变我们的决策的目标,决策目标更换成对应的阅读量 ''' save = [] if inx == None : save = sql.main(5) else: save = [inx] print(save) data = [] for i in save: p = [] # size 离散化 if 0 <= i[1] < 1000 : p.append(1) elif 1000 <= i[1] < 5000 : p.append(2) else : p.append(3) # number_like if i[3] == 0 : p.append(1) elif 1 <= i[3] < 5 : p.append(2) else : p.append(3) # grade if 0 <= i[4] < 60 : p.append(1) elif 60 <= i[4] < 80 : p.append(2) else : p.append(3) # number_code if i[5] == 0 : p.append(1) elif 1 <= i[5] <= 3 : p.append(2) else : p.append(3) # number_photo if i[6] == 0 : p.append(1) elif 1 <= i[6] <= 3 : p.append(2) else : p.append(3) # number_link if i[7] == 0 : p.append(1) elif 1 <= i[7] <= 3 : p.append(2) else : p.append(3) # number_reader if 0 <= i[2] < 500 : p.append(1) elif 500 <= i[2] < 1000 : p.append(2) else : p.append(3) data.append(p) label = ['content size' , 'number_reader' , 'number_like' , 'number_code' , 'number_photo' , 'number_link'] return data , label
def check_login(username , passwd): print(username , passwd) save = sql.main(1 , **{'name' : username}) if save[1] == passwd : return True else : return False
def blog_open(md5url): ''' 该模块返回一个新的富文本博文给用户 ''' response.set_header('Access-Control-Allow-Origin','*') return sql.main(3 , **{'md5url' :md5url})[0][1].decode('utf8')
def get_signup(): username = request.forms.get('username') passwd = request.forms.get('passwd') sql.main(2 , **{'name' : username , 'passwd' : passwd}) return '<p><b>Create login successfully!</b></p>'