def create(): # Create a new instance of our CarForm class using the request.form # object which will get the request method (GET or POST) as the first # parameter form = CarForm(request.form, csrf_enabled=False) # To check if our require fields have been filled out and then create a new # instance of our Car class and its contructor method will take the form # object if form.validate_on_submit(): car = Car(form) # Since, our object has been created; use an INSERT INTO statement to # add the object's properties to our database query = "INSERT INTO `cars` (`name`, `image`, `topspeed`) VALUES (%s, %s, %s)" # query = "INSERT INTO `cars` (`name`, `image`, `topspeed`) VALUES (%s, %s, %d)" value = (car.name, car.image, car.topspeed) mycursor.execute(query, value) mydb.commit() # If the required fields have been filled out add the car to the # database and redirect to the View Cars page with GET data to # display a success message, else go back to the Add Car page with # GET data to display an error message return redirect('/?add=success') else: return redirect('/add-car?add=error')
def update(): # Then we will create a new instance of our EditCarForm class form = EditCarForm(request.form, csrf_enabled=False) # Retreive the ID of the car which is being updated by accessing the data # attribute of the id attribute of form object which is actually coming # from POST id = form.id.data # To check if our required fields are filled out if form.validate_on_submit(): # Create a new instance of our Car class and its constructor method # which will take the form object car = Car(form) # Since, our object has been created; use an UPDATE statement and # interpolate the ID of the car to find a matching database row query = f"UPDATE `cars` SET name=%s, image=%s, topspeed=%s WHERE id={id}" value = (car.name, car.image, car.topspeed) mycursor.execute(query, value) mydb.commit() # If the required fields have been filled out update the car in the # database and redirect to the View Cars page with GET data to display # a success message return redirect('/?edit=success') else: # The ID of the item needs to be passed back to the edit page so that # it can display the data. return redirect(f"/edit-object?edit=error&id={id}")
def crawl(url, webid): driver = get_driver() # try: driver.get(url) # time.sleep(6) driver.implicitly_wait(30) # driver.set_page_load_timeout(10) # driver.set_script_timeout(30) js_down = "var q=document.documentElement.scrollTop=100000" js_up = "var q=document.documentElement.scrollTop=0" for i in range(3): driver.execute_script(js_down) time.sleep(2) driver.execute_script(js_up) time.sleep(1) soup = BeautifulSoup(driver.page_source, 'lxml') for i in getdata(soup, webid): try: print(i) cursor.execute('update article set title=%s where url=%s', (i[0], i[1])) mydb.commit() # cursor.execute("insert into article(title,url,webid,source,imgurl,date) values (%s,%s,%s,%s,%s,%s)",i) # mydb.commit() except Exception as e: print(str(e)) driver.quit()
def delete(): form = DeleteCarForm(csrf_enabled=False) id = form.id.data if form.validate_on_submit(): # To ensure that the ID has been submited, use a DELETE statement and # interpolate the ID of the car to find a matching database row using # the WHERE clause to delete a row query = f"DELETE FROM `cars` WHERE id='{id}'" mycursor.execute(query) mydb.commit() return redirect("/?delete=success") else: return redirect('/?delete=error')
def video_crawler(url, webid): driver = get_driver() try: cursor=mydb.cursor() driver.get(url) time.sleep(6) driver.implicitly_wait(30) # driver.set_page_load_timeout(30) # driver.set_script_timeout(30) js_down = "var q=document.documentElement.scrollTop=100000" js_up = "var q=document.documentElement.scrollTop=0" driver.execute_script(js_down) time.sleep(2) driver.execute_script(js_up) time.sleep(1) soup = BeautifulSoup(driver.page_source, 'lxml') # cursor.execute('select articleid from article ORDER BY articleid DESC LIMIT 1') # result = cursor.fetchall()[0][0] error = 0 resultss=get_data(soup,webid) for i in resultss: print(i) for i in resultss: try: sql = 'insert into video(url, webid, source, date, fever, authorimgurl, title) ' \ 'VALUES (%s,%s,%s,%s,%s,%s,%s)' cursor.execute(sql,i) mydb.commit() except Exception as e: print(e) try: sql='update video set fever=? where url= ?' cursor.execute(sql,(i[4],i[0],)) except Exception as e: print(e) error=error+1 print(datetime.datetime.now(),' 更新视频,成功:%d 个, 已存在:%d 个。' % (len(resultss)-error,error) ) except Exception as e: print(str(e)) driver.quit()
def crawl(url, webid): driver = get_driver() try: cursor=mydb.cursor() driver.get(url) time.sleep(6) driver.implicitly_wait(30) # driver.set_page_load_timeout(30) # driver.set_script_timeout(30) js_down = "var q=document.documentElement.scrollTop=100000" js_up = "var q=document.documentElement.scrollTop=0" for i in range(6): driver.execute_script(js_down) time.sleep(2) driver.execute_script(js_up) time.sleep(1) soup = BeautifulSoup(driver.page_source, 'lxml') # cursor.execute('select articleid from article ORDER BY articleid DESC LIMIT 1') # result = cursor.fetchall()[0][0] error = 0 results=getdata(soup,webid) for i in results: try: cursor.execute("insert into article(title,url,webid,source,imgurl,date) values (%s,%s,%s,%s,%s,%s)",i) mydb.commit() except: error=error+1 print(datetime.datetime.now(),' 更新文章,成功:%d 篇, 已存在:%d 篇。' % (len(results)-error,error) ) error=0 cursor.execute('select articleid,title from article ORDER BY articleid DESC LIMIT 100') results = cursor.fetchall() for i in results: # if i[0]==result: # break seg_list = list(jieba.cut(i[1],cut_all = True)) for item in seg_list: try: cursor.execute('insert into article_keyword(articleid, keyword) VALUES (%s,%s)', (i[0], item,)) mydb.commit() cursor.execute('update keyword set fever=fever+1 where name=%s',(item,)) mydb.commit() except: error=error+1 print(datetime.datetime.now(),' 更新关键词完成') except Exception as e: print(str(e)) driver.quit()
def user_keyword_update(action): cursor = mydb.cursor() if action == 1: sql = 'select * from user_keyword_daily_view' sql2 = 'delete from user_keyword where exists(select * from user_click where user_click.userid=user_keyword.userid)' else: sql = 'select * from user_keyword_update_instant_view' sql2 = 'delete from user_keyword where exists(select * from user_click where user_click.userid=user_keyword.userid and times>10)' cursor.execute(sql) results = cursor.fetchall() cursor.execute(sql2) mydb.commit() lists = [] if len(results) == 0: print(datetime.datetime.now(), '没有可更新的用户') return for item in results: userid = item[0] keyword = item[1] date = item[2] times = item[3] fever = math.log((1 + date) / MAX_DATA) / math.log(1 / 60) * times lists.append({'userid': userid, 'keyword': keyword, 'fever': fever}) df = pandas.DataFrame(lists) data = df.groupby(['userid', 'keyword']).sum() data2 = data.groupby('userid') user = 0 for name, item in data2: user = user + 1 dic = item.to_dict()['fever'] list = [] for i in dic: list.append((name, i[1], dic[i])) list.sort(key=lambda a: a[2], reverse=True) for i in list[0:20]: sql = 'insert into user_keyword(userid, keyword, fever) VALUES (%s,%s,%s)' cursor.execute(sql, i) mydb.commit() if action == 1: sql = 'delete from user_click' else: sql = 'delete from user_click where times>10' cursor.execute(sql) mydb.commit() print(datetime.datetime.now(), ' 更新用户模型成功,共更新%d个用户' % (user))
print('-' * 60) with open(file_name, 'rb') as f: data = f.read() print("词库名:", byte2str(data[0x130:0x338])) # .encode('GB18030') print("词库类型:", byte2str(data[0x338:0x540])) print("描述信息:", byte2str(data[0x540:0xd40])) print("词库示例:", byte2str(data[0xd40:startPy])) getPyTable(data[startPy:startChinese]) getChinese(data[startChinese:]) if __name__ == '__main__': # scel所在文件夹路径 in_path = '/Users/zhusa/Downloads/scel' # 输出词典所在文件夹路径 out_path = "coal_dict.txt" fin = [fname for fname in os.listdir(in_path) if fname[-5:] == ".scel"] for f in fin: f = os.path.join(in_path, f) scel2txt(f) # 保存结果 for count, py, word in GTable: print(word) cursor.execute('insert into keyword(name) values (%s)', (word, )) mydb.commit()