def testRmTags(): id = 1 db = DB("47.100.50.2", 'root', '123', 'ruanjian', charset='utf8') res_content = db.executeSelectSql( "SELECT id FROM url_tb WHERE id>6188 ORDER BY id", (id, )) if res_content and res_content[0] and res_content[0][0]: html_parse = rm_tags(res_content[0][0]) print(html_parse)
import time import datetime import json import re from Common import encryption from Common import Reids from Common import DB from Common import Read_config from Common.BaseCase import BaseCase #数据准备 cf = Read_config.ReadConfig() db = DB.DB_enterprise() re = Reids.Redis() logintoken = cf.get_userdatainfo('logintoken') worksurfaceid = cf.get_project('worksurfaceid') addemployeeid2 = cf.get_employeeinfo('addemployeeid2') addemployeeid = cf.get_employeeinfo('addemployeeid') daytime = time.strftime('%Y-%m-%d', time.localtime(time.time())) utctime = datetime.datetime.utcnow().isoformat() class test_project_team(BaseCase): def test_add_teamemployee(self): #添加项目人员 cf = Read_config.ReadConfig() projectid = cf.get_project('projectid') data_value = addemployeeid2 + ',' + projectid data = self.get_case_data("test_add_teamemployee") check = self.send_request(data, headersvariable=logintoken, datavariable=data_value)
visited_dict[href] = True visit_list.append(href) except Exception as e: print(repr(e)) pointingTo = ",".join(pointingTo) # print(pointingTo) db_rwlock.writer_lock.acquire() saveHtmlToDb(db, url_node, html, pointingTo) db_rwlock.writer_lock.release() if __name__ == "__main__": db = DB(DbParams["ip"], DbParams["user"], DbParams["password"], DbParams["db_name"], charset=DbParams["charset"]) pool = threadpool.ThreadPool(num_workers=3) # 建立一个拥有十个线程的线程池 count = 0 while len(visit_list) != 0: # 获得当前队列中的所有url链接,对它们的处理交给线程池多线程处理 # url_node = visit_list.pop() # print("ok") temp_url_list = visit_list.copy() visit_list.clear() args = [((db, url), {}) for url in temp_url_list] # print(args) reqs = threadpool.makeRequests(dealPage, args) [pool.putRequest(req) for req in reqs] pool.wait() # 等待将该层页面全部爬完
G.add_edge(id_, pointing_num_id) print(id_) def buildDiGraph(db): G = nx.DiGraph() id_list = get_url_tb_ids(db) print(id_list) args = [((db, G, id_list, id_), {}) for id_ in id_list] pool = threadpool.ThreadPool(num_workers=5) reqs = threadpool.makeRequests(addEdge, args) [pool.putRequest(req) for req in reqs] pool.wait() return G # layout = nx.spring_layout(G) # nx.draw(G, pos=layout, with_labels=True, hold=False) # plt.show() # for index in G.edges(data=True): # print(index) #输出所有边的节点关系和权重 if __name__ == "__main__": db = DB(DbParams["ip"], DbParams["user"], DbParams["password"], DbParams["db_name"]) G = buildDiGraph(db) pr = nx.pagerank_numpy(G) for key, value in pr.items(): db.executeUpdateSql("INSERT INTO url_pr(url_id,pr) VALUES(%s,%s)", (key, value))
from Common import Read_config from Common import DB cf = Read_config.ReadConfig() db_en = DB.DB_enterprise() db_user = DB.DB_user() """ 获取变量 """ notphone = cf.get_userdatainfo('not_exist_phone') projectid = cf.get_project("projectid") corporation_master_id = cf.get_userdatainfo("corporation_master_id") enterpriseid = cf.get_enterpriseinfo("enterpriseid") buildenterpriseid = cf.get_enterpriseinfo("buildenterpriseid") checkenterpriseid = cf.get_enterpriseinfo("checkenterpriseid") """ 被执行的sql(管理员用户) """ del_Adminuser_sql = """ DELETE FROM tb_user WHERE phone = %s """ """ 被执行的sql(企业部门) """ del_company_sql = """ DELETE FROM `company` WHERE corporation_master_id = %s """ #包含默认创建及测试添加 del_employee_sql = """ DELETE FROM `employee` WHERE company_id = %s """ del_department_sql = """ DELETE FROM `department` WHERE company_id = %s """ """ 被执行的sql(工程项目) """ del_project_sql = """ DELETE FROM project WHERE id = %s """ del_project_company_sql = """ DELETE FROM project_company WHERE project_id = %s """