def gtaskManager(self,urls,extractSearchResults,proxy_flag = 0,ua_flag = 0): task_log = None gtaskpool.setlogging(logging.INFO,task_log) purl1 = ["http://192.168.120.17:8014/proxy/get_http_proxy_list"] uurl1 = "http://192.168.120.17:8014/proxy/get_useragent_list" limited_urls = [ ('^https{0,1}://', 0) ] global proxymgr if proxy_flag == 1: proxymgr = ProxyManager(get_http_proxies, limited_urls, {'refresh': True, 'interval': 30 * 60, 'delay': 8 * 60}, *purl1) else: proxymgr = None print proxymgr global useragents if ua_flag == 1: useragents = get_useragents(uurl1) else: useragents = [None] if useragents == []: useragents = [None] gtaskpool.runtasks(AccessUrls.taskGenerator(self,urls,extractSearchResults))
def gtaskmanager(self,engine_type): #task_log = 'task_log.log' task_log = None gtaskpool.setlogging(logging.INFO,task_log) purl1 = ["http://192.168.120.185:5500/get_google_http_proxy_list"] uurl1 = "http://192.168.120.17:8014/proxy/get_useragent_list" limited_urls = [ ('^https://search\.disconnect\.me', 1) ] global proxymgr proxymgr = ProxyManager(get_http_proxies, limited_urls, {'refresh': True, 'interval': 30 * 60, 'delay': 8 * 60}, *purl1) global useragents useragents = get_useragents(uurl1) if useragents == []: useragents = [None] gtaskpool.runtasks(self.task_generator(self,engine_type))
def gtaskmanager(self, engine_type): #task_log = 'task_log.log' task_log = None gtaskpool.setlogging(logging.INFO, task_log) purl1 = ["http://192.168.120.185:5500/get_google_http_proxy_list"] uurl1 = "http://192.168.120.17:8014/proxy/get_useragent_list" limited_urls = [('^https://search\.disconnect\.me', 1)] global proxymgr proxymgr = ProxyManager(get_http_proxies, limited_urls, { 'refresh': True, 'interval': 30 * 60, 'delay': 8 * 60 }, *purl1) global useragents useragents = get_useragents(uurl1) if useragents == []: useragents = [None] gtaskpool.runtasks(self.task_generator(self, engine_type))
gtaskpool.setlogging(logging.INFO) purl1 = ["http://192.168.120.17:8014/proxy/get_http_proxy_list"] purl2 = ["http://192.168.1.14:5500/get_http_proxy_list"] uurl1 = "http://192.168.120.17:8014/proxy/get_useragent_list" uurl2 = "http://192.168.1.14:5500/get_useragent_list" # Create a ProxyManager if you need limited_urls = [("^http://www\.baidu\.com/s\?wd=apple&pn=\d+$", 1)] proxymgr = ProxyManager( get_http_proxies, limited_urls, {"refresh": True, "interval": 30 * 60, "delay": 8 * 60}, *purl2 ) # Or if you don't want refresh proxies periodcally # proxymgr = ProxyManager(get_http_proxies, *purl2, limited_urls, \ # {'refresh': False}, *purl2) # A useragent list for http request if you need useragents = get_useragents(uurl2) if useragents == []: useragents = [None] fresult = open("result.txt", "w") fleft = open("left.txt", "w") # Optional args: # @max_ongoing_tasks (default to 1000) gtaskpool.runtasks(task_generator()) fresult.close() fleft.close()