def download_work(args): if args.PROXY: init_proxy(args.PROXY) cookie = args.COOKIE if args.COOKIE else None n_success = 0 n_fail = 0 if args.poc != 'all': poc_id = args.poc if not re.search(_ID_REGEX, poc_id): logger.error('Error format on poc id, please reinput.') else: if download_poc(poc_id, cookie): n_success += 1 else: n_fail += 1 else: logger.info('Download all pocs from "beebeeto.com"') logger.warning( 'PoC existed will be overwrite, type [Enter] to continue.') raw_input() if True: crawl_dic = {'http://beebeeto.com/pdb/?page=1': False} while False in crawl_dic.values(): crawl_url = choice([ link for link, crawled in crawl_dic.items() if not crawled ]) try: content = requests.get(crawl_url).content crawl_dic[crawl_url] = True except Exception, e: logger.error('Exception occured "%s" (%s)' % (Exception, e)) break if content: crawl_dic = parse_page_from_content(content, crawl_dic) ids = parse_poc_id_from_content(content) for poc_id in ids: if download_poc(poc_id, cookie): n_success += 1 else: n_fail += 1 else:
def init_proxy(proxy): res = urlparse(proxy) use_proxy = True if res.scheme == 'socks4': mode = socks.SOCKS4 elif res.scheme == 'socks5': mode = socks.SOCKS5 elif res.scheme == 'http': mode = socks.HTTP else: use_proxy = False logger.warning('Unknown proxy "%s", starting without proxy...' % proxy) if use_proxy: socks.set_default_proxy(mode, res.netloc.split(':')[0], int(res.netloc.split(':')[1])) socket.socket = socks.socksocket logger.info('Proxy "%s" using' % proxy)
def download_work(args): if args.PROXY: init_proxy(args.PROXY) cookie = args.COOKIE if args.COOKIE else None n_success = 0 n_fail = 0 if args.poc != 'all': poc_id = args.poc if not re.search(_ID_REGEX, poc_id): logger.error('Error format on poc id, please reinput.') else: if download_poc(poc_id, cookie): n_success += 1 else: n_fail += 1 else: logger.info('Download all pocs from "beebeeto.com"') logger.warning('PoC existed will be overwrite, type [Enter] to continue.') raw_input() if True: crawl_dic = {'http://beebeeto.com/pdb/?page=1': False} while False in crawl_dic.values(): crawl_url = choice([link for link, crawled in crawl_dic.items() if not crawled]) try: content = requests.get(crawl_url).content crawl_dic[crawl_url] = True except Exception, e: logger.error('Exception occured "%s" (%s)' % (Exception, e)) break if content: crawl_dic = parse_page_from_content(content, crawl_dic) ids = parse_poc_id_from_content(content) for poc_id in ids: if download_poc(poc_id, cookie): n_success += 1 else: n_fail += 1 else:
def fetch_results(self, query): url_collection = [] #host_collection = [] start = 0 logger.info('Starting search with google: %s' % query) logger.warning('You can interrupt this process with [Ctrl+c]') next_url = None while True: try: if next_url: content = self.access(next_url) else: content = self.search(query, page_num=100, start=start) except GoogleSearchLimitError, e: logger.error('%s' % e) return url_collection except GoogleSearchInitError, e: logger.error('%s' % e) return url_collection
def _redirect_process(self, response): location = response.headers['location'] if 'sorry' not in location: host = urlparse(location).scheme + '://' + urlparse(location).netloc self.host = host if self.debug: logger.warning('Host redirect detected: "%s"' % self.host) logger.info('New host(%s) useed' % self.host) elif 'sorry' in location: if self.debug: logger.warning('Captche verify detected, cant load plugin to process...') logger.warning('Exit...') sys.exit()
except KeyboardInterrupt, e: return url_collection except Exception, e: continue if content: next_url = parse_next_url_from_content(content) temp_urls = parse_url_from_content(content) if len(temp_urls) > 0: url_collection.extend(temp_urls) logger.info('Catched %d results currently' % url_collection.__len__()) start += 100 else: logger.warning('No more results found, mo longer continue to search') return url_collection if not next_url: logger.warning('No more results found, no longer continue to search') return url_collection def fetch_work(args): if args.PROXY: init_proxy(args.PROXY) debug = False if args.QUIET else True outfile = args.OUTFILE query = args.query