def test_6(): from ghost import Ghost base_url = 'http://127.0.0.1:8888' ghost = Ghost() ghost.open(base_url)
def __init__(self, keyword): self.ghost = Ghost() self.kw = urllib.quote(keyword) self.site = { 'td': 'https://s.taobao.com/search?&q=', # 淘宝 'jd': 'http://search.jd.com/Search?keyword=', # 京东 'yd': 'http://search.yhd.com/c0-0/k', # 一号店 'yx': 'http://searchex.yixun.com/html?key=u', # 易迅 'lf': 'http://search.lefeng.com/search/noresult?keyWord=', # 乐蜂网 'an': 'http://www.amazon.cn/s/ref=nb_sb_noss_1/475-4397139-2107651?field-keywords=', # 亚马逊 'vl': 'http://s.vancl.com/search?k=', # 凡客诚品 'ge': 'http://search.gome.com.cn/search?question=', # 国美在线 'sg': 'http://search.suning.com/' # 苏宁易购 } self.list = ['td', 'jd', 'yx', 'lf', 'an', 'vl', 'ge', 'sg'] for s in self.list: headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) ' 'AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/43.0.2357.132 Safari/537.36', 'Referer': urlparse(self.site[s]).scheme + '://' + urlparse(self.site[s]).netloc, 'Host': urlparse(self.site[s]).netloc } # print s, len(requests.get(self.site[s], headers=headers).content) if s == 'yx': # result = requests.get(self.site[s], headers=headers) with self.ghost.start() as session: session.open(self.site[s], headers=headers) page, resources = session.wait_for_page_loaded() print repr(page.content)
def test_1(): from ghost import Ghost ghost = Ghost() page, extra_res = ghost.open('http://www.baidu.com') print(page.http_status) print(ghost.content)
def load_map(self, file_name): """ Indlæser bane """ # Initialiserer map klasse self.map = Map(file_name) # Find ud af hvor spilleren er player_pos = self.map.get_player_pos() if player_pos is not None: self.player = Player(player_pos) # Konverter bane matrice så det kan bruges af a* pathfinding algoritmen map_matrix = self.map.pathfinding_matrix() # Find spøgelserne ghost_pos = self.map.get_ghost_pos() ghosts = [] for i in range(0, len(ghost_pos)): pos, map_type = ghost_pos[i] # Initialiser spøgelse klasse ghost = Ghost(pos, map_type) # Indlæs pathfinding matrice ghost.load_matrix(map_matrix) ghosts.append(ghost) self.ghosts = ghosts
def break_protection(self, target): print('\nAttempting ClickJacking... \n') html = ''' <html> <body> <iframe src="'''+target+'''" height='600px' width='800px'></iframe> </body> </html>''' html_filename = 'clickjack.html' f = open(html_filename, 'w+') f.write(html) f.close() abs_path = os.path.abspath(os.path.dirname(__file__)) log_filename = abs_path+'/../../Logs/'+'Clickjacking.log' fh = logging.FileHandler(log_filename) ghost = Ghost(log_level=logging.INFO, log_handler=fh) with ghost.start() as session: session.wait_timeout = 50 page, resources = session.open(html_filename) l = open(log_filename, 'r') if 'forbidden by X-Frame-Options.' in l.read(): print('Clickjacking mitigated via X-FRAME-OPTIONS') else: href = session.evaluate('document.location.href')[0] if html_filename not in href: print('Frame busting detected') else: print(colored('Frame busting not detected, page is likely ' + 'vulnerable to ClickJacking', 'red')) l.close() logging.getLogger('ghost').handlers[0].close() os.unlink(log_filename) os.unlink(html_filename)
def init_monster_in_game(monster_name, monster_number, ground, lives, IA): list_monsters = ["ghost", "golem", "human", "goblin"] if ground == "1": start_x_monster_1 = 60 start_x_monster_2 = 850 end = 1210 elif ground == "2": start_x_monster_1 = 60 start_x_monster_2 = 740 end = 1100 else: start_x_monster_1 = 350 start_x_monster_2 = 650 end = 1050 if monster_number == 1: if monster_name == list_monsters[0]: monster = Ghost(start_x_monster_1, 417, end, lives, 0, IA, 1) elif monster_name == list_monsters[1]: monster = Golem(start_x_monster_1, 410, end, lives, 0, IA, 1) elif monster_name == list_monsters[2]: monster = Human(start_x_monster_1, 420, end, lives, 0, IA, 1) else: monster = Goblin(start_x_monster_1, 430, end, lives, 0, IA, 1) else: if monster_name == list_monsters[0]: monster = Ghost(start_x_monster_2, 417, end, lives, 0, IA, 1) elif monster_name == list_monsters[1]: monster = Golem(start_x_monster_2, 410, end, lives, 0, IA, -1) elif monster_name == list_monsters[2]: monster = Human(start_x_monster_2, 420, end, lives, 0, IA, -1) else: monster = Goblin(start_x_monster_2, 430, end, lives, 0, IA, -1) return monster
def __init__(self, environment): self.run = True self.display = None self.pacmanImgs = None self.pacmanRight_image = None self.pacmanDown_image = None self.pacmanLeft_image = None self.pacmanUp_image = None self.ghost_image = None self.block_image = None self.dot_image = None # Environment (Environment) and Pacman self.environment = environment self.pacman = PacmanAgent(self.environment) self.wonGame = 0 self.ghosts = [] # Ghosts self.ghosts.append(Ghost(self.environment, self.pacman, 6, 6)) self.ghosts.append(Ghost(self.environment, self.pacman, 6, 8)) self.ghosts.append(Ghost(self.environment, self.pacman, 8, 8)) self.ghosts.append(Ghost(self.environment, self.pacman, 8, 6)) self.initGame()
def grab_url_screenshot(url): """ Grab an url making a screenshot of it Filename is SHA256 of url :param url: :return: """ ret = None try: # Bonifica url (se manca lo schema assumo http://) url_res = urlparse(url) if not url_res.scheme: url = "http://" + url # TODO: Può essere un singleton Ghost? ghost = Ghost() page, res = ghost.open(url) if not page is None and page.http_status == 200: url_sha256 = hashlib.sha256(url).hexdigest() image_path = os.path.join('url_previews', url_sha256 + ".png") full_path = os.path.join(settings.MEDIA_ROOT, image_path) ghost.capture_to(full_path) image_path = image_path.replace(".png", ".thumb.png") thumb_full_path = os.path.join(settings.MEDIA_ROOT,image_path) resize_and_crop(full_path, thumb_full_path, (550, 500)) ret = urljoin(settings.BASE_URL, "uploads/" + image_path) else: logger.error("Failed to capture screenshot for {0}".format(url)) except Exception, e: logger.exception(e)
def ghost_img(src): ghost = Ghost() page,resources = ghost.open(src) ghost.wait_for_page_loaded() result, resources = ghost.evaluate("document.getElementById('largeImg').getAttribute('src');" ) del resources return result
def ghost_process(self, request): def do_counts(str_counts): try: counts = str_counts.replace(',', '') return counts except: return 0 def do_item(item): if item and isinstance(item, list): return item[0] return item url = request.url try: ghost = Ghost() with ghost.start() as session: page, extra_resouce = session.open(url) response = HtmlResponse(url, body=page.content.encode('UTF-8'), request=request) except Exception as e: self.logger.error("ghost.py error:", e, url) return [] return self.parse_one_news(response)
def clickjack(url): html = ''' <html> <body> <iframe src="'''+url+'''"></iframe> </body> </html>''' html_file = 'clickjack.html' log_file = 'test.log' f = open(html_file, 'w+') f.write(html) f.close() logging.basicConfig(filename=log_file) logger = logging.getLogger('ghost') logger.propagate = False ghost = Ghost(log_level=logging.INFO) page, resources = ghost.open(html_file) ghost.exit() l = open(log_file, 'r') if 'forbidden by X-Frame-Options.' in l.read(): print 'Clickjacking mitigated' else: print 'Clickjacking successful' print os.getcwd() l.close()
class GhostVisitor: def __init__(self): self.ghost = Ghost() def GET(self, url): page, resources = self.ghost.open(url) if page.http_status != 200: print('Status', page.http_status, 'for', url) return yield from self.parse(page) js = 'document.getElementsByTagName("form").length' form_count, resources = self.ghost.evaluate(js) if form_count: yield self.submit_form, url def parse(self, page): html = etree.HTML(self.ghost.content) links = html.findall('.//a[@href]') for link in links: yield self.GET, urljoin(page.url, link.attrib['href']) def submit_form(self, url): page, resources = self.ghost.open(url) page, resources = self.ghost.fire_on("form", "submit", expect_loading=True) yield from self.parse(page)
def getHtml(self): ghost = Ghost() with ghost.start() as session: page, extra_resources = session.open(self.url,'get',{},None,None,None,True,30) if page.http_status == 200: return session.content return False
def __init__(self, solutionFilePath, logFilePath, height, width, pillDensity, wallDensity, fruitProbability, fruitScore, timeMultiplier): self.pacman1 = Pacman(0, height - 1) self.ghost1 = Ghost(width - 1, 0, 1) self.ghost2 = Ghost(width - 1, 0, 2) self.ghost3 = Ghost(width - 1, 0, 3) self.solutionFilePath = solutionFilePath self.logFilePath = logFilePath self.height = height self.width = width self.pillDensity = pillDensity self.wallDensity = wallDensity self.fruitProbability = fruitProbability self.fruitScore = fruitScore self.timeMultiplier = timeMultiplier self.time = timeMultiplier * width * height self.grid = [[]] self.hasFruit = False self.logger = Logger(self.solutionFilePath, self.logFilePath) self.numPills = 0 #pills pacman currently has self.totalPills = 0 self.didSpawn = False self.fruitx = -1 #fruit position self.fruity = -1 self.solution = []
def __init__(self, url=None): if url: self.url = url assert self.url, "All clients must have a URL attribute" self._attributes = self._collect_attributes() self._class_model = self._setup_class_model() self._ghost = Ghost()
def process_request(self, request, spider): if spider.name in zhcw_scrape.settings.WEBKIT_DOWNLOADER: if (type(request) is not FormRequest): # print request.meta if request.meta.has_key('flag') and request.meta['flag'] == 1: # print '111' + request.url ghost = Ghost() session = ghost.start() session.evaluate('window.localStorage=undefined') session.evaluate('window.sessionStorage=undefined') session.evaluate('window.RTCPeerConnection=undefined') session.evaluate( 'window.webkitRTCPeerConnection=undefined') session.evaluate('window.mozRTCPeerConnection=undefined') try: session.open(request.url) except: pass session.page = None result, resource = session.evaluate( 'document.documentElement.innerHTML') """ 保留会话到爬虫,用以在爬虫里面执行js代码 """ # print result spider.webkit_session = session renderedBody = result.encode('utf8') """ #返回rendereBody就是执行了js后的页面 """ # print request.url return HtmlResponse(request.url, body=renderedBody) return None
def index(request): dir_name = os.path.dirname(__file__) img_name = os.path.join(dir_name, "pic.jpg") if request.method == "POST": url = request.POST.get("url", "") else: url = request.GET.get("url", "") if not url: url = "http://www.bbc.uk.com" display = Display() display.start() ghost = Ghost() ghost.open(url) width = int(ghost.evaluate("document.body.clientWidth")[0]) height = int(ghost.evaluate("document.body.clientHeight")[0]) ghost = Ghost(viewport_size=(width, height)) ghost.open(url) ghost.capture_to(img_name, selector="body") image = Image.open(img_name) image.thumbnail((128, 128), Image.ANTIALIAS) response = HttpResponse(mimetype="image/jpeg") image.save(response, "jpeg") display.stop() return response
def scan_xss(method, vulns, url, fuzz, cookie, useragent, data, refer): #payload = 'javascript://\'/</Title></sTyle></teXtarea></scRipt>--><svg" %0Aonload=confirm(42)//>*/prompt(42)/*<details/open/ontoggle=confirm`42` >' payload = 'jaVasCript:alert(1)//" name=alert(1) onErrOr=eval(name) src=1 autofocus oNfoCus=eval(name)><marquee><img src=x onerror=alert(1)></marquee>" ></textarea\></|\><details/open/ontoggle=prompt`1` ><script>prompt(1)</script>@gmail.com<isindex formaction=javascript:alert(/XSS/) type=submit>\'-->" ></script><sCrIpt>confirm(1)</scRipt>"><img/id="confirm( 1)"/alt="/"src="/"onerror=eval(id&%23x29;>\'"><!--' try: ghost = Ghost() with ghost.start() as x: result = 0 # POST if (method == 'POST' and fuzz != ''): inject = dict(data) inject[fuzz] = inject[fuzz] + payload del inject[''] if refer and refer != '' and cookie and cookie != '': page, extra_resources = x.open(url, headers={'Cookie':cookie, 'Referer': refer}, user_agent=useragent, method='post') elif refer and refer != '': page, extra_resources = x.open(url, headers={'Referer': refer}, user_agent=useragent, method='post') elif cookie and cookie != '': page, extra_resources = x.open(url, headers={'Cookie':cookie}, user_agent=useragent, method='post') else: page, extra_resources = x.open(url, user_agent=useragent, method='post') result, resources = x.fill("form", inject) page, resources = x.call("form", "submit", expect_loading=True) result, resources = x.wait_for_alert(1) inject = url + ":" + fuzz + ":" + inject[fuzz] # GET if (method == 'GET'): inject = url.replace(fuzz+"=", fuzz+"="+payload) if refer and refer != '' and cookie and cookie != '': page, extra_resources = x.open(inject, headers={'Cookie':cookie, 'Referer':refer}, user_agent=useragent) elif refer and refer != '': page, extra_resources = x.open(inject, headers={'Referer':refer}, user_agent=useragent) elif cookie and cookie != '': page, extra_resources = x.open(inject, headers={'Cookie':cookie}, user_agent=useragent) else: page, extra_resources = x.open(inject, user_agent=useragent) result, resources = x.wait_for_alert(1) # Detect XSS result with an alert if result == '1': print("\t\t\033[93mXSS Detected\033[0m for ", fuzz, " with the payload :", payload) vulns['xss'] += 1 vulns['list'] += 'XSS|TYPE|'+inject+'|DELIMITER|' else: #print("\t\t\033[94mXSS Failed \033[0m for ", fuzz, " with the payload :", payload) pass except Exception as e: if "confirm" in str(e) : #or "alert" in str(e): print("\t\t\033[93mXSS Detected (False positive ?)\033[0m for ", fuzz, " with the payload :", payload) inject = url + ":" + fuzz + ":" + payload vulns['xss'] += 1 vulns['list'] += 'XSS|TYPE|'+inject+'|DELIMITER|' else: #print("Error",e) #print("\t\t\033[94mXSS Failed \033[0m for ", fuzz, " with the payload :", payload) pass
def _create_ghost(self, ghost_number, row_number): # Create a ghost and place it in the row. ghost = Ghost(self) ghost_width, ghost_height = ghost.rect.size ghost.x = ghost_width + 2 * ghost_width * ghost_number ghost.rect.x = ghost.x ghost.rect.y = ghost.rect.height + 2 * ghost.rect.height * row_number self.ghosts.add(ghost)
def start(timeout=10000): ghost = Ghost() session = ghost.start(download_images=False, show_scrollbars=False, wait_timeout=timeout, display=False, plugins_enabled=False) return ConstitutionalCourtCrawler(session)
def gethtml(URL): ghost = Ghost(wait_timeout=120) try: page, resources = ghost.open(URL) html = BeautifulSoup(ghost.content) except Exception as EX: print(EX) return html
def __init__(self): ua = random.choice(self.user_agent_list) # 随机选择一个User-Agent self.ghost = Ghost() self.se = Session(self.ghost, display=False, wait_timeout=60, download_images=False) super(GhostMiddleware, self).__init__()
def vision(ghost): if ghost.inHeaven == True: ghost = Ghost() print("While sleeping, you have a strange dream....") print("A ghostly figure appears in front of you and begins to speak.\n") ghost.display_message()
def __init__(self, **kwargs): kwargs.setdefault('wait_timeout', 60) self.user_id = kwargs.pop('user_id') self.access_token = kwargs.pop('access_token') self.server_host = kwargs.pop('server_host') self.server_port = kwargs.pop('server_port') self.ghost = Ghost(**kwargs) self.callbacks = {}
class Shazam(object): """ Provides methods for downloading Shazam history """ def __init__(self, fb_email=None, fb_password=None): self.session = Ghost().start() self.facebook = Facebook(self.session, fb_email, fb_password) self.login_successful = False self.fat = None # Facebook access token def login(self): """ Performs Shazam login :return: bool - True if success, False - otherwise """ if self.login_successful: return True if not self.facebook.login(): return False # fat = self.facebook.get_access_token(app_id) # if not fat: # logging.error("Couldn't get Facebook access token") # return False # # user_id = self.facebook.get_user_id() # if not user_id: # logging.error("Couldn't get Facebook user id") # return False # # query = [("fat", fat), # ("uid", user_id)] # body = urllib.urlencode(query) # login_url = "http://www.shazam.com/login" # try: # self.session.open(login_url, # method="post", # body=body) # except Exception, e: # logging.error("Shazam login failed") # logging.error(str(e)) # return False myshazam_url = "http://www.shazam.com/myshazam" try: self.session.open(myshazam_url) except Exception, e: logging.error("Shazam login failed. Couldn't open myshazam page.") logging.error(str(e)) return False try: self.session.click(".js-fblogin") except Exception, e: logging.error("Shazam login failed. Couldn't click login button.") logging.error(str(e)) return False
def main(): """ Go to across pages """ if not b_extraction: global ghost ghost = Ghost() global session session = ghost.start(download_images=False, show_scrollbars=False, wait_timeout=5000, display=False, plugins_enabled=False) logger.info("Start - ČAK") session.open(url) list_of_links = None if session.exists("#mainContent_btnSearch") and not b_extraction: session.click("#mainContent_btnSearch", expect_loading=True) if session.exists("#mainContent_gridResult"): value, resources = session.evaluate("document.getElementById('mainContent_lblResult').innerHTML") logger.debug(value) records, pages = how_many(value, 50) #pages = 99 # hack for testing page_from = 1 logger.info("Checking records...") list_of_links = check_records(page_from, pages) if len(list_of_links) > 0: logger.info("Dowload new records") for record in list_of_links: #print(record)#,record["url"],record["id"]) # may it be wget? if not os.path.exists(join(documents_dir_path, record["id"] + ".html")): import urllib.request try: urllib.request.urlretrieve(record["url"], join(documents_dir_path, record["id"] + ".html")) except urllib.request.HTTPError as ex: logger.error(ex.msg, exc_info=True) #session.open(record["url"]) #response = str(urlopen(record["url"]).read()) #response = session.content #extract_data(response, record["id"]+".html") logger.info("Download - DONE") session.exit() ghost.exit() else: logger.info("Not found new records") list_of_links = None if list_of_links is not None: logger.info("Extract information...") extract_information(list_of_links) logger.info("Extraction - DONE") else: if b_extraction: logger.info("Only extract information...") extract_information(list_of_links=None) logger.info("Extraction - DONE") return True
def gethtml(URL): ghost = Ghost(wait_timeout=120) try: page, resources = ghost.open(URL) html = BeautifulSoup(ghost.content) # ,from_encoding='GB18030') except Exception as EX: print(EX) print(html) return html
class Client(object): def __init__(self, url=None): if url: self.url = url assert self.url, "All clients must have a URL attribute" self._attributes = self._collect_attributes() self._class_model = self._setup_class_model() self._ghost = Ghost() def process(self): self._load_ghost() attribute_results = self._process_attributes() self._object_results = self._make_objects(attribute_results) return self._object_results def _setup_class_model(self): class_name = self.__class__.__name__ return namedtuple(class_name + "Response", self._attributes.keys()) def _process_attributes(self): results = [] for attribute_name, attribute in self._attributes.iteritems(): result, resources = self._ghost.evaluate(attribute.query) # If a node was selected, return it's data if isinstance(result, dict): if 'data' in result: result = result['data'] elif 'selector' in result: raise TypeError( "The attribute {} returned a selector" " instead of a node.".format(attribute_name)) results.append(result) return results def _make_objects(self, attribute_results): raise NotImplementedError() def _collect_attributes(self): attrs = [(attr_name, attr) for (attr_name, attr) in inspect.getmembers(self) if isinstance(attr, Attribute)] return dict(attrs) def _load_ghost(self): page, extra_resources = self._ghost.open(self.url) # For local testing, page is None if page: # TODO should error better assert page.http_status < 400 # Load jquery jquery_path = os.path.join(os.path.abspath(os.curdir), 'zester', 'fixtures', 'jquery.min.js') jquery_text = open(jquery_path, 'r').read() result, resources = self._ghost.evaluate(jquery_text)
def main(): print(U"Start US") global ghost ghost = Ghost() global session session = ghost.start(download_images=False, show_scrollbars=False, wait_timeout=main_timeout, display=False, plugins_enabled=False) session.open(search_url) # print(session.content) records_per_page = 20 if view_data(date_from, records_per_page, date_to, days): response = session.content if b_screens: session.capture_to( join(screens_dir_path, "errors.png"), selector=".searchValidator") records = 0 if not session.exists("#ctl00_MainContent_lbError"): pages, records = how_many(response, records_per_page) # print(pages) logger.info("pages: %s, records %s" % (pages, records)) page_from = 0 # pages = 20 # load starting page if os.path.exists(join(out_dir, "current_page.ini")): with codecs.open(join(out_dir, "current_page.ini"), "r") as cr: page_from = int(cr.read().strip()) logger.debug("Start on page %d" % page_from) if pages is not None and records is not None: if (page_from + 1) > pages: logger.debug( "Loaded page number is greater than count of pages") page_from = 0 if pages != (page_from + 1): # parameter page is from zero last_page = page_from while (last_page + 1) != pages: last_page = walk_pages(last_page, pages) print("\n") logger.info("DONE - download") else: logger.debug("I am complete!") else: logger.error("View error - 'pages' or 'records' are None") return False else: logger.info("Not found new records") logger.info("Extract information...") result = extract_information(records) if result and os.path.exists(join(out_dir, "current_page.ini")): logger.info("DONE - extraction") os.remove(join(out_dir, "current_page.ini")) return True
def __init__(self, id, pw, displayFlag=False, download_images=False, prevent_download=["css"]): # 새 Ghost instance를 만들어서 사용합니다. self.ghost = Ghost(display=displayFlag, wait_timeout=60) self.currentPage = None self.login(id, pw)
def signNeu6(): ghost = Ghost() session = ghost.start() resp = session.open("http://bt.neu6.edu.cn/member.php") session.set_field_value("[name=username]", 用户名) session.set_field_value("[name=password]", 密码) session.click("[type=checkbox]") session.click("[type=submit]") session.open("http://bt.neu6.edu.cn/member.php") return session
def main(): ghost = Ghost() page, resources = ghost.open('http://dl.acm.org/citation.cfm?id=511146#') links = ghost.evaluate(khhkj) print links sys.stdout.flush()
def getCartoonUrl(self, url): if url is None: return false #todo many decide about url ghost = Ghost() #open webkit ghost.open(url) #exceute javascript and get what you want result, resources = ghost.evaluate("document.getElementById('cp_image').getAttribute('src');") del resources return result
class Client(object): def __init__(self, url=None): if url: self.url = url assert self.url, "All clients must have a URL attribute" self._attributes = self._collect_attributes() self._class_model = self._setup_class_model() self._ghost = Ghost() def process(self): self._load_ghost() attribute_results = self._process_attributes() self._object_results = self._make_objects(attribute_results) return self._object_results def _setup_class_model(self): class_name = self.__class__.__name__ return namedtuple(class_name + "Response", self._attributes.keys()) def _process_attributes(self): results = [] for attribute_name, attribute in self._attributes.iteritems(): result, resources = self._ghost.evaluate(attribute.query) # If a node was selected, return it's data if isinstance(result, dict): if 'data' in result: result = result['data'] elif 'selector' in result: raise TypeError("The attribute {} returned a selector" " instead of a node.".format(attribute_name)) results.append(result) return results def _make_objects(self, attribute_results): raise NotImplementedError() def _collect_attributes(self): attrs = [(attr_name, attr) for (attr_name, attr) in inspect.getmembers(self) if isinstance(attr, Attribute)] return dict(attrs) def _load_ghost(self): page, extra_resources = self._ghost.open(self.url) # For local testing, page is None if page: # TODO should error better assert page.http_status < 400 # Load jquery jquery_path = os.path.join(os.path.abspath(os.curdir), 'zester', 'fixtures', 'jquery.min.js') jquery_text = open(jquery_path, 'r').read() result, resources = self._ghost.evaluate(jquery_text)
def run(self): ghost = Ghost() with ghost.start(download_images=False) as session: while True: action = self.pipe.recv() if action is None: break page = action.do(session) self.updatePage(session, page) self.sendResult(session, self.currentPage)
def getContent(self, url): if (self.gh is None): self.gh = Ghost() self.page, self.page_name = self.gh.create_page(600) try: self.page_resource = self.page.open(url, wait_onload_event=True) except Exception, e: LelianLogger.log('main', logging.ERROR, u"Timeout to get page: %s", url) self.content = self.staticPageContent.getContent(url) return self.content
def runGame(self): players = [ PacMan(self.world.pacManCoords, self.pacManSolution), Ghost(self.world.ghostCoords[0], self.ghostSolution), Ghost(self.world.ghostCoords[1], self.ghostSolution), Ghost(self.world.ghostCoords[2], self.ghostSolution) ] gameRunning = True won = False while gameRunning: oldPlayerCoords = [self.world.pacManCoords ] + self.world.ghostCoords decisions = [] for player in range(len(players)): currPlayer = players[player] validMoves = self.world.validMoves[player] choice = currPlayer.move(validMoves, self.world) decisions.append(choice) self._movePlayers(players, decisions) self.world.time -= 1 newPlayerCoords = [self.world.pacManCoords ] + self.world.ghostCoords if self._pacManGhostCollision(oldPlayerCoords, newPlayerCoords): gameRunning = False break self._removePills() self._removeFruit() if len(self.world.pillCoords) == 0: won = True gameRunning = False break if self.world.time == 0: gameRunning = False break self._addFruit(self.world) self.worldTracker.addSnapshot(self.world) self.world.validMoves = self.worldTracker.getValidMoves(self.world) if won and self.world.time != 0: percentTime = int((self.world.time / self.world.timeStart) * 100) score = self.world.score + percentTime self.world.score = score self.worldTracker.addSnapshot(self.world) self.score = self.world.score
def main(argv): if len(argv) < 2: sys.stderr.write("Usage: %s <url>\n" % (argv[0],)) return 1 ghost = Ghost(viewport_size=(1280, 1024)) page, resources = ghost.open(argv[1]) assert page.http_status==200 and 'bbc' in ghost.content ghost.capture_to('screenshot2.png') for r in resources: print r.url
def create_ghosts(self): self.ghost = Ghost(self.screen, self.settings, self.maze, self.stats, 0) self.ghosts.add(self.ghost) self.ghost = Ghost(self.screen, self.settings, self.maze, self.stats, 1) self.ghosts.add(self.ghost) self.ghost = Ghost(self.screen, self.settings, self.maze, self.stats, 2) self.ghosts.add(self.ghost) self.ghost = Ghost(self.screen, self.settings, self.maze, self.stats, 3) self.ghosts.add(self.ghost)
def set_environment(view): global ghost ghost = Ghost() global session session = ghost.start(download_images=False, java_enabled=False, show_scrollbars=False, wait_timeout=main_timeout, display=False, plugins_enabled=False) if view: session.display = True session.show()
def test_main_page_ghost(self): from ghost import Ghost import logging logging.basicConfig(level=logging.DEBUG) router = get_static_routes() router.add_handler('/', get_routes()) with run_test_server(self.loop, router=router, port=9999) as httpd: url = httpd.url("/") print(url) meth = 'get' ghost = Ghost() page, extra_resources = ghost.open(url) assert page.http_status == 200
def spawn_ghosts(self): inkey = Ghost(self.screen, -35, 260, 'images/ghost/inkey/inkey1.png', "Inkey") self.ghosts.add(inkey) pinky = Ghost(self.screen, 0, 260, 'images/ghost/pinky/pinky1.png', "Pinky") self.ghosts.add(pinky) blinky = Ghost(self.screen, 0, 204, 'images/ghost/blinky/blinky1.png', "Blinky") self.ghosts.add(blinky) clyde = Ghost(self.screen, 35, 260, 'images/ghost/clyde/clyde1.png', "Clyde") self.ghosts.add(clyde)
def download(url, timeout = 30): from ghost import Ghost ghost = Ghost(wait_timeout=timeout, download_images=False, display=False) ghost.open(url) ghost.click(b'.playNums') ghost.wait_for_text("播放量数据") content = ghost.content ghost.exit() return content
def test1(): url = "http://www.ebay.com/" gh = Ghost() # We create a new page page, page_name = gh.create_page() # We load the main page of ebay page_resource = page.open(url, wait_onload_event=True) # Full the main bar and click on the search button page.set_field_value("#gh-ac", "plane") page.click("#gh-btn") # Wait for the next page page.wait_for_selector("#e1-15") # Save the image of the screen page.capture_to("plane.png")
def signYaohuo(): ghost = Ghost() session = ghost.start() resp = session.open("http://yaohuo.me/waplogin.aspx") session.set_field_value("[name=logname]", 用户名) session.set_field_value("[name=logpass]", 密码) session.click("[type=submit]", btn=0) session.wait_for_selector("[href='/signin/signin.aspx']") session.open("http://yaohuo.me/signin/signin.aspx") session.set_field_value("[name=content]", "today is a new day") session.click("[name=g]") session.wait_for_selector(".tip") return session.evaluate( "document.documentElement.getElementsByTagName('head')")
def screenshot(url,target): ghost = Ghost(wait_timeout=4) print "Do u want to provide any credentials \n" choice=raw_input() if choice.lower()=='y': print colored("[-] Enter Username and Password ",'green') username=raw_input() password=raw_input() ghost.open(url,auth=(username,password)) ghost.capture_to(str(time.time())+'.png') os.system('mv *.png ./screenshots') print colored("[-] Screenshot Succesfull catpured and Saved @ %s/screen.png"%(os.getcwd()),'green') else: pass
def login(self, url, userName, password, headers): from ghost import Ghost self.s = Ghost().start() self.s.open(url, user_agent=settings.USER_AGENT) self.s.wait_for_selector('#loginUrl') self.s.click('#loginUrl') self.s.wait_for_selector('#login-user-btn', 10) self.s.set_field_value('#user_name', userName) self.s.set_field_value('#user_password', password) self.s.click('#login-user-btn') self.s.wait_for_selector('span.user-name') self.s.show() self.s.sleep(13)
def make_pages(pn, c): #global ghost ghost = Ghost(download_images = True) #time.sleep( 1 ) root = app_root + '/cache' file = "page_" + pn + "_" + str(c) + ".jpg" markup = get_markup(pn, True) ghost.main_frame.setHtml(markup) ghost.wait_for_page_loaded() ghost.capture_to(root + '/' + file)
def init(details): d=details.copy() del d["Keywords"] del d["Gender"] print "[INFO] Starting Google+ Engine" from selenium import webdriver print "[INFO] Runnning Ghost !" from ghost import Ghost ghost = Ghost(wait_timeout=60,download_images=False) print "[INFO] Ghost started !" page, resources = ghost.open('https://plus.google.com/s/'+" ".join(d.values())+'/people') print "[INFO]Starting Parsing" from bs4 import BeautifulSoup as bs print bs(page.content).findAll("div",{"class":"Osd Hfb"}) browser.quit()
def cmd_down_liked(self, event): self.cmdDown.config(state='disabled') self.lbl_status.config(text=u'正在获取曲目, 请稍后 ...') html_doc = urllib2.urlopen('http://douban.fm/mine#!type=liked&start=0').read().decode('utf-8') re_scripts = re.compile(r'<script>([\s\S]+?)</script>') scripts = re_scripts.findall(html_doc) script = scripts[-2] ghost = Ghost() self.douban_user_id_sign, res = ghost.evaluate(script+';window.user_id_sign;') for c in self.cookie: if c.name == 'bid': self.douban_bid = c.value.strip('"') self.douban_spbid = self.douban_user_id_sign + self.douban_bid threading.Thread(target=self.down_liked_master).start() return
def _get_html_page(host): ghost = Ghost() with ghost.start() as session: start = time.time() url = SSL_LABS_URL + host logger.debug("Request url: %s", url) page, extra_resources = session.open( url, headers={"User-Agent": CHROME_USER_AGENT}, user_agent=CHROME_USER_AGENT, wait=True ) logger.info("[%s] Html page retrieved in %.2f seconds," " starting wait for test completion", host, time.time() - start) return _wait_for_result(session, page)
class Shotter(object): def __init__(self,): try: #self.ghost = Ghost(wait_timeout=10) self.ghost = Ghost() except ImportError: self.ghost = None def cuty_shot(self, url, filename, x11=True, width=1024, height=768, colorbits=24): #TODO: add check if cutycapt installed if x11: cmd = 'cutycapt --url="%s" --out=%s' % (url, filename) else: cmd = 'xvfb-run --server-args="-screen 0, %ix%ix%i" cutycapt --url="%s" --out=%s' % \ (url, filename, width, height, colorbits) try: Popen(cmd, shell=True).wait() return True except: return False def ghost_shot(self, url, filename, ignore_errors=True): #print('ghost_shot(%s)' % url) try: page, resources = self.ghost.open(url) if ignore_errors: self.ghost.capture_to(filename) return True elif page.http_status == 200 and page.totalBytes() != 0: self.ghost.capture_to(filename) return True else: return False except: print(exc_info()) return False def screenshot(self, url, filename, overwrite=False): if path.exists(filename) and not overwrite: print('%s exists, skipping' % filename) return print('[SCREENSHOT] %s -> %s' % (url, filename)) if self.ghost is not None: self.ghost_shot(url, filename) else: self.cuty_shot(url, filename)
class searchData: def __init__(self, keyword): self.ghost = Ghost() self.kw = urllib.quote(keyword) self.site = { 'td': 'https://s.taobao.com/search?&q=', # 淘宝 'jd': 'http://search.jd.com/Search?keyword=', # 京东 'yd': 'http://search.yhd.com/c0-0/k', # 一号店 'yx': 'http://searchex.yixun.com/html?key=u', # 易迅 'lf': 'http://search.lefeng.com/search/noresult?keyWord=', # 乐蜂网 'an': 'http://www.amazon.cn/s/ref=nb_sb_noss_1/475-4397139-2107651?field-keywords=', # 亚马逊 'vl': 'http://s.vancl.com/search?k=', # 凡客诚品 'ge': 'http://search.gome.com.cn/search?question=', # 国美在线 'sg': 'http://search.suning.com/' # 苏宁易购 } self.list = ['td', 'jd', 'yx', 'lf', 'an', 'vl', 'ge', 'sg'] for s in self.list: headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) ' 'AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/43.0.2357.132 Safari/537.36', 'Referer': urlparse(self.site[s]).scheme + '://' + urlparse(self.site[s]).netloc, 'Host': urlparse(self.site[s]).netloc } # print s, len(requests.get(self.site[s], headers=headers).content) if s == 'yx': # result = requests.get(self.site[s], headers=headers) with self.ghost.start() as session: session.open(self.site[s], headers=headers) page, resources = session.wait_for_page_loaded() print repr(page.content)
def __init__(self, target_url): self.max_depth = 0 #0为单页面 self.ghost = Ghost(wait_timeout=page_timeout, display=True, download_images=False) dvwa_security(self.ghost, 'low') self.base_url = base(target_url) self.result = {target_url: []} #字典保存所有url及其参数 self.__page_crawler(target_url, 0)
def __init__(self, email, password): SGMLParser.__init__(self) self.h3 = False self.h3_is_ready = False self.div = False self.h3_and_div = False self.a = False self.depth = 0 self.names = "" self.dic = {} self.email = email self.password = password self.domain = 'renren.com' self.file = None self.friend_file = None self.ghost = Ghost() self.cookie = None self.group_url = "http://friend.renren.com/groupsdata" self.group_home = "http://friend.renren.com/managefriends" self.file_url = "renren_cookie.txt" self.mongodb = MongoClient("127.0.0.1", 27017) try: self.cookie = cookielib.LWPCookieJar(self.file_url) cookieProc = urllib2.HTTPCookieProcessor(self.cookie) except: raise else: opener = urllib2.build_opener(cookieProc) urllib2.install_opener(opener) print "init finished successfully!!"
class spidder(SGMLParser): def __init__(self, email, password): self.email = email self.password = password self.domain = "renren.com" self.ghost = Ghost() self.log_url = "http://www.renren.com/SysHome.do" self.log_auth_url = "http://www.renren.com/PLogin.do" self.frd_url = "http://friend.renren.com/managefriends" self.posttest_url = "http://localhost/print_post.php" self.post_data = { 'email': self.email, 'password': self.password, 'domain': self.domain } def ghost_test(self): with self.ghost.start() as session: #执行一个post请求,登录页面 page, resources = None, None try: req = urllib2.Request(self.posttest_url, urllib.urlencode(self.post_data)) print req page, resources = session.open(self.posttest_url, method='post', body=req.data) except: print sys.exc_info()[0], sys.exc_info()[1] else: print "we get the string!"