def main(): try: start = sys.argv[1] except: print('ERROR: Requires URL as the first argument.') quit(0) # Constants ALLDROPDOWN = '//*[@id="selectReadType"]/option[2]' ACTUALIMAGES = '//*[@id="divImage"]//img' IMGGROUPS = '.listing a' TITLE = '.bigChar' NEXT = '//*[(@id = "btnNext")]//src' s = Session( webdriver_path='C:\\Webdrivers\\chromedriver', browser='chrome' ) # ,webdriver_options={'arguments': ['headless', 'disable-gpu']} s.driver.get(start) s.driver.ensure_element_by_css_selector(TITLE) title = s.driver.find_element_by_css_selector(TITLE).text groups = s.driver.find_elements_by_css_selector(IMGGROUPS) s.transfer_driver_cookies_to_session() begin = to_attribute_list(groups, 'href').pop() response = s.get(begin).xpath(ACTUALIMAGES) print(response) s.close() quit(2)
username, password = session.driver.find_elements_by_class_name('form-control') username.send_keys(credentials[0]) password.send_keys(credentials[1]) button.click() session.driver.get( f'{root}/_layouts/15/HudStuPortal/simpleCPIP.aspx?sys=timetable' ) session.driver.ensure_element_by_tag_name('td') lessons: List[WebElement] = [ *session.driver.find_elements_by_class_name('lect'), *session.driver.find_elements_by_class_name('prac') ] session.close() timetable: List[List[Any]] = [[], [], [], [], []] def search(pattern: str, source: str) -> str: """Return the first group matched by a pattern.""" match = re.search(pattern, source) if match is not None: return match.group(1) else: return str() for lesson in lessons: day = -1
class Driver(object): def __init__(self): # 使用requestium的Session, 使用requests和Selenium, 设置为headless模式 self.s = Session( webdriver_path='./chromedriver', browser='chrome', default_timeout=15, #webdriver_options={'arguments': ['headless']} ) self.category_mapping = None path = os.path.join(os.getcwd(), FILENAME) if os.path.exists(path): self.category_mapping = ujson.load(open(path)) #pprint(self.category_mapping) def close(self): if self.s.driver is not None: self.s.driver.quit() if self.s is not None: self.s.close() def login(self): """ 使用driver登录到启信宝 """ login_url = 'http://www.qixin.com/auth/login?return_url=%2F' self.s.driver.get(login_url) # 使用requestium中的ensure_*方法定位元素 username_xpath = '//input[@class="form-control input-lg input-flat input-flat-user"]' user_element = self.s.driver.ensure_element_by_xpath(username_xpath) for c in USERNAME: # 间歇输入Username和Password user_element.send_keys(c) time.sleep(random.randint(0, 2)) password_xpath = '//input[@class="form-control input-lg input-flat input-flat-lock"]' password_element = self.s.driver.ensure_element_by_xpath( password_xpath) for c in PASSWORD: password_element.send_keys(c) time.sleep(random.random()) password_element.send_keys(Keys.ENTER) self.s.driver.implicitly_wait(10) def process_cookies(self): """ 使用requests抓取页面 """ # 将driver的cookies转给requests的session tmp_url = 'http://www.qixin.com/search?area.province=12&page=1&scope[]=1' self.s.driver.get(tmp_url) self.s.transfer_driver_cookies_to_session() self.s.copy_user_agent_from_driver() # 判断category mapping是否存在 if self.category_mapping is None: req = self.s.get('http://www.qixin.com') self.category_mapping = {} for element in req.xpath('//div[@class="grid-item"]'): category_l1 = element.xpath( './div/text()').extract_first().strip() category_l2 = element.xpath('./a/text()').extract() self.category_mapping[category_l1] = category_l2 ujson.dump(self.category_mapping, open(os.path.join(os.getcwd(), FILENAME), 'w')) def fetch_page(self): # 获取cookies之后,使用requests的session开始抓取数据 result = [] self.s.proxies.update({ 'http': 'http://forward.xdaili.cn:80', 'https': 'https://forward.xdaili.cn:80' }) for page in range(1, 11): url = 'http://www.qixin.com/search?area.province=12&page=%s&scope[]=1&sorter=4' % page self.s.headers.update({'Proxy-Authorization': sign()}) req = self.s.get(url) for element in req.xpath( "//div[contains(@class, 'company-item')]"): result.append({ 'title': element.xpath(".//div[@class='company-title']/a/text()" ).extract_first().strip(), 'legal_owner': element.xpath(".//div[@class='legal-person'][1]/text()" ).re_first(r'法定代表人:(\w*)').strip(), 'status': element.xpath( ".//div[@class='company-tags']/span[1]/text()"). extract_first().strip(), 'capital': element.xpath(".//div[contains(@class, 'col-3-1')]/text()" ).extract_first().strip(), 'date': element.xpath(".//div[contains(@class, 'col-3-2')]/text()" ).extract_first().strip(), 'url': element.xpath(".//div[@class='company-title']/a/@href" ).extract_first().strip() }) time.sleep(10) return result def process_search_condition(self): """ 构建搜索条件 * URL: http://www.qixin.com/search? * param 地区: area.province=12, area.district=120101-120119 * param 搜索范围: scope[]=1 * param 排序: sorter=3 | 4 * param 注册资本: capital: 1-5 * param 所属行业: industry.l1 一级行业, industry.l2 二级行业 * param 注册年份: year: 1-5 * param page: 页码,最大不超过500, 只能看5000条搜索结果 http://www.qixin.com/search?area.district=120101&area.province=12&capital=2&industry.l1=%E5%86%9C%E3%80%81%E6%9E%97%E3%80%81%E7%89%A7%E3%80%81%E6%B8%94%E4%B8%9A&industry.l2=%E5%86%9C%E4%B8%9A&page=1&scope[]=1&sorter=4&year=5 """ pass
class CocktailEngineTest(LiveServerTestCase): """ Test the web part of the project in live server """ def setUp(self): self.browser = Session(webdriver_path='/usr/lib/chromium-browser/chromedriver', browser='chrome', default_timeout=15, webdriver_options={ 'arguments': ['--headless', '--no-sandbox', '--disable-dev-shm-usage']}) SolenoidValve.objects.create(id=1, number=1, step=10, first_pin=1, second_pin=2) SolenoidValve.objects.create(id=2, number=2, step=20, first_pin=1, second_pin=2) SolenoidValve.objects.create(id=3, number=3, step=30, first_pin=1, second_pin=2) SolenoidValve.objects.create(id=4, number=4, step=40, first_pin=1, second_pin=2) SolenoidValve.objects.create(id=5, number=5, step=50, first_pin=1, second_pin=2) SolenoidValve.objects.create(id=6, number=6, step=60, first_pin=1, second_pin=2) bottle_one = Bottle.objects.create(id=1, name='bottle1', solenoid_valve_id=1) bottle_two = Bottle.objects.create(id=2, name='bottle2', solenoid_valve_id=2) bottle_three = Bottle.objects.create(id=3, name='bottle3', solenoid_valve_id=3) bottle_four = Bottle.objects.create(id=4, name='bottle4', solenoid_valve_id=4, empty=True) bottle_five = Bottle.objects.create(id=5, name='bottle5', solenoid_valve_id=5) Bottle.objects.create(id=6, name='bottle6', solenoid_valve_id=6) cocktail_one = Cocktail.objects.create \ (id=1, name="cocktailone", description='cocktail one description') cocktail_two = Cocktail.objects.create \ (id=2, name="cocktailtwo", description='cocktail two description') cocktail_three = Cocktail.objects.create \ (id=3, name="cocktailthree", description='cocktail three description') BottlesBelongsCocktails(bottle=bottle_one, cocktail=cocktail_one, dose=1).save() BottlesBelongsCocktails(bottle=bottle_two, cocktail=cocktail_two, dose=2).save() BottlesBelongsCocktails(bottle=bottle_three, cocktail=cocktail_two, dose=3).save() BottlesBelongsCocktails(bottle=bottle_four, cocktail=cocktail_three, dose=4).save() BottlesBelongsCocktails(bottle=bottle_five, cocktail=cocktail_three, dose=4).save() self.client = Client() def tearDown(self): """ Close browser if is finish """ self.browser.driver.close() self.browser.close() def test_solenoid_valve(self): """ Test if the step this object is good equal """ solenoid_valve = SolenoidValve.objects.get(number=1) self.assertEqual(solenoid_valve.step, 10) def test_bottle(self): """ Test if __str__ correctly return the name of the object in the database """ bottle = Bottle.objects.get(name='bottle1') self.assertEqual(bottle.solenoid_valve_id, 1) self.assertEqual(str(bottle), 'bottle1') def test_bottle_belong_cocktails(self): """ Test if the many-to-many relationship is working properly """ bottle = BottlesBelongsCocktails.objects.get(bottle=1) cocktail = BottlesBelongsCocktails.objects.get(cocktail=1) self.assertEqual(bottle.bottle_id, 1) self.assertEqual(bottle.bottle_detail, 'bottle1') self.assertEqual(cocktail.cocktail_detail, 'cocktailone') self.assertEqual(cocktail.dose_detail, '1') self.assertEqual(str(cocktail), '1') def test_cocktail(self): """ Test if the cocktail exists and meets the requirements """ cocktail = Cocktail.objects.get(name="cocktailone") self.assertEqual(cocktail.description, 'cocktail one description') self.assertEqual(str(cocktail), 'cocktailone') cocktail_by_bottle = Cocktail.objects.get(bottlesbelongscocktails__bottle__name='bottle1') self.assertEqual(cocktail_by_bottle.description, 'cocktail one description') def test_cocktail_views(self): """ Test if the cocktails are well displayed on the page of the view, or if the bottles are well in the list """ response = self.client.get(self.live_server_url + reverse('engine:cocktail_views')) self.assertEqual(response.status_code, 200) self.browser.driver.get(self.live_server_url + reverse('engine:cocktail_views')) tags = self.browser.driver.find_elements_by_class_name('dropdown-item') cocktail1 = self.browser.driver.find_element_by_id \ ('cocktailone').find_element_by_tag_name('p').get_attribute \ ("innerText") cocktail2 = self.browser.driver.find_element_by_id \ ('cocktailtwo').find_element_by_tag_name('p').get_attribute \ ("innerText") self.assertEqual(cocktail1, 'Nom: cocktailone') self.assertEqual(cocktail2, 'Nom: cocktailtwo') self.assertListEqual([tag.get_attribute("text") for tag in tags], ['bottle1', 'bottle2', "bottle3", "bottle4", "bottle5", "bottle6"]) def test_view_cocktail_views_research(self): """ Test the cocktail view if a search was made from a name of a cocktail """ self.browser.driver.get(self.live_server_url + reverse('engine:cocktail_views') + "?name=cocktailone") cocktail = self.browser.driver.find_element_by_id \ ('cocktailone').find_element_by_tag_name('p').get_attribute \ ("innerText") self.assertEqual(cocktail, 'Nom: cocktailone') def test_view_cocktail_views_bottle(self): """ Test the view of cocktails when a bottle has been selected """ self.browser.driver.get(self.live_server_url + reverse('engine:cocktail_views') + "?bottle=2") cocktail = self.browser.driver.find_element_by_id \ ('cocktailtwo').find_element_by_tag_name('p').get_attribute \ ("innerText") self.assertEqual(cocktail, 'Nom: cocktailtwo') @patch('engine.views.make_cocktail.delay', lambda x: 0) @patch('engine.views.make_cocktail.app.control.inspect', MagicMock()) def test_view_make_cocktail(self): """ Test the creation of a cocktail selected in a view """ response = self.client.post(self.live_server_url + reverse('engine:make_the_cocktail'), {"cocktail_id": "1"}, **{'HTTP_X_REQUESTED_WITH': 'XMLHttpRequest'}) response_json = json.loads(response.content.decode('utf-8')) self.assertEqual(response.status_code, 200) self.assertNotEqual(response_json['task_id'], 'error') response = self.client.post(self.live_server_url + reverse('engine:make_the_cocktail'), {"task_id": response_json['task_id']}, **{'HTTP_X_REQUESTED_WITH': 'XMLHttpRequest'}) response_json = json.loads(response.content.decode('utf-8')) self.assertEqual(response.status_code, 200) self.assertEqual(response_json['task_info'], 0) response = self.client.post(self.live_server_url + reverse('engine:make_the_cocktail'), {"cocktail_id": "3"}, **{'HTTP_X_REQUESTED_WITH': 'XMLHttpRequest'}) response_json = json.loads(response.content.decode('utf-8')) self.assertEqual(response.status_code, 200) self.assertEqual(response_json['task_id'], 'error') def test_bottle_admin(self): """ Test the addition of a bottle or the removal of it in the page "administration of bottles" """ self.browser.driver.get(self.live_server_url + reverse('engine:bottle_engine_admin')) bottle = self.browser.driver.find_element_by_id \ ('bottle_6').find_element_by_tag_name('p').get_attribute \ ("innerText") self.assertEqual(bottle, 'Nom: bottle6') response = self.client.get(self.live_server_url + reverse('engine:bottle_engine_admin') + '?deleteBottle=6') self.assertEqual(response.status_code, 302) self.client.post(self.live_server_url + reverse('engine:bottle_engine_admin'), {'solenoidValve': 6, 'name': 'bottle7', 'empty': 'False'}) self.assertEqual(response.status_code, 302) self.browser.driver.get(self.live_server_url + reverse('engine:bottle_engine_admin')) bottle = self.browser.driver.find_element_by_id \ ('bottle_6').find_element_by_tag_name('p').get_attribute \ ("innerText") self.assertEqual(bottle, 'Nom: bottle7') def test_bottle_admin_modify_bottle(self): """ Test the modification of a bottle if it is empty or when it is not well synchronized under the valve """ response = self.client.post(self.live_server_url + reverse('engine:bottle_modify_parameter'), {"step": 61, "solenoidValve": 6}, **{'HTTP_X_REQUESTED_WITH': 'XMLHttpRequest'}) self.assertEqual(response.status_code, 200) solenoid_valve = SolenoidValve.objects.get(id=6) self.assertEqual(solenoid_valve.step, 61) self.client.post(self.live_server_url + reverse('engine:bottle_modify_parameter'), {"empty": 'true', "solenoidValve": 6}, **{'HTTP_X_REQUESTED_WITH': 'XMLHttpRequest'}) bottle = Bottle.objects.get(solenoid_valve__number=6) self.assertTrue(bottle.empty, True) def test_cocktail_admin_add_cocktail(self): """ Test the addition of a cocktail in the page "administration of cocktails" """ response = self.client.post(self.live_server_url + reverse('engine:cocktail_engine_admin'), {'name': 'cocktailfive', 'description': 'cocktail five', 'image': [''], 'form-TOTAL_FORMS': ['1'], 'form-MIN_NUM_FORMS': ['0'], 'form-MAX_NUM_FORMS': ['6'], 'form-INITIAL_FORMS': ['0'], 'form-0-dose': ['2'], 'form-0-bottle': ['bottle1'], 'form-1-dose': ['3'], 'form-1-bottle': ['bottle2'], }) self.assertEqual(response.status_code, 200) self.browser.driver.get(self.live_server_url + reverse('engine:cocktail_engine_admin')) cocktail1 = self.browser.driver.find_element_by_id \ ('cocktailone').find_element_by_tag_name('p').get_attribute \ ("innerText") cocktail2 = self.browser.driver.find_element_by_id \ ('cocktailtwo').find_element_by_tag_name('p').get_attribute \ ("innerText") cocktail5 = self.browser.driver.find_element_by_id \ ('cocktailfive').find_element_by_tag_name('p').get_attribute \ ("innerText") response = self.client.get(self.live_server_url + reverse('engine:cocktail_engine_admin') + '?deleteCocktail=3') self.assertEqual(response.status_code, 302) self.assertEqual(cocktail1, "Nom: cocktailone") self.assertEqual(cocktail2, "Nom: cocktailtwo") self.assertEqual(cocktail5, 'Nom: cocktailfive')
def torrent_form(): try: if request.method == 'POST': name = request.form['name'] s = Session(webdriver_path=r"./chromedriver.exe", browser='chrome', webdriver_options={ 'arguments': [ 'disable-dev-shm-usage', 'headless', 'no-sandbox' ] }) # url = 'https://torlock2.com' # s.driver.get(url) # s.driver.ensure_element_by_name('q').send_keys([name, Keys.ENTER]) # r = requests.get(s.driver.current_url) # soup = BeautifulSoup(r.text, 'lxml') # torlocksearch = soup.find(class_='panel panel-default') # torlock = torlocksearch.b.getText() # torlockseeds = torlocksearch.find(class_='tul').getText() # torlocktemp = torlocksearch.td # torlockhref = torlocktemp.a.get('href') # torlockdownload = url + torlockhref # s.driver.get(torlockdownload) # torlocksite = requests.get(s.driver.current_url) # torlocksoup = BeautifulSoup(torlocksite.text, 'lxml') # torlockdownloadsearch = torlocksoup.find(class_='table table-condensed') # torlockmagnet = torlockdownloadsearch.a.get('href') # url2 = 'https://thepiratebay.org' s.driver.get(url2) s.driver.ensure_element_by_tag_name('input').send_keys([name, Keys.ENTER]) x = requests.get(s.driver.current_url) soup2 = BeautifulSoup(x.text, 'lxml') piratesearch = soup2.find(class_='detLink').getText() piratesearch2 = soup2.find(class_='detLink') temp3 = piratesearch2.get('href') baylink = url2 + temp3 seeds2 = soup2.find('td', {'align': 'right'}).getText() s.driver.get(baylink) w = requests.get(baylink) soup3 = BeautifulSoup(w.text, 'lxml') download = soup3.find(class_='download') link3 = download.find('a') magnet2 = link3.get('href') # print(torlockseeds) # print(seeds2) # if int(torlockseeds) > int(seeds2): # url = torlockdownload # stats = torlockseeds # torname = torlock # magnet = torlockmagnet # else: url = baylink stats = seeds2 torname = piratesearch magnet = magnet2 searchcount = 1 s.close() # # Create cursor # cur = mysql.connection.cursor() # result = cur.execute("SELECT * FROM search") # if result > 0: # test = cur.execute("SELECT * FROM search WHERE historytorname=%s", [torname]) # if test: # cur.execute( # "UPDATE search SET historyurl=%s, historymagnet=%s, historyseeds=%s, searchcount = searchcount + 1 WHERE historytorname=%s", # (url, magnet, stats, torname) # ) # else: # print("będzie insert") # cur.execute( # "INSERT INTO search(historytorname, historyurl, historymagnet, historyseeds, searchcount) VALUES(%s, %s, %s, %s, %s)", # (torname, url, magnet, stats, searchcount)) # # Commit to DB # else: # print("będzie insert na pustą ") # cur.execute( # "INSERT INTO search(historytorname, historyurl, historymagnet, historyseeds, searchcount) VALUES(%s, %s, %s, %s, %s)", # (torname, url, magnet, stats, searchcount)) # mysql.connection.commit() # # if 'logged_in' in session: # # print('logged') # # cur.execute( # # "SELECT searchID FROM search ORDER BY searchID DESC LIMIT 1" # # ) # # searchID = cur.fetchone() # # print(searchID) # # cur.execute( # # "INSERT INTO usersearch(userID, torrentID) VALUES(%s, %s)", ([session['userID']], searchID) # # ) # # print('próba executea') # # mysql.connection.commit() # cur.close() # # Close connection return render_template('torrent_form.html', url=url, stats=stats, torname=torname, magnet=magnet) return render_template('torrent_form.html') except (Exception, ValueError): message = "Torrent not found" return render_template('torrent_form.html', message=message)
class Driver(object): def __init__(self): # 使用requestium的Session, 使用requests和Selenium, 设置为headless模式 self.s = Session( webdriver_path='./chromedriver', browser='chrome', default_timeout=15, #webdriver_options={'arguments': ['headless']} ) # self.category_mapping = None # path = os.path.join(os.getcwd(), FILENAME) # if os.path.exists(path): # self.category_mapping = ujson.load(open(path)) # pprint(self.category_mapping) def close(self): if self.s.driver is not None: self.s.driver.quit() if self.s is not None: self.s.close() def login(self): """ 使用driver登录到启信宝 """ login_url = 'http://www.qixin.com/auth/login?return_url=%2F' self.s.driver.get(login_url) # 使用requestium中的ensure_*方法定位元素 user_element = self.s.driver.ensure_element_by_xpath( LOGIN_XPATH['username']) for c in USERNAME: # 间歇输入Username和Password user_element.send_keys(c) time.sleep(random.randint(0, 2)) password_element = self.s.driver.ensure_element_by_xpath( LOGIN_XPATH['password']) for c in PASSWORD: password_element.send_keys(c) time.sleep(random.random()) password_element.send_keys(Keys.ENTER) self.s.driver.implicitly_wait(20) def process_cookies(self): """ 使用requests抓取页面 """ # 将driver的cookies转给requests的session tmp_url = 'http://www.qixin.com/search?area.province=12&page=1&scope[]=1' self.s.driver.get(tmp_url) self.s.transfer_driver_cookies_to_session() self.s.copy_user_agent_from_driver() # 判断category mapping是否存在 if self.category_mapping is None: req = self.s.get('http://www.qixin.com') self.category_mapping = {} for element in req.xpath(CATEGORY_XPATH['info']): category_l1 = element.xpath( CATEGORY_XPATH['l1']).extract_first().strip() category_l2 = element.xpath(CATEGORY_XPATH['l2']).extract() self.category_mapping[category_l1] = category_l2 ujson.dump(self.category_mapping, open(os.path.join(os.getcwd(), FILENAME), 'w')) def fetch_page_with_chrome(self, url): self.s.transfer_session_cookies_to_driver() self.s.driver.get(url) def fetch_page_with_requests(self, url): """ url = 'http://www.qixin.com/search?area.province=12&page=%s&scope[]=1&sorter=4' % page :param url:请求的URL :param return: 返回list """ # 获取cookies之后,使用requests的session开始抓取数据 self.s.proxies.update({ 'http': 'http://forward.xdaili.cn:80', 'https': 'https://forward.xdaili.cn:80' }) self.s.headers.update({'Proxy-Authorization': sign()}) req = self.s.get(url) result = parse_list(req) return result