Beispiel #1
0
class KeywordTool(object):
    sources = {'google', 'youtube', 'bing', 'amazon', 'ebay', 'app-store'}

    def __init__(self, source='google', timeout=5):
        self.source = source
        self.base_url = None
        self.timeout = timeout
        self.driver = PhantomJS()
        self.driver.get(self.base_url)

    def search(self, search_term):
        if self.current_url != self.base_url:
            self.source = self.source  # forces page load
        self.driver.find_element_by_xpath(
            '//input[@id="edit-keyword"]').send_keys(search_term)
        self.driver.find_element_by_xpath(
            '//button[@id="edit-submit"]').click()
        """Wait for at least one element to load. In practice, most of them load. You can't get them all without scrolling."""
        element_not_present = EC.invisibility_of_element_located(
            (By.XPATH, '//td[@class="col-keywords"]//div'))
        WebDriverWait(self.driver, self.timeout).until(element_not_present)

    def parse(self):
        tree = html.fromstring(self.driver.page_source)
        L = tree.xpath('//td[@class="col-keywords"]//text()')
        L = map(lambda s: s.strip(), ''.join(L).split('\n'))
        return [s for s in L if s]

    def get_keywords(self, search_term, source='google'):
        if self.source != source:
            self.source = source
        self.search(search_term)
        return self.parse()

    @property
    def source(self):
        return self._source

    @source.setter
    def source(self, val):
        self._source = val if val in self.sources else 'google'
        if 'driver' in self.__dict__:
            self.driver.get(self.base_url)

    @property
    def base_url(self):
        return ''.join(['https://keywordtool.io/', self.source])

    @base_url.setter
    def base_url(self, val):
        pass

    @property
    def current_url(self):
        return self.driver.current_url

    @current_url.setter
    def current_url(self, val):
        pass
Beispiel #2
0
def start(n, comic_url):
    urllists.append(comic_url)
    driver = PhantomJS()
    driver.get(comic_url)
    get_images_url(n, driver, comic_url)
    while True:
        try:
            driver.find_element_by_xpath(
                "//li[@id='next_item']/a[@id='mainControlNext']").click()
            comic_url = driver.current_url
            if comic_url not in urllists:
                urllists.append(comic_url)
                get_images_url(n, driver, comic_url)
                driver.find_element_by_xpath(
                    "//li[@id='next_item']/a[@id='mainControlNext']").click()
                # print n + '\t' + comic_url
        except:
            print 'All done!'
            break
Beispiel #3
0
class AdvertisementAdvancedViewTests(LiveServerTestCase):
    def setUp(self):
        self.driver = PhantomJS()

        self.user = User.objects.create_user('admin', '*****@*****.**', 'pass')
        self.user.save()

        self.provider = Provider(
            name='provider',
            user=self.user,
        )
        self.provider.save()

        self.provider_adverts = mommy.make(Advertisement, _quantity=20, provider=self.provider)

    def tearDown(self):
        self.driver.quit()

    def open(self, url):
        self.driver.get("%s%s" % (self.live_server_url, url))

    def test_side_ad_display(self):
        """
        Test that the side ads display properly
        """
        self.open(reverse('advertisements.views.side_ads'))

        self.assertEqual(len(self.driver.find_elements_by_xpath("//a")), 4)

        self.driver.find_element_by_xpath("//a[1]/img")
        self.driver.find_element_by_xpath("//a[2]/img")
        self.driver.find_element_by_xpath("//a[3]/img")
        self.driver.find_element_by_xpath("//a[4]/img")

        self.assertNotEqual(self.driver.find_element_by_xpath("//a[1]").get_attribute("href"), '')
        self.assertNotEqual(self.driver.find_element_by_xpath("//a[2]").get_attribute("href"), '')
        self.assertNotEqual(self.driver.find_element_by_xpath("//a[3]").get_attribute("href"), '')
        self.assertNotEqual(self.driver.find_element_by_xpath("//a[4]").get_attribute("href"), '')

    def test_top_ad_display(self):
        """
        Test that the top ad displays properly
        """
        self.open(reverse('advertisements.views.top_ad'))

        self.assertEqual(len(self.driver.find_elements_by_xpath("//a")), 1)
        self.driver.find_element_by_xpath("//a/img")
        self.assertNotEqual(self.driver.find_element_by_xpath("//a").get_attribute("href"), '')
Beispiel #4
0
class Premiumgeneratorlink(object):
	def __init__(self, url):
		self.url = url
		self.browser = PhantomJS()

	def get_link(self):
		try:
			self.browser.get('http://premiumgeneratorlink.com/')
			self.browser.find_element_by_name('link').send_keys(self.url)
			self.browser.find_element_by_xpath('//a[@class="input"]').click()
			wdw = WebDriverWait(self.browser, 10)
			wdw.until(EC.element_to_be_clickable((By.ID, 'check'))).click()
			wdw.until(EC.element_to_be_clickable((By.ID, 'generate'))).click()
			link = wdw.until(EC.visibility_of_element_located((By.XPATH, '//form[@class="center"]'))).get_attribute('action')
		except (WebDriverException, NoSuchElementException, TimeoutException):
			return False
		finally:
			self.browser.quit()
		return link
Beispiel #5
0
def run_get_logic(driver: PhantomJS, command_id, token):
    if not token:
        return {"code": 103, "public": "Session troubles!"}
    driver.add_cookie({
        'name': 'token',
        'value': token,
        'domain': "." + command_id.split(":")[0],
        'path': '/'
    })
    driver.get("http://{}/cabinet".format(command_id))
    try:
        flag_there = driver.find_element_by_xpath('//html//body//div//h5//i')
        flag_container = flag_there.get_attribute('innerHTML')
        return flag_container
    except NoSuchElementException as e:
        return "error_no_flag_in_cabinet"
Beispiel #6
0
def run_get_logic(driver: PhantomJS, comand_id, post, flag, cookies):
    if 'sessions' not in cookies:
        return {"code": MUMBLE, "public": "Session troubles!"}
    driver.add_cookie({
        'name': 'sessions',
        'value': cookies['sessions'],
        'domain': "." + comand_id.split(":")[0],
        'path': '/'
    })
    driver.get("http://{}/{}".format(comand_id, post))
    try:
        flag_there = driver.find_element_by_xpath('//li/a[@href="#"]')
        flag_container = flag_there.get_attribute('innerHTML')
        if flag in flag_container:
            return {"code": OK}
        else:
            return {"code": CORRUPT, "public": "Can't find my private data!"}
    except NoSuchElementException:
        return {"code": CORRUPT, "public": "Can't find my private data!"}
def render(gist_id, commit):
    block_url = 'http://bl.ocks.org/' + gist_id
    d3_block_rec = {'gist_id': gist_id}
    try:
        driver = PhantomJS()
        driver.get(block_url)
        time.sleep(RENDER_DELAY)  # let it render
        fullpage_im = Image.open(BytesIO(driver.get_screenshot_as_png()))
        fimb = BytesIO()
        fullpage_im.save(fimb, 'png')
        d3_block_rec['fullpage_base64'] = base64.b64encode(fimb.getvalue())
        d3_block_rec['block_url'] = driver.current_url
    except Exception as e:
        # we got nothing
        with LittlePGer('dbname=' + DB_NAME, commit=commit) as pg:
            d3_block_rec['error'] = str(e)
            pg.insert('d3_block', values=d3_block_rec)
        exit(10)

    try:
        f = driver.find_element_by_xpath('//iframe')
        x, y = int(f.location['x']), int(f.location['y'])
        w, h = x + int(f.size['width']), y + int(f.size['height'])
        block_im = fullpage_im.crop((x, y, w, h))
        bimb = BytesIO()
        block_im.save(bimb, 'png')
        d3_block_rec['block_base64'] = base64.b64encode(bimb.getvalue())
        d3_block_rec['block_size'] = list(block_im.size)
    except Exception as e:
        # at least we got the fullpage im, save it
        with LittlePGer('dbname=' + DB_NAME, commit=commit) as pg:
            d3_block_rec['error'] = str(e)
            pg.insert('d3_block', values=d3_block_rec)
        exit(11)

    # all good, save everything
    with LittlePGer('dbname=' + DB_NAME, commit=commit) as pg:
        pg.insert('d3_block', values=d3_block_rec)
Beispiel #8
0
class CNStock(SentimentCrawler):
    def __init__(self):
        super().__init__(init=False)
        self.driver = PhantomJS()
        self.driver.maximize_window()
        self.wait = WebDriverWait(self.driver, 15)
        self.url = 'http://www.cnstock.com/'
        self.name = '中国证券网'

    def crawl_main_page(self, keyword):
        self.driver.set_page_load_timeout(10)
        try:
            self.driver.get(self.url)
        except TimeoutException:
            self.driver.execute_script('window.stop();')

        try:
            self.wait.until(
                ec.presence_of_element_located((By.ID, 'nav_keywords')))
        except:
            CustomLogging.log_to_file('中国证券网打开失败', LogType.ERROR)

        self.driver.find_element_by_id('nav_keywords').clear()
        self.driver.find_element_by_id('nav_keywords').send_keys(keyword +
                                                                 Keys.ENTER)

        return self.crawl_search_results()

    def crawl_search_results(self):
        search_results = []
        self.driver.switch_to.window(self.driver.window_handles[-1])
        self.driver.maximize_window()

        exit_flag = 0
        while True:
            try:
                self.wait.until(
                    ec.presence_of_element_located(
                        (By.CLASS_NAME, 'result-cont')))
            except TimeoutException:
                CustomLogging.log_to_file('中国证券网搜索结果页错误', LogType.ERROR)
                break

            try:
                result_articles = self.driver.find_elements_by_class_name(
                    'result-article')

                for each_article in result_articles:
                    item = Entity()

                    publish_date = each_article.find_element_by_class_name(
                        'g').text
                    item.publish_date = re.search(
                        re.compile(
                            '[1-9]\d{3}-(0[1-9]|1[0-2])-(0[1-9]|[1-2][0-9]|3[0-1])\s+(20|21|22|23|[0-1]\d):[0-5]\d'
                        ), publish_date).group()

                    if not in_date_range(
                            conv_pub_date(item.publish_date, 'cnstock'),
                            self.year_range):
                        exit_flag = 1
                        # 跳出for循环
                        break
                    item.short_description = each_article.find_element_by_class_name(
                        'des').text
                    item.title = each_article.find_element_by_tag_name(
                        'a').text
                    if self.keyword not in item.short_description and self.keyword not in item.title:
                        continue

                    if item.title in self.titles:
                        continue
                    else:
                        self.titles.append(item.title)

                    item.url = each_article.find_element_by_tag_name(
                        'a').get_attribute('href')
                    threading.Thread(target=super().download_and_save_item,
                                     args=(item, )).start()

                if exit_flag == 1:
                    break
            except NoSuchElementException:
                CustomLogging.log_to_file('没有搜索结果', LogType.INFO)
                break

            try:
                next_page = self.driver.find_element_by_xpath(
                    '//div[@class="pagination pagination-centered"]//a[contains(text(), "下一页")]'
                )
                self.driver.get(next_page.get_attribute('href'))
                # next_page.click()
            except NoSuchElementException:
                break

        return search_results

    def parse_html(self, url, html):
        bs = BeautifulSoup(html, 'lxml')
        try:
            full_content = bs.find('div', attrs={'id': 'qmt_content_div'}).text
            return full_content
        except Exception:
            CustomLogging.log_to_file('页面解析错误: {0}|{1}'.format(self.name, url),
                                      LogType.ERROR)
            pass
Beispiel #9
0
from selenium.webdriver import PhantomJS

driver = PhantomJS(
    executable_path=
    r'E:\Documents\Apps\phantomjs-2.1.1-windows\bin\phantomjs.exe')
url = 'http://cxwh.kexing100.com:82/?app_act=detail&id=328&from=groupmessage'
driver.get(url)
while True:
    driver.refresh()
    print driver.find_element_by_xpath("//div[@class='xinfo']").text
    # driver.execute_script("return localStorage.setItem('toupiao','0')")
    driver.execute_script("return localStorage.removeItem('toupiao')")
    driver.delete_all_cookies()
    driver.refresh()
    vote = driver.find_element_by_xpath("//span/input[@class='btn1']").click()
    # break
Beispiel #10
0
    def get_applications_in_page(self, scroll_script):
        applications = []
        driver = None
        try:
            desired_capabilities = dict(DesiredCapabilities.PHANTOMJS)
            desired_capabilities["phantomjs.page.settings.userAgent"] = useragent.get_random_agent(google_prop.user_agent_list_url)
            service_args = ['--load-images=no', '--proxy=%s' % (proxy.get_random_proxy(google_prop.proxy_list_url))]
            driver = PhantomJS(desired_capabilities=desired_capabilities, service_args=service_args)
            # driver = Firefox(firefox_profile=self.fp, proxy=self.proxy)

            if self.proxy_test:
                driver.get('http://curlmyip.com/')
                ip = driver.find_element_by_xpath('//body//pre').text
                print('ip : [ ' + ip + ' ]')
                pass
            else:
                driver.get(self.url)
                driver.execute_script(scroll_script)

                acknowledge = 0
                done = False
                while not done:
                    scroll_finished = driver.execute_script("return scraperLoadCompleted")
                    if scroll_finished:
                        if acknowledge == self.acknowledgements:
                            done = driver.execute_script("return scraperLoadCompleted")
                            pass
                        else:
                            acknowledge += 1
                            pass
                        pass
                    else:
                        acknowledge = 0
                        pass
                    time.sleep(5)  # Wait before retry
                    pass

                product_matrix = driver.find_elements_by_class_name("card")
                for application in product_matrix:
                    extracted_application = self.extract_application_data(application)
                    # if extracted_application['app_price'] != -1:
                    applications.append(extracted_application)
                    #pass
                    pass
                pass
            driver.quit()
            pass

        except Exception as e:
            if driver is not None:
                driver.quit()
                pass

            if self.attempt < self.retries:
                self.attempt += 1
                time.sleep(10)
                print 'retry : url [ ' + self.url + ' ] + | attempt [ ' + str(self.attempt) + ' ] | error [ ' + str(e) + ' ]'
                applications = self.get_applications_in_page(scroll_script)
                pass
            else:
                print('fail : url [ ' + self.url + ' ] | error [ ' + str(e) + ' ]')
                pass
            pass
        return applications
        pass
Beispiel #11
0
def get_url_files(retail, invoice_doc_type, invoice_id, invoice_date,
                  invoice_amount):
    retail_invoice_url = RETAIL_INVOICE_URL[retail]

    driver = PhantomJS()
    driver.get(retail_invoice_url)

    # 1 Set doc_type 'select'
    try:
        select_doc_type = Select(driver.find_element_by_name('txtTipoDte'))
        value = RETAIL_INVOICE_DOC_TYPES[retail][invoice_doc_type]['value']
        select_doc_type.select_by_value(value)
        # name = RETAIL_INVOICE_DOC_TYPES[retail][invoice_doc_type]['name']
        # select_doc_type.select_by_visible_text(name)
    except Exception:
        print 'ERROR: set doc_type select as Boleta'
        driver.save_screenshot('screen.png')
        return '', ''

    time.sleep(5)

    # 2 Get recaptcha img url
    try:
        recaptcha_img = driver.find_element_by_id('recaptcha_challenge_image')
        recaptcha_img_url = recaptcha_img.get_attribute('src')
    except Exception:
        print 'ERROR: get recaptcha image url'
        driver.save_screenshot('screen.png')
        return '', ''

    # 3 Solve recaptcha
    v = VisionApi()
    recaptcha_value = v.detect_text_from_url(recaptcha_img_url)

    if recaptcha_value is None:
        print 'ERROR: solving recaptcha image'
        driver.save_screenshot('screen.png')
        return '', ''

    # 4 Fill form
    script = u"""
        document.getElementsByName('txtFolio')[0].value = '{invoice_id}';
        document.getElementsByName('txtFechaEmision')[0].value = '{invoice_date}';
        document.getElementsByName('txtMontoTotal')[0].value = '{invoice_amount}';
        document.getElementsByName('recaptcha_response_field')[0].value = '{recaptcha_value}';
    """.format(
        invoice_id=invoice_id,
        invoice_date=invoice_date,
        invoice_amount=invoice_amount,
        recaptcha_value=recaptcha_value,
    )
    driver.execute_script(script)

    # 5 Submit form
    try:
        driver.find_element_by_name('frmDatos').submit()
    except Exception:
        print 'ERROR: submitting form'
        driver.save_screenshot('screen.png')
        return '', ''

    # 6 Get url files
    try:
        xml_a_tag = driver.find_element_by_xpath(
            '//*[@id="Tabla_01"]/tbody/tr[1]/td[2]/p/a[2]')
        pdf_a_tag = driver.find_element_by_xpath(
            '//*[@id="Tabla_01"]/tbody/tr[1]/td[2]/p/a[1]')

        xml_url = xml_a_tag.get_attribute('href')
        pdf_url = pdf_a_tag.get_attribute('href')
    except Exception:
        print 'ERROR: getting url files'
        driver.save_screenshot('screen.png')
        return '', ''

    # 8 Delete driver session
    driver.close()
    driver.quit()

    return xml_url, pdf_url
Beispiel #12
0
class plugin:
    def __init__(self):
        APP_ROOT = os.path.dirname(os.path.abspath(__file__))
        print(APP_ROOT)
        self.req = 0
        self.driver = PhantomJS(APP_ROOT + "/phantomjs",
                                service_log_path=os.path.devnull)
        self.driver.implicitly_wait(3)

    def restart(self):
        self.__init__()

    def frame_search(self, path):
        framedict = {}
        for child_frame in self.driver.find_elements_by_tag_name('frame'):
            child_frame_name = child_frame.get_attribute('name')
            framedict[child_frame_name] = {'framepath': path, 'children': {}}
            xpath = '//frame[@name="{}"]'.format(child_frame_name)
            self.driver.switch_to.frame(
                self.driver.find_element_by_xpath(xpath))
            framedict[child_frame_name]['children'] = self.frame_search(
                framedict[child_frame_name]['framepath'] + [child_frame_name])

            self.driver.switch_to.default_content()
            if len(framedict[child_frame_name]['framepath']) > 0:
                for parent in framedict[child_frame_name]['framepath']:
                    parent_xpath = '//frame[@name="{}"]'.format(parent)
                    self.driver.switch_to.frame(
                        self.driver.find_element_by_xpath(parent_xpath))
        return framedict

    def tmon(self):
        self.driver.get(
            "https://login.ticketmonster.co.kr/user/loginform?return_url=")
        self.driver.find_element_by_name('userid').send_keys(
            config['ACCOUNT']['tmon_id'])
        self.driver.find_element_by_name('password').send_keys(
            config['ACCOUNT']['tmon_pw'])
        self.driver.find_element_by_xpath('//*[@id="loginFrm"]/a[2]').click()
        self.driver.get(
            'http://m.benefit.ticketmonster.co.kr/promotions/page/attendance?view_mode=app'
        )
        self.driver.find_element_by_xpath(
            '//*[@id="attn_wrap"]/div/div/div[3]/div[2]/div[1]/button').click(
            )

        print(self.driver.find_element_by_class_name('content').text)
        self.tmon_ret = self.driver.find_element_by_class_name('content').text

    def ondisk(self):
        try:
            self.driver.get("http://ondisk.co.kr/index.php")
            self.driver.implicitly_wait(3)
            self.driver.find_element_by_xpath('//*[@id="mb_id"]').send_keys(
                config['ACCOUNT']['ondisk_id'])
            self.driver.find_element_by_xpath(
                '//*[@id="page-login"]/form/div[2]/p[2]/input').send_keys(
                    config['ACCOUNT']['ondisk_pw'])
            self.driver.find_element_by_xpath(
                '//*[@id="page-login"]/form/div[2]/p[3]/input').click()
            self.driver.get(
                "http://ondisk.co.kr/index.php?mode=eventMarge&sm=event&action=view&idx=746&event_page=1"
            )
            self.driver.switch_to_frame(1)
            self.driver.execute_script(
                "window.alert = function(msg){ window.msg = msg; };")
            self.driver.find_element_by_class_name('button').click()

            alert_text = self.driver.execute_script("return window.msg;")
            print(alert_text)
        except:
            print("ERR")
            print(self.driver.page_source)
        self.ondisk_ret = alert_text

    def ok_cash_bag(self):
        today = datetime.datetime.now().strftime("%Y%m%d")
        sess = requests.session()
        getdata = sess.get(
            "https://www.facebook.com/login.php?login_attempt=1&next=https%3A%2F%2Fwww.facebook.com%2Fv2.6%2Fdialog%2Foauth%3Fredirect_uri%3Dhttps%253A%252F%252Fmember.okcashbag.com%252Focb%252FsocialId%252FfacebookProcessor%26scope%3Dpublic_profile%252Cuser_birthday%252Cemail%26client_id%3D645711852239977%26ret%3Dlogin%26logger_id%3D91698e1d-fe1e-b325-4c13-b62636843a9e&lwv=101"
        )
        param = {
            "lsd": "AVpmy4vJ",
            "api_key": "645711852239977",
            "cancel_url":
            "https://member.okcashbag.com/ocb/socialId/facebookProcessor?error=access_denied&error_code=200&error_description=Permissions+error&error_reason=user_denied#_=_",
            "display": "page",
            "enable_profile_selector": "",
            "isprivate": "",
            "legacy_return": "0",
            "profile_selector_ids": "",
            "return_session": "",
            "skip_api_login": "******",
            "signed_next": "1",
            "trynum": "1",
            "timezone": "-540",
            "lgndim":
            "eyJ3IjoxOTIwLCJoIjoxMDgwLCJhdyI6MTkyMCwiYWgiOjEwNDAsImMiOjI0fQ==",
            "lgnrnd": "173648_UqkK",
            "lgnjs": "1528418208",
            "email": config['ACCOUNT']['fb_id'],
            "pass": config['ACCOUNT']['fb_pw'],
            "prefill_contact_point": config['ACCOUNT']['fb_id'],
            "prefill_source": "last_login",
            "prefill_type": "contact_point",
            "first_prefill_source": "last_login",
            "first_prefill_type": "contact_point",
            "had_cp_prefilled": "true",
            "had_password_prefilled": "false"
        }
        postdata = sess.post(
            "https://www.facebook.com/login.php?login_attempt=1&next=https%3A%2F%2Fwww.facebook.com%2Fv2.6%2Fdialog%2Foauth%3Fredirect_uri%3Dhttps%253A%252F%252Fmember.okcashbag.com%252Focb%252FsocialId%252FfacebookProcessor%26scope%3Dpublic_profile%252Cuser_birthday%252Cemail%26client_id%3D645711852239977%26ret%3Dlogin%26logger_id%3D91698e1d-fe1e-b325-4c13-b62636843a9e&lwv=101",
            data=param)

        # print(postdata.text)
        postdata = sess.post(
            "https://member.okcashbag.com//ocb/socialId/socialIdLoginProcess/42100/687474703A2F2F7777772e6f6b636173686261672e636f6d2F696e6465782e646f3F6c6f67696e3D59"
        )
        samlResponse = postdata.text.split("samlResponse.value = \"")[1].split(
            "\"")[0]
        # print(samlResponse)
        param = {"samlResponse": samlResponse, "sst_cd": "", "return_url": ""}
        postdata = sess.post("http://www.okcashbag.com/index.do?login=Y",
                             data=param)
        print(
            postdata.text.split('<span id="profileNickname" class="name">')
            [1].split("</span>")[0] + "님 로그인")
        print(
            postdata.text.split('<span id="spanUsablePoint">')[1].split(
                '</span>')[0] + "포인트")
        getdata = sess.get(
            "http://www.okcashbag.com/life/event/attend/attendMain.do")
        param = {"method": "", "myUrl": "", "recommUser": "", "today": today}
        postdata = sess.post(
            "http://www.okcashbag.com/life/event/attend/attend.do", data=param)
        print(postdata.text)
        if len(postdata.text.split('<i class="win-point">')) > 1:
            print(postdata.text.split('<i class="win-point">')[1] + "포인트 적립")
        elif len(postdata.text.split("success")) > 1:
            print("출석체크 완료 ")
            self.ok_ret = "출석체크 완료"
        else:
            print('이미 출석체크 완료')
            self.ok_ret = "이미 출석체크 완료"
class InspectAddress(object):
    def __init__(self):
        dcap = dict(DesiredCapabilities.PHANTOMJS)  # 设置userAgent
        dcap["phantomjs.page.settings.userAgent"] = (
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:25.0) Gecko/20100101 Firefox/25.0 "
        )
        self.driver = PhantomJS(
            executable_path=r'phantomjs-2.1.1-windows\bin\phantomjs.exe',
            desired_capabilities=dcap)

    def get_dev_cookie(self):
        logurl = 'https://www.bidinghuo.cn/api/backend/login.json'
        # jsondata_url = 'https://www.bidinghuo.cn/api/backend/platform/query.json'
        headers = {
            'Content-Type':
            'application/json;charset=UTF-8',
            'User-Agent':
            'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36'
        }
        data = {
            u'username': config.developers_account[0],
            u'password': config.developers_account[1]
        }
        value = ''
        try:
            res = requests.post(logurl, data=data)
            if res.status_code == 200:
                print u'开发平台账户登录-成功'
                value = res.cookies['laravel_session']
            else:
                print u'开发平台账户登录-失败'

        except:
            print u'开发平台账户登录-失败'

        cookies = {
            u'domain': u'.bidinghuo.cn',
            u'secure': False,
            u'value': value,
            u'expiry': None,
            u'path': u'/',
            u'httpOnly': True,
            u'name': u'laravel_session'
        }
        return cookies

    def get_brand_cookie(self):
        logurl = 'https://pyf123.bidinghuo.cn/api/admin/login.json'
        headers = {
            'Content-Type':
            'application/json;charset=UTF-8',
            'User-Agent':
            'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36'
        }
        data = {
            u'username': config.brand_user[0],
            u'password': config.brand_user[1]
        }
        value = ''
        try:
            res = requests.post(logurl, data=data)
            if res.status_code == 200:
                print u'品牌商账户登录-成功'
                value = res.cookies['laravel_session']
            else:
                print u'品牌商账户登录-失败'
        except:
            print u'品牌商账户登录-失败'
        cookies = {
            u'domain': u'.bidinghuo.cn',
            u'secure': False,
            u'value': value,
            u'expiry': None,
            u'path': u'/',
            u'httpOnly': True,
            u'name': u'laravel_session'
        }
        return cookies

    def developer_platform(self):
        '''访问品牌商管理开发平台'''
        url = config.developers_platform
        try:
            self.driver.add_cookie(self.get_dev_cookie())
            self.driver.get(url)
            self.driver.set_page_load_timeout(30)
        except:
            print u'访问品牌商管理开发平台-异常'

        try:
            page = self.driver.page_source
            page_soup = BeautifulSoup(page)
            username = page_soup.find_all(class_='user-name')[0]
            assert username.string == config.developers_account[0]
            print u'品牌商管理开发平台-访问正常'
        except:
            print u'品牌商管理开发平台-访问异常'

    def brand_platform(self):
        '''访问品牌商后台'''
        url = config.brand_platform
        try:
            self.driver.add_cookie(self.get_brand_cookie())
            self.driver.get(url)
            self.driver.set_page_load_timeout(30)
            bdh_title = BeautifulSoup(
                self.driver.page_source).find_all(class_='ovh')[0].h2.string
            nsgj_title = BeautifulSoup(
                self.driver.page_source).find_all(class_='ovh')[1].h2.string
            assert bdh_title == u'必订火'
            assert nsgj_title == u'内审管家'
            print u'访问品牌商后台-正常'
        except:
            print u'访问品牌商后台-异常'
        try:
            page = self.driver.page_source
            nsgj_href = self.driver.find_element_by_xpath(
                '//*[@id="app"]/div[2]/div/div[2]/div/div[2]/a').get_attribute(
                    'href')
            bdh_href = self.driver.find_element_by_xpath(
                '//*[@id="app"]/div[2]/div/div[1]/div/div[2]/a').get_attribute(
                    'href')
            assert requests.get(bdh_href).status_code == 200
            self.driver.get(bdh_href)
            self.driver.set_page_load_timeout(30)
            dhh_title = BeautifulSoup(self.driver.page_source).find_all(
                class_='meeting-name text-overflow')[0].string
            assert dhh_title == u'测试订货会'
            print u'访问品牌商订货会-正常'
        except:
            print u'访问品牌商订货会-异常'
        try:
            assert requests.get(bdh_href).status_code == 200
            self.driver.get(nsgj_href)
            self.driver.set_page_load_timeout(30)
            nsh_title = BeautifulSoup(self.driver.page_source).find_all(
                class_='meeting-name text-overflow')[0].string
            assert nsh_title == u'认同与人体'
            print u'访问品牌商内审管家-正常'
        except:
            print u'访问品牌商内审管家-异常'
Beispiel #14
0
class ProviderAdvancedViewTests(LiveServerTestCase):
    def setUp(self):
        self.driver = PhantomJS()

        self.user = User.objects.create_user('admin', '*****@*****.**', 'password')
        self.user.save()

        self.provider = Provider(
            name='provider',
            user=self.user,
        )
        self.provider.save()

        self.provider_adverts = mommy.make(Advertisement, _quantity=20, provider=self.provider)

        self.login()

    def tearDown(self):
        self.driver.quit()

    def open(self, url):
        self.driver.get("%s%s" % (self.live_server_url, url))

    def login(self):
        self.open(settings.LOGIN_URL)
        self.driver.find_element_by_id("id_username").send_keys("admin")
        self.driver.find_element_by_id("id_password").send_keys("password")
        self.driver.find_element_by_css_selector("button.btn.btn-default").click()

        self.assertEqual(
            self.driver.current_url,
            self.live_server_url + reverse('advertisements.views.view_provider_statistics', args=[self.provider.pk]),
        )

    def test_can_login(self):
        """
        Test that the user can login
        """
        pass

    def test_provider_page_has_all_data(self):
        """
        Test that the provider statistics page has all the correct data
        """
        self.open(reverse('advertisements.views.view_provider_statistics', args=[self.provider.pk]))

        self.assertEqual("Open Ads", self.driver.title)
        self.assertIn(
            "{0} advertisements".format(self.provider.name),
            self.driver.find_element_by_css_selector("h1.page-header").text
        )
        self.assertIn(
            "{0} advertisements in rotation".format(20),
            self.driver.find_element_by_css_selector("h1.page-header").text
        )

    def test_advertisement_page_has_all_data(self):
        """
        Test that the advertisement page has all the correct data
        """

        for advert in self.provider_adverts:
            self.open(reverse('advertisements.views.view_advert_statistics', args=[advert.pk]))

            self.assertIn(
                "ID number: {0}".format(advert.pk),
                self.driver.find_element_by_css_selector("h1.page-header").text,
            )
            self.driver.find_element_by_css_selector("img")
            self.assertEqual("Active", self.driver.find_element_by_xpath("//td[2]/span").text)
            self.assertEqual(advert.url, self.driver.find_element_by_link_text(advert.url).text)
            self.driver.find_element_by_link_text("Edit URL").click()
            self.assertEqual(advert.url, self.driver.find_element_by_id("id_url").get_attribute("value"))
Beispiel #15
0
def get_url_files(retail, invoice_doc_type, invoice_id, invoice_date, invoice_amount):
    retail_invoice_url = RETAIL_INVOICE_URL[retail]

    driver = PhantomJS()
    driver.get(retail_invoice_url)

    # 1 Set doc_type 'select'
    try:
        select_doc_type = Select(driver.find_element_by_name('txtTipoDte'))
        value = RETAIL_INVOICE_DOC_TYPES[retail][invoice_doc_type]['value']
        select_doc_type.select_by_value(value)
        # name = RETAIL_INVOICE_DOC_TYPES[retail][invoice_doc_type]['name']
        # select_doc_type.select_by_visible_text(name)
    except Exception:
        print 'ERROR: set doc_type select as Boleta'
        driver.save_screenshot('screen.png')
        return '', ''

    time.sleep(5)

    # 2 Get recaptcha img url
    try:
        recaptcha_img = driver.find_element_by_id('recaptcha_challenge_image')
        recaptcha_img_url = recaptcha_img.get_attribute('src')
    except Exception:
        print 'ERROR: get recaptcha image url'
        driver.save_screenshot('screen.png')
        return '', ''

    # 3 Solve recaptcha
    v = VisionApi()
    recaptcha_value = v.detect_text_from_url(recaptcha_img_url)

    if recaptcha_value is None:
        print 'ERROR: solving recaptcha image'
        driver.save_screenshot('screen.png')
        return '', ''

    # 4 Fill form
    script = u"""
        document.getElementsByName('txtFolio')[0].value = '{invoice_id}';
        document.getElementsByName('txtFechaEmision')[0].value = '{invoice_date}';
        document.getElementsByName('txtMontoTotal')[0].value = '{invoice_amount}';
        document.getElementsByName('recaptcha_response_field')[0].value = '{recaptcha_value}';
    """.format(
        invoice_id=invoice_id,
        invoice_date=invoice_date,
        invoice_amount=invoice_amount,
        recaptcha_value=recaptcha_value,
    )
    driver.execute_script(script)

    # 5 Submit form
    try:
        driver.find_element_by_name('frmDatos').submit()
    except Exception:
        print 'ERROR: submitting form'
        driver.save_screenshot('screen.png')
        return '', ''

    # 6 Get url files
    try:
        xml_a_tag = driver.find_element_by_xpath('//*[@id="Tabla_01"]/tbody/tr[1]/td[2]/p/a[2]')
        pdf_a_tag = driver.find_element_by_xpath('//*[@id="Tabla_01"]/tbody/tr[1]/td[2]/p/a[1]')

        xml_url = xml_a_tag.get_attribute('href')
        pdf_url = pdf_a_tag.get_attribute('href')
    except Exception:
        print 'ERROR: getting url files'
        driver.save_screenshot('screen.png')
        return '', ''

    # 8 Delete driver session
    driver.close()
    driver.quit()

    return xml_url, pdf_url
# -*- coding: utf-8 -*-
"""
另类爬虫selenium  @author: 肖   
"""
# 首先进行安装 执行: pip install -U selenium  /  pip install selenium
# test1:
from selenium.webdriver import PhantomJS
import re
web = PhantomJS(executable_path='F:/phantomjs-2.1.1-windows/bin/phantomjs'
                )  # 需要设置PhantomJS的路径,否则无法运行

web.get('http://www.baidu.com')  # 获取网页
print(web.page_source)  # 打印网页源代码
print(web.page_source[0:300])  # 打印网页部分源代码,前面300个元素
web.get_screenshot_as_file(
    './baidu.png')  # 网页截图,可以看到一个网页图片,以png的格式保存到指定位置,名称为baidu.png

# test2:
element = web.find_element_by_xpath('//*[@id="kw"]')  # 通过xpath方式进行获取,定位百度的输入栏
element.send_keys('python')  # 输入关键字
element = web.find_element_by_xpath(
    '//*[@id="su"]')  # 定位百度的百度一下按钮,定位方式都是通过右键copy xpath即可,不是以往的xpath定位
element.click()  # 促发按钮点击事件
web.get_screenshot_as_file(
    './baidusearch.png')  # 网页截图,可以看到一个网页图片,以png的格式保存到指定位置,名称为baidusearch.png

print(element)  # 打印此时的element
print(web.page_source)  # 打印源代码
re.compile('<title>(.*?)</title>').findall(web.page_source)  # 正则匹配而已