コード例 #1
0
def get_bookmark(url, oid, owner):

    display = Display(visible=0, size=(800, 600))
    display.start()
    result = {}
    # url = 'http://kcy.me/wge8'
    result['original-url'] = url
    req = requests.get(url)
    result['url'] = req.url


    soup = BeautifulSoup(req.text, 'lxml')
    result['title'] = soup.title.string.encode('utf-8')

    result['description'] = soup.findAll(attrs={"name":"description"})[0]['content'].encode('utf-8')


    driver = webdriver.Firefox()
    driver.get(req.url)
    filename = oid + '.png'
    fname = os.path.join(os.path.dirname(__file__), filename)

    driver.get_screenshot_as_file(fname)
    result['screenshot'] = upload_image_as_png_to_s3(filename, owner)

    driver.close()
    display.stop()

    # delete temp file
    os.unlink(filename)

    return result
コード例 #2
0
ファイル: __init__.py プロジェクト: ahinz/OpenTreeMap-cloud
class UITestCase(LiveServerTestCase):
    def use_xvfb(self):
        from pyvirtualdisplay import Display
        self.display = Display('xvfb',
                               visible=1,
                               size=(1280, 1024))
        self.display.start()
        self.driver = WebDriver()

    def setUp(self):
        try:
            self.driver = WebDriver()
            ui_is_not_available = False
        except WebDriverException:
            ui_is_not_available = True

        if ui_is_not_available:
            self.use_xvfb()

        self.driver.implicitly_wait(10)
        super(UITestCase, self).setUp()

    def tearDown(self):
        self.driver.quit()
        if hasattr(self, 'display'):
            self.display.stop()

        super(UITestCase, self).tearDown()
コード例 #3
0
ファイル: cl_bot.py プロジェクト: vulpineblaze/cl_bot
def main():
    '''business logic for when running this module as the primary one!'''
    display = Display(visible=0, size=(1024, 768))
    display.start()

    fresh_cl_post = find_cl_post()
    prev_cl_post = {"title":"","link":""}
    old_cl_post = {"title":"","link":""}
    
    # find_cl_post()
    while True:
        # print "TEST" + str(datetime.date.today())
        fresh_cl_post = find_cl_post()
        
        try:
            if fresh_cl_post['title'] != prev_cl_post['title']:
            
                old_cl_post = prev_cl_post
                prev_cl_post = fresh_cl_post
            
                send_cl_email(fresh_cl_post)

        except:
            print "Failed to test & send mail at: "+str(datetime.datetime.now())

        gc.collect()
        time.sleep(SLEEP_SECONDS)
        
    
    
    display.stop()
コード例 #4
0
ファイル: views.py プロジェクト: mastinux/tebRotarapmoc
def retrieveTTdata(url):
    print "processing tenTeb ..."

    display = Display(visible=0, size=(1024, 1024))
    display.start()

    # driver = webdriver.Firefox()

    # http://stackoverflow.com/questions/8255929/running-webdriver-chrome-with-selenium
    driver = webdriver.Chrome()
    driver.get(url)
    sleep(5)
    html = driver.page_source
    driver.quit()

    display.stop()

    parser = etree.HTMLParser()
    tree = etree.parse(StringIO(html), parser)

    for div_element in tree.getiterator("div"):
        if "class" in div_element.keys() and div_element.attrib["class"] == "types_bg":
            tree = div_element

    for div_element in tree.getiterator("div"):
        if "class" in div_element.keys() and div_element.attrib["class"] == "bets ml":
            parse_div_element(div_element)
コード例 #5
0
ファイル: utils.py プロジェクト: Cruel/Anondex
def webthumb(url, filename, is_flash=False):
    script = """
        var s = document.createElement('script');
        s.src = 'http://cruels.net/sb/flashfix.js';
        document.body.appendChild(s);
    """
    print "webthumb(%s, %s)" % (url, filename)
    display = Display(visible=0, size=(1200, 900))
    display.start()
    browser = webdriver.Firefox()
    browser.get(url)
    if is_flash:
        time.sleep(1)
    else:
        browser.execute_script(script)
        time.sleep(6)
    tmpfile = "%s.tmp" % filename
    browser.get_screenshot_as_file(tmpfile)
    img = pil.open(tmpfile)
    width, height = img.size
    if is_flash:
        resized = img.resize((LIBRARYFILE_THUMB_WIDTH, LIBRARYFILE_THUMB_HEIGHT), pil.ANTIALIAS)
    else:
        ratio = float(width) / float(height)
        resized = img.resize((LIBRARYFILE_THUMB_WIDTH, int(LIBRARYFILE_THUMB_WIDTH / ratio)), pil.ANTIALIAS)
    resized.save(filename)
    os.remove(tmpfile)
    print "Saved %s." % filename
    browser.quit()
    display.stop()
    return True
コード例 #6
0
class BCCVLTestCase(unittest.TestCase):
    def setUp(self):
        # acquire URL, username and password from environment variables, or use default values for dev env.
        self.username = os.getenv("BCCVL_TEST_USERNAME", "admin")
        self.password = os.getenv("BCCVL_TEST_PASSWORD", "admin")
        self.url = os.getenv("BCCVL_TEST_URL", "https://192.168.100.200/")

        # The amount of time selenium will potentially wait in searching for elements. This is blocking.
        implicit_wait = int(os.getenv("BCCVL_TEST_IMPLICIT_WAIT", "15"))

        # Run tests in a virtual display (xvfb)
        virtual_display = os.getenv("BCCVL_TEST_VIRTUAL_DISPLAY", "false") == "true"

        # Setup the virtual display
        if virtual_display:
            self.display = Display(visible=0, size=(1920, 1080))
            self.display.start()
        else:
            self.display = None

        # Setup the Firefox Profile and webdriver
        self.driver = webdriver.Firefox()
        self.driver.implicitly_wait(implicit_wait)

        # Maximize the window
        # self.driver.maximize_window()
        self.driver.set_window_size(1200, 800)

        # Go to the bccvl homepage
        self.driver.get(self.url)

    def tearDown(self):
        if self.display:
            self.display.stop()
        self.driver.quit()
コード例 #7
0
ファイル: scrapeinfo.py プロジェクト: zheverson/videoitem
def getupc(data, sleeptime):
    display = Display(visible=0, size=(800, 600))
    display.start()
    a = webdriver.Firefox()
    a.get('https://www.google.com/ncr')
    time.sleep(sleeptime)
    search = WebDriverWait(a, 5).until(EC.element_to_be_clickable((By.XPATH, "//input[@type='text']")))
    for i in data:
        ActionChains(a).move_to_element(search).click(search).send_keys(i['name'] + ' upc', Keys.ENTER).perform()
        time.sleep(sleeptime)
        contents = WebDriverWait(a, 5).until(EC.presence_of_all_elements_located((By.XPATH, "//div[@class='g']")))
        try:
            upc = next(
                    (re.split(r'/', href.find_element_by_tag_name('a').get_attribute('href'))[-1] for
                     href in contents if
                     href.find_element_by_tag_name('a').get_attribute('href').startswith(
                             'http://www.upcitemdb.com/upc')))
            i['upc'] = upc
        except StopIteration:
            pass

        search = WebDriverWait(a, 5).until(EC.element_to_be_clickable((By.XPATH, "//input[@type='text']")))
        search.clear()
    a.close()
    display.stop()
    return data
コード例 #8
0
ファイル: rzhd_3.py プロジェクト: dmitrybezb/rzd
def rzhd():
    directions=[create_url(),]

    while raw_input('Want to add more directions? y/n ')=='y':
        directions.append(create_url())
        print "------------------"
    # n=raw_input('Check tickets every ...(seconds)? ')
    n = 60

    place=choose_place()
    i = 0
    display = Display(visible=0, size=(5, 5))
    display.start() # Запускаем вирутальный дисплей
    while len(directions)!=0:
        i+=1
        print
        print "----------------->Searching for PLATSKART<-----------------"

        print "try #",i
        print time.asctime()
        print

        for url in directions:
            if find_train(url, place)==True:
                send_email('*****@*****.**', url)
                if raw_input('Did you buy ticket? y/n ')=='y':
                    directions.remove(url)
                    if len(directions) == 0:
                        print "Successfully bought all tickets!"
                        return True                
            print str(n)+" seconds until next try..."
            time.sleep(float(n)) # Дадим браузеру корректно завершиться
    display.stop() # Закрываем виртуальный дисплей
コード例 #9
0
ファイル: keepUp.py プロジェクト: Fal34/PFG-NFC-Android
 def load(self):
     min_time = 3600 # 1 hour in seconds
     max_time = 7179 # 2 hours in seconds (less 21)
     tasktime = randint(min_time, max_time)
     threading.Timer(tasktime, self.load).start()
     tasktime_m , tasktime_s = divmod( tasktime , 60)
     tasktime_h , tasktime_m = divmod( tasktime_m , 60) 
     output_content = "Load execution - waiting %dh %02dmin %02dsec for the next time." % (tasktime_h, tasktime_m, tasktime_s)
     print "[KeepUp]" , output_content
     
     from selenium import webdriver
     from selenium.webdriver.common.by import By
     from selenium.webdriver.support.ui import WebDriverWait
     from selenium.webdriver.support import expected_conditions as ec
     from selenium.webdriver.common.keys import Keys
     from pyvirtualdisplay import Display
     
     # Initial
     display = Display(visible=0, size=(1600, 900))
     display.start()
     profile = webdriver.FirefoxProfile()
     profile.set_preference("browser.cache.disk.enable", False)
     profile.set_preference("browser.cache.memory.enable", False)
     profile.set_preference("browser.cache.offline.enable", False)
     profile.set_preference("network.http.use-cache", False)
     driver = webdriver.Firefox()
     driver.get("https://c9.io/dashboard.html")
     driver.save_screenshot(self.directory_img + 'login.png')
     
     #Username
     username = driver.find_element_by_id("id-username")
     username.click()
     username.clear()
     username.send_keys(self.user, Keys.ARROW_DOWN)
     
     #Password
     password = driver.find_element_by_id("id-password")
     password.click()
     password.clear()
     password.send_keys(self.password, Keys.ARROW_DOWN)
     
     #Submit
     submit_button = driver.find_element_by_css_selector("button[type=submit]")
     # print submit_button.text
     
     # Click submition
     submit_button.click();
     time.sleep(5)
     driver.save_screenshot(self.directory_img + 'user_profile.png')
     
     # Target dir
     driver.get(self.target_workspace)
     time.sleep(10)
     
     self.log({'log_html': driver.page_source, 'log_file': output_content}) #make log
     driver.save_screenshot(self.directory_img + 'final_workspace.png')
     
     # End
     driver.quit()
     display.stop()
コード例 #10
0
def main(param):

    if len(param) != 2:
        sys.exit(-9)
    if len(param[1]) <= 0:
        sys.exit(-8)
    paths = param[0]
    shotsdir = paths.get('path', 'output.shotsdir').lstrip('"').rstrip('"')
    targets = param[1]

    display = Display(visible=0, size=(800, 600))
    display.start()

    binary = FirefoxBinary('/opt/firefox/firefox')
    browser = webdriver.Firefox(firefox_binary=binary)

    tgt_len = len(targets)
    for i, tgt in enumerate(targets):
        browser.get(tgt[0])
        browser.save_screenshot(shotsdir+'/'+tgt[1]+'.png')
        print '( %3d / %3d ) Took %s.png' % (i+1, tgt_len, tgt[1])

    browser.quit()

    display.stop()
コード例 #11
0
ファイル: utils.py プロジェクト: aaccomazzi/adsabs
class TestContext(object):
    
    def open_browser(self):

#         if test_config.SELENIUM_USE_REMOTE:
#             dc = getattr(DesiredCapabilities, self.driver.upper())
#             dc['name'] = test_config.SELENIUM_TEST_NAME
#             cmd_exec = test_config.SELENIUM_REMOTE_CMD_EXEC
#             self.browser = webdriver.Remote(desired_capabilities=dc, command_executor=cmd_exec)

        if test_config.SELENIUM_USE_VIRTUALDISPLAY:
            self.virtualdisplay = Display(backend=test_config.SELENIUM_VIRTUALDISPLAY_BACKEND, size=(600, 800)).start()

        self.browser = webdriver.Firefox(firefox_binary=FirefoxBinary(test_config.SELENIUM_FIREFOX_PATH))
        self.browser.implicitly_wait(test_config.SELENIUM_PAGE_WAIT)
        
    def close(self):
        self.browser.quit()
        if hasattr(self, 'virtualdisplay'):
            self.virtualdisplay.stop()
            
    def get(self, url):
        self.browser.get(url)
        self.url = url
    
    def follow_link(self, link):
        link.click()
        self.url = self.browser.current_url
        
    def wait_for(self, by, thing):
        wait = WebDriverWait(self.browser, test_config.SELENIUM_PAGE_WAIT)
        wait.until(EC.presence_of_element_located((by, thing)))
コード例 #12
0
ファイル: test.py プロジェクト: jaredbischof/patric-tests
def main(args):
    parser = argparse.ArgumentParser(description="Program for running tests on the PATRIC web interface.")
    parser.add_argument("user", metavar="user", help="Patric login username.")
    parser.add_argument("passwd", metavar="passwd", help="Patric login password.")
    parser.add_argument("--firebug", action="store_true", help="Open Firebug during test.")
    args = parser.parse_args()

    fp = webdriver.FirefoxProfile()
    if args.firebug:
        fp.add_extension(extension='extras/firebug-2.0.9.xpi')
        fp.set_preference("extensions.firebug.currentVersion", "2.0.9") #Avoid startup screen
        fp.set_preference("extensions.firebug.console.enableSites", "true")
        fp.set_preference("extensions.firebug.net.enableSites", "true")
        fp.set_preference("extensions.firebug.script.enableSites", "true")
        fp.set_preference("extensions.firebug.allPagesActivation", "on")

    # Create virtual display
    display = Display(visible=0, size=(1400, 950))
    display.start()

    # Create webdriver and retrieve url
    driver = webdriver.Firefox(firefox_profile=fp)
    driver.get(SITE_URL + '/login')

    # Wait for username input box to appear
    WebDriverWait(driver, PAGE_LOAD_TIMEOUT).until(EC.presence_of_element_located((By.ID, "dijit_form_TextBox_0")))

    # Set username and password, click login button
    userElement = driver.find_element_by_id("dijit_form_TextBox_0")
    pwdElement = driver.find_element_by_id("dijit_form_TextBox_1")
    userElement.send_keys(args.user)
    pwdElement.send_keys(args.passwd)
    loginElement = driver.find_element_by_id("dijit_form_Button_1")
    loginElement.click()
    time.sleep(3)

    # Retrieve home page, wait for an expected page element to load, take a screenshot
    driver.get(SITE_URL + '/portal/portal/patric/Home')
    WebDriverWait(driver, PAGE_LOAD_TIMEOUT).until(EC.presence_of_element_located((By.ID, "cart")))
    driver.set_window_size(1400, 950)
    driver.execute_script("window.scrollTo(0,0);")
    driver.get_screenshot_as_file("homepage_after_login.jpg")
    print "Saved screenshot to: homepage_after_login.jpg\n"

    # Retrieve ws url, wait for create folder button to appear
    ws_url = SITE_URL + '/workspace/' + args.user + '@patricbrc.org/home'
    driver.get(ws_url)
    WebDriverWait(driver, PAGE_LOAD_TIMEOUT).until(EC.presence_of_element_located((By.CLASS_NAME, "ActionButtonContainer")))
    time.sleep(5)

    # Have to reload page, because often time the workspace is empty on first load
    driver.get(ws_url)
    WebDriverWait(driver, PAGE_LOAD_TIMEOUT).until(EC.presence_of_element_located((By.CLASS_NAME, "ActionButtonContainer")))
#    createFolderButton = driver.find_element_by_class_name("ActionButton fa icon-folder-plus fa-2x")
#    createFolderButton.click()
    time.sleep(30)

    driver.quit()
    display.stop()
    return 0
コード例 #13
0
ファイル: get_ip2.py プロジェクト: alexandrtkachuk/python
def loadSite(url):
    profile = webdriver.FirefoxProfile()
    profile.set_preference("network.proxy.type", 1)
    profile.set_preference("network.proxy.http", "74.84.131.34")
    profile.set_preference("network.proxy.http_port", int('80'))
    profile.update_preferences()
    #
    display = Display(visible=0, size=(800, 600))
    display.start()
    path_to_chromedriver = '/home/alexandr/www/html/python/prs/files/geckodriver'
    browser = webdriver.Firefox(firefox_profile = profile, executable_path = path_to_chromedriver)
    #
    browser.delete_all_cookies()
    browser.get(url)
    #print(browser.page_source)
    #print(browser.page_source)
    tree = etree.HTML( browser.page_source)
    #
    browser.close()
    display.stop()
    #
    nodes = tree.xpath('//table[@class="network-info"]//tr/td')
    for node in nodes:
        print(node.text)
    return 1
コード例 #14
0
ファイル: base.py プロジェクト: doanchienthang/superlists
class FunctionalTest(StaticLiveServerTestCase):
    @classmethod
    def setUpClass(cls):
        for arg in sys.argv:
            if 'liveserver' in arg:
                cls.server_url = 'http://' + arg.split('=')[1]
                return
        super().setUpClass()
        cls.server_url = cls.live_server_url

    @classmethod
    def tearDownClass(cls):
        if cls.server_url == cls.live_server_url:
            super().tearDownClass()

    def setUp(self):
        self.display = Display(visible=0, size=(1024, 768))
        self.display.start()
        self.browser = webdriver.Firefox()
        # self.browser.implicitly_wait(3)

    def tearDown(self):
        self.browser.quit()
        self.display.stop()

    def check_for_row_in_list_table(self, row_text):
        table = self.browser.find_element_by_id('id_list_table')
        rows = table.find_elements_by_tag_name('tr')
        self.assertIn(row_text, [row.text for row in rows])
コード例 #15
0
class TestCase(unittest.TestCase):
    def setUp(self):
        app.config['TESTING'] = True
        app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///:memory:'
        self.app = app.test_client()
        db.create_all()

        self.display = Display(visible=0, size=(800, 600))
        self.display.start()
        self.driver = webdriver.Firefox()

    def tearDown(self):
        db.session.remove()
        db.drop_all()

        self.driver.quit()
        self.display.stop()

    def test_extract_funds(self):
        funds = extract_funds(
            # some javascript going on that I can't figure out how to mock
            #'file:///%s/t/test_files/list_mutual_funds.html' % basedir,

            self.driver
        )

        self.assertTrue(len(funds) > 110)
コード例 #16
0
def work():

    logging.info("start weeklys screenshot work")
    print ("start ... ")

    if not DISPLAY:
        print ("hide display ... ")
        display = Display(visible=0, size=(1366, 768))
        display.start()

    config = getConfigObj()
    if config == None:
        return False
    userName = config.get("USER", "UserName")
    userPWD = config.get("USER", "userPWD")

    ret = getTowerWeeklyScreenshot(userName, userPWD, DEFAULT_SAVE_PATH)

    if not ret:
        print ('Error, abort. Please check the log file "%s"' % LOG_FILE)
        return False

    logging.info("finish all work, exit.")

    if not DISPLAY:
        display.stop()

    return True
コード例 #17
0
ファイル: f.py プロジェクト: kamekame/alpha
def get_news():
    if check_wlan():
        from pyvirtualdisplay import Display
        import re

        display = Display(visible=0, size=(800, 600))
        display.start()

        driver = webdriver.Firefox()
        url = "http://www.deutschlandfunk.de/"
        driver.get(url)
        source = driver.find_element_by_xpath('//*[@id="wrapper"]/div/section[2]/div[1]').get_attribute('innerHTML')

        n_articles = source.count('<article')
        print(str(n_articles) + " articles found.")

        lst = re.findall('<h3>(.+)</h3>', source)
        result = lst

        driver.close()

        display.stop()
        return result
    else:
        print("Error: Not connected to the internet")
コード例 #18
0
def run_selenium(landmark):
	display = Display(visible=0, size=(800, 600))
	display.start()
        logTo(TEST_LOG,'Selenium : Starting Selenium  for '+landmark,'INFO','a')
	interFace=open(HOME_DIR+'/Desktop/one-time-test-suite/iface.txt','r')
	tmp=interFace.readlines()
	iface=tmp[0].split('\n')[0]
	tmpstmp=datetime.now().strftime("%s")
	profile = webdriver.FirefoxProfile()
	profile.update_preferences()
	browser = webdriver.Firefox(firefox_profile=profile) # assign profile to browser
	browser.delete_all_cookies()
	logTo(TEST_LOG,' Selenium : Starting tcpdump .. ','INFO','a')
	tcpcmd='tcpdump -i '+iface+' -w '+EXP_DIR+'/'+'tcpdump_'+landmark.split('.')[0]+'_'+tmpstmp
	args=shlex.split(tcpcmd)
	ptcpdmp=sub.Popen((args))
	time.sleep(10)
	logTo(TEST_LOG,' Selenium : Starting get '+landmark,'INFO','a')
	browser.get('http://www.'+landmark)
	time.sleep(5)
	perfData=browser.execute_script('return window.performance.timing')
	fname=EXP_DIR+'/'+'perfdata_'+landmark.split('/')[0]
	fname=fname.replace('.','-')
	pickle.dump(perfData,open(fname,'wb'))
        logTo(TEST_LOG,'Selenium : Writing done to '+EXP_DIR+'/perfdata_'+landmark,'INFO','a')
	browser.quit()
	display.stop()
	ptcpdmp.terminate()
        logTo(TEST_LOG,'Finished Selenium for '+landmark,'INFO','a')
コード例 #19
0
class AdminTestCase(LiveServerTestCase):

    def setUp(self):
        self.display = Display(visible=0, size=(800, 600))
        self.display.start()

        self.selenium = webdriver.Firefox()

        super(AdminTestCase, self).setUp()

    def tearDown(self):
        self.selenium.quit()
        self.display.stop()
        super(AdminTestCase, self).tearDown()

    def test_payment(self):
        """
        payment will be successful.
        """

        self.selenium.get("%s/pay" % self.live_server_url)
        self.selenium.implicitly_wait(20)
        self.selenium.maximize_window()

        self.selenium.find_element_by_name("amount").send_keys("100000")

        pay_button = self.selenium \
            .find_element_by_xpath('//input[@value="pay"]')
        pay_button.click()

        return_to_site_button = self.selenium.find_element_by_id("btn3")

        return_to_site_button.click()

        self.assertIn("successful", self.selenium.page_source)
コード例 #20
0
def get_all_items():
    #list to store alll scraped data
    all_items = list()

    #Display - read about pyvirtualdisplay
    display = Display(visible=0, size=(1024, 768))
    display.start()
    #webdriver - read about selenium.webdriver
    driver = webdriver.Firefox()
    
    #this is a starting page we are scraping
    driver.get("http://www.federalreserve.gov/apps/reportforms/default.aspx")
    #Every element on the HTML page can be located using CSS selectors.
    #Opening the starting page in Chrome, right click on the drop-down menu, click "Inspect" we see a tag on the right highlighted, we copy it's id - MainContent_ddl_ReportForms
    #Knowing the id of dropdown menu, we can locate it with Selenium like this
    main_menu = WebDriverWait(driver,10).until(EC.presence_of_element_located((By.CSS_SELECTOR,"#MainContent_ddl_ReportForms")))
    #Drop down menu is an HTML table of options which can be verified in Chrome browser (Developer Tools, that pop up when you right click and press "Inspect" on an element)
    #Following returns all of the options - rows in that table
    form_options = main_menu.find_elements_by_tag_name("option")
    #We count them
    option_count = len(form_options)
    #Next, we loop over all of them - essentially like we scrolling down the drop down menu and clicking on each every form 
    for form_i in xrange(1,option_count):
        #Get web element corresponding to a form
        form = form_options[form_i]
        #Click as a mouse click-action in browser 
        form.click()
        #Get text, because we need to store the form number
        form_id = form.text
        #Locate a web element corresponding to the submit button. By CSS selector which we found by inspection in Chrome browser (same logic as above)
        submit_button = WebDriverWait(driver,3).until(EC.presence_of_element_located((By.CSS_SELECTOR,"#MainContent_btn_GetForm")))
        #Click as a mouse click-action in browser 
        submit_button.click()      
        #Prepare data structures to store all the info we want to scrape
        a = dict.fromkeys(['Description','OMB','Background','RespondentPanel','Frequency','PublicRelease'])
        #We are on a web page after submit-click, following will search for all items of interest. Or for corresponding
        #web-elements 
        for el in a.keys():
            try:
                item = driver.find_element_by_css_selector("#MainContent_lbl_"+el+"_data") 
                #Once found it will store them in our dictionary, if not it will proceed to "except" section and do nothing
                a[el] = item.text 
            except: 
                #case when there is no such field
                pass
        #we need form number as well
        a['FormNumber'] = form_id
        #keeping them all in one list, which will have a dictionary per Form Number - and later, a row in your excel file per Form number
        all_items.append(a)
    
        #Ok, that part bothers me a little: it looks like I have to refresh "form_options" each time... 
        #Otherwise I get following exception: selenium.common.exceptions.StaleElementReferenceException: Message: Element not found in the cache - perhaps the page has changed since it was looked up
        driver.get("http://www.federalreserve.gov/apps/reportforms/default.aspx")
        main_menu = WebDriverWait(driver,10).until(EC.presence_of_element_located((By.CSS_SELECTOR,"#MainContent_ddl_ReportForms")))
        form_options = main_menu.find_elements_by_tag_name("option")

    driver.close()
    display.stop()

    return all_items
コード例 #21
0
	def get_image(self):
		## Uses supplied scrape site to find new pictures
		url = self.scrape_site
		# virtual display for headless runs
		display = Display(visible=0, size=(800, 600))
		display.start()

		with closing(Firefox()) as browser:
			browser.get(url)
			time.sleep(5) # TODO: fix with something less static, but still
			# multipurpose considering scrape_site as a db var
			imgs = browser.find_elements_by_tag_name('img')
			# TODO: fix this temporary workaround that prevents ad server data
			# from reaching the image checks
			no_ad_imgs = [i for i in imgs if 'adsrvr' not in \
				i.get_attribute('src')]
			for img in no_ad_imgs:
				src = img.get_attribute('src')
				alt = img.get_attribute('alt')
				image_id = re.findall("/photo/(.+?)/", src)[0]
				if(self._check_id(image_id) and self._check_ratios(src)):
					self.img_id = image_id
					self.description = alt
					self._save_hd_image()
					break
		display.stop()
		if (self.img_id):
			return
		raise Exception('Failed to find a suitable image: all out or bugged')
コード例 #22
0
ファイル: getallinone.py プロジェクト: junjunqian/cb
def openurl(companyname=first_arg):
    display = Display(visible=0, size=(1024, 768))
    display.start()
    browser = webdriver.Firefox()
    time.sleep(randint(8,10))
    try:
        browser.get('http://www.google.com')
        time.sleep(5)
        search = browser.find_element_by_name('q')
        input_text = companyname + str(" crunchbase")
        search.send_keys(input_text)
        time.sleep(randint(10,15))
        search.send_keys(Keys.RETURN)
        time.sleep(randint(10,15))
        gn = browser.find_element_by_tag_name('h3').text
        gnc = str(gn).split(' | ')[0].replace(" ","")
        output_file = '0515' + gnc + '.html'
        browser.find_element_by_link_text(gn).click()
        time.sleep(randint(55,60))
        company_html = browser.page_source
        time.sleep(randint(5,10))
        with open("smallname.txt", 'a') as myfile:
            json.dump(output_file,myfile)
        with open(output_file, 'a+') as myfile:
            myfile.write(company_html)
    except:
        company_html = 'none'        
        with open("missedname.txt", "a") as myfile:
            json.dump(companyname,myfile)            
    time.sleep(1)
    browser.close()
    time.sleep(1)
    display.stop()
    return company_html
コード例 #23
0
class BrowserManager:
	def __init__(self):
		self._lock = False
	def bootup(self):
		self._display = Display(visible=0, size=(1024, 768))
		self._display.start()
		profile = {}
		if 'HTTP_PROXY' in os.environ:
			proxy_url = os.environ['HTTP_PROXY']
			proxy_server = proxy_url.split(':')[1][2:]
			proxy_port = proxy_url.split(':')[-1]
			profile['network.proxy.type'] = 1
			profile['network.proxy.http'] = proxy_server
			profile['network.proxy.http_port'] = proxy_port
			profile['network.proxy.https'] = proxy_server
			profile['network.proxy.https_port'] = proxy_port
		self.browser = Browser(profile_preferences=profile)
	def obtain(self,background):
		while self._lock:
			background.wait('Browser lock', 15)
		self._lock = True
		return self.browser
	def release(self,background):
		self._lock = False
	def shutdown(self):
		self.browser.quit()
		self._display.stop()
コード例 #24
0
ファイル: fb_auto_commenter.py プロジェクト: maomaotp/fb_post
def main():
    if (len(sys.argv) < 2):
        print "./fb_auto_commenter.py Brazil/English/French"
        return

    fb_auto_mail.write_file("---------------------------" + sys.argv[1] + "\n")

    try:
        display = Display(visible=0, size=(800,600))
        display.start()
        #打开区域权限
        logging.info(">>>>>>>open limit")
        open_limit()
        #读取googledocs 群组信息
        logging.info(">>>>>>>read from google docs")
        global french_groups_id
        french_groups_id = read_from_googledocs()
        #french_groups_id = ['309490585766406', '745769152175443', '1393190844256106', '1384933575085078', '1458512047714028', '1581747275377893', '778025652245798', '252563551503667', '1468450793419237']
        logging.info(french_groups_id)

        #打开任务进程
        logging.info(">>>>>>>start post task")
        start_task_process()

        #关闭权限
        logging.info(">>>>>>>close limit")
        close_limit()

        logging.info(">>>>>>>send result mail")
        fb_auto_mail.send_mail()
    except Exception as e:
        logging.error(e)
    finally:
        logging.info("end")
        display.stop()
コード例 #25
0
ファイル: etradepy.py プロジェクト: Anhmike/etradePythonAPI
def authorizeToken(requestTokenResponse):
  """
  Given a dict requestTokenResponse with the temporary oauth_token and oauth_token_secret,
  we generate a login link that a user should interact with to obtain an authCode <str>
  This process is automated with Splinter and pyvirtualdisplay
  """

  resource_owner_key = requestTokenResponse['oauth_token']
  resource_owner_secret = requestTokenResponse['oauth_token_secret']
  redirect_response = 'https://us.etrade.com/e/t/etws/authorize?key={}&token={}'.format(client_Consumer_Key,resource_owner_key)
  

  # print 'go to this link for authorization:', redirect_response

  # cannot parse redirect_response without a browser because the response is not pure json
  # oauth_response = oauth.parse_authorization_response(redirect_response)

  # Open URL in a new tab, if a browser window is already open.
  # webbrowser.open_new_tab(redirect_response)

  # Display allows the script to run in a linux cloud without a screen
  display = Display(visible=0, size=(1024, 768))
  display.start()


  # create a browser using Splinter library and simulate the workflow of a user logging in
  # various time.sleep(n) is inserted here to make sure login is successful even on slower connections
  with Browser() as browser:
    # Visit URL
    url = redirect_response
    browser.visit(url)
    
    if browser.is_element_present_by_name('txtPassword', wait_time=0):
      
      browser.fill('USER', etrade_settings.username)
      time.sleep(3)


      browser.find_by_name('txtPassword').click()
      
      time.sleep(3)
      # pprint(browser.html)

      browser.fill('PASSWORD', etrade_settings.userpass)
      # Find and click the 'logon' button
      browser.find_by_name('Logon').click()
      time.sleep(3)
      if browser.is_element_present_by_name('continueButton', wait_time=2):
        browser.find_by_name('continueButton').click()

      browser.find_by_value('Accept').click()
      time.sleep(3)
      # authCode = browser.find_by_xpath("//@type='text'").first.value
      authCode = browser.find_by_tag("input").first.value
      time.sleep(3)


  display.stop()
  
  return authCode
コード例 #26
0
ファイル: soniclogin.py プロジェクト: XChikuX/hacker-scripts
def main() :
	display = Display(visible=0, size=(800, 600))
	display.start()
	authurl = "https://firewall.amritanet.edu:8443/auth1.html"
	delay = 3
	print "\n\n[*]  Opening a New Session.."
	driver = webdriver.Firefox()
	driver.get(authurl)

	assert "Sonic" in driver.title

	print "\n\n[*] Enumerating Login Page.."
	user = driver.find_element_by_name("userName")
	passwd = driver.find_element_by_name("pwd")

	print "\n\n[*] Sending Credentials .. "
	user.send_keys("<user_name_here>")
	passwd.send_keys("<password_here>")
	passwd.send_keys(Keys.RETURN)

	driver.get("http://www.msftncsi.com/ncsi.txt")

	print "\n\n[*] Login Done!"
	driver.quit()
	display.stop()
コード例 #27
0
ファイル: views.py プロジェクト: mastinux/tebRotarapmoc
def retrieveYRdata(url):
    print "processing yddapRewop ..."

    display = Display(visible=0, size=(1024, 1024))
    display.start()

    # driver = webdriver.Firefox()

    # http://stackoverflow.com/questions/8255929/running-webdriver-chrome-with-selenium
    driver = webdriver.Chrome()
    driver.get(url)
    html = driver.page_source
    driver.quit()

    display.stop()

    parser = etree.HTMLParser()
    tree = etree.parse(StringIO(html), parser)

    first = 1

    table_list = list()

    for table_element in tree.getiterator("table"):
        if "id" in table_element.keys() and "class" in table_element.keys() and "style" in table_element.keys():
            if not first:
                table_list.append(table_element)
            first = 0

    for table_element in table_list:
        parse_table_element(table_element)
コード例 #28
0
ファイル: runner.py プロジェクト: benvand/profiler
class SeleniumRunner(object):
    def __call__(self, f):
        @functools.wraps(f)
        def decorated(_self, *args, **kwargs):
            with self as driver:
                return f(_self, driver, *args, **kwargs)
        return decorated

    def __enter__(self):
        self.display = Display(visible=0, size=(800, 600))
        self.display.start()
        self.driver = webdriver.Chrome()
        return self.driver

    def __exit__(self, *args, **kwargs):
        try:
            self.driver.quit()
        except (AttributeError,) as e:
            # Someone has messed with our browser
            pass
        try:
            self.display.stop()
        except (AttributeError,) as e:
            # Someone has messed with our display
            pass
コード例 #29
0
class Collab(threading.Thread):
    """docstring forCollab"""
    def __init__(self, selector):
        threading.Thread.__init__(self)
        self.__display = Display(visible=0, size=(800, 600))
        self.__display.start()
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_argument("--no-sandbox")
        chrome_options.binary_location = CHROME_LOCATION
        self.__driver = webdriver.Chrome("/opt/selenium/chromedriver",
                                         chrome_options=chrome_options,
                                         service_args=["--verbose",
                                                       "--log-path=/home/log"])
        self.__driver.get(URL + DOCID)
        self.content_editor = ""
        self.alive = False
        self.select = None
        while self.select is None:
            self.__driver.implicitly_wait(20)
            self.select = self.__driver.find_element_by_class_name(
                selector)

    def stop(self):
        self.alive = False
        self.__driver.close()
        self.__display.stop()
コード例 #30
0
ファイル: screenshot.py プロジェクト: Aypak/ka-lite
def process_screenshots(app, env):
    if not hasattr(env, 'screenshot_all_screenshots'):
        return

    if not app.config['screenshots_create']:
        print("Not doing screenshots on maggies farm no more")
        return
        
    if 'SPHINX_SS_USE_PVD' in os.environ.keys() and os.environ['SPHINX_SS_USE_PVD'] == "true":
        from pyvirtualdisplay import Display
        # Start a virtual headless display
        display = Display(visible=0, size=(1024, 768))
        display.start()
    else:
        display = None
    
    # Don't bother building screenshots if we're just collecting messages.
    # Just checks if we invoked the build command with "gettext" in there somewhere
    if "gettext" in sys.argv:
        return
    all_args = map(lambda x: x['from_str_arg'], env.screenshot_all_screenshots)
    # If building in a different language, start the server in a different language
    command = SCREENSHOT_COMMAND + SCREENSHOT_COMMAND_OPTS + \
              [re.sub(r"\s", r"", "--from-str={0}".format(json.dumps(all_args)))]
    language = env.config.language
    if language:
        command += ["--lang={0}".format(language)]
    subprocess = Popen(command)
    subprocess.wait()
    try:
        if subprocess.returncode:
            raise Exception("Screenshot process had nonzero return code: {0}".format(subprocess.returncode))
    finally:
        if display:
            display.stop()
コード例 #31
0
def run():
    #read input variables
    ABR_ALG = args.abr_alg  #abr algorithm to execute
    TIME = args.time_seconds  # time to sleep ins seconds
    SERVER_ADDR = args.server_addr  #server address to open
    STREAM_ID = str(args.stream_id)
    TRACE = args.trace
    EXP_ID = args.result_dir + '/log_' + ABR_ALG + '_' + TRACE + '_' + STREAM_ID  #path to logsile

    #print >> sys.stderr, 'udp', args.udp
    if args.udp:
        url = 'http://localhost/' + 'myindex_' + ABR_ALG + '_udp.html'
    else:
        url = 'http://localhost/' + 'myindex_' + ABR_ALG + '.html'

    # timeout signal
    signal.signal(signal.SIGALRM, timeout_handler)
    signal.alarm(TIME + 30)

    try:
        port = ABR_SERVER_PORT_OFFSET + args.stream_id

        # Note: all the video servers have to take the same params
        #
        #

        log_file_dir_abr_server = os.path.join(args.result_dir, 'result')
        if not os.path.exists(log_file_dir_abr_server):
            os.makedirs(log_file_dir_abr_server, 0o777)

        python_v = 'python3'
        command = [
            python_v, args.server_module,
            str(port), ABR_ALG, EXP_ID,
            str(TIME), args.result_dir, STREAM_ID
        ]

        if args.debug:
            command.append('--debug')
        if args.display:
            command.append('--display')

        global proc

        cmd = ''
        for x in command:
            cmd += x + ' '

        logger.info("Starting the server located at {}".format(command[1]))
        proc = subprocess.Popen([cmd], stdout=subprocess.PIPE, shell=True)

        sleep(10)

        url += '?p=' + str(port)

        print(
            port
        )  # This has to be the only print statement up to this point. This is because every time we call print,
        # its string is passed to competitive_tests.py using pipes
        sys.stdout.flush()

        #r = requests.post('http://localhost:' + str(port), json={'suggested_bitrate': 4300})

        # to not display the page in browser (unless -b option)
        if args.show_browser:
            logger.info("Not displaying the browser")
            display = Display(visible=0, size=(300, 400))
            display.start()

        #init chrome driver
        '''
        default_chrome_user_dir = 'abr_browser_dir/chrome_data_dir'
        chrome_user_dir = '/tmp/chrome_user_dir_id_'
        os.system('rm -r ' + chrome_user_dir)
        os.system('cp -r ' + default_chrome_user_dir + ' ' + chrome_user_dir)
        chrome_driver = 'abr_browser_dir/chromedriver'
	'''

        options = Options()
        options.add_argument('--ignore-certificate-errors')
        options.add_argument('--autoplay-policy=no-user-gesture-required')
        options.add_argument("--disable-infobars")
        options.add_argument('--disable-application-cache')
        options.add_argument('--media-cache-size=1')
        options.add_argument("--disk-cache-size=1")
        options.add_argument(
            "--disable-web-security"
        )  # only needed when running tests over the UDP proxy
        options.add_argument("--explicitly-allowed-ports=6000")
        options.add_argument("--auto-open-devtools-for-tabs")

        logger.info("Options have been added to chrome driver")

        #enable quic
        if args.quic:
            logger.info("Enabling quic")
            options.add_argument('--no-proxy-server')
            options.add_argument('--enable-quic')
            options.add_argument('--quic-version=QUIC_VERSION_39')
            options.add_argument('--quic-host-whitelist="https://' +
                                 SERVER_ADDR + '" "https://' + SERVER_ADDR +
                                 '"')
            options.add_argument('--origin-to-force-quic-on=' + SERVER_ADDR)

        # start chrome
        #driver=webdriver.Chrome(chrome_driver, chrome_options=options)
        driver_path = './src/chromedriver'
        driver = webdriver.Chrome(chrome_options=options,
                                  executable_path=driver_path)
        driver.set_page_load_timeout(30)
        driver.get(url)

        logger.info("Chrome driver started")

        #run for @TIME seconds
        wait_for_video_end(pipe_out=proc.stdout, timeout=TIME)
        logger.info("Video ended")
        driver.quit()
        logger.info("Driver quitted")
        if args.show_browser:
            logger.info("Stopping display")
            display.stop()

        logger.info("Sending SIGINT to the video server")
        proc.kill()
        proc.wait()

    except Exception as e:
        logging.error(traceback.format_exc())
        try:
            display.stop()
        except:
            logging.error(traceback.format_exc())
        try:
            driver.quit()
        except:
            logging.error(traceback.format_exc())
        try:
            proc.kill()
            proc.wait()
        except:
            logging.error(traceback.format_exc())
コード例 #32
0
    print i + 1, str(webslist[i])
    log.write(str(i + 1) + str(webslist[i]) + '\n')
    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument("--proxy-server={0}".format(proxy.proxy))

    driver = webdriver.Chrome(chrome_options=chrome_options)
    driver.set_page_load_timeout(60)
    proxy.new_har()

    web = webslist[i]

    try:
        driver.get(web)
    except:
        print "Error"
        log.write('Error\n')
        driver.quit()
        continue
    else:
        proxy.har
        filename = 'httpdata22/' + str(i) + '.har'

        out_f = open(filename, 'w')
        json.dump(proxy.har, out_f)
        out_f.close()
        driver.quit()

server.stop()
display.stop()
log.close()
コード例 #33
0
class PassQuiz:
    def __init__(self):
        self.url = 'http://elearning.surgeons.org/course/view.php?' \
            'id=127&section=0'
        self.display = Display(visible=0, size=(1024, 768))
        self.display.start()
        self.driver = webdriver.Firefox()

    def sign_in(self):
        """
        Authorization
        """
        EMAIL = os.getenv('EMAIL')
        EMAIL_PASSWORD = os.getenv('PASSWORD')

        # find and fill inputs in form
        username = self.driver.find_element_by_name("username")
        username.send_keys(EMAIL)
        password = self.driver.find_element_by_name("password")
        password.send_keys(EMAIL_PASSWORD)
        # send form data
        self.driver.find_element_by_id("regularsubmit").click()

    def get_body(self):
        return self.driver.page_source

    def pass_quiz(self):
        """
        """
        driver = self.driver
        driver.get(self.url)

        self.sign_in()

        for s in range(1, 5):
            for j in range(1, 4):
                driver.get(
                    'http://elearning.surgeons.org/course/view.php?id=127&section=0'
                )

                category = driver.find_element_by_xpath('//li[@id="section-' +
                                                        str(s) +
                                                        '"]/div/ul/li[' +
                                                        str(j) +
                                                        ']/div/div/div/div/a')
                category.click()

                for i in range(0, 500):
                    re_attempt = driver.find_element_by_xpath(
                        "//input[@value='Re-attempt quiz'] | "
                        "//input[@value='Attempt quiz now']")
                    re_attempt.click()

                    finish_quiz = driver.find_element_by_class_name(
                        "endtestlink")
                    finish_quiz.click()

                    finish_all = driver.find_element_by_xpath(
                        "//input[@value='Submit all and finish']")
                    finish_all.click()

                    time.sleep(2)

                    confirm = driver.find_element_by_xpath(
                        "//div[@class='confirmation-dialogue']/div/"
                        "input[@value='Submit all and finish']")
                    confirm.click()

                    finish_review = driver.find_element_by_partial_link_text(
                        'Finish review')
                    finish_review.click()

    def __del__(self):
        pass
        self.driver.delete_all_cookies()
        self.driver.close()
        self.display.stop()
コード例 #34
0
def main():
    argv = None

    ## Set of test arguments, uncomment to try the crawler
    # argv = ["cookiebot", "-u", "https://purplemath.com/", "-u", "https://gamefly.com/", "-n", "2"]
    # argv = ["onetrust", "-n", "5", "-u", "https://www.metabomb.net/", "-u", "https://www.maytag.com/", "-u", "https://www.aveda.com/", "-u", "https://www.equipmenttrader.com/", "-u", "https://www.tiffany.com/"]
    # argv = ["all", "-n", "1", "-u", "https://www.equipmenttrader.com/"]

    # parse usage docstring and get arguments
    cargs = docopt(__doc__, argv=argv)
    sites: Set[str] = retrieve_cmdline_urls(cargs)
    filtered_sites: List[str] = filter_bad_urls_and_sort(sites)

    # safety check
    if len(filtered_sites) == 0:
        print("Website crawl list is empty. Aborting...", file=sys.stderr)
        return 1

    # set up OpenWPM
    num_browsers = int(cargs["--num_browsers"])
    manager_params, browser_params = TaskManager.load_default_params(
        num_browsers)
    for i in range(num_browsers):
        setup_browser_config(browser_params[i])

    # define output directories
    manager_params["output_format"] = "local"

    manager_params["log_directory"] = "./logs/"
    os.makedirs(manager_params["log_directory"], exist_ok=True)

    # define log file and database paths
    now = datetime.now().strftime('%Y%m%d_%H%M%S')
    manager_params["log_file"] = f"crawl_{now}.log"

    # Database filename
    if cargs["--use_db"]:
        db_path, db_fn = os.path.split(cargs["--use_db"])
        manager_params["data_directory"] = db_path
        manager_params["database_name"] = db_fn
    else:
        manager_params["data_directory"] = "./collected_data/"
        manager_params["database_name"] = f"crawl_data_{now}.sqlite"
    os.makedirs(manager_params["data_directory"], exist_ok=True)

    # activate pyvirtualdisplay
    disp = Display(backend="xvfb")
    disp.start()

    # prevent shutdown due to failures
    manager_params["failure_limit"] = 16384

    # setting up the TaskManager creates the logger. then we can retrieve a sub-logger, and set it up.
    manager = TaskManager.TaskManager(manager_params, browser_params)
    logger = manager.logger

    total_commands = len(filtered_sites)

    # callback, executed once command sequence completes
    def progress_report(success: bool):
        global completed, interrupted
        if success:
            completed += 1
            logger.info("Command sequence completed.")
        else:
            interrupted += 1
            logger.warning("Command sequence has been interrupted!")
        logger.info("%i/%i completed, %i/%i interrupted" %
                    (completed, total_commands, interrupted, total_commands))

    # crawl each site
    # Can alter some parameters here if needed
    for j, site in enumerate(filtered_sites):
        command_sequence = CommandSequence.CommandSequence(
            site,
            site_rank=j,
            reset=True,
            blocking=False,
            callback=progress_report)

        if cargs["all"]:
            # CMP crawl and Browse functions consolidated into the same command
            # this is done such that browse can be aborted early if CMP is not found
            command_sequence.run_consent_crawl(num_links=10,
                                               sleep=1.0,
                                               timeout=180,
                                               abort_browse_early=True,
                                               subpage_timeout=10.0)
        else:
            # legacy variants of the consent crawler commands. Only a single CMP active.
            if cargs["cookiebot"]:
                command_sequence.try_extract_cookiebot_data(sleep=1.0,
                                                            timeout=60)
            elif cargs["onetrust"]:
                command_sequence.try_extract_onetrust_data(sleep=1.0,
                                                           timeout=60)
            elif cargs["termly"]:
                command_sequence.try_extract_termly_data(sleep=1.0, timeout=60)

            # browse the page to retrieve additional cookies
            command_sequence.browse(num_links=20,
                                    sleep=1.0,
                                    timeout=120,
                                    subpage_timeout=10.0)

        # Execute the two commands
        manager.execute_command_sequence(command_sequence)

    # shuts down the browsers and waits for the data to finish logging
    manager.close()
    disp.stop()

    return 0
コード例 #35
0
ファイル: EyeWitness.py プロジェクト: ziednamouchi/EyeWitness
def multi_mode(cli_parsed):
    dbm = db_manager.DB_Manager(cli_parsed.d + '/ew.db')
    dbm.open_connection()
    if not cli_parsed.resume:
        dbm.initialize_db()
    dbm.save_options(cli_parsed)
    m = Manager()
    targets = m.Queue()
    lock = m.Lock()
    multi_counter = m.Value('i', 0)
    display = None

    def exitsig(*args):
        dbm.close()
        if current_process().name == 'MainProcess':
            print ''
            print 'Resume using ./EyeWitness.py --resume {0}'.format(
                cli_parsed.d + '/ew.db')
        os._exit(1)

    signal.signal(signal.SIGINT, exitsig)
    if cli_parsed.resume:
        pass
    else:
        url_list, rdp_list, vnc_list = target_creator(cli_parsed)
        if any((cli_parsed.web, cli_parsed.headless)):
            for url in url_list:
                dbm.create_http_object(url, cli_parsed)
        for rdp in rdp_list:
            dbm.create_vnc_rdp_object('rdp', rdp, cli_parsed)
        for vnc in vnc_list:
            dbm.create_vnc_rdp_object('vnc', vnc, cli_parsed)

    if any((cli_parsed.web, cli_parsed.headless)):
        if cli_parsed.web and not cli_parsed.show_selenium:
            display = Display(visible=0, size=(1920, 1080))
            display.start()

        multi_total = dbm.get_incomplete_http(targets)
        if multi_total > 0:
            if cli_parsed.resume:
                print 'Resuming Web Scan ({0} Hosts Remaining)'.format(
                    str(multi_total))
            else:
                print 'Starting Web Requests ({0} Hosts)'.format(
                    str(multi_total))

        if multi_total < cli_parsed.threads:
            num_threads = multi_total
        else:
            num_threads = cli_parsed.threads
        for i in xrange(num_threads):
            targets.put(None)
        try:
            workers = [
                Process(target=worker_thread,
                        args=(cli_parsed, targets, lock, (multi_counter,
                                                          multi_total)))
                for i in xrange(num_threads)
            ]
            for w in workers:
                w.start()
            for w in workers:
                w.join()
        except Exception as e:
            print str(e)

        # Set up UA table here
        if cli_parsed.cycle is not None:
            ua_dict = get_ua_values(cli_parsed.cycle)
            if not cli_parsed.ua_init:
                dbm.clear_table("ua")
                completed = dbm.get_complete_http()
                completed[:] = [x for x in completed if x.error_state is None]
                for item in completed:
                    for browser, ua in ua_dict.iteritems():
                        dbm.create_ua_object(item, browser, ua)

                cli_parsed.ua_init = True
                dbm.clear_table("opts")
                dbm.save_options(cli_parsed)

            for browser, ua in ua_dict.iteritems():
                targets = m.Queue()
                multi_counter.value = 0
                multi_total = dbm.get_incomplete_ua(targets, browser)
                if multi_total > 0:
                    print(
                        "[*] Starting requests for User Agent {0}"
                        " ({1} Hosts)").format(browser, str(multi_total))
                if multi_total < cli_parsed.threads:
                    num_threads = multi_total
                else:
                    num_threads = cli_parsed.threads
                for i in xrange(num_threads):
                    targets.put(None)
                workers = [
                    Process(target=worker_thread,
                            args=(cli_parsed, targets, lock,
                                  (multi_counter, multi_total), (browser, ua)))
                    for i in xrange(num_threads)
                ]
                for w in workers:
                    w.start()
                for w in workers:
                    w.join()

    if any((cli_parsed.vnc, cli_parsed.rdp)):
        log._LOG_LEVEL = log.Level.ERROR
        multi_total, targets = dbm.get_incomplete_vnc_rdp()
        if multi_total > 0:
            print ''
            print 'Starting VNC/RDP Requests ({0} Hosts)'.format(
                str(multi_total))

            app = QtGui.QApplication(sys.argv)
            timer = QTimer()
            timer.start(10)
            timer.timeout.connect(lambda: None)

            # add qt4 reactor
            import qt4reactor
            qt4reactor.install()
            from twisted.internet import reactor

            for target in targets:
                if os.path.dirname(cli_parsed.d) != os.path.dirname(
                        target.screenshot_path):
                    target.set_paths(cli_parsed.d)
                tdbm = db_manager.DB_Manager(cli_parsed.d + '/ew.db')
                if target.proto == 'vnc':
                    reactor.connectTCP(
                        target.remote_system, target.port,
                        vnc_module.RFBScreenShotFactory(
                            target.screenshot_path, reactor, app, target,
                            tdbm))
                else:
                    reactor.connectTCP(
                        target.remote_system, int(target.port),
                        rdp_module.RDPScreenShotFactory(
                            reactor, app, 1200, 800, target.screenshot_path,
                            cli_parsed.timeout, target, tdbm))
            reactor.runReturn()
            app.exec_()

    if display is not None:
        display.stop()
    results = dbm.get_complete_http()
    vnc_rdp = dbm.get_complete_vnc_rdp()
    dbm.close()
    m.shutdown()
    write_vnc_rdp_data(cli_parsed, vnc_rdp)
    sort_data_and_write(cli_parsed, results)
    if cli_parsed.ocr:
        for target in targets:
            try:
                rdp_module.parse_screenshot(cli_parsed.d, target)
            except IOError:
                pass
コード例 #36
0
ファイル: EyeWitness.py プロジェクト: ziednamouchi/EyeWitness
def single_mode(cli_parsed):
    display = None
    if cli_parsed.web:
        create_driver = selenium_module.create_driver
        capture_host = selenium_module.capture_host
        if not cli_parsed.show_selenium:
            display = Display(visible=0, size=(1920, 1080))
            display.start()
    elif cli_parsed.headless:
        if not os.path.isfile(
                os.path.join(os.path.dirname(os.path.realpath(__file__)),
                             'bin', 'phantomjs')):
            print(" [*] Error: You are missing your phantomjs binary!")
            print(" [*] Please run the setup script!")
            sys.exit(0)
        create_driver = phantomjs_module.create_driver
        capture_host = phantomjs_module.capture_host

    url = cli_parsed.single
    http_object = objects.HTTPTableObject()
    if cli_parsed.active_scan:
        http_object._active_scan = True
    http_object.remote_system = url
    http_object.set_paths(cli_parsed.d,
                          'baseline' if cli_parsed.cycle is not None else None)
    if cli_parsed.active_scan:
        http_object._active_scan = True

    web_index_head = create_web_index_head(cli_parsed.date, cli_parsed.time)

    if cli_parsed.cycle is not None:
        print 'Making baseline request for {0}'.format(
            http_object.remote_system)
    else:
        print 'Attempting to screenshot {0}'.format(http_object.remote_system)
    driver = create_driver(cli_parsed)
    result, driver = capture_host(cli_parsed, http_object, driver)
    result = default_creds_category(result)
    if cli_parsed.resolve:
        result.resolved = resolve_host(result.remote_system)
    driver.quit()
    if cli_parsed.cycle is not None and result.error_state is None:
        ua_dict = get_ua_values(cli_parsed.cycle)
        for browser_key, user_agent_value in ua_dict.iteritems():
            print 'Now making web request with: {0} for {1}'.format(
                browser_key, result.remote_system)
            ua_object = objects.UAObject(browser_key, user_agent_value)
            ua_object.copy_data(result)
            driver = create_driver(cli_parsed, user_agent_value)
            ua_object, driver = capture_host(cli_parsed, ua_object, driver)
            ua_object = default_creds_category(ua_object)
            result.add_ua_data(ua_object)
            driver.quit()
    if display is not None:
        display.stop()
    html = result.create_table_html()
    with open(os.path.join(cli_parsed.d, 'report.html'), 'w') as f:
        f.write(web_index_head)
        f.write(create_table_head())
        f.write(html)
        f.write("</table><br>")
コード例 #37
0
ファイル: scrape.py プロジェクト: cmb1908/graze
class Scrape(object):
    def __init__(self, log=False):
        self.log = log
        self.display = Display(visible=0, size=(800, 2400))
        self.display.start()
        logging.info('Initialized virtual display..')

        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_argument('--no-sandbox')

        chrome_options.add_experimental_option(
            'prefs', {
                'download.default_directory': os.getcwd(),
                'download.prompt_for_download': False,
            })
        logging.info('Prepared chrome options..')

        self.browser = webdriver.Chrome(chrome_options=chrome_options)
        logging.info('Initialized chrome browser..')

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        self.browser.quit()
        self.display.stop()

    def click_through(self, button):
        button.click()

        def link_has_gone_stale():
            try:
                # poll the link with an arbitrary call
                button.find_elements_by_id('doesnt-matter')
                return False
            except exceptions.StaleElementReferenceException:
                return True

        wait_for(link_has_gone_stale)

    def scrape(self, url):
        self.browser.get(url)
        logging.info('Accessed %s ..', url)

        logging.info('Page title: %s', self.browser.title)

    def download_nsw(self, year=1989, retry=5):
        '''
        @param retry Number of retry attempts on captcha
        '''

        self.scrape(
            'https://www.apps08.osr.nsw.gov.au/erevenue/ucm/ucm_list.php')
        cobj = Captcha("//img[@id='captcha']",
                       "//input[@id='gd_securityCode']",
                       "//button[@id='captcha']", retry, self.log)
        cobj.bypass(self.browser)

        er = self.browser.find_element_by_xpath("//select[@name='g_range']")
        select = Select(er)
        select.select_by_value('6')

        for y in range(year, 2019):
            for q in range(1, 8, 6):
                if q == 7 and y == 2018:
                    break
                ed = self.browser.find_element_by_xpath(
                    "//input[@id='g_date']")
                ed.send_keys(Keys.CONTROL + 'a')
                ed.send_keys('01/0%d/%d' % (q, y))
                esb = self.browser.find_element_by_xpath(
                    "//button[@id='g_submit']")
                esb.click()
                time.sleep(1)
                eeb = self.browser.find_element_by_xpath(
                    "//button[@id='OpenResultDialog']")
                eeb.click()
                time.sleep(1)
                eeb = self.browser.find_element_by_xpath(
                    "//button[@name='export_download']")
                eeb.click()
                time.sleep(20)
                os.rename("ucmlist.slk", "ucmlist-%d-0%d-01.slk" % (y, q))
コード例 #38
0
ファイル: scraper.py プロジェクト: awcrosby/media-search
class Scraper():
    """Scraper parent class, child classes are media streaming sites."""
    def __init__(self):
        """Sets creds for each instance."""
        with open('creds.json', 'r') as f:
            self.creds = json.loads(f.read())

    def start_driver(self, window_size='--window-size=1920,1080'):
        """Starts headless chrome browser/driver."""
        logging.info('starting driver')
        self.display = Display(visible=0)
        # self.display = Display(visible=0, size=(1920, 1080))
        self.display.start()

        options = Options()
        options.add_argument('--headless')
        options.add_argument('--disable-gpu')  # likely necessary
        options.add_argument(window_size)
        self.driver = webdriver.Chrome(CHROMEDRIVER_PATH, options=options)
        self.driver.implicitly_wait(10)  # seconds

    def stop_driver(self):
        """Stops headless browser/driver."""
        logging.info('stopping driver')
        self.display.stop()
        self.driver.quit()

    def lookup_and_write_medias(self, medias, mtype):
        """Takes list of movies or shows, searches themoviedb,
           creates object to write to database, then inserts if new
           or updates timestamp if not new.
        """
        logging.info('len(medias) before take unique: {}'.format(len(medias)))
        # get unique: list of dict into list of tuples, set, back to dict
        medias = [dict(t) for t in set([tuple(d.items()) for d in medias])]
        logging.info('len(medias) after take unique: {}'.format(len(medias)))

        for m in medias:
            source_to_write = dict(self.source)

            # if media link exists, set source link, try link db lookup / update
            if 'link' in m.keys():
                source_to_write['link'] = m['link']
                full_media = flaskapp.db_lookup_via_link(m['link'])
                if full_media:
                    # logging.info(u'db media link found: {}'.format(m['title']))
                    flaskapp.update_media_with_source(full_media,
                                                      source_to_write)
                    continue

            # link url was not in database, therefore do themoviedb search
            sleep(0.2)
            year = m.get('year', '')

            results = flaskapp.themoviedb_search(m['title'], mtype, year=year)

            # exit iteration if search not complete or no results
            if 'total_results' not in results:
                logging.error(u'tmdb search not complete for {}: {} {}'.format(
                    mtype, m['title'], year))
                continue
            if results['total_results'] < 1:
                logging.warning(u'tmdb 0 results for {}: {} {}'.format(
                    mtype, m['title'], year))
                # empty media for db write, prevent re-searching
                full_media = dict()
                full_media['title'] = m['title']
                full_media['mtype'] = mtype
                full_media['year'] = year
                full_media['id'] = m['link']
                full_media['sources'] = []
            else:
                # assume top result is best match and use it
                full_media = results['results'][0]

                # append data so dict can be saved to database
                full_media['mtype'] = mtype
                full_media['sources'] = []
                if mtype == 'movie':
                    full_media['year'] = full_media['release_date'][:4]
                else:
                    full_media['title'] = full_media['name']
                    full_media['year'] = full_media['first_air_date'][:4]

                # check if titles are not exact match, in future may not append these
                if not flaskapp.doTitlesMatch(m['title'], full_media['title']):
                    logging.warning(u'not exact titles: {} | {}'.format(
                        m['title'], full_media['title']))

            # write db media if new
            flaskapp.insert_media_if_new(full_media)

            # update db media with source
            flaskapp.update_media_with_source(full_media, source_to_write)

    def update_watchlist_amz(self):
        """For watchlist items check if amazon prime and amazon pay
           are sources and add to db"""
        wl_unique = flaskapp.get_all_watchlist_in_db()
        for m in wl_unique:
            media = flaskapp.themoviedb_lookup(m['mtype'], m['id'])
            flaskapp.amz_prime_check(media)
            sleep(2.5)
            flaskapp.amz_pay_check(media)
            sleep(2.5)
コード例 #39
0
ファイル: slack_spyder.py プロジェクト: snarang2/SNA4Slack
class SlackSpider():
    def __init__(self):
        #self.all_items = []
        self.channelList = []
        self.dataList = []
        self.pageSize = 0
        self.urlsToHit = []
        self.TeamName = ''
        self.ChannelName = ''

    # Open headless chromedriver
    def start_driver(self):
        print('starting driver...')
        self.display = Display(visible=0, size=(800, 600))
        self.display.start()
        self.driver = webdriver.Chrome("/var/chromedriver/chromedriver")
        sleep(randint(9, 10))

    # Close chromedriver
    def close_driver(self):
        print('closing driver...')
        self.display.stop()
        self.driver.quit()
        print('closed!')

    # Tell the browser to get a page
    def get_page(self, url):
        print('getting page...{0}'.format(url))
        self.driver.get(url)
        sleep(randint(9, 10))

    # Grab items from divisions
    def grab_list_items(self):
        print('grabbing list of items...')
        senderAvatar = ''
        all_items = []
        for div in self.driver.find_elements_by_xpath(
                '//ul[@class="messages"]//li'):
            data = self.process_elements(div, senderAvatar)

            if data:
                all_items.append(data)
                if data.senderAvatar != '':
                    senderAvatar = data.senderAvatar
        return all_items

    # Process division elements
    def process_elements(self, div, senderAvatar):
        msg_sender_avatar = ''
        try:
            msg_sender = div.find_element_by_class_name(
                "msg-user").get_attribute('innerText')
            msg_time = div.find_element_by_class_name(
                "msg-time").get_attribute('innerText')
            msg_body = div.find_element_by_class_name(
                "msg-body").get_attribute('innerText')
        except Exception as error:
            print 'element not found exception'
            return None

        try:
            avatar = div.find_element_by_xpath('.//*[@class="msg-avatar"]')
            msg_sender_avatar = avatar.find_element_by_class_name(
                'msg-thumb').get_attribute('src')
        except Exception as error:
            msg_sender_avatar = senderAvatar

        if msg_sender and msg_time and msg_body:
            archiveObj = SlackArchive()
            archiveObj.teamName = self.TeamName
            archiveObj.channelName = self.ChannelName
            archiveObj.messageBody = msg_body
            archiveObj.senderAvatar = msg_sender_avatar
            archiveObj.messageTime = msg_time
            archiveObj.messageSender = msg_sender
            return archiveObj

        else:
            return None

    # Parse the URL
    def parse(self, url):
        self.get_page(url)
        return self.grab_list_items()
        pass

    # Get list of channels in a team
    def getChannelList(self):
        for channelName in self.driver.find_elements_by_xpath(
                '//ul[@class="channels-list"]//li//a'):
            self.channelList.append(channelName.text)
        pass

    # Get the total number of pages in each channel in each page
    def getPageSize(self, url_Template):
        for page in self.driver.find_elements_by_xpath(
                '//ul[@class="pagination pagination-vertical"]//li[@class="page-item active"]'
        ):
            self.pageSize = int(page.text)
        pass

    # Build the list of URL's to hit
    def buildTarget(self, teamName):
        url_Template = "https://{0}.slackarchive.io/".format(teamName)
        self.get_page(url_Template)
        self.getChannelList()
        if teamName == 'buffercommunity':
            self.channelList = self.channelList[7:]
        for channel in self.channelList:
            channelName = channel[1:].strip()
            urlA = url_Template + channelName + "/"
            self.get_page(urlA)
            self.getPageSize(urlA)
            print 'Page size: {0}'.format(self.pageSize)
            for i in range(1, self.pageSize + 1):
                urlObject = []
                urlObject.append(teamName)
                urlObject.append(channelName)
                urlObject.append(urlA + "page-" + str(i))
                self.urlsToHit.append(urlObject)
        pass

    # Run the crawler
    def runSpider(self, teamName):

        self.buildTarget(teamName)
        Utils.get_Connection_SNA4Slack()
        sync_table(SlackArchive)

        for url in self.urlsToHit:
            self.TeamName = url[0]
            self.ChannelName = url[1]
            count = 0
            for data in self.parse(url[2]):
                if data:
                    count += 1
                    node_object = SlackArchive(
                        id=uuid.uuid1(),
                        teamName=data.teamName,
                        channelName=data.channelName,
                        messageSender=data.messageSender.rstrip().lstrip(),
                        messageBody=data.messageBody.rstrip().lstrip(),
                        senderAvatar=data.senderAvatar,
                        messageTime=dateutil.parser.parse(data.messageTime))
                    node_object.save()
            if count > 0:
                print '{0} rows saved'.format(count)

            else:
                print url[2]
                print 'No data found'

    pass
コード例 #40
0
ファイル: webdriver.py プロジェクト: SazhnevWIS/noodles_blog
def wd_instance(driver_name, time_to_wait=30):
    """:param time_to_wait: Sets a sticky timeout to implicitly wait for an
    element to be found
    """

    display = None
    remurl = 'http://%s:%s' \
             % (get_config('CHROMEDRIVER_HOST'),
                get_config('CHROMEDRIVER_PORT'))
    if driver_name == 'firefox':
        driver = webdriver.Firefox()
    elif driver_name in user_agents.keys():
        capabilities = {}
        if driver_name == 'mobile_chrome':
            capabilities = selenium.webdriver.DesiredCapabilities.CHROME
            capabilities["chromeOptions"] = {
                'args': ['user-agent=%s' % user_agents[driver_name]],
                'extensions': []
            }
        elif driver_name != 'chrome':
            capabilities["chromeOptions"] = {
                'args': ["user-agent=%s" % user_agents[driver_name]],
                'extensions': []
            }
        if 'chromeOptions' not in capabilities:
            capabilities['chromeOptions']={'args':[]}
        capabilities['chromeOptions']['args'].append('--test-type')

        if get_config('CHROMEDRIVER_VIRTUAL_DISPLAY'):
            chromedriver = get_config('CHROMEDRIVER_BINARY')
            os.environ['webdriver.chrome.driver'] = chromedriver
            display = Display(visible=0, size=DEFAULT_DISPLAY_SIZE)
            display.start()


            options = selenium.webdriver.ChromeOptions()
            if driver_name in user_agents:
                options.add_argument('--user-agent="%s"'%user_agents[driver_name])

            try:
                driver = selenium.webdriver.Chrome(chromedriver,chrome_options=options,desired_capabilities=capabilities)
                driver.set_window_size(*DEFAULT_DISPLAY_SIZE)
                driver.set_window_position(0, 0)
            except Exception as expt:
                print(expt)
                print('-' * 80)
                raise Exception(
                    'failed to instantiate webdriver '
                    'with binary path %s' % chromedriver)
        else:
            try:
                if get_config('CHROMEDRIVER_HOST') not in ['localhost', '127.0.0.1']:
                    remurl += '/wd/hub'
                    capabilities = selenium.webdriver.DesiredCapabilities.CHROME
                driver = selenium.webdriver.Remote(
                    remurl, desired_capabilities=capabilities)
                driver.set_window_size(*DEFAULT_DISPLAY_SIZE)
                driver.set_window_position(0, 0)
            except Exception as expt:
                print('-' * 80)
                raise Exception('could not connect to selenium at %s; '
                                'CHECK THAT YOU HAVE CHROMEDRIVER RUNNING - '
                                'http://code.google.com/p/chromedriver/'
                                'downloads/list'
                                'Exception: %s' % (remurl, str(expt)))
    else:
        raise Exception('Driver not defined!')
    if not display is None:
        driver.display_stop = lambda: display.stop()
    driver.implicitly_wait(time_to_wait)
    return driver
コード例 #41
0
class LexisNexisSpider(scrapy.Spider):
    name = 'lexisnexis'
    start_urls = []

    s_date = ''
    e_date = ''
    c_date = ''
    page_cnt = 1
    dont_filter = True
    agency_list = []
    '''
    today = datetime.now() + timedelta(days = -3)
    date = str(today)[0:10]
    year = date[0:4]
    month = date[5:7]
    day = date[8:10]
    '''
    '''
    Constructor
    '''
    def __init__(self, keyword='nation', *args, **kwargs):
        self.keyword = keyword
        self.start_urls = ['http://www.google.com']
        super(LexisNexisSpider, self).__init__(*args, **kwargs)

        self.display = Display(visible=0, size=(1280, 1024))
        self.display.start()
        profile = webdriver.FirefoxProfile()
        profile.native_events_enabled = True
        self.driver = webdriver.Firefox(profile)
        # self.driver2 = webdriver.Firefox(profile)
        self.driver.get(self.get_query_url(self.keyword))
        time.sleep(3)

    def __del__(self):

        self.driver.close()
        self.driver.quit()
        self.display.stop()
        print '************************************************************************'
        print 'CLOSED!!!'

    '''
    Get the query url
    '''

    def get_query_url(self, keyword):
        today = datetime.now() + timedelta(days=-25)
        date = str(today)[0:10]
        year = date[0:4]
        month = date[5:7]
        day = date[8:10]
        return 'http://www.lexisnexis.com/lnacui2api/api/version1/sr?sr=%28' + keyword + '%29%20and%20Date%28geq%28' + month + '/' + day + '/' + year + '%29%29&csi=8006%2C6742%2C8213%2C8142%2C8075%2C11810%2C306884%2C247189%2C163823%2C301477&oc=00006&hgn=t&hl=t&hes=t&hnsl=t&hsl=t&hdym=t&hfb=t&ssl=f&stp=bool&icvrpg=true'
        '''
					#The New York Times
					+'%2C6742' \
					# USA TODAY
					+'%2C8213' \
					#Wall Street Journal Abstracts
					+'%2C8142' \
					#The Washington Post
					+'%2C8075' \
					#Post-Dispatch
					+'%2C11810' \
					#The Baltimore Sun
					+'%2C306884' \
					#The Philadelphia Inquirer
					+'%2C247189' \
					#Chicago Daily Herald
                    +'%2c163823'
					#Arizona Capitol Times
                    +'%2c301477'
		'''
        #return 'http://www.lexisnexis.com/lnacui2api/api/version1/sr?sr=%28' + keyword + '%29%20and%20Date%28geq%28'+ month + '/' + day + '/' + year + '%29%29&csi=8006%2C6742%2C8213%2C8142%2C8075&oc=00006&hgn=t&hl=t&hes=t&hnsl=t&hsl=t&hdym=t&hfb=t&ssl=f&stp=bool&icvrpg=true'
        #return 'http://www.lexisnexis.com/lnacui2api/api/version1/sr?sr=%28' + keyword + '%29%20and%20Date%28geq%284/5/2011%29%29&csi=8006%2C6742%2C8213%2C8142%2C8075&oc=00006&hgn=t&hl=t&hes=t&hnsl=t&hsl=t&hdym=t&hfb=t&ssl=f&stp=bool&icvrpg=true'

    def next_page(self, start_index):
        try:
            next_button = self.driver.find_element_by_xpath(
                '//table//table//table//table//table//table//td[@align="right"]/a/img[@src="images/IconPaginationNext.gif"]'
            )
        except:
            return False
            pass
        risb = self.driver.find_element_by_xpath(
            '//input[@name="risb"]').get_attribute("value")
        nexpage = "http://www.lexisnexis.com/lnacui2api/results/listview/listview.do?start=" + str(
            start_index) + "&sort=RELEVANCE&format=GNBLIST&risb=" + risb
        self.driver.get(nexpage)
        time.sleep(2)
        source = self.driver.find_element_by_xpath(
            '//frame[@title="Results Content Frame"]')
        self.driver.get(source.get_attribute("src"))
        time.sleep(2)
        return True

    '''
    Starting point
    Retrieve the news link from the list of search results.
    Args:
     response - the response object pertaining to the search results page
    '''

    def parse(self, response):
        button_continue = self.driver.find_element_by_xpath(
            '//a[@id="firstbtn"]')
        try:
            button_continue.click()
        except:
            print 'can' 't find continue button '
        source = self.driver.find_element_by_xpath(
            '//frame[@title="Results Content Frame"]')
        self.driver.get(source.get_attribute("src"))
        time.sleep(5)
        item_list = list()
        start_id = 1
        while self.next_page(start_id):
            noshade_list = self.driver.find_elements_by_xpath(
                '//tr[@class="noshaderow1st"]')
            shade_list = self.driver.find_elements_by_xpath(
                '//tr[@class="shaderow1st"]')
            for news in noshade_list + shade_list:
                button = news.find_element_by_xpath('.//a')
                news_title = button.text
                news_url = button.get_attribute("href")
                news_agency = news.find_element_by_xpath(
                    './/span[@class="notranslate"]').text

                article = LexisnexisArticleItem()
                article['title'] = news_title
                article['url'] = news_url
                article['agency'] = news_agency
                item_list.append(article)
            start_id += 25
            print "++++++++++++++++++", len(item_list)
        for article in item_list:
            self.driver.get(article['url'])
            time.sleep(2)
            try:
                source = self.driver.find_element_by_xpath(
                    '//frame[@title="Results Document Content Frame"]')
                self.driver.get(source.get_attribute('src'))
                time.sleep(2)
                date_str = self.driver.find_element_by_xpath(
                    '//span[@class="verdana"]/center').text
                news_date = self.parse_date(date_str)

                news_id = self.driver.find_element_by_xpath(
                    '//input[@name="docIdentifier"]')
                news_id = news_id.get_attribute('value')

                news_content_list = self.driver.find_elements_by_xpath(
                    '//span[@class="verdana"]/p[@class="loose"]')
                news_content_list = [n.text for n in news_content_list]
                news_content = '.'.join(news_content_list)

                #Get keywords
                rake = Rake()
                keywords_list = rake.run(news_content)
                keywords = '\n'.join(keywords_list)
                tag = rake.get_tagged_text()

                #article['keywords'] = keywords
                article['aid'] = news_id
                article['date'] = news_date
                article['contents'] = news_content
                article['keywords'] = keywords
                article['tagged_text'] = tag
            except Exception, e:
                print 'ERROR!!!!!!!!!!!!!  URL :'
                print traceback.print_exc(file=sys.stdout)

            yield article
コード例 #42
0
class BrowserWebdriver(BrowserBase):
    skip_urls = []

    def __init__(self, *args, **kwargs):
        BrowserBase.__init__(self, *args, **kwargs)
        self._first_navigation_ts = None
        self._first_navigation_netloc = None
        self._ts_offset = None

    def _skip_url(self, page, url):
        if not url:
            return False

        _, req_netloc, _ = parse_url(url)

        for su in self.skip_urls:
            if su in req_netloc:
                _, page_netloc, _ = parse_url(page.url)
                if not any(x in page_netloc for x in self.skip_urls):
                    self.log_debug("skipping URL %s" % req_netloc)
                    return True
        return False

    def _browser_clear_caches(self):
        BrowserBase._browser_clear_caches(self)
        self.driver.quit()
        self.pid = self.browser_start()

    def _browser_navigate(self, location, cached=True, name=None):
        url = location.url if isinstance(location, Page) else location
        real_navigation = self._http_get(url)
        return Page(self,
                    url,
                    cached,
                    name=name,
                    real_navigation=real_navigation)

    def _browser_wait(self, page, timeout=None):

        self.log_info("_browser_wait()...")

        if timeout is None:
            timeout = self.nav_timeout

        start = time.time()
        while time.time() - start < timeout / 2:
            time.sleep(0.2)
            if self.driver.execute_script(
                    "return window.performance.timing.loadEventEnd"):
                break
            # onload event has not been processed yet, so need to wait and retry
            self.log_info("Waiting for loadEventEnd ... ")

        while time.time() - start < timeout:
            time.sleep(self.ajax_threshold)

            # hack. Execute something in browser context to flush logs...
            self.driver.execute_script(
                "return window.performance.timing.loadEventEnd")

            self._browser_get_events(page)

            ir = page.get_incomplete_reqs()
            if not ir:
                break
            self.log_info(
                "Waiting for incomplete requests:\n    %s" %
                ("\n    ".join(["%s - %s" % (r.id, r.url) for r in ir])))

        if time.time() - start >= timeout:
            if not self.driver.execute_script(
                    "return window.performance.timing.loadEventEnd"):
                self.log_error(
                    "Page '%s' load timeout, window.performance.timing.loadEventEnd = 0"
                    % page.url)

            ir = page.get_incomplete_reqs()
            if ir:
                self.log_error(
                    "Can't wait for page '%s' load completion, "
                    "see '%s' for details\nincomplete requests:\n    %s" %
                    (page.url, self.log_path, "\n    ".join(
                        ["%s - %s" % (r.id, r.url) for r in ir])))

        page.complete(self)

    def _browser_warmup_page(self, location, name=None):
        self.navigate_to(location, cached=False, stats=False, name=name)

    def _browser_display_init(self, headless, resolution):
        if headless:
            try:
                from pyvirtualdisplay import Display
            except ImportError as e:
                abort(e)
            self.display = Display(visible=0, size=resolution)
            self.display.start()
        else:
            self.display = None

    def _browser_execute_script(self, js):
        val = self.driver.execute_script("return %s" % js)
        self.log_debug("%s = %s" % (js, val))
        return val

    def browser_get_name(self):
        c = self.driver.capabilities
        return c['browserName']

    def browser_get_version(self):
        c = self.driver.capabilities
        return self._get_val(c, ['version', 'browserVersion'])

    def browser_get_platform(self):
        c = self.driver.capabilities
        return self._get_val(c, ['platform', 'platformName'])

    def browser_get_screenshot_as_file(self, filename):
        self.driver.get_screenshot_as_file(filename)

    def browser_get_page_timeline(self, page):

        values = {}
        for t in PageTimeline.types:
            if t in PageTimeline.jstypes:
                js = "window.performance.timing.%s" % PageTimeline.jstypes[t]
                values[t] = self._browser_execute_script(js)

        return PageTimeline(page, values)

#    def browser_set_session(self, domain, session_id):
#        self._http_get(domain)
#        self.driver.add_cookie({'name': 'sessionid', 'value': session_id})

    def browser_get_current_url(self):
        return self.driver.current_url

    def browser_get_screenshot(self, filename):
        self.driver.get_screenshot_as_file(filename)

    def browser_stop(self):
        try:
            if self.driver:
                self.driver.quit()
                self.driver = None
            if self.display:
                self.display.stop()
                self.display = None
        except URLError:
            pass

    def _xpath_click(self, xpath):
        exc = None

        # take into account possible replacements of %23/#
        xpaths = [xpath]
        if "%23" in xpath:
            xpaths.append(xpath.replace("%23", "#"))
        if "#" in xpath:
            xpaths.append(xpath.replace("#", "%23"))

        for x in xpaths:
            self.log_debug("Looking for xpath: %s ..." % x)
            try:
                el = self.driver.find_element_by_xpath(x)
                el.click()
                self.log_debug("Looking for xpath: %s ... OK" % x)
                return
            except NoSuchElementException as e:
                self.log_debug(
                    "Looking for xpath: %s ... Failed, no such element" % x)
                exc = e
            except ElementNotVisibleException as e:
                self.log_warning(
                    "Looking for xpath: %s ... Failed, element not visible" %
                    x)
                exc = e

        self.log_error("NoSuchElementException, xpath: %s, see debug log" %
                       xpath)
        self.log_debug("page source:\n%s" %
                       self.driver.page_source.encode('ascii', 'ignore'))
        raise BrowserExc(e)

    def _http_get(self, url, validator=None):
        self.log_debug("Execute GET request: %s" % url)

        if not self._first_navigation_ts:
            self._first_navigation_ts = time.time()
            _, self._first_navigation_netloc, _ = parse_url(url)

        ar = url.split("^")
        if len(ar) > 1:
            self._xpath_click(ar[1])
            return False

        try:
            self.driver.get(url)
        except WebDriverException as e:
            raise BrowserExc(e)
        return True

    @staticmethod
    def _get_val(d, keys):
        for key in keys:
            if key in d:
                return d[key]
        return "unknown"

    def print_browser_info(self):
        c = self.driver.capabilities
        self.print_stats_title("Browser summary")
        print("  - platform: %s" % self.browser_get_platform())
        print("  - browser:  %s %s" %
              (self.browser_get_name(), self.browser_get_version()))
        print("  - PID:      %d" % self.pid)
        print("  - log file: %s" % self.log_path)

    def print_log_file_path(self):
        self.print_stats_title("Browser log file")
        print("  %s" % self.log_path)

    # === virtual methods that must be implemented in every webdriver-based browser === #

    def _browser_parse_logs(self, page, logs):
        raise BrowserExcNotImplemented()

    def _browser_get_events(self, page):
        raise BrowserExcNotImplemented()

    # === webdriver specific === #

    def dom_wait_element_stale(self, el, timeout_s=None, name=None):
        start_time = time.time()

        if timeout_s is None:
            timeout_s = self.nav_timeout

        # http://www.obeythetestinggoat.com/how-to-get-selenium-to-wait-for-page-load-after-a-click.html
        while time.time() < start_time + timeout_s:
            try:
                el.find_elements_by_id('doesnt-matter')
                pass
            except StaleElementReferenceException:
                break
            time.sleep(0.1)

        if time.time() > start_time + timeout_s:
            msg = "DOM element '%s' click() timeout: %.1fs" % (
                name, time.time() - start_time)
            self.log_error(msg)
            raise BrowserExcTimeout(msg)

    def dom_click(self,
                  el,
                  timeout_s=None,
                  name=None,
                  wait_callback=None,
                  wait_callback_obj=None):
        self.log_debug("dom_click(%s, %s)" % (str(el), str(name)))

        if timeout_s is None:
            timeout_s = self.nav_timeout

        p = Page(self,
                 self.browser_get_current_url(),
                 True,
                 name=name,
                 real_navigation=False)
        p.start()

        # 1. click on the element

        old_page = self.driver.find_element_by_tag_name('html')
        el.click()

        # 2. wait for selenium onclick completion

        if wait_callback:
            self.log_debug(
                "wait callback: %s, %s" %
                (str(wait_callback.__name__), str(wait_callback_obj)))
            wait_callback(wait_callback_obj, el, timeout_s, name)
        else:
            self.log_debug("wait stale: %s, %s, %s" % (el, timeout_s, name))
            self.dom_wait_element_stale(el, timeout_s, name)

        # 3. wait for ajax completion, because browser URL can be update only after that

        self._browser_wait(p, timeout=timeout_s)
        p.url = self.browser_get_current_url()

        time.sleep(0.2)

    def dom_find_element_by_id(self, id):
        try:
            return self.driver.find_element_by_id(id)
        except NoSuchElementException as e:
            raise BrowserExc(e)

    def dom_find_element_by_name(self, name):
        try:
            return self.driver.find_element_by_name(name)
        except NoSuchElementException as e:
            raise BrowserExc(e)

    def dom_find_element_by_xpath(self, xpath):
        try:
            return self.driver.find_element_by_xpath(xpath)
        except NoSuchElementException as e:
            raise BrowserExc(e)

    def dom_find_frames(self):
        frames = []
        for name in ("frame", "iframe"):
            try:
                frames += self.driver.find_elements_by_tag_name(name)
            except NoSuchElementException as e:
                pass
        return frames

    def dom_switch_to_frame(self, frame):
        self.log_info("Switching to frame %s" % frame)
        return self.driver.switch_to.frame(frame)

    def dom_switch_to_default_content(self):
        self.log_info("Switching to default content")
        return self.driver.switch_to.default_content()

    def dom_send_keys(self, el, keys):
        val = el.get_attribute('value')
        if val != '':  # clear initial value
            self.log_info("Element value is not empty, clear content...")
            self.driver.execute_script("arguments[0].value = ''", el)
            time.sleep(2.0)

        for ch in keys:
            el.send_keys(ch)
            time.sleep(0.2)
        val = el.get_attribute('value')
        if val == keys:
            return True

        self.log_warning("Bogus selenium send_keys(). Entered: '%s', "
                         "but see: '%s', using set_attribute()..." %
                         (keys, val))
        time.sleep(2.0)
        self.driver.execute_script("arguments[0].value = '%s'" % keys, el)
        time.sleep(2.0)
        val = el.get_attribute('value')
        if val == keys:
            self.log_info("Ok, set_attribute() works fine")
            return True

        self.log_error(
            "Bogus selenium send_keys() and set_attribute(), can't enter value into the element"
        )
        return False

    # === some predefined scenarios === #

    def _do_send_keys(self, title, keys, tag_names, tag_ids):
        for tag, name in tag_names:
            try:
                el = self.dom_find_element_by_name(name)
                if el.tag_name != tag:
                    continue
                if not self.dom_send_keys(el, keys):
                    self.log_error("Couldn't enter %s" % title)
                    return False
                return True
            except BrowserExc as e:
                pass
        for tag, name in tag_names:
            try:
                el = self.dom_find_element_by_xpath(
                    '//*[@label="{}"]'.format(name))
                if el.tag_name != tag:
                    continue
                if not self.dom_send_keys(el, keys):
                    self.log_error("Couldn't enter %s" % title)
                    return False
                return True
            except BrowserExc as e:
                pass

        for tag, id in tag_ids:
            try:
                el = self.dom_find_element_by_id(id)
                if el.tag_name != tag:
                    continue
                if not self.dom_send_keys(el, keys):
                    self.log_error("Couldn't enter %s" % title)
                    return False
                return True
            except BrowserExc as e:
                pass

        self.log_info("Couldn't find %s input field" % title)
        return False

    def _do_login(self, url, user, password, login_form, timeout_s=None):
        if not self._do_send_keys('user name', user, login_form.user_tags,
                                  login_form.user_ids):
            return False

        time.sleep(1)

        if not self._do_send_keys('password', password, login_form.pass_tags,
                                  login_form.pass_ids):
            return False

        time.sleep(1)

        submit_form_found = False
        for tag, name in login_form.sbmt_tags:
            try:
                el = self.dom_find_element_by_name(name)
                if el.tag_name != tag:
                    continue
                submit_form_found = True
                self.dom_click(el, name=name, timeout_s=timeout_s)

                try:
                    el = self.dom_find_element_by_name(name)
                except BrowserExc:
                    self.log_info("Login succeed")
                    return True

            except BrowserExc as e:
                pass

        for tag, id in login_form.sbmt_ids:
            try:
                el = self.dom_find_element_by_id(id)
                if el.tag_name != tag:
                    continue
                submit_form_found = True
                self.dom_click(el, name=id, timeout_s=timeout_s)

                try:
                    el = self.dom_find_element_by_id(id)
                except BrowserExc:
                    self.log_info("Login succeed")
                    return True

            except BrowserExc as e:
                pass

        for x in login_form.sbmt_xpath:
            try:
                el = self.dom_find_element_by_xpath(x)
                submit_form_found = True
                self.dom_click(el, name=id, timeout_s=timeout_s)

                try:
                    el = self.dom_find_element_by_xpath(x)
                except BrowserExc:
                    self.log_info("Login succeed")
                    return True

            except BrowserExc as e:
                pass

        if not submit_form_found:
            self.log_info("Couldn't find login submit form")

        self.log_info("Login failed")
        return False

    def do_login(self, url, user, password, login_form, timeout_s=None):
        self.log_info("Trying to login to '%s' under user %s" % (url, user))
        self.navigate_to(url, cached=None)

        if self._do_login(url, user, password, login_form,
                          timeout_s=timeout_s):
            return True

        for frame in self.dom_find_frames():
            self.dom_switch_to_frame(frame)
            if self._do_login(url,
                              user,
                              password,
                              login_form,
                              timeout_s=timeout_s):
                return True

        self.log_info("Login to '%s' under user '%s' has been failed" %
                      (url, user))
        return False
コード例 #43
0
ファイル: Scraper.py プロジェクト: jmozah/scarface
class PinterestImages():
    def __init__(self):
        self.display = Display(visible=0, size=(800, 600))
        self.display.start()
        self.srchurl = 'https://in.pinterest.com/search/pins/?q=%s'
        self.base_url = self.srchurl
        self.path_to_chromedriver = './chromedriver'
        self.browser = webdriver.Chrome(
            executable_path=self.path_to_chromedriver)
        self.browser = webdriver.Chrome()
        self.browser.get('https://in.pinterest.com/login/')
        self.elem = self.browser.find_elements_by_name("username_or_email")
        self.elem[0].send_keys("*****@*****.**")
        self.elem = self.browser.find_elements_by_name("password")
        self.elem[0].send_keys("qawsedrf")
        self.elem = self.browser.find_elements_by_xpath(
            "/html/body/div[1]/div[1]/div[1]/div/div/div/form/div[4]/div/button"
        )
        self.elem[0].click()

        self.buton = '//*[@id="yui_3_5_1_1_1440135195051_1805"]'

    def crawl(self, qry):
        def noImages(psource):
            if psource == None:
                return 0
            soup = BeautifulSoup(psource, 'lxml')
            imgs = soup.findAll('div', 'Image Module pinUiImage')
            return len(imgs)

        url = self.base_url % ('+'.join(qry))
        self.browser.get(url)
        time.sleep(1)
        pps = None
        cps = None
        for i in range(1, 20):
            self.browser.execute_script("window.scrollTo(0, %d);" %
                                        (i * 10000))
            time.sleep(10)
            cps = self.browser.page_source
            if noImages(cps) < noImages(pps):
                break
            pps = cps

        pagesource = pps

        soup = BeautifulSoup(pagesource, 'lxml')
        imgs = soup.findAll('div', 'Image Module pinUiImage')
        extractedUrls = []
        for img in imgs:
            imgd = img.findAll('img')
            url = imgd[0]['src']
            title = imgd[0]['alt'].encode('ascii', 'ignore')
            extractedUrls.append(url.replace('236x', '736x') + '\t' + title)

        with open('_'.join(sys.argv[1:]) + '_Pinterest', 'w') as outfile:
            for x in extractedUrls:
                outfile.write(x + '\n')

    def stop(self):
        self.browser.quit()
        self.display.stop()
コード例 #44
0
class Order:
    def __init__(self, username, password, url):
        self.username = username
        self.password = password
        self.url = url
        self.display = Display(visible=0, size=(1920, 1080))
        self.display.start()
        self.browser = webdriver.Chrome()
        self.browser.implicitly_wait(15)

    def goToPage(self):
        self.browser.get(self.url)
        print(self.browser.title)

    def login(self):
        loginButton = self.browser.find_element_by_css_selector(
            "a#nav-link-yourAccount span.nav-line-1")
        print(loginButton.text)
        if loginButton.text == "Hello. Sign in":
            loginButton.click()
            email = self.browser.find_element_by_id("ap_email")
            pw = self.browser.find_element_by_id("ap_password")
            email.clear()
            pw.clear()
            email.send_keys(self.username)
            pw.send_keys(self.password)
            submit = self.browser.find_element_by_id("signInSubmit")
            submit.click()
        else:
            print("Already logged in.")

        loginButton = self.browser.find_element_by_css_selector(
            "a#nav-link-yourAccount span.nav-line-1")
        print(loginButton.text)

    def placeOrder(self):
        print(self.browser.title)
        print("Placing order.")
        wait = WebDriverWait(self.browser, 10)
        addToCart = self.browser.find_element_by_css_selector(
            "input#add-to-cart-button")
        addToCart.click()
        time.sleep(10)
        print(self.browser.title)
        wait.until(EC.title_contains('Amazon.com Shopping Cart'))
        checkout = self.browser.find_element_by_css_selector(
            "a#hlb-ptc-btn-native")
        checkout.click()
        time.sleep(10)
        print(self.browser.title)
        wait.until(EC.title_contains('Amazon.com Checkout'))
        placeOrder = self.browser.find_element_by_name("placeYourOrder1")
        placeOrder.click()
        time.sleep(20)
        print(self.browser.title)
        wait.until(EC.title_contains('Amazon.com Thanks You'))

    def kill(self):
        self.browser.close()
        self.display.stop()

    def start(self):
        try:
            self.goToPage()
            self.login()
            self.placeOrder()
        except Exception:
            print("Exception Raised")
            raise
        finally:
            self.kill()
コード例 #45
0
class TflCrawler():
    def __init__(self):
        '''
            Constructor method that instantiate the TflCrawler.
        '''
        self.__site = 'http://cycling.data.tfl.gov.uk/'
        self.__elements = {} # initialise an empty dictionary
        self._file_type = 'CSV file'
        self.__folder_dir = os.path.abspath(os.path.dirname(__file__))

    def _start_crawling(self, driver_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), 'chromedriver'))):
        '''
            Start crawling process, creating and invisible browser display, with 800 by 600 dimension. Additionally,
            the location of Chrome driver is specified.

        :param driver_dir: defines the location of Chrome driver. The directory of the driver is specified as a relative
                path of the user working directory.
        '''
        try:
            print('start driver...')
            self._display = Display( visible= 0, size = (800,600)) # create a chrome display with 800*600 dimension
            self._display.start() # starts the browser
            self._driver = webdriver.Chrome(driver_dir) # set the location of web driver
        except Exception as e:
            print(f'[No driver was identified. Identified files: {os.listdir(driver_dir)}]')

    def _stop_crawling(self):
        '''
            It closes the browser display that was initialised by the start_crawling method. The driver it also stops.
        '''
        print('closing driver...')
        self._display.stop()
        self._driver.quit()

    def _get_site(self, url):
        '''
            The current method performs a request on http://cycling.data.tfl.gov.uk/ server and gets a response. The content
            of the response is converted in HTML and is returned by the method.
        :param url: the url of http://cycling.data.tfl.gov.uk/'
        '''

        try:
            self._driver.get(url) # navigates to page
            sleep(5)  # stops the code execution so that the HTML content to be loaded (5 to 10 seconds)
            return self._driver.execute_script('return document.body.innerHTML') # load the HTML content
        except Exception as e:
            print(f'[Unable to reach {self.__site}. Error : {str(e)}]')

    def _populate_dictionary(self, html):
        '''
            The HTML structure that has been retrieved by __get_site method is analysed such that a dictionary of all
            csv files within the website is constructed. The dictionary is populated by each csv file that may be uploaded
            on TFL website. Each csv file is encapsulated as a dictionary, containing keys such that [name, url, date, size].
        :param html: the html structure that is created by the __get_site method
        '''
        try:
            print('get the content...')
            soup = BeautifulSoup(html, 'html.parser') # creates a soup object and defines how the HTML will be parsed
            # finds all tr elements with an attribute of data-level=3
            main_content = soup.find_all('tr', attrs= { 'data-level' : '3' })

            # iterate over the tr elements
            for i,item in enumerate(main_content):
                td = item.find_all('td') # retrieves the td elements within the tr

                # checks if the type of the 4th td element is CSV
                if (td[3].string == self._file_type):
                    # Populates the dictionary
                    self.__elements[i] ={
                            'name' : td[0].a.string,
                            'url' : td[0].a['href'],
                            'date' : td[1].string,
                            'size' : td[2].string
                    }
        except Exception as e:
            print(f'[Unable to parse the content of {self.__site}. Error: {str(e)}]')

    def parse(self):
        '''
            Performs the entire process to parse the TFL website. In particular, starts the Chrome driver, waits until the site
            to load the HTML content, and therefore performs a request to the website. Then, the response is parsed, populating
            a dictionary that maintains all the csv files that might exist on that site
        :param driver_dir: defines the Google driver relative directory
        '''

        self._start_crawling(os.path.join(self.__folder_dir,'chromedriver'))
        html = self._get_site(self.__site)
        self._populate_dictionary(html)
        self._stop_crawling()

    def retrieve_csv_files(self, DNS,rel_path):
        '''
            Iterates over the constructed dictionary and retrieves each csv file that is identified. The csv files are saved
            locally. Additionally, the corresponded relations of the DB are created
        :param path:  the relative path, which determines the location that the created csv file would be stored.
        '''

        def populate_stations_pairs_relation(df):
            def insert(l):
                if len(l) > 1:
                    # adds a colon at the end of the statement
                    l[-1] = no_space_join([l[-1][:-1], ';'])
                    # joins the insert statements
                    statement = no_space_join(l)

                    # insert the query
                    execute(statement)
                    conn.commit()

            # Drops duplicate routes, that have a start-end station which already exists
            dfrout= df[['StartStation Id','EndStation Id']].drop_duplicates()
            # drop OD routes that started and ended at the same station

            dfrout = dfrout.drop(dfrout[(dfrout['StartStation Id'] == dfrout['EndStation Id'])].index)

            # Variables to avoid overheading
            execute = cur.execute
            fetchall = cur.fetchall

           # corresponds to the stations that already exists in the DB AND have a location
            execute('SELECT station_id,st_asText(location) FROM webapp_stations WHERE location IS NOT NULL')
            # gets the stations that have a location
            stations = dict([(station[0], station[1].replace('MULTIPOINT', '')) for station in fetchall()])

            # stations that in do not have a location in the database, are removed from the data frame
            sids = [s for s in stations.keys()]
            dfrout = dfrout[dfrout['StartStation Id'].isin(sids) == dfrout['EndStation Id'].isin(sids)]

            # requests the pairs of stations that exist in the database
            execute('SELECT start_station_id,end_station_id FROM webapp_stations_pairs_routes')
            pairs_dict = dict([(pair,pair) for pair in fetchall()])

            # Variables that will used to construct the request url
            #plan = '&plan='
            #plan_options = ['fastest','balanced','quietest']
            plan = '&plan=balanced'
            default_url = 'https://www.cyclestreets.net/api/journey.json?key=112d0fc4c69f3951&itinerarypoints='
            nPairs = dfrout.shape[0]
            try:
                # Variables out of the for loop
                #l = ['INSERT INTO webapp_stations_pairs_routes(start_station_id,end_station_id,fastest_ref_dist,fastest_ref_time,fastest_ref_geom,balanced_ref_dist,balanced_ref_time,balanced_ref_geom,quietest_ref_dist,quietest_ref_time,quietest_ref_geom) VALUES ']
                l = ['INSERT INTO webapp_stations_pairs_routes(start_station_id,end_station_id,balanced_ref_dist,balanced_ref_time,balanced_ref_geom) VALUES ']
                comma_join = ','.join
                no_space_join = ''.join
                pipe_join = '|'.join

                for i_pair,pair in enumerate(dfrout.itertuples()):
                    # every 100 requests, stop the execution for 10 seconds (request policy)
                    if i_pair % 1000 == 0 and i_pair > 0:
                        sleep(5)
                        print(f'Pair : {i_pair+1} of {nPairs}')

                    start_station_id = int(pair[1])
                    end_station_id = int(pair[2])

                    # checks for OD pairs that do not exist in the DP (if the )
                    if (start_station_id,end_station_id) not in pairs_dict:
                        try:
                            start_coords = stations[start_station_id][1:-1].replace(' ',',')
                            end_coords = stations[end_station_id][1:-1].replace(' ',',')

                            #time,distance,coords = [],[],[]
                            #atime = time.append
                            #adistance = distance.append
                            #acoords = coords.append
                            #for option in plan_options:
                            # request the link from www.cyclestreet.com
                            response = requests.get(no_space_join([default_url, pipe_join([start_coords,end_coords]), plan])).json()['marker'][0]['@attributes']
                            # loads the json file into a python object(dictionary)
                            time = response['time']
                            distance = response['length']
                            coords = f"st_GeomFromText('LINESTRING({response['coordinates'].replace(' ','?').replace(',',' ').replace('?',',')})',4326)"
                            #response_json = loads(response)['marker'][0]['@attributes']
                            #atime(response['time'])
                            #adistance(response['length'])
                            #acoords(f"st_GeomFromText('LINESTRING({response['coordinates'].replace(' ','?').replace(',',' ').replace('?',',')})',4326)")

                        except (KeyError,AttributeError):
                            continue

                        # creates a statement of the current pair
                        #statement = no_space_join(['(',comma_join([str(start_station_id),str(end_station_id),distance[0],time[0],coords[0],distance[1],time[1],coords[1],distance[2],time[2],coords[2]]),'),'])
                        statement = no_space_join(['(',comma_join([str(start_station_id),str(end_station_id),distance,time,coords]),'),'])
                        l.append(statement)

                    if i_pair % 100 == 0:
                        insert(l)
                        l = ['INSERT INTO webapp_stations_pairs_routes(start_station_id,end_station_id,balanced_ref_dist,balanced_ref_time,balanced_ref_geom) VALUES ']
                        #l = ['INSERT INTO webapp_stations_pairs_routes(start_station_id,end_station_id,fastest_ref_dist,fastest_ref_time,fastest_ref_geom,balanced_ref_dist,balanced_ref_time,balanced_ref_geom,quietest_ref_dist,quietest_ref_time,quietest_ref_geom) VALUES ']

            except Exception as e:
                print('Error while data of webapp_stations_ref_routes were requested...')
            try:
                insert(l)
                return stations
            except:
                print('Error while the INSERT statement was executed for the webapp_stations_ref_routes relation')

        def insert_values_db(values, table_attributes,relation,null_stations):

            # Local Variables
            statement = [f"INSERT INTO {table_attributes} VALUES "]
            append = statement.append # its assign so that we avoid the overheating inside the loop
            replace = str.replace # its assign so that we avoid overheating inside the loop
            n = values.shape[0]-1 # number of observations

            # If the relation that is examined is the stations, receive the spatial location of each station
            if relation == 'webapp_stations':
                # stations_location =[(randint(0,89) + random() ,randint(0,89) + random()) for e in range(values.shape[0])]
                try:
                    stations_location, null_stations = get_station_location(driver_dir= os.path.join(self.__folder_dir,'chromedriver'), url ='https://api.tfl.gov.uk/swagger/ui/index.html?url=/swagger/docs/v1#!/BikePoint/BikePoint_Search' , stations =  values['StartStation Name'].values.tolist(), null_stations = null_stations)
                except Exception as e:
                    print('Error - line 228')
            elif relation == 'webapp_routes':
                stations = populate_stations_pairs_relation(values) # returns a dictionary, with all the stations that have a location
                cur.execute('SELECT id,start_station_id,end_station_id FROM webapp_stations_pairs_routes')
                pairs = dict([((pair[1],pair[2]),pair[0]) for pair in cur.fetchall()])

            # Iterate over each observation and create the corresponded INSERT statement
            for irow, row in enumerate(values.itertuples()):
                pk = row[1] # assign the value of pk to a local variable
                try:
                    if relation == 'webapp_bikes':
                            append(replace(f"({pk}),", "\\'", "''"))
                    elif relation == 'webapp_stations':
                        try:
                            append(replace(f"({pk},'{row[2]}', ST_GeomFromText('MULTIPOINT({stations_location[irow][0]} {stations_location[irow][1]})',4326)),", "\\'", "''"))
                        except:
                            continue
                    elif relation == 'webapp_routes':
                        # get only the routes that i) do not have the same starting and ending station and i) have a start or end station that contains a location in the db
                         if (row[6] != row[7]) and (row[6] in stations) and (row[7] in stations) :
                            pair_id = pairs[(row[6],row[7])]
                            append(replace(f"({pk},'{row[2]}','{row[3]}',{abs(row[4])},{row[5]},{pair_id}),", "\\'", "''"))

                except (ValueError,KeyError):
                    continue

            # Constructs the INSERT statement
            if len(statement) > 1:
                statement[-1] = ''.join([statement[-1][:-1] + ';'])
                statement = ''.join(statement)
                # INSERT the new values into the database
                sql_execute(statement)
                conn.commit() # commit the transaction
            if relation =='webapp_stations':
                return null_stations

        def populate_relation(df, df_main_all_names, relation, pk , table_attributes, null_stations):
            # Local variables
            def process_df(df, df_main_all_names,relation):
                # in order to avoid error in subsequent procedures, we need to receive the Id of the starting and ending stations
                if relation == 'webapp_stations':
                    start_stations_df = df[df_main_all_names[1]].dropna()
                    scol = start_stations_df.columns
                    end_stations_df = df[['EndStation Id','EndStation Name']].dropna()
                    end_stations_df.columns = [scol[0],scol[1]]
                    ndf = pd.concat([start_stations_df,end_stations_df], axis= 0).drop_duplicates([df_main_all_names[0]])
                else:
                    # drops the duplicates from the primary key for the webapp_routes and webapp_bikes relation
                    ndf = dataframe(df[df_main_all_names[1]]).drop_duplicates([df_main_all_names[0]]).dropna()
                return ndf

            new_values = []
            append = new_values.append
            dataframe = pd.DataFrame

            # Retrieves the csv sub-dataframe that defines a relation
            try:
                ndf = process_df(df,df_main_all_names,relation)
            except (TypeError,IndexError,KeyError):
                df.columns = ['Rental Id','Duration','Bike Id','End Date','EndStation Id','EndStation Name','Start Date','StartStation Id', 'StartStation Name']
                ndf = process_df(df,df_main_all_names,relation)


            # Dimensions of the table
            n = ndf.shape[1]
            # Performs a SELECT query that will return current values within the db
            sql_execute(f"SELECT {pk[1]} FROM {relation};")
            # identify the pk of each entity - a dictionary is used for more efficient search
            stored_pks= dict([(e[pk[0]],e[pk[0]]) for e in cur.fetchall()])

            try:
                # Look for new values
                for row in ndf.itertuples():
                    if (row[1] not in stored_pks):
                        append(row[1])

                if len(stored_pks) != 0:
                    if n == 1: # 1 Dimensional relations
                        if len(new_values) > 0:
                            insert_values_db(dataframe({f'{df_main_all_names[0]}' : new_values}), table_attributes, relation,null_stations)

                    else: # n Dimensional relations
                         if len(new_values) > 0:
                            new_values_joined = dataframe({ df_main_all_names[0]: new_values}).merge(ndf,how='left',left_on= df_main_all_names[0], right_on = df_main_all_names[0])
                            if relation == 'webapp_stations':
                                null_stations = insert_values_db(new_values_joined[df_main_all_names[1]], table_attributes, relation,null_stations)
                                return null_stations
                            else:
                                insert_values_db(new_values_joined[df_main_all_names[1]], table_attributes, relation,null_stations)
                else:
                    if relation == 'webapp_stations':
                        null_stations = insert_values_db(dataframe(ndf[df_main_all_names[1]]), table_attributes, relation,null_stations)
                        return null_stations
                    else:
                        insert_values_db(dataframe(ndf[df_main_all_names[1]]), table_attributes, relation, null_stations)

            except psycopg2.InternalError:
                conn.rollback()
                process_df(df, df_main_all_names, relation)
            except Exception as e:
                print(f'Line 327 - {e}')
        #-------------------------------------------------------------------------------------------------------------------------

        try:
            # Local Variables
            join = os.path.join
            exists = os.path.exists
            size = os.path.getsize
            cd = self.__folder_dir # gives the directory of tflcrawler
            read_csv = pd.read_csv

            # establish a connection with o PostgreSQL database, based on the given DNS parameter
            conn = psycopg2.connect(DNS)
            cur = conn.cursor() # initialise a cursor
            sql_execute = cur.execute # cur.execute command is assigned as local variable (avoid dot overheating)
            null_stations = ['Bourne Street, Belgravia'] # list that will check if a station is null

            path = join(cd,rel_path) # Defines the path where the csv files will be stored
            print('starts to retrieve the csv files...')
            elements = self.__elements # assign the current dictionary to a local variable

            # iterate over the dictionary elements
            for value in tqdm(elements.values()):
                name = value['name'] # file name
                try:
                    csv_path = join(path, name) # assign a full path fof the file
                    print(csv_path)

                    # if the file does not exist or the file exists, having a size of zero (nothing within it)
                    if (not exists(csv_path)) or (exists(csv_path) and size(csv_path) == 0):

                        # request the csv file from the server
                        try:
                            response = requests.get(value['url'])
                        except (requests.ConnectionError, requests.ConnectTimeout, requests.HTTPError, requests.TooManyRedirects) as error:
                            print(str(error))
                        # convert the text to a generator
                        splitted_text = response.iter_lines(chunk_size= 512)

                        # opens and write the file
                        with open(csv_path, 'w') as file:
                            for line in splitted_text:
                                file.write(str(line)[2:-1] + '\n')
                            file.close()

                        # reads the created csv file
                        df = read_csv(filepath_or_buffer= csv_path, delimiter=',' ,encoding= 'utf-8')
                        # Populates the Bikes entity
                        populate_relation(df = df, df_main_all_names= ('Bike Id', 'Bike Id'), relation= 'webapp_bikes' , pk = (0,'bike_id'), table_attributes= 'webapp_bikes(bike_id)', null_stations = null_stations)

                        # Populates the Stations entity
                        condition = True # initialise a boolean variable that checks if the populate_relation function of stations has been correctly executed
                        while(condition):
                            try:
                                # populate the db with the corresponded values of stations
                                null_stations = populate_relation(df = df, df_main_all_names= ('StartStation Id', ['StartStation Id', 'StartStation Name']) , relation ='webapp_stations', pk = (0,'station_id'), table_attributes= 'webapp_stations(station_id,station_name,location)', null_stations = null_stations)

                                # set the condition to false and exit from the while loop
                                condition = False
                            except ValueError:  # If the function returns an error due to unsimilarity of the file, SKIP the file
                                condition = False
                            except Exception as e: # If the function returns any other error, execute the function again
                                # The function may do not executed correctly due to problems with the connection with the API and other requests
                                print('POPULATE_RELATION IS EXECUTED AGAIN...')
                                continue
                        # Populates the Routes entity
                        populate_relation(df = df, df_main_all_names=('Rental Id', ['Rental Id','Start Date','End Date', 'Duration','Bike Id','StartStation Id', 'EndStation Id']), relation= 'webapp_routes', pk =(0,'rental_id'), table_attributes='webapp_routes(rental_id,start_date,end_date,duration,bike_id,station_pairs_id)', null_stations = null_stations)

                except Exception as e:
                    print(f'[Error of file {name} - Inside the FOR loop]')
                    continue

        except Exception as e:
            # Close the cursor and database connection as well
            cur.close()
            conn.close()
            print(f'[ Error while the files are retrieved. Error: {str(e)}]')

    @property
    def elements(self):
        return self.__elements

    @property
    def site(self):
        return self.__site
コード例 #46
0
ファイル: search.py プロジェクト: webshell520/Zeus-Scanner
def get_urls(query, url, verbose=False, warning=True, user_agent=None, proxy=None, **kwargs):
    """
      Bypass Google captchas and Google API by using selenium-webdriver to gather
      the Google URL. This will open a robot controlled browser window and attempt
      to get a URL from Google that will be used for scraping afterwards.

      Only downside to this method is that your IP and user agent will be visible
      until the application pulls the URL.
    """
    if verbose:
        logger.debug(set_color(
            "setting up the virtual display to hide the browser...", level=10
        ))
    ff_display = Display(visible=0, size=(800, 600))
    ff_display.start()
    logger.info(set_color(
        "firefox browser display will be hidden while it performs the query..."
    ))
    if warning:
        logger.warning(set_color(
            "your web browser will be automated in order for Zeus to successfully "
            "bypass captchas and API calls. this is done in order to grab the URL "
            "from the search and parse the results. please give selenium time to "
            "finish it's task...", level=30
        ))
    if verbose:
        logger.debug(set_color(
            "running selenium-webdriver and launching browser...", level=10
        ))

    if verbose:
        logger.debug(set_color(
            "adjusting selenium-webdriver user-agent to '{}'...".format(user_agent), level=10
        ))
    if proxy is not None:
        proxy_type = proxy.keys()
        proxy_to_use = Proxy({
            "proxyType": ProxyType.MANUAL,
            "httpProxy": proxy[proxy_type[0]],
            "ftpProxy": proxy[proxy_type[0]],
            "sslProxy": proxy[proxy_type[0]],
            "noProxy": ""
        })
        if verbose:
            logger.debug(set_color(
                "setting selenium proxy to '{}'...".format(
                    ''.join(proxy_type) + "://" + ''.join(proxy.values())
                ), level=10
            ))
    else:
        proxy_to_use = None

    profile = webdriver.FirefoxProfile()
    profile.set_preference("general.useragent.override", user_agent)
    browser = webdriver.Firefox(profile, proxy=proxy_to_use)
    logger.info(set_color("browser will open shortly..."))
    browser.get(url)
    if verbose:
        logger.debug(set_color(
            "searching search engine for the 'q' element (search button)...", level=10
        ))
    search = browser.find_element_by_name('q')
    logger.info(set_color(
        "searching '{}' using query '{}'...".format(url, query)
    ))
    search.send_keys(query)
    search.send_keys(Keys.RETURN)  # hit return after you enter search text
    time.sleep(3)
    if verbose:
        logger.debug(set_color(
            "obtaining URL from selenium..."
        ))
    retval = browser.current_url
    if verbose:
        logger.debug(set_color(
            "found current URL from selenium browser '{}'...".format(retval), level=10
        ))
    logger.info(set_color(
        "closing the browser and continuing process.."
    ))
    browser.close()
    ff_display.stop()
    return retval
コード例 #47
0
class CraigslistBot:
    @staticmethod
    def debug(inString):
        print(" [BOT] - %s" % inString.encode('utf-8').strip())

    def __init__(self,
                 protonLogin="",
                 protonPassword="",
                 loginEmail="",
                 loginPass="",
                 contactNumber="",
                 contactName="",
                 postCode="",
                 listingsFile="",
                 waitTime=10,
                 waitTimeBetweenPosts=30):
        self.display = ""

        if not os.name == 'nt':
            self.display = Display(visible=1, size=(1248, 1000))  # 800x600
            self.display.start()

        self.client = webdriver.Firefox()
        self.isLoggedIn = False

        self.protonLogin = protonLogin
        self.protonPassword = protonPassword
        self.loginEmail = loginEmail
        self.loginPass = loginPass
        self.contactNumber = contactNumber
        self.contactName = contactName
        self.postCode = postCode
        self.listingsFile = listingsFile
        self.waitTime = waitTime
        self.waitTimeBetweenPosts = waitTimeBetweenPosts

        self.locationCode = "chi"  #nyc asks for more location data not implement yet s

    def __del__(self):
        if not os.name == 'nt':
            self.display.stop()

        self.client.quit()
        return 0

    def login(self, oneTimeLoginLink=""):
        self.debug("Logging in...")

        if oneTimeLoginLink == "":
            self.client.get("https://accounts.craigslist.org/login")
        else:
            self.client.get(oneTimeLoginLink)

        self.waitForId("inputEmailHandle")

        #self.debug("Inputing information to login screen")

        self.client.find_element_by_css_selector(
            "#inputEmailHandle").send_keys(self.loginEmail)

        self.client.find_element_by_css_selector("#inputPassword").send_keys(
            self.loginPass)

        self.client.find_element_by_id("login").click()

        # if need activation:
        # otl = self.validatePostInEmail()
        # self.login(otl)
        # return

        try:
            self.client.find_element_by_css_selector('.tab')
        except NoSuchElementException:
            self.debug("Not logged in")
            return

        self.debug("Successfully logged in!")

        self.isLoggedIn = True

    def createpost(self, listing):
        if not self.isLoggedIn:
            self.debug("ERROR: You're not logged in!")
            return 0

        #self.debug("Attempting to post this listing:")
        #self.debug(listing.tostring() + "\n")

        #self.debug("Navigating to post page")

        #self.debug("locationCode: " + self.locationCode)
        initialPostUrl = "https://post.craigslist.org/c/" + self.locationCode
        #self.debug("navigating to " + initialPostUrl)
        self.client.get(initialPostUrl)

        self.waitForCss("input[value='1']")

        self.client.find_element_by_css_selector("input[value='1']").click()

        # fso = for sale by owner
        # so  = service offered
        self.client.find_element_by_css_selector("input[value='fso']").click()
        time.sleep(self.waitTime)

        # 199 = computer parts
        # 7   = computers
        # 96  = electronics
        self.client.find_element_by_css_selector("input[value='96']").click()
        time.sleep(self.waitTime)
        """
        self.debug("Trying to fill in email")
        try:
            self.client.find_element_by_css_selector(
                '#FromEMail').send_keys(self.loginEmail)
        except NoSuchElementException:
            self.debug("Not avaliable")
        try:
            self.client.find_element_by_css_selector(
                '#FromEMail').send_keys(self.loginEmail)
        except NoSuchElementException:
            self.debug("Not avaliable")
        """

        #self.debug("Checking 'Okay to contact by phone'")
        self.waitForName("show_phone_ok")
        self.client.find_element_by_name("show_phone_ok").click()
        self.client.find_element_by_name("contact_phone_ok").click()

        #self.debug("Checking 'Okay to contact by text'")
        self.client.find_element_by_name("contact_text_ok").click()

        #self.debug("Filling in contact phone number")
        self.client.find_element_by_name("contact_phone").send_keys(
            self.contactNumber)

        #self.debug("Filling in contact name")
        self.client.find_element_by_name("contact_name").send_keys(
            self.contactName)

        #self.debug("Filling in post title")
        spinName = spintax.spin(listing.name)
        self.client.find_element_by_name("PostingTitle").send_keys(spinName)

        #self.debug("Filling in zip code")
        self.client.find_element_by_id("postal_code").send_keys(self.postCode)

        #self.debug("Filling in post content")
        spinDescription = spintax.spin(listing.description)
        self.client.find_element_by_name("PostingBody").send_keys(
            spinDescription)

        #self.debug("Checking 'Okay to contact for other offers'")
        self.waitForName("contact_ok")
        self.client.find_element_by_name("contact_ok").click()

        # self.debug("Unchecking 'Want a map' if checked")
        # try:
        #    self.client.find_element_by_css_selector("#wantamap:checked")
        # except NoSuchElementException:
        #    self.debug("Not checked")
        # finally:
        #    self.client.find_element_by_css_selector("#wantamap:checked").click()
        # time.sleep(self.waitTime)

        #self.debug("Clicking continue")
        self.client.find_element_by_name("go").click()

        # if "editimage" in self.client.current_url:  # FIX tHIS
        #   self.debug("Clicking continue")
        #   self.client.find_element_by_css_selector('button.done').click()
        # else:
        #   self.debug(
        #      "Could not submit. Maybe a bad email address or phone number")

        #self.debug("Clicking publish")
        self.waitForClass("bigbutton")
        self.client.find_element_by_class_name('bigbutton').click()

        # determine if we need to switch to classic uploading
        time.sleep(self.waitTime)
        if len(self.client.find_elements_by_id('classic')) != 0:
            #self.debug("clicking use classic image uploader")
            self.waitForId("classic")
            time.sleep(self.waitTime)
            self.client.find_element_by_id('classic').click()
            time.sleep(self.waitTime
                       )  # must wait for classic to pop into the viewport

        #self.debug("uploading images")
        self.waitForName("file")
        for imagePath in listing.imagePathList:
            self.debug("Attempting to upload image: " + os.getcwd() + "/" +
                       imagePath)
            self.client.find_element_by_name("file").send_keys(os.getcwd() +
                                                               "/" + imagePath)
            time.sleep(self.waitTime)

        self.debug("Clicking done with images")
        self.waitForClass("bigbutton")
        self.client.find_element_by_class_name('bigbutton').click()

        self.debug("Click publish (again)")
        self.waitForName("go")
        self.client.find_element_by_name('go').click()

        # check if we need to verify the post
        self.debug("Check if the post needs verified")
        time.sleep(self.waitTime)
        htmlText = self.client.find_element_by_css_selector("body").text
        # self.debug(htmlText)
        if "FURTHER ACTION REQUIRED" in htmlText:
            # wait for the email to come through and then verify it
            self.debug("must verify post")
            time.sleep(45)
            self.validatePostInEmail()

        return self.client.find_element_by_css_selector(
            "ul.ul").find_elements_by_css_selector("a")[0].get_attribute(
                "href")

    # region WaitFor methods

    def waitForName(self, name):
        for i in range(0, 30):
            #self.debug("waiting for id \"" + name + "\"...")
            if len(self.client.find_elements_by_name(name)) != 0:
                break
            time.sleep(2)

    def waitForId(self, idName):
        for i in range(0, 30):
            #self.debug("waiting for id \"" + idName + "\"...")
            if len(self.client.find_elements_by_id(idName)) != 0:
                break
            time.sleep(2)

    def waitForCss(self, css):
        for i in range(0, 30):
            #self.debug("waiting for css selector \"" + css + "\"...")
            if len(self.client.find_elements_by_css_selector(css)) != 0:
                break
            time.sleep(2)

    def waitForClass(self, className):
        for i in range(0, 30):
            #self.debug("waiting for class \"" + className + "\"...")
            if len(self.client.find_elements_by_class_name(className)) != 0:
                break
            time.sleep(2)

    # endregion

    def validatePostInEmail(self):
        self.debug("NOW, WE VALIDATE!")
        self.client.get("https://mail.protonmail.com/login")

        self.waitForId("username")
        self.client.find_element_by_id("username").send_keys(self.protonLogin)
        self.client.find_element_by_id("password").send_keys(
            self.protonPassword)
        self.client.find_element_by_id("login_btn").click()

        # we're looking for the first link (our craigslistBot email folder) in the first "menuItem-label" list
        self.waitForClass("menuLabel-item")
        labelItem = self.client.find_elements_by_class_name(
            "menuLabel-item")[0]
        labelLink = labelItem.find_elements_by_css_selector(
            "a")[0].get_attribute('href')
        self.client.get(labelLink)

        # click the newest email
        self.waitForClass("conversation")
        self.client.find_elements_by_class_name("conversation")[0].click()

        # find the newest message in that email
        self.waitForClass("message")
        correctMessage = self.client.find_elements_by_class_name("message")[-1]

        # get the one time link, typically the last link in the list
        self.waitForCss("a")
        oneTimeLink = correctMessage.find_elements_by_css_selector(
            "a")[-1].get_attribute('href')

        # if the last link is a support page, select the second to last link which should be our verification link
        if oneTimeLink == "https://www.craigslist.org/about/scams?lang=en&cc=us":
            oneTimeLink = correctMessage.find_elements_by_css_selector(
                "a")[-2].get_attribute('href')

        # navigate to the verification link
        self.client.get(oneTimeLink)

        # get the new post link. This may be the incorrect link, look into this.
        self.waitForCss("a")
        newPostLink = labelItem.find_elements_by_css_selector(
            "a")[0].get_attribute('href')

        time.sleep(2)

        return newPostLink
コード例 #48
0
class BaseCase(unittest.TestCase):
    '''
    A base test case that wraps methods for enhanced usage.
    You can also add your own methods here.
    '''

    def __init__(self, *args, **kwargs):
        super(BaseCase, self).__init__(*args, **kwargs)
        try:
            self.driver = WebDriver()
        except Exception:
            pass
        self.environment = None

    def open(self, url):
        self.driver.get(url)
        if settings.WAIT_FOR_RSC_ON_PAGE_LOADS:
            self.wait_for_ready_state_complete()
        self._demo_mode_pause_if_active()

    def open_url(self, url):
        """ In case people are mixing up self.open() with open(),
            use this alternative. """
        self.open(url)

    def click(self, selector, by=By.CSS_SELECTOR,
              timeout=settings.SMALL_TIMEOUT):
        element = page_actions.wait_for_element_visible(
            self.driver, selector, by, timeout=timeout)
        self._demo_mode_scroll_if_active(selector, by)
        element.click()
        if settings.WAIT_FOR_RSC_ON_CLICKS:
            self.wait_for_ready_state_complete()
        self._demo_mode_pause_if_active()

    def click_chain(self, selectors_list, by=By.CSS_SELECTOR,
                    timeout=settings.SMALL_TIMEOUT, spacing=0):
        """ This method clicks on a list of elements in succession.
            'spacing' is the amount of time to wait between clicks. (sec) """
        for selector in selectors_list:
            self.click(selector, by=by, timeout=timeout)
            if spacing > 0:
                time.sleep(spacing)

    def click_link_text(self, link_text, timeout=settings.SMALL_TIMEOUT):
        element = self.wait_for_link_text_visible(link_text, timeout=timeout)
        element.click()
        if settings.WAIT_FOR_RSC_ON_CLICKS:
            self.wait_for_ready_state_complete()
        self._demo_mode_pause_if_active()

    def add_text(self, selector, new_value, timeout=settings.SMALL_TIMEOUT):
        """ The more-reliable version of driver.send_keys()
            Similar to update_text(), but won't clear the text field first. """
        element = self.wait_for_element_visible(selector, timeout=timeout)
        element.send_keys(new_value)
        self._demo_mode_pause_if_active()

    def send_keys(self, selector, new_value, timeout=settings.SMALL_TIMEOUT):
        """ Same as add_text() -> more reliable, but less name confusion. """
        self.add_text(selector, new_value, timeout=timeout)

    def update_text_value(self, selector, new_value,
                          timeout=settings.SMALL_TIMEOUT, retry=False):
        """ This method updates an element's text value with a new value.
            @Params
            selector - the selector with the value to update
            new_value - the new value for setting the text field
            timeout - how long to wait for the selector to be visible
            retry - if True, use jquery if the selenium text update fails
        """
        element = self.wait_for_element_visible(selector, timeout=timeout)
        element.clear()
        self._demo_mode_pause_if_active(tiny=True)
        element.send_keys(new_value)
        if (retry and element.get_attribute('value') != new_value and (
                not new_value.endswith('\n'))):
            logging.debug('update_text_value is falling back to jQuery!')
            selector = self.jq_format(selector)
            self.set_value(selector, new_value)
        self._demo_mode_pause_if_active()

    def update_text(self, selector, new_value,
                    timeout=settings.SMALL_TIMEOUT, retry=False):
        """ The shorter version of update_text_value(), which
            clears existing text and adds new text into the text field.
            We want to keep the old version for backward compatibility. """
        self.update_text_value(selector, new_value,
                               timeout=timeout, retry=retry)

    def is_element_present(self, selector, by=By.CSS_SELECTOR):
        return page_actions.is_element_present(self.driver, selector, by)

    def is_element_visible(self, selector, by=By.CSS_SELECTOR):
        return page_actions.is_element_visible(self.driver, selector, by)

    def is_link_text_visible(self, link_text):
        return page_actions.is_element_visible(self.driver, link_text,
                                               by=By.LINK_TEXT)

    def is_text_visible(self, text, selector, by=By.CSS_SELECTOR):
        return page_actions.is_text_visible(self.driver, text, selector, by)

    def find_visible_elements(self, selector, by=By.CSS_SELECTOR):
        return page_actions.find_visible_elements(self.driver, selector, by)

    def execute_script(self, script):
        return self.driver.execute_script(script)

    def set_window_size(self, width, height):
        return self.driver.set_window_size(width, height)
        self._demo_mode_pause_if_active()

    def maximize_window(self):
        return self.driver.maximize_window()
        self._demo_mode_pause_if_active()

    def activate_jquery(self):
        """ If "jQuery is not defined", use this method to activate it for use.
            This happens because jQuery is not always defined on web sites. """
        try:
            # Let's first find out if jQuery is already defined.
            self.driver.execute_script("jQuery('html')")
            # Since that command worked, jQuery is defined. Let's return.
            return
        except Exception:
            # jQuery is not currently defined. Let's proceed by defining it.
            pass
        self.driver.execute_script(
            '''var script = document.createElement("script"); '''
            '''script.src = "https://ajax.googleapis.com/ajax/libs/jquery/1/'''
            '''jquery.min.js"; document.getElementsByTagName("head")[0]'''
            '''.appendChild(script);''')
        for x in xrange(30):
            # jQuery needs a small amount of time to activate. (At most 3s)
            try:
                self.driver.execute_script("jQuery('html')")
                return
            except Exception:
                time.sleep(0.1)
        # Since jQuery still isn't activating, give up and raise an exception
        raise Exception("Exception: WebDriver could not activate jQuery!")

    def scroll_to(self, selector):
        self.wait_for_element_visible(selector, timeout=settings.SMALL_TIMEOUT)
        scroll_script = "jQuery('%s')[0].scrollIntoView()" % selector
        try:
            self.driver.execute_script(scroll_script)
        except Exception:
            # The likely reason this fails is because: "jQuery is not defined"
            self.activate_jquery()  # It's a good thing we can define it here
            self.driver.execute_script(scroll_script)
        self._demo_mode_pause_if_active(tiny=True)

    def scroll_click(self, selector):
        self.scroll_to(selector)
        self.click(selector)

    def jquery_click(self, selector):
        self.scroll_to(selector)
        self.driver.execute_script("jQuery('%s').click()" % selector)
        self._demo_mode_pause_if_active()

    def jq_format(self, code):
        return page_utils.jq_format(code)

    def set_value(self, selector, value):
        self.scroll_to(selector)
        val = json.dumps(value)
        self.driver.execute_script("jQuery('%s').val(%s)" % (selector, val))
        self._demo_mode_pause_if_active()

    def jquery_update_text_value(self, selector, new_value,
                                 timeout=settings.SMALL_TIMEOUT):
        element = self.wait_for_element_visible(selector, timeout=timeout)
        self.scroll_to(selector)
        self.driver.execute_script("""jQuery('%s').val('%s')"""
                                   % (selector, self.jq_format(new_value)))
        if new_value.endswith('\n'):
            element.send_keys('\n')
        self._demo_mode_pause_if_active()

    def jquery_update_text(self, selector, new_value,
                           timeout=settings.SMALL_TIMEOUT):
        self.jquery_update_text_value(selector, new_value, timeout=timeout)

    def hover_on_element(self, selector):
        self.wait_for_element_visible(selector, timeout=settings.SMALL_TIMEOUT)
        self.scroll_to(selector)
        time.sleep(0.05)  # Settle down from scrolling before hovering
        return page_actions.hover_on_element(self.driver, selector)

    def hover_and_click(self, hover_selector, click_selector,
                        click_by=By.CSS_SELECTOR,
                        timeout=settings.SMALL_TIMEOUT):
        self.wait_for_element_visible(hover_selector, timeout=timeout)
        self.scroll_to(hover_selector)
        # Settle down from the scrolling before hovering
        element = page_actions.hover_and_click(
            self.driver, hover_selector, click_selector, click_by, timeout)
        self._demo_mode_pause_if_active()
        return element

    def wait_for_element_present(self, selector, by=By.CSS_SELECTOR,
                                 timeout=settings.LARGE_TIMEOUT):
        return page_actions.wait_for_element_present(
            self.driver, selector, by, timeout)

    def wait_for_element_visible(self, selector, by=By.CSS_SELECTOR,
                                 timeout=settings.LARGE_TIMEOUT):
        return page_actions.wait_for_element_visible(
            self.driver, selector, by, timeout)

    def wait_for_text_visible(self, text, selector, by=By.CSS_SELECTOR,
                              timeout=settings.LARGE_TIMEOUT):
        return page_actions.wait_for_text_visible(
            self.driver, text, selector, by, timeout)

    def wait_for_link_text_visible(self, link_text,
                                   timeout=settings.LARGE_TIMEOUT):
        return self.wait_for_element_visible(
            link_text, by=By.LINK_TEXT, timeout=timeout)

    def wait_for_element_absent(self, selector, by=By.CSS_SELECTOR,
                                timeout=settings.LARGE_TIMEOUT):
        return page_actions.wait_for_element_absent(
            self.driver, selector, by, timeout)

    def wait_for_element_not_visible(self, selector, by=By.CSS_SELECTOR,
                                     timeout=settings.LARGE_TIMEOUT):
        return page_actions.wait_for_element_not_visible(
            self.driver, selector, by, timeout)

    def wait_for_ready_state_complete(self, timeout=settings.EXTREME_TIMEOUT):
        return page_actions.wait_for_ready_state_complete(self.driver, timeout)

    def wait_for_and_accept_alert(self, timeout=settings.LARGE_TIMEOUT):
        return page_actions.wait_for_and_accept_alert(self.driver, timeout)

    def wait_for_and_dismiss_alert(self, timeout=settings.LARGE_TIMEOUT):
        return page_actions.wait_for_and_dismiss_alert(self.driver, timeout)

    def wait_for_and_switch_to_alert(self, timeout=settings.LARGE_TIMEOUT):
        return page_actions.wait_for_and_switch_to_alert(self.driver, timeout)

    def save_screenshot(self, name, folder=None):
        return page_actions.save_screenshot(self.driver, name, folder)

    def _demo_mode_pause_if_active(self, tiny=False):
        if self.demo_mode:
            if self.demo_sleep:
                wait_time = float(self.demo_sleep)
            else:
                wait_time = settings.DEFAULT_DEMO_MODE_TIMEOUT
            if not tiny:
                time.sleep(wait_time)
            else:
                time.sleep(wait_time/3.0)

    def _demo_mode_scroll_if_active(self, selector, by):
        if self.demo_mode:
            if by == By.CSS_SELECTOR:
                self.scroll_to(selector)


# PyTest-Specific Code #

    def setUp(self):
        """
        pytest-specific code
        Be careful if a subclass of BaseCase overrides setUp()
        You'll need to add the following line to the subclass setUp() method:
        super(SubClassOfBaseCase, self).setUp()
        """
        self.is_pytest = None
        try:
            # This raises an exception if the test is not coming from pytest
            self.is_pytest = pytest.config.option.is_pytest
        except Exception:
            # Not using pytest (probably nosetests)
            self.is_pytest = False
        if self.is_pytest:
            self.with_selenium = pytest.config.option.with_selenium
            self.headless = pytest.config.option.headless
            self.headless_active = False
            self.with_testing_base = pytest.config.option.with_testing_base
            self.log_path = pytest.config.option.log_path
            self.browser = pytest.config.option.browser
            self.data = pytest.config.option.data
            self.demo_mode = pytest.config.option.demo_mode
            self.demo_sleep = pytest.config.option.demo_sleep
            if self.headless:
                self.display = Display(visible=0, size=(1200, 800))
                self.display.start()
                self.headless_active = True
            if self.with_selenium:
                self.driver = browser_launcher.get_driver(self.browser)

    def tearDown(self):
        """
        pytest-specific code
        Be careful if a subclass of BaseCase overrides setUp()
        You'll need to add the following line to the subclass's tearDown():
        super(SubClassOfBaseCase, self).tearDown()
        """
        if self.is_pytest:
            if self.with_selenium:
                # Save a screenshot if logging is on when an exception occurs
                if self.with_testing_base and (sys.exc_info()[1] is not None):
                    test_id = "%s.%s.%s" % (self.__class__.__module__,
                                            self.__class__.__name__,
                                            self._testMethodName)
                    test_logpath = self.log_path + "/" + test_id
                    if not os.path.exists(test_logpath):
                        os.makedirs(test_logpath)
                    # Handle screenshot logging
                    log_helper.log_screenshot(test_logpath, self.driver)
                    # Handle basic test info logging
                    log_helper.log_test_failure_data(
                        test_logpath, self.driver, self.browser)
                    # Handle page source logging
                    log_helper.log_page_source(test_logpath, self.driver)
                # Finally close the browser
                self.driver.quit()
            if self.headless:
                if self.headless_active:
                    self.display.stop()
コード例 #49
0
class LinkedinPy:
    """Class to be instantiated to use the script"""
    def __init__(self,
                 username=None,
                 userid=None,
                 password=None,
                 nogui=False,
                 selenium_local_session=True,
                 use_firefox=False,
                 browser_profile_path=None,
                 page_delay=25,
                 show_logs=True,
                 headless_browser=False,
                 proxy_address=None,
                 proxy_chrome_extension=None,
                 proxy_port=None,
                 disable_image_load=False,
                 bypass_suspicious_attempt=False,
                 bypass_with_mobile=False,
                 multi_logs=True):

        cli_args = parse_cli_args()
        username = cli_args.username or username
        password = cli_args.password or password
        use_firefox = cli_args.use_firefox or use_firefox
        page_delay = cli_args.page_delay or page_delay
        headless_browser = cli_args.headless_browser or headless_browser
        proxy_address = cli_args.proxy_address or proxy_address
        proxy_port = cli_args.proxy_port or proxy_port
        disable_image_load = cli_args.disable_image_load or disable_image_load
        bypass_suspicious_attempt = (cli_args.bypass_suspicious_attempt
                                     or bypass_suspicious_attempt)
        bypass_with_mobile = cli_args.bypass_with_mobile or bypass_with_mobile
        if not get_workspace(Settings):
            raise SocialPyError(
                "Oh no! I don't have a workspace to work at :'(")

        self.nogui = nogui
        if nogui:
            self.display = Display(visible=0, size=(800, 600))
            self.display.start()

        self.browser = None
        self.headless_browser = headless_browser
        self.proxy_address = proxy_address
        self.proxy_port = proxy_port
        self.proxy_chrome_extension = proxy_chrome_extension
        self.selenium_local_session = selenium_local_session
        self.bypass_suspicious_attempt = bypass_suspicious_attempt
        self.bypass_with_mobile = bypass_with_mobile
        self.disable_image_load = disable_image_load

        self.username = username or os.environ.get('LINKEDIN_USER')
        self.password = password or os.environ.get('LINKEDIN_PW')
        Settings.profile["name"] = self.username

        self.page_delay = page_delay
        self.switch_language = True
        self.use_firefox = use_firefox
        Settings.use_firefox = self.use_firefox
        self.browser_profile_path = browser_profile_path
        self.liked_img = 0
        self.already_liked = 0
        self.liked_comments = 0
        self.commented = 0
        self.replied_to_comments = 0
        self.connected = 0
        self.already_connected = 0
        self.unconnected = 0
        self.connected_by = 0
        self.connecting_num = 0
        self.inap_img = 0
        self.not_valid_users = 0
        self.connect_times = 1
        self.start_time = time.time()

        # assign logger
        self.show_logs = show_logs
        Settings.show_logs = show_logs or None
        self.multi_logs = multi_logs
        self.logfolder = get_logfolder(self.username, self.multi_logs,
                                       Settings)
        self.logger = self.get_linkedinpy_logger(self.show_logs)

        get_database(Settings,
                     make=True)  # IMPORTANT: think twice before relocating

        if self.selenium_local_session is True:
            self.set_selenium_local_session(Settings)

    def get_linkedinpy_logger(self, show_logs):
        """
        Handles the creation and retrieval of loggers to avoid
        re-instantiation.
        """

        existing_logger = Settings.loggers.get(self.username)
        if existing_logger is not None:
            return existing_logger
        else:
            # initialize and setup logging system for the LinkedinPy object
            logger = logging.getLogger(self.username)
            logger.setLevel(logging.DEBUG)
            file_handler = logging.FileHandler('{}general.log'.format(
                self.logfolder))
            file_handler.setLevel(logging.DEBUG)
            extra = {"username": self.username}
            logger_formatter = logging.Formatter(
                '%(levelname)s [%(asctime)s] [%(username)s]  %(message)s',
                datefmt='%Y-%m-%d %H:%M:%S')
            file_handler.setFormatter(logger_formatter)
            logger.addHandler(file_handler)

            if show_logs is True:
                console_handler = logging.StreamHandler()
                console_handler.setLevel(logging.DEBUG)
                console_handler.setFormatter(logger_formatter)
                logger.addHandler(console_handler)

            logger = logging.LoggerAdapter(logger, extra)

            Settings.loggers[self.username] = logger
            Settings.logger = logger
            return logger

    def set_selenium_local_session(self, Settings):
        self.browser, err_msg = \
            set_selenium_local_session(self.proxy_address,
                                       self.proxy_port,
                                       self.proxy_chrome_extension,
                                       self.headless_browser,
                                       self.use_firefox,
                                       self.browser_profile_path,
                                       # Replaces
                                       # browser User
                                       # Agent from
                                       # "HeadlessChrome".
                                       self.disable_image_load,
                                       self.page_delay,
                                       self.logger,
                                       Settings)
        if len(err_msg) > 0:
            raise SocialPyError(err_msg)

    def login(self):
        """Used to login the user either with the username and password"""
        if not login_user(self.browser, self.username, None, self.password,
                          self.logger, self.logfolder, self.switch_language,
                          self.bypass_suspicious_attempt,
                          self.bypass_with_mobile):
            message = "Wrong login data!"
            highlight_print(Settings, self.username, message, "login",
                            "critical", self.logger)

            # self.aborting = True

        else:
            message = "Logged in successfully!"
            highlight_print(Settings, self.username, message, "login", "info",
                            self.logger)
            # try to save account progress
            try:
                save_account_progress(self.browser,
                                      "https://www.linkedin.com/",
                                      self.username, self.logger)
            except Exception:
                self.logger.warning(
                    'Unable to save account progress, skipping data update')
        return self

    def withdraw_old_invitations(self, skip_pages=10, sleep_delay=6):
        page_no = skip_pages
        while page_no < 100:
            page_no = page_no + 1
            try:
                url = "https://www.linkedin.com/mynetwork/invitation-manager/sent/?page=" + str(
                    page_no)
                web_address_navigator(Settings, self.browser, url)
                print("Starting page:", page_no)
                if self.browser.current_url == "https://www.linkedin.com/mynetwork/invitation-manager/sent/" or len(
                        self.browser.find_elements_by_css_selector(
                            "li.invitation-card div.pl5")) == 0:
                    print("============Last Page Reached==============")
                    break
                checked_in_page = 0
                for i in range(
                        0,
                        len(
                            self.browser.find_elements_by_css_selector(
                                "li.invitation-card div.pl5"))):
                    try:
                        res_item = self.browser.find_elements_by_css_selector(
                            "li.invitation-card div.pl5")[i]
                        try:
                            link = res_item.find_element_by_css_selector(
                                "div > a")
                            profile_link = link.get_attribute("href")
                            user_name = profile_link.split('/')[4]
                            self.logger.info(
                                "user_name : {}".format(user_name))
                        except Exception as e:
                            print("Might be a stale profile", e)
                        time = res_item.find_element_by_css_selector(
                            "div > time")
                        self.logger.info("time : {}".format(time.text))
                        check_button = res_item.find_element_by_css_selector(
                            "div > div:nth-child(1) > input")
                        check_status = check_button.get_attribute(
                            "data-artdeco-is-focused")
                        self.logger.info(
                            "check_status : {}".format(check_status))

                        self.browser.execute_script("window.scrollTo(0, " +
                                                    str((i + 1) * 104) + ");")

                        if "month" in time.text:
                            (ActionChains(self.browser).move_to_element(
                                check_button).click().perform())
                            self.logger.info("check_button clicked")
                            checked_in_page = checked_in_page + 1
                            delay_random = random.randint(
                                ceil(sleep_delay * 0.42),
                                ceil(sleep_delay * 0.57))
                            sleep(delay_random)
                    except Exception as e:
                        self.logger.error(e)
                if checked_in_page > 0:
                    self.logger.info("Widraw to be pressed")
                    try:
                        self.browser.execute_script("window.scrollTo(0, 0);")
                        withdraw_button = self.browser.find_element_by_css_selector(
                            "ul > li.mn-list-toolbar__right-button > button")
                        self.logger.info("withdraw_button : {}".format(
                            withdraw_button.text))
                        if "Withdraw" in withdraw_button.text:
                            (ActionChains(self.browser).move_to_element(
                                withdraw_button).click().perform())
                            self.logger.info("withdraw_button clicked")
                            page_no = page_no - 1
                            delay_random = random.randint(
                                ceil(sleep_delay * 0.85),
                                ceil(sleep_delay * 1.14))
                            sleep(delay_random)
                    except Exception as e:
                        print(
                            "For some reason there is no withdraw_button inspite of checkings",
                            e)
                else:
                    self.logger.info("Nothing checked in this page")
            except Exception as e:
                self.logger.error(e)
            self.logger.info("============Next Page==============")

    def search_1stconnects_and_savetodb(self,
                                        query,
                                        city_code,
                                        school_code=None,
                                        past_company=None,
                                        random_start=True,
                                        max_pages=10,
                                        max_connects=25,
                                        sleep_delay=6):
        """ search linkedin and connect from a given profile """

        self.logger.info(
            "Searching for: query={}, city_code={}, school_code={}".format(
                query, city_code, school_code))
        search_url = "https://www.linkedin.com/search/results/people/?&facetNetwork=%5B%22F%22%5D"
        if city_code:
            search_url = search_url + "&facetGeoRegion=" + city_code
        if school_code:
            search_url = search_url + "&facetSchool=" + school_code
        if past_company:
            search_url = search_url + "&facetPastCompany=" + past_company

        search_url = search_url + "&keywords=" + query
        search_url = search_url + "&origin=" + "FACETED_SEARCH"

        for page_no in range(1, 101):
            try:
                temp_search_url = search_url + "&page=" + str(page_no)
                web_address_navigator(Settings, self.browser, temp_search_url)
                self.logger.info("Starting page: {}".format(page_no))

                for jc in range(2, 11):
                    sleep(1)
                    self.browser.execute_script(
                        "window.scrollTo(0, document.body.scrollHeight/" +
                        str(jc) + ");")

                if len(
                        self.browser.find_elements_by_css_selector(
                            "div.search-result__wrapper")) == 0:
                    self.logger.info(
                        "============Last Page Reached or asking for Premium membership=============="
                    )
                    break
                for i in range(
                        0,
                        len(
                            self.browser.find_elements_by_css_selector(
                                "div.search-result__wrapper"))):
                    try:
                        res_item = self.browser.find_elements_by_css_selector(
                            "li.search-result div.search-entity div.search-result__wrapper"
                        )[i]
                        link = res_item.find_element_by_css_selector("div > a")
                        profile_link = link.get_attribute("href")
                        user_name = profile_link.split('/')[4]
                        self.logger.info("user_name : {}".format(user_name))
                        msg_button = res_item.find_element_by_xpath(
                            "//div[3]/div/div/button[text()='Message']")
                        print(msg_button.text, "present")
                        if msg_button.text == "Message":
                            connect_restriction("write", user_name, None,
                                                self.logger)
                            self.logger.info(
                                "saved {} to db".format(user_name))
                    except Exception as e:
                        self.logger.error(e)
            except Exception as e:
                self.logger.error(e)
            self.logger.info("============Next Page==============")

    def test_page(self, search_url, page_no, css_selector_identifier):
        web_address_navigator(Settings, self.browser, search_url)
        self.logger.info("Testing page: {}".format(page_no))
        if len(
                self.browser.find_elements_by_css_selector(
                    css_selector_identifier)) > 0:
            return True
        return False

    def search_and_connect(self,
                           query,
                           connection_relationship_code,
                           city_code,
                           school_code=None,
                           past_company=None,
                           random_start=True,
                           max_pages=10,
                           max_connects=25,
                           sleep_delay=6):
        """ search linkedin and connect from a given profile """

        if quota_supervisor(Settings, "connects") == "jump":
            return 0

        self.logger.info(
            "Searching for: query={}, connection_relationship_code={}, city_code={}, school_code={}"
            .format(query, connection_relationship_code, city_code,
                    school_code))
        connects = 0
        prev_connects = -1
        search_url = "https://www.linkedin.com/search/results/people/?"
        if connection_relationship_code:
            search_url = search_url + "&facetNetwork=" + connection_relationship_code
        if city_code:
            search_url = search_url + "&facetGeoRegion=" + city_code
        if school_code:
            search_url = search_url + "&facetSchool=" + school_code
        if past_company:
            search_url = search_url + "&facetPastCompany=" + past_company

        search_url = search_url + "&keywords=" + query
        search_url = search_url + "&origin=" + "FACETED_SEARCH"

        temp_search_url = search_url + "&page=1"
        print(temp_search_url)
        time.sleep(10)
        if self.test_page(
                search_url=temp_search_url,
                page_no=1,
                css_selector_identifier="div.search-result__wrapper") == False:
            self.logger.info(
                "============Definitely no Result, Next Query==============")
            return 0

        if random_start:
            trial = 0
            st = 5
            while True and trial < 5 and st > 1:
                st = random.randint(1, st - 1)
                temp_search_url = search_url + "&page=" + str(st)
                if self.test_page(temp_search_url, st,
                                  "div.search-result__wrapper"):
                    break
                trial = trial + 1
        else:
            st = 1

        for page_no in list(range(st, st + max_pages)):

            if prev_connects == connects:
                self.logger.info(
                    "============Limits might have exceeded or all Invites pending from this page(let's exit either case)=============="
                )
                break
            else:
                prev_connects = connects

            try:
                temp_search_url = search_url + "&page=" + str(page_no)
                if page_no > st and st > 1:
                    web_address_navigator(Settings, self.browser,
                                          temp_search_url)
                self.logger.info("Starting page: {}".format(page_no))

                for jc in range(2, 11):
                    sleep(1)
                    self.browser.execute_script(
                        "window.scrollTo(0, document.body.scrollHeight/" +
                        str(jc) + "-100);")

                if len(
                        self.browser.find_elements_by_css_selector(
                            "div.search-result__wrapper")) == 0:
                    self.logger.info(
                        "============Last Page Reached or asking for Premium membership=============="
                    )
                    break
                for i in range(
                        0,
                        len(
                            self.browser.find_elements_by_css_selector(
                                "div.search-result__wrapper"))):
                    try:
                        res_item = self.browser.find_elements_by_css_selector(
                            "li.search-result div.search-entity div.search-result__wrapper"
                        )[i]  # div.search-result__actions div button")
                        # pp.pprint(res_item.get_attribute('innerHTML'))
                        link = res_item.find_element_by_css_selector("div > a")
                        profile_link = link.get_attribute("href")
                        self.logger.info("Profile : {}".format(profile_link))
                        user_name = profile_link.split('/')[4]
                        # self.logger.info("user_name : {}".format(user_name))
                        name = res_item.find_element_by_css_selector(
                            "h3 > span > span > span")  #//span/span/span[1]")
                        self.logger.info("Name : {}".format(name.text))

                        if connect_restriction("read", user_name,
                                               self.connect_times,
                                               self.logger):
                            self.logger.info("already connected")
                            continue

                        try:
                            connect_button = res_item.find_element_by_xpath(
                                "//div[3]/div/button[text()='Connect']")
                            self.logger.info(
                                "Connect button found, connecting...")
                            self.browser.execute_script(
                                "var evt = document.createEvent('MouseEvents');"
                                +
                                "evt.initMouseEvent('click',true, true, window, 0, 0, 0, 0, 0, false, false, false, false, 0,null);"
                                + "arguments[0].dispatchEvent(evt);",
                                res_item.find_element_by_xpath(
                                    '//div[3]/div/button[text()="Connect"]'))
                            self.logger.info("Clicked {}".format(
                                connect_button.text))
                            sleep(2)
                        except Exception:
                            invite_sent_button = res_item.find_element_by_xpath(
                                "//div[3]/div/button[text()='Invite Sent']")
                            self.logger.info("Already {}".format(
                                invite_sent_button.text))
                            continue

                        try:
                            modal = self.browser.find_element_by_css_selector(
                                "div.modal-wormhole-content > div")
                            if modal:
                                try:
                                    sendnow_or_done_button = modal.find_element_by_xpath(
                                        "//div[1]/div/section/div/div[2]/button[2]"
                                    )  #text()='Send now']")
                                    self.logger.info(
                                        sendnow_or_done_button.text)
                                    if not (sendnow_or_done_button.text
                                            == 'Done'
                                            or sendnow_or_done_button.text
                                            == 'Send now'):
                                        raise Exception(
                                            "Send Now or Done button not found"
                                        )
                                    if sendnow_or_done_button.is_enabled():
                                        (ActionChains(
                                            self.browser).move_to_element(
                                                sendnow_or_done_button).click(
                                                ).perform())
                                        self.logger.info("Clicked {}".format(
                                            sendnow_or_done_button.text))
                                        connects = connects + 1
                                        connect_restriction(
                                            "write", user_name, None,
                                            self.logger)
                                        try:
                                            # update server calls
                                            update_activity(
                                                Settings, 'connects')
                                        except Exception as e:
                                            self.logger.error(e)
                                        sleep(2)
                                    else:
                                        try:
                                            #TODO: input("find correct close XPATH")
                                            close_button = modal.find_element_by_xpath(
                                                "//div[1]/div/section/div/header/button"
                                            )
                                            (ActionChains(
                                                self.browser).move_to_element(
                                                    close_button).click().
                                             perform())
                                            print(sendnow_or_done_button.text,
                                                  "disabled, clicked close")
                                            sleep(2)
                                        except Exception as e:
                                            print(
                                                "close_button not found, Failed with:",
                                                e)
                                except Exception as e:
                                    print(
                                        "sendnow_or_done_button not found, Failed with:",
                                        e)
                            else:
                                self.logger.info("Popup not found")
                        except Exception as e:
                            print("Popup not found, Failed with:", e)
                            try:
                                new_popup_buttons = self.browser.find_elements_by_css_selector(
                                    "#artdeco-modal-outlet div.artdeco-modal-overlay div.artdeco-modal div.artdeco-modal__actionbar button.artdeco-button"
                                )
                                gotit_button = new_popup_buttons[1]
                                (ActionChains(self.browser).move_to_element(
                                    gotit_button).click().perform())
                                print(gotit_button.text, " clicked")
                                sleep(2)
                            except Exception as e:
                                print("New Popup also not found, Failed with:",
                                      e)

                        self.logger.info(
                            "Connects sent in this iteration: {}".format(
                                connects))
                        delay_random = random.randint(ceil(sleep_delay * 0.85),
                                                      ceil(sleep_delay * 1.14))
                        sleep(delay_random)
                        if connects >= max_connects:
                            self.logger.info(
                                "max_connects({}) for this iteration reached , Returning..."
                                .format(max_connects))
                            return
                    except Exception as e:
                        self.logger.error(e)
            except Exception as e:
                self.logger.error(e)
            self.logger.info("============Next Page==============")
            return connects

    def endorse(self, profile_link, sleep_delay):
        try:
            web_address_navigator(Settings, self.browser, profile_link)

            for jc in range(1, 10):
                sleep(1)
                self.browser.execute_script(
                    "window.scrollTo(0, document.body.scrollHeight*" +
                    str(jc) + "/10);")

            skills_pane = self.browser.find_element_by_css_selector(
                "div.profile-detail > div.pv-deferred-area > div > section.pv-profile-section.pv-skill-categories-section"
            )
            if (skills_pane.text.split('\n')[0] == 'Skills & Endorsements'):
                try:
                    first_skill_button_icon = self.browser.find_element_by_css_selector(
                        "div.profile-detail > div.pv-deferred-area > div > section.pv-profile-section.pv-skill-categories-section > ol > li > div > div > div > button > li-icon"
                    )
                    button_type = first_skill_button_icon.get_attribute("type")
                    if button_type == 'plus-icon':
                        first_skill_button = self.browser.find_element_by_css_selector(
                            "div.profile-detail > div.pv-deferred-area > div > section.pv-profile-section.pv-skill-categories-section > ol > li > div > div > div > button"
                        )
                        self.browser.execute_script(
                            "var evt = document.createEvent('MouseEvents');" +
                            "evt.initMouseEvent('click',true, true, window, 0, 0, 0, 0, 0, false, false, false, false, 0,null);"
                            + "arguments[0].dispatchEvent(evt);",
                            first_skill_button)
                        first_skill_title = self.browser.find_element_by_css_selector(
                            "div.profile-detail > div.pv-deferred-area > div > section.pv-profile-section.pv-skill-categories-section > ol > li > div > div > p > a > span"
                        )
                        print(first_skill_title.text, "clicked")
                        delay_random = random.randint(ceil(sleep_delay * 0.85),
                                                      ceil(sleep_delay * 1.14))
                        sleep(delay_random)
                    else:
                        self.logger.info(
                            'button_type already {}'.format(button_type))
                except Exception as e:
                    self.logger.error(e)
            else:
                self.logger.info('Skill & Endorsements pane not found')
        except Exception as e:
            self.logger.error(e)

    def search_and_endorse(self,
                           query,
                           city_code,
                           school_code,
                           random_start=True,
                           max_pages=3,
                           max_endorsements=25,
                           sleep_delay=6):
        """ search linkedin and endose few first connections """

        if quota_supervisor(Settings, "connects") == "jump":
            return  #False, "jumped"

        print("Searching for: ", query, city_code, school_code)
        search_url = "https://www.linkedin.com/search/results/people/?"
        if city_code:
            search_url = search_url + "&facetGeoRegion=" + city_code
        if school_code:
            search_url = search_url + "&facetSchool=" + school_code

        search_url = search_url + "&facetNetwork=%5B%22F%22%5D"
        search_url = search_url + "&keywords=" + query
        search_url = search_url + "&origin=" + "FACETED_SEARCH"

        if random_start:
            trial = 0
            while True and trial < 3:
                st = random.randint(1, 3)
                temp_search_url = search_url + "&page=" + str(st)
                web_address_navigator(Settings, self.browser, temp_search_url)
                self.logger.info("Testing page:".format(st))
                result_items = self.browser.find_elements_by_css_selector(
                    "div.search-result__wrapper")
                if len(result_items) > 0:
                    break
                trial = trial + 1
        else:
            st = 1

        connects = 0
        for page_no in list(range(st, st + 1)):
            collected_profile_links = []
            try:
                temp_search_url = search_url + "&page=" + str(page_no)
                if page_no > st and st > 1:
                    web_address_navigator(Settings, self.browser,
                                          temp_search_url)
                self.logger.info("Starting page: {}".format(page_no))

                for jc in range(2, 11):
                    sleep(1)
                    self.browser.execute_script(
                        "window.scrollTo(0, document.body.scrollHeight/" +
                        str(jc) + "-100);")

                result_items = self.browser.find_elements_by_css_selector(
                    "div.search-result__wrapper")

                # print(result_items)
                for result_item in result_items:
                    try:
                        link = result_item.find_element_by_css_selector(
                            "div > a")
                        self.logger.info("Profile : {}".format(
                            link.get_attribute("href")))
                        collected_profile_links.append(
                            link.get_attribute("href"))
                        name = result_item.find_element_by_css_selector(
                            "h3 > span > span > span")
                        self.logger.info("Name : {}".format(name.text))
                    except Exception as e:
                        self.logger.error(e)
            except Exception as e:
                self.logger.error(e)

            for collected_profile_link in collected_profile_links:
                self.endorse(collected_profile_link, sleep_delay=sleep_delay)
                connects = connects + 1
                if connects >= max_endorsements:
                    self.logger.info(
                        "max_endorsements({}) for this iteration reached , Returning..."
                        .format(max_endorsements))
                    return

            self.logger.info("============Next Page==============")

    def dump_connect_restriction(self, profile_name, logger, logfolder):
        """ Dump connect restriction data to a local human-readable JSON """

        try:
            # get a DB and start a connection
            db, id = get_database(Settings)
            conn = sqlite3.connect(db)

            with conn:
                conn.row_factory = sqlite3.Row
                cur = conn.cursor()

                cur.execute(
                    "SELECT * FROM connectRestriction WHERE profile_id=:var",
                    {"var": id})
                data = cur.fetchall()

            if data:
                # get the existing data
                filename = "{}connectRestriction.json".format(logfolder)
                if os.path.isfile(filename):
                    with open(filename) as connectResFile:
                        current_data = json.load(connectResFile)
                else:
                    current_data = {}

                # pack the new data
                connect_data = {
                    user_data[1]: user_data[2]
                    for user_data in data or []
                }
                current_data[profile_name] = connect_data

                # dump the fresh connect data to a local human readable JSON
                with open(filename, 'w') as connectResFile:
                    json.dump(current_data, connectResFile)

        except Exception as exc:
            logger.error(
                "Pow! Error occurred while dumping connect restriction data to a "
                "local JSON:\n\t{}".format(str(exc).encode("utf-8")))

        finally:
            if conn:
                # close the open connection
                conn.close()

    def end(self):
        """Closes the current session"""

        # IS_RUNNING = False
        close_browser(self.browser, False, self.logger)

        with interruption_handler():
            # close virtual display
            if self.nogui:
                self.display.stop()

            # write useful information
            self.dump_connect_restriction(self.username, self.logger,
                                          self.logfolder)
            # dump_record_activity(self.username,
            #                      self.logger,
            #                      self.logfolder,
            #                      Settings)

            with open('{}connected.txt'.format(self.logfolder), 'w') \
                    as connectFile:
                connectFile.write(str(self.connected))

            # output live stats before leaving
            self.live_report()

            message = "Session ended!"
            highlight_print(Settings, self.username, message, "end", "info",
                            self.logger)
            print("\n\n")

    def set_quota_supervisor(self,
                             Settings,
                             enabled=False,
                             sleep_after=[],
                             sleepyhead=False,
                             stochastic_flow=False,
                             notify_me=False,
                             peak_likes=(None, None),
                             peak_comments=(None, None),
                             peak_connects=(None, None),
                             peak_unconnects=(None, None),
                             peak_server_calls=(None, None)):
        """
         Sets aside QS configuration ANY time in a session
        """
        # take a reference of the global configuration
        configuration = Settings.QS_config

        # strong type checking on peaks entered
        peak_values_combined = [
            peak_likes, peak_comments, peak_connects, peak_unconnects,
            peak_server_calls
        ]
        peaks_are_tuple = all(
            type(item) is tuple for item in peak_values_combined)

        if peaks_are_tuple:
            peak_values_merged = [
                i for sub in peak_values_combined for i in sub
            ]
            integers_filtered = filter(lambda e: isinstance(e, int),
                                       peak_values_merged)

            peaks_are_provided = all(
                len(item) == 2 for item in peak_values_combined)
            peaks_are_valid = all(
                type(item) is int or type(item) is type(None)
                for item in peak_values_merged)
            peaks_are_good = all(item >= 0 for item in integers_filtered)

        # set QS if peak values are eligible
        if (peaks_are_tuple and peaks_are_provided and peaks_are_valid
                and peaks_are_good):

            peaks = {
                "likes": {
                    "hourly": peak_likes[0],
                    "daily": peak_likes[1]
                },
                "comments": {
                    "hourly": peak_comments[0],
                    "daily": peak_comments[1]
                },
                "connects": {
                    "hourly": peak_connects[0],
                    "daily": peak_connects[1]
                },
                "unconnects": {
                    "hourly": peak_unconnects[0],
                    "daily": peak_unconnects[1]
                },
                "server_calls": {
                    "hourly": peak_server_calls[0],
                    "daily": peak_server_calls[1]
                }
            }

            if not isinstance(sleep_after, list):
                sleep_after = [sleep_after]

            rt = time.time()
            latesttime = {"hourly": rt, "daily": rt}
            orig_peaks = deepcopy(peaks)  # original peaks always remain static
            stochasticity = {
                "enabled": stochastic_flow,
                "latesttime": latesttime,
                "original_peaks": orig_peaks
            }

            if (platform.startswith("win32") and python_version() < "2.7.15"):
                # UPDATE ME: remove this block once plyer is
                # verified to work on [very] old versions of Python 2
                notify_me = False

            # update QS configuration with the fresh settings
            configuration.update({
                "state": enabled,
                "sleep_after": sleep_after,
                "sleepyhead": sleepyhead,
                "stochasticity": stochasticity,
                "notify": notify_me,
                "peaks": peaks
            })

        else:
            # turn off QS for the rest of the session
            # since peak values are ineligible
            configuration.update(state="False")

            # user should be warned only if has had QS turned on
            if enabled is True:
                self.logger.warning("Quota Supervisor: peak rates are misfit! "
                                    "Please use supported formats."
                                    "\t~disabled QS")

    def live_report(self):
        """ Report live sessional statistics """

        print('')

        stats = [
            self.liked_img, self.already_liked, self.commented, self.connected,
            self.already_connected, self.unconnected, self.inap_img,
            self.not_valid_users
        ]

        if self.connecting_num and self.connected_by:
            owner_relationship_info = (
                "On session start was connectING {} users"
                " & had {} connectERS".format(self.connecting_num,
                                              self.connected_by))
        else:
            owner_relationship_info = ''

        sessional_run_time = self.run_time()
        run_time_info = (
            "{} seconds".format(sessional_run_time) if sessional_run_time < 60
            else "{} minutes".format(truncate_float(
                sessional_run_time / 60, 2)) if sessional_run_time < 3600 else
            "{} hours".format(truncate_float(sessional_run_time / 60 / 60, 2)))
        run_time_msg = "[Session lasted {}]".format(run_time_info)

        if any(stat for stat in stats):
            self.logger.info(
                "Sessional Live Report:\n"
                "\t|> LIKED {} images  |  ALREADY LIKED: {}\n"
                "\t|> COMMENTED on {} images\n"
                "\t|> connected {} users  |  ALREADY connected: {}\n"
                "\t|> UNconnected {} users\n"
                "\t|> LIKED {} comments\n"
                "\t|> REPLIED to {} comments\n"
                "\t|> INAPPROPRIATE images: {}\n"
                "\t|> NOT VALID users: {}\n"
                "\n{}\n{}".format(self.liked_img, self.already_liked,
                                  self.commented, self.connected,
                                  self.already_connected, self.unconnected,
                                  self.liked_comments,
                                  self.replied_to_comments, self.inap_img,
                                  self.not_valid_users,
                                  owner_relationship_info, run_time_msg))
        else:
            self.logger.info("Sessional Live Report:\n"
                             "\t|> No any statistics to show\n"
                             "\n{}\n{}".format(owner_relationship_info,
                                               run_time_msg))

    def run_time(self):
        """ Get the time session lasted in seconds """

        real_time = time.time()
        run_time = (real_time - self.start_time)
        run_time = truncate_float(run_time, 2)

        return run_time

    def search_and_apply(self):
        usualjobslink = "https://www.linkedin.com/jobs"
        web_address_navigator(Settings, self.browser, usualjobslink)

        job_title_XP = '//input[contains(@id,"jobs-search-box-keyword-id")]'
        txt_job_title = self.browser.find_element_by_xpath(job_title_XP)
        print('Entering Job Title')
        (ActionChains(self.browser).move_to_element(
            txt_job_title).click().send_keys("Python Developer").perform())

        job_location_XP = '//input[contains(@id,"jobs-search-box-location-id")]'
        txt_job_location = self.browser.find_element_by_xpath(job_location_XP)
        print('Entering Job Location')
        (ActionChains(
            self.browser).move_to_element(txt_job_location).click().send_keys(
                "San Jose, California, United States").perform())

        # update server calls for both 'click' and 'send_keys' actions
        for i in range(2):
            update_activity(Settings)

        sleep(1)
        print("Clicking Search Button")
        job_search_XP = '//button[contains(@class,"jobs-search-box__submit-button")]'
        btn_job_search = self.browser.find_element_by_xpath(job_search_XP)
        print(btn_job_search)
        (ActionChains(
            self.browser).move_to_element(btn_job_search).click().perform())

        # update server calls
        update_activity(Settings)

        sleep(10)
        input("Press Enter to continue...")

    def search_and_apply(self,
                         job_title,
                         job_location,
                         distance=50,
                         random_start=True,
                         max_pages=20,
                         max_connects=25,
                         sleep_delay=6):

        self.logger.info(
            "Searching for: job_title={}, job_location={}, radius={}".format(
                job_title, job_location, distance))
        connects = 0
        prev_connects = -1
        # https://www.linkedin.com/jobs/search/?keywords=python%20developer&location=San%20Jose%2C%20California%2C%20United%20States&distance=50
        job_search_url = "https://www.linkedin.com/jobs/search/?"
        if job_title:
            job_search_url = job_search_url + "keywords=" + job_title
        if job_location:
            job_search_url = job_search_url + "&location=" + job_location
        if distance:
            job_search_url = job_search_url + "&distance=" + str(distance)

        temp_job_search_url = job_search_url + "&start=0"
        print(temp_job_search_url)
        time.sleep(10)
        if self.test_page(
                search_url=temp_job_search_url,
                page_no=1,
                css_selector_identifier="div.jobs-search-results ") == False:
            self.logger.info(
                "============Definitely no Result, Next Query==============")
            return 0
        if random_start:

            trial = 0
            st = 5
            while True and trial < 5 and st > 1:
                st = random.randint(1, st - 1)
                temp_job_search_url = job_search_url + "&start=" + str(st * 25)
                if self.test_page(temp_job_search_url, st,
                                  "div.jobs-search-results"):
                    break
                trial = trial + 1
        else:
            st = 1
        for page_no in list(range(st, st + max_pages)):
            try:
                temp_job_search_url = job_search_url + "&start=" + str(page_no)
                if page_no > st and st > 1:
                    web_address_navigator(Settings, self.browser,
                                          temp_job_search_url)
                self.logger.info("Starting page: {}".format(page_no))

                for jc in range(2, 11):
                    sleep(1)
                    self.browser.execute_script(
                        "window.scrollTo(0, document.body.scrollHeight/" +
                        str(jc) + "-100);")
                if len(
                        self.browser.find_elements_by_css_selector(
                            "div.jobs-search-results")) == 0:
                    self.logger.info(
                        "============Last Page Reached or asking for Premium membership=============="
                    )
                    break
                for i in range(
                        0,
                        len(
                            self.browser.find_elements_by_css_selector(
                                "div.jobs-search-results"))):
                    print(i)
            except Exception as e:
                self.logger.error(e)
        input("Press Enter to continue...")
コード例 #50
0
    def parse(self, response):
        socket.setdefaulttimeout(int(self.timeout))

        # temporary file for the output image
        t_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png')
        t_file.close()
        print('Created temporary image file: %s' % t_file.name)
        self.log('Created temporary image file: %s' % t_file.name)

        if not DEBUG_MODE:
            display = Display(visible=int(bool(DEBUG_MODE)),
                              size=(self.width, self.height))
            display.start()

        # we will use requesocks for checking response code
        r_session = requests.session()
        if self.timeout:
            self.timeout = int(self.timeout)
        r_session.timeout = self.timeout
        # Proxies activated again because of walmart bans
        if self.proxy:
            r_session.proxies = {"http": "{}://{}".format(self.proxy_type, self.proxy), \
                            "https": "{}://{}".format(self.proxy_type, self.proxy)}

        if self.user_agent:
            r_session.headers = {'User-Agent': self.user_agent}

        # check if the page returns code != 200
        if self.code_200_required and str(
                self.code_200_required).lower() not in ('0', 'false', 'off'):
            page_code = r_session.get(self.product_url,
                                      verify=False).status_code
            if page_code != 200:
                self.log(
                    'Page returned code %s at %s' %
                    (page_code, self.product_url), ERROR)
                yield ScreenshotItem()  # return empty item
                if not DEBUG_MODE:
                    display.stop()
                return

        driver = self.init_driver()
        item = ScreenshotItem()

        if self.proxy:
            ip_via_proxy = URL2ScreenshotSpider._get_proxy_ip(driver)
            item['via_proxy'] = ip_via_proxy
            print 'IP via proxy:', ip_via_proxy
            self.log('IP via proxy: %s' % ip_via_proxy)

        try:
            self.prepare_driver(driver)
            self.make_screenshot(driver, t_file.name)
            self.log('Screenshot was made for file %s' % t_file.name)
        except Exception as e:
            self.log('Exception while getting response using selenium! %s' %
                     str(e))
            # lets try with another driver
            another_driver_name = self._choose_another_driver()
            try:
                if not DEBUG_MODE:
                    driver.quit()  # clean RAM
            except Exception as e:
                pass
            driver = self.init_driver(name=another_driver_name)
            self.prepare_driver(driver)
            self.make_screenshot(driver, t_file.name)
            self.log('Screenshot was made for file %s (2nd attempt)' %
                     t_file.name)
            try:
                if not DEBUG_MODE:
                    driver.quit()
            except:
                pass

        # crop the image if needed
        if self.crop_width and self.crop_height:
            self.crop_width = int(self.crop_width)
            self.crop_height = int(self.crop_height)
            from PIL import Image
            # size is width/height
            img = Image.open(t_file.name)
            box = (self.crop_left, self.crop_top,
                   self.crop_left + self.crop_width,
                   self.crop_top + self.crop_height)
            area = img.crop(box)
            area.save(t_file.name, 'png')
            self.log('Screenshot was cropped and saved to %s' % t_file.name)
            if self.image_copy:  # save a copy of the file if needed
                area.save(self.image_copy, 'png')

        with open(t_file.name, 'rb') as fh:
            img_content = fh.read()
            self.log('Screenshot content was read, size: %s bytes' %
                     len(img_content))

        if self.remove_img is True:
            os.unlink(t_file.name)  # remove old output file
            self.log('Screenshot file was removed: %s' % t_file.name)

        # yield the item
        item['url'] = response.url
        item['image'] = base64.b64encode(img_content)
        item['site_settings'] = getattr(self, '_site_settings_activated_for',
                                        None)
        item['creation_datetime'] = datetime.datetime.utcnow().isoformat()

        if not DEBUG_MODE:
            display.stop()

        self.log('Item image key length: %s' % len(item.get('image', '')))

        if img_content:
            yield item
コード例 #51
0
class UITestCase(LiveServerTestCase):
    def use_xvfb(self):
        from pyvirtualdisplay import Display
        self.display = Display('xvfb', visible=1, size=(1280, 1024))
        self.display.start()
        self.driver = WebDriver()

    def setUp(self):
        try:
            self.driver = WebDriver()
            ui_is_not_available = False
        except WebDriverException:
            ui_is_not_available = True

        if ui_is_not_available:
            self.use_xvfb()

        self.driver.implicitly_wait(10)

        clear_caches()
        setup_for_ui_test()

        super(UITestCase, self).setUp()

    def tearDown(self):
        self.driver.quit()
        if hasattr(self, 'display'):
            self.display.stop()

        ContentType.objects.clear_cache()

        super(UITestCase, self).tearDown()

    def click(self, selector):
        self.find(selector).click()

    def click_when_visible(self, selector):
        element = self.find(selector)
        self.wait_until_visible(element)
        element.click()

    def find(self, selector):
        return self.driver.find_element_by_css_selector(selector)

    def find_name(self, name):
        return self.driver.find_element_by_name(name)

    def find_id(self, id):
        return self.driver.find_element_by_id(id)

    def process_login_form(self, username, password):
        username_elmt = self.wait_until_present('[name="username"]')
        password_elmt = self.find_name('password')

        username_elmt.send_keys(username)
        password_elmt.send_keys(password)

        self.click('form * button')

    def browse_to_url(self, url):
        self.driver.get(self.live_server_url + url)

    def browse_to_instance_url(self, url, instance=None):
        instance = instance if instance is not None else self.instance
        self.driver.get('%s/%s/%s' %
                        (self.live_server_url, self.instance.url_name, url))

    def find_anchor_by_url(self, url):
        return self.find("[href='%s']" % url)

    def wait_until_present(self, selector, timeout=10):
        """
        Wait until an element with CSS 'selector' exists on the page.
        Useful for detecting that an operation loads the page you're expecting.
        """
        element = [None]  # use list so it can be set by inner scope

        def is_present(driver):
            element[0] = self.find(selector)
            return element[0] is not None

        WebDriverWait(self.driver, timeout).until(is_present)
        return element[0]

    def wait_until_text_present(self, text, timeout=10):
        """
        Wait until 'text' exists on the page.
        Useful for detecting that an operation loads the page you're expecting.
        """
        WebDriverWait(self.driver,
                      timeout).until(lambda driver: text in driver.page_source)

    def wait_until_enabled(self, element_or_selector, timeout=10):
        """
        Wait until 'element_or_selector' is enabled.
        """
        element = self._get_element(element_or_selector)
        WebDriverWait(self.driver, timeout).until(
            lambda driver: element.get_attribute("disabled") is None)
        return element

    def wait_until_visible(self, element_or_selector, timeout=10):
        """
        Wait until 'element_or_selector' (known to already exist on the page)
        is displayed.
        """
        element = self._get_element(element_or_selector)
        WebDriverWait(self.driver,
                      timeout).until(lambda driver: element.is_displayed())
        return element

    def wait_until_invisible(self, element_or_selector, timeout=10):
        """
        Wait until 'element_or_selector' (known to already exist on the page)
        is not displayed.
        """
        element = self._get_element(element_or_selector)

        def is_invisible(driver):
            try:
                return not element.is_displayed()
            except StaleElementReferenceException:
                return True

        WebDriverWait(self.driver, timeout).until(is_invisible)
        return element

    def _get_element(self, element_or_selector):
        if isinstance(element_or_selector, basestring):
            return self.find(element_or_selector)
        else:
            return element_or_selector
コード例 #52
0
        "BACKEND": "django.template.backends.django.DjangoTemplates",
        "APP_DIRS": True,
        "OPTIONS": {
            "context_processors": [
                "django.template.context_processors.debug",
                "django.template.context_processors.request",
                "django.contrib.auth.context_processors.auth",
                "django.contrib.messages.context_processors.messages",
                "portal.context_processors.process_newsletter_form",
            ]
        },
    }
]

if os.environ.get('SELENIUM_HEADLESS', None):
    from pyvirtualdisplay import Display
    display = Display(visible=0, size=(1624, 1024))
    display.start()
    import atexit
    atexit.register(lambda: display.stop())

INSTALLED_APPS = ["portal"]
PIPELINE_ENABLED = False
ROOT_URLCONF = "example_project.example_project.urls"
STATIC_ROOT = "example_project/example_project/static"
SECRET_KEY = "bad_test_secret"

from django_autoconfig.autoconfig import configure_settings

configure_settings(globals())
def check_lectures():
    exec_start_time1 = time.time()
    email_message = ''
    driver = ''
    display = ''
    try:
        write_html('Time: ' + time.strftime("%H:%M") +
                   ' Checking Cancelled Lectures \n')
        if check_website(lectures_url):
            cancelled = ''
            display = Display(visible=0, size=(1920, 1080))
            display.start()
            driver = webdriver.Firefox()
            driver.get(lectures_url)
            driver.find_element_by_id('username').send_keys(
                'MOODLE_USERNAME_HERE')
            driver.find_element_by_id('password').send_keys(
                base64.b64decode('MOODLE_Password_HERE').decode("utf-8"))
            driver.find_element_by_id('loginbtn').click()
            abs_lec = driver.find_element_by_xpath(
                '//*[@id="section-1"]/div[3]').text
            abs_lec_split = abs_lec.split('\n')
            today = (datetime.datetime.now()).strftime(
                "%A")  # Define which subjects you have in which particular day
            if today == "Monday":
                todays_lec = ['database', 'project', 'networking security']
            elif today == "Tuesday":
                todays_lec = ['project', 'advanced networking']
            elif today == "Wednesday":
                todays_lec = ['project', 'advanced networking']
            elif today == "Thursday":
                todays_lec = [
                    'database', 'advanced networking', 'networking security'
                ]
            elif today == "Friday":
                todays_lec = ['project', 'networking security', 'database']
            else:
                todays_lec = ['NO SCHOOL TODAY!']
            for line in abs_lec_split:
                if current_class in line:
                    for lectures in todays_lec:
                        if lectures in line.lower():
                            cancelled = cancelled + line
                        del lectures
                del line
            driver.quit()
            display.stop()
            write_html('Cancelled Lectures info Received \n')
            snd_message = check_notice(abs_lec.encode('utf-8'))
            if snd_message and cancelled != '':
                email_message = 'Below please find Cancelled lectures info:\n\n' + cancelled
                group_post(str(email_message), 'GROUP ID NO', "LEC")
            else:
                write_html(
                    'Cancelled Lectures still the same, E-mail not sent!')
            del cancelled
            del abs_lec
            del abs_lec_split
            del snd_message
            del driver
            del display
            del todays_lec
            del today
        else:
            write_html('Website Unreachable!')
    except Exception as err1:
        driver.quit()
        display.stop()
        status1, err_msg1 = update_log(
            str(err1), 'Error Origin: Cancelled Lectures Script')
        write_html(status1)
        del err1
        del status1
        del err_msg1
        del driver
        del display
    time_took1 = time.time() - exec_start_time1
    write_html('\nScript took ' + ("%.2f" % time_took1) +
               ' seconds to complete \n')
    del exec_start_time1
    del email_message
    del time_took1
コード例 #54
0
class WebAssay:
    """
    This is a base class that is built ontop a Selenium driver.
    
    Inherit from this class to
    1. parse web pages, 
    2. calculate the area and position of elements, and 
    3. stain HTML page for parsed elements.
    
    It can be used as a base class for variants of WebAssay.
    You must implement a `run` function to use the base class.
    """
    def __init__(self,
                 user_agent: str,
                 window_size: tuple,
                 headless=False,
                 parser_functions: List = [],
                 color_palette: Dict = {},
                 warpped_height_px: int = 700,
                 reset_driver_after: int = 50):
        """
        `headless` should be set to True if you want a headless web browser.
        `color_palette` is a dictionary that maps from element category to a 
          hex color.
        `parser_functions` a list of parser functions. 
          Where a parser function takes bs4, and returns a list of dictionaries. 
          Be sure to make one of those keys contains `category`, 
          if you're using a `color_pallette` and want to stain images.
        `warpped_height_px` is the minimum y-distance in pixels to consider 
          an element warpped.
        """
        # functions that take bs4 and return a list of dicts.
        self.parser_functions = parser_functions
        if len(self.parser_functions) == 0:
            raise ValueError("Please assign parser_functions!")

        # browser params
        self.window_size = window_size
        self.width, self.height = window_size
        self.user_agent = user_agent
        self.headless = headless
        self._init_browser()

        # optional params
        self.color_palette = color_palette  # dictionary of category to color.
        self.warpped_height = warpped_height_px  # skip elements whose height exceeds.

        # friends we make along the way
        self.error_files = []  # which files are not parsed correctly?
        self.element_metadata = pd.DataFrame(
        )  # the most recent element metadata.
        self.driver_reset_counter = 0  # driver will reset at `reset_driver_after`.
        self.reset_driver_after = reset_driver_after

    def _init_browser(self):
        """
        Initalizes a selenium browser with proper `user_agent` and window `size`.
        Set `headless` to True to have a headless browser. 
        Keep the default as False to help debug.
        """
        self.display = False
        if self.headless:
            self.display = Display(visible=0,
                                   size=(self.width + 10, self.height + 10))
            self.display.start()

        # Set up user agent
        profile = webdriver.FirefoxProfile()
        profile.set_preference("general.useragent.override", self.user_agent)
        firefox_capabilities = webdriver.DesiredCapabilities.FIREFOX
        firefox_capabilities['marionette'] = True

        driver = webdriver.Firefox(profile, capabilities=firefox_capabilities)
        driver.set_window_size(*self.window_size)
        self.driver = driver

    def close_driver(self):
        """Closes the driver"""
        self.driver.quit()
        if not isinstance(self.display, bool):
            self.display.stop()

    def restart_driver(self):
        """Restarts drivers and display"""
        self.close_driver()
        self._init_browser()
        self.driver_reset_counter = 0
        time.sleep(2)

    def save_source(self, fn: str):
        """Saves the source code of a page."""
        with open(fn, 'w') as f:
            f.write(self.driver.page_source)

    def screenshot_full(self, fn: str):
        """
        Takes a full screenshot. There are other methods that work
        better with a headless browser (such as expanding the window).
        
        The screenshot is resized to the original dimensions.
        For whatever reason, I get higher res images by the default
        screenshot.
        
        The standard size allows us to mark up the screenshot with the
        element metadata in `paint_abstract_representation`.
        """
        body = self.driver.find_element_by_tag_name('body')
        body.screenshot(fn)

        # resize image
        img = Image.open(fn)
        img.thumbnail((body.rect['width'], 1e6), Image.ANTIALIAS)
        img.save(fn)

    def identify_elements(
            self, body: Union[element.Tag, element.NavigableString]) -> List:
        """ 
        Runs every parser in `self.parser_functions` through the web page.
        The results are appended to the `data` output.
        """
        data = []
        for parser in self.parser_functions:
            results = parser(body)
            data.extend(results)
        return data

    def stain_element(self,
                      xpath: str,
                      category: str,
                      color: str = '#ffffff',
                      opacity: float = 0.7) -> bool:
        """
        Alters the HTML of a page.
        Stains elements located in `xpath` with `color` by overwritting 
        the style attribute.
        Also sets a new param of markup_category = `category`.
        """
        try:
            elm = self.driver.find_element_by_xpath(xpath)
        except:  # couldn't find element
            return False
        if not elm.is_displayed():
            return False
        style = elm.get_attribute('style')
        if elm.tag_name == 'img':
            custom_style = f"background-color: {color} !important; " \
                            "transition: all 0.5s linear;"\
                            "mix-blend-mode: multiply !important;"
            if style:
                style += '; ' + custom_style
            else:
                style = custom_style
            self.driver.execute_script(
                f"arguments[0].setAttribute('markup_category','{category}')",
                elm)
            self.driver.execute_script(
                f"arguments[0].setAttribute('style','{style}')", elm)
            parent = elm.find_element_by_xpath('ancestor::div[1]')
            style_parent = parent.get_attribute('style')
            custom_style = f"background-color: {color} !important; "
            if style_parent:
                style_parent += '; ' + custom_style
            else:
                style_parent = custom_style
            self.driver.execute_script(
                f"arguments[0].setAttribute('style','{style_parent}')", parent)
        else:
            self.driver.execute_script(
                f"arguments[0].setAttribute('markup_category','{category}')",
                elm)
            custom_style = f"background-color: {color} !important; "\
                            "transition: all 0.5s linear;"
            if style:
                style += '; ' + custom_style
            else:
                style = custom_style
            self.driver.execute_script(
                f"arguments[0].setAttribute('style','{style}')", elm)
            all_images = elm.find_elements_by_tag_name('img')
            for img in all_images:
                if img.is_displayed():
                    style = img.get_attribute('style')
                    custom_style = f"background-color: {color} !important; " \
                                    "mix-blend-mode: multiply !important; z-index:99 !important;"
                    if style:
                        style += '; ' + custom_style
                    else:
                        style = custom_style
                    self.driver.execute_script(
                        f"arguments[0].setAttribute('style','{style}')", img)
            all_videos = elm.find_elements_by_tag_name('video')
            for vid in all_videos:
                if vid.is_displayed():
                    style = vid.get_attribute('style')
                    custom_style = f"background-color: {color} !important; " \
                                    "mix-blend-mode: multiply !important; z-index:99 !important;"
                    if style:
                        style += '; ' + custom_style
                    else:
                        style = custom_style
                    self.driver.execute_script(
                        f"arguments[0].setAttribute('style','{style}')", vid)
            if elm.tag_name == 'a':
                all_children_by_xpath = elm.find_elements_by_tag_name("div")
                for child in all_children_by_xpath:
                    if child.is_displayed():
                        style = elm.get_attribute('style')
                        custom_style = f"background-color: {color} !important; "
                        if style:
                            style += '; ' + custom_style
                        else:
                            style = custom_style
                        self.driver.execute_script(
                            f"arguments[0].setAttribute('style','{style}')",
                            child)
        return True

    def calculate_element_area(self, xpath: str) -> Dict:
        """
        Selenium will try to find an element based on the `xpath`.
        If it is found, calculate the `area` that element occupies 
        on first screen (`area`) and whole page (`area_page`).
        
        If the element is warpped or empty, return an empty dict.
        """
        # get the element based on the xpath
        try:
            elm = self.driver.find_element_by_xpath(xpath)
        except:  # couldn't find element
            return {}

        # get dimensions of element
        rect = elm.rect
        # skip warped elements
        if rect['height'] >= self.warpped_height:
            return {'is_warpped': True}

        # adjust the dimensions by clipping if necessay. "Area" is the first screen
        if elm.is_displayed():
            area = calc_area(rect,
                             location=rect,
                             width=self.width,
                             height_bottom=self.height)
            area_page = calc_area(rect, location=rect, width=self.width)
            meta = {
                'xpath': xpath,
                'dimensions': elm.size,
                'location': elm.location,
                'area': area,
                'area_page': area_page,
            }

            return meta

    def open_local_html(self, fn):
        """Opens a local HTML page in the emulator."""
        local_file = 'file://' + os.path.abspath(fn)
        if self.driver.current_url != local_file:
            self.driver.get(local_file)

    def run(self):
        """
        This function must be overwritten in the inherited class.
        
        Should contain the following steps:
        1. Read either the current page on the driver or a local HTML file 
           `fn` into bs4...
           
        2. Identify elements by sending the contents of the HTML through each 
           parser in `parser_functions`. 
           Do this by calling `self.identify_elements()` on the page.
           
        3. For each element, `self.calculate_element_area()`, 
           and optionally `self.stain_element()` if self.stain = True.
           
        4. Assign `self.element_metadata` with the latest element metadata.
        
        And then anything else is up to you.
        """
        raise NotImplementedError
コード例 #55
0
    def parse_page(self, response):
        try:
            from pyvirtualdisplay import Display
            display = Display(visible=0, size=(800, 800))
            display.start()
            firefox_options = Options()
            firefox_options.add_argument('-headless')
            firefox_options.add_argument('--disable-gpu')
            driver = webdriver.Firefox(firefox_options=firefox_options, executable_path=settings.FIREFOX_PATH)
            driver.get(response.url)
            driver.implicitly_wait(100)
            elem_code = driver.find_elements_by_id('WarehouseCode')
            elem_acode = driver.find_elements_by_id('AccountCode')
            elem_name = driver.find_elements_by_id('UserName')
            elem_pass = driver.find_elements_by_id('Password')
            btn_login = driver.find_elements_by_css_selector('input[name="Login"]')

            if elem_code:
                elem_code[0].send_keys('03')
            if elem_acode:
                elem_acode[0].send_keys('001862')
            if elem_name:
                elem_name[0].send_keys('MAXLEAD')
            if elem_pass:
                elem_pass[0].send_keys('1202HXML')
            btn_login[0].click()
            driver.implicitly_wait(100)
            time.sleep(5)
            total_page = driver.find_elements_by_css_selector('#navigationTR nobr')[0].text
            total_page = int(total_page.split(' ')[-1])

            for i in range(total_page):
                try:
                    res = driver.find_elements_by_css_selector('#ViewManyListTable tr')
                    elem = driver.find_element_by_id('MetaData')
                    elem.click()
                    res.pop(0)
                    for val in res:
                        td_re = val.find_elements_by_tag_name('td')
                        if td_re:
                            sku = td_re[0].text
                            warehouse = 'Hanover'
                            if td_re[3].text and not td_re[3].text == ' ':
                                qty = td_re[3].text
                                qty = qty.replace(',','')
                            else:
                                qty = 0

                            qty_sql = "select id from mmc_stocks where commodity_repertory_sku='%s' and warehouse='%s'" % (
                            sku, warehouse)
                            self.db_cur.execute(qty_sql)
                            self.db_cur.fetchone
                            qty_re = self.db_cur.rowcount
                            values = (qty, sku, warehouse)
                            if qty_re > 0:
                                sql = "update mmc_stocks set qty=%s where commodity_repertory_sku=%s and warehouse=%s"
                            else:
                                sql = "insert into mmc_stocks (qty, commodity_repertory_sku, warehouse) values (%s, %s, %s)"
                            self.db_cur.execute(sql, values)
                    if i < total_page:
                        elem_next_page = driver.find_elements_by_id('Next')
                        if elem_next_page:
                            elem_next_page[0].click()
                            driver.implicitly_wait(100)
                except:
                    continue
            self.conn.commit()
            sql = "update mmc_spider_status set status=3, description='' where warehouse='Hanover'"
            self.db_cur.execute(sql)
            self.conn.commit()
        except Exception as e:
            values = (str(e),)
            sql = "update mmc_spider_status set status=2, description=%s where warehouse='Hanover'"
            self.db_cur.execute(sql, values)
            self.conn.commit()

        try:
            driver.refresh()
            driver.switch_to.alert.accept()
            driver.implicitly_wait(100)
        except:
            pass
        display.stop()
        driver.quit()
コード例 #56
0
def create_thumbnails(documents,
                      workingdir='.',
                      skip=0,
                      login=None,
                      s3bucket=None):
    try:
        display = None
        from pyvirtualdisplay import Display
        display = Display(visible=0, size=(1280, 1024))
        display.start()
    except:
        print 'No Xvfb!'

    workingdir = workingdir.rstrip('/')
    print 'workingdir:', workingdir

    if not os.path.exists(workingdir):
        parser.error("workingdir not exists")

    thumbnail_folder = workingdir + "/thumbnails/"
    if not os.path.exists(thumbnail_folder):
        os.mkdir(thumbnail_folder)

    thumbnail_doubts_folder = workingdir + "/thumbnails_doubts/"
    if not os.path.exists(thumbnail_doubts_folder):
        os.mkdir(thumbnail_doubts_folder)

    if skip:
        print 'skip:', skip

    log_file = codecs.open(workingdir + '/error_log.txt', 'a', 'utf-8')

    file(workingdir + '/running_status.txt', 'w').write("crawling started")

    if os.path.exists(workingdir + '/running_firefox_pid.txt'):
        os.remove(workingdir + '/running_firefox_pid.txt')

    driver, browser_pid = crawlutils.open_driver()
    try:
        if browser_pid:
            file(workingdir + '/running_firefox_pid.txt',
                 'w').write("%s" % (browser_pid))

        #crawlutils.login(driver, login_url, 'loginname-id', 'password-id', 'alc_acc', 'n0thinghaschanged')
        if login:
            print 'login_url:', login['login_url']
            crawlutils.login(driver, login['login_url'],
                             login['username-control-id'],
                             login['password-control-id'], login['username'],
                             login['password'])

        print len(documents), 'to be processed'

        count = 0
        for (document_id, document_url) in documents:
            count += 1
            if skip > count: continue
            print 'count:', count
            try:
                file(workingdir + '/running_status.txt', 'w').write(
                    "%s\t%s\t%s" % (count, document_id, document_url))
                create_thumbnail(driver, workingdir, document_id, document_url,
                                 s3bucket)
            except:
                traceback.print_exc()
                if driver:
                    try:
                        driver.quit()
                    except:
                        pass
                if os.path.exists(workingdir + '/running_firefox_pid.txt'):
                    os.remove(workingdir + '/running_firefox_pid.txt')
                driver, browser_pid = crawlutils.open_driver()
                if browser_pid:
                    print 'browser_pid:', browser_pid
                    file(workingdir + '/running_firefox_pid.txt',
                         'w').write("%s" % (browser_pid))

                if login:
                    print 'login_url:', login['login_url']
                    crawlutils.login(driver, login['login_url'],
                                     login['username-control-id'],
                                     login['password-control-id'],
                                     login['username'], login['password'])
                time.sleep(5)

        print 'completed'
    except:
        traceback.print_exc()
    finally:
        if os.path.exists(workingdir + '/running_firefox_pid.txt'):
            os.remove(workingdir + '/running_firefox_pid.txt')
        if os.path.exists(workingdir + '/running_status.txt'):
            os.remove(workingdir + '/running_status.txt')
        try:
            driver.quit()
        except:
            pass
        del driver
        if display: display.stop()
        log_file.close()
コード例 #57
0
class GithubLogin(unittest.TestCase):
    def setUp(self):
        self.display = Display(visible=0, size=(1920, 1080))
        self.display.start()
        self.driver = webdriver.Firefox(executable_path='./geckodriver')
        self.driver.implicitly_wait(30)
        self.base_url = "https://www.fabric.io"
        self.verificationErrors = []
        self.accept_next_alert = True

    def JSonFile(self, file):
        '''
         讀取檔案將參數放入到ADJson()內是你要丟入的檔案
         範例:
         ADJson = JSonFile('536_default.json')
        '''
        with open(file, 'r', encoding='utf-8')as f:  # 3.5
            ADJson = json.load(f, object_pairs_hook=OrderedDict)
        return ADJson


    def Platform(self, PlatformName):
        PlatformCss = self.driver.find_elements_by_css_selector('.js-app-view span')
        PlatformNumber = 0
        print("你選擇的平台是" + PlatformName)
        for i in PlatformCss:
            try:
                if i.text == PlatformName:
                    PlatformCss[PlatformNumber].click()
            except:
                pass
            PlatformNumber += 1
        time.sleep(5)

    def ClickCarshlytics(self):

        self.driver.find_element_by_css_selector(".crashlytics i").click()
        time.sleep(5)


    def EnterVserion(self,Version):
        print("你選擇的版本:")
        for i in range(len(Version)):
            print(Version[i])

        for i in range(len(Version)):
            VersionCheck = self.driver.find_elements_by_css_selector(".Select-arrow-zone span")
            VersionCheck[0].click()
            self.driver.find_element_by_class_name('Select-control').send_keys(Version[i] + '\n')
            time.sleep(3)



    def ClearSelectIcon(self):

        # 清掉預設值
        self.driver.find_element_by_class_name('Select-value-icon').click()
        time.sleep(5)

    def SelectAll(self):
        # ClickAll = self.driver.find_elements_by_css_selector("#state-group-all")
        self.driver.find_element_by_id("state-group-all").click()
        time.sleep(5)
        print("點擊All")

    def ReadAllUserSessions(self):

        All = self.driver.find_elements_by_css_selector('.crash-free-percent .stat .value')
        Name = self.driver.find_elements_by_css_selector('.crash-free-percent .stat .name')
        for i in All:
            AllUserSessions.append(i.text)

        for i in Name:
            AllUserSessionsName.append(i.text)

    def MoveWeb(self):

        # 向下滑動判斷150筆停止
        print("正在滑動頁面請稍等...")
        # AllNumber = self.driver.find_elements_by_css_selector(".events-stat span span")
        for i in range(5):
            # if int(len(AllNumber)) == 180:
            #     pass
            # else:
            js = "var q=document.documentElement.scrollTop=10000"
            self.driver.execute_script(js)
            time.sleep(5)

    def ReadUrl(self):

        #讀取URL 數量
        print("開始-->讀取URL")
        URLNumber  = self.driver.find_elements_by_css_selector(".cell-title a")
        for i in URLNumber:
            URL.append(i.get_attribute("href"))
            URLTitle.append("URL")
        print("結束-->讀取URL")

    def ReadCrashNumber(self):

        # 讀取Crash 數量
        print("*"*10)
        print("開始-->讀取Crash數量")
        CrashNumber = self.driver.find_elements_by_css_selector(".events-stat span span")
        for i in CrashNumber:
            if i.text == 'CRASHES':
                pass
            elif i.text == 'CRASH':
                pass
            elif i.text == '':
                pass
            else:
                Crash.append(i.text)
                CrashTitle.append(("Crash"))
        print("結束-->讀取Crash數量")
        print("*"*10)

    def ReadUserNumber(self):
        # 讀取User 數量
        print("開始-->讀取User數量")
        UserNumber = self.driver.find_elements_by_css_selector(".devices-stat span span")

        for i in UserNumber:
            if i.text =='USERS':
                pass
            elif i.text == '':
                pass
            elif i.text =='USER':
                pass
            else:
                User.append(i.text)
                UserTitle.append("User")
        print("結束-->讀取User數量")
        print("*"*10)

    def ReadVersionNumber(self):
        # 讀取發生的版本號
        print("開始-->讀取Version")
        VersionNumber  = self.driver.find_elements_by_css_selector(".more-info")
        for i in VersionNumber:
            Version.append(i.text)
            VersionTitle.append("Version")
        print("結束-->讀取Version")
        print("*"*10)
    def ReadIssueNumber(self):
        # Issue 數字
        print("開始-->讀取Issue編號")
        IssueNumberTest = self.driver.find_elements_by_css_selector(".issue-number")
        for i in IssueNumberTest:
            IssueNumber.append(i.text)
            IssueNumberTitle.append("IssueNumber")
        print("結束-->讀取Issue編號")
        print("*"*10)
    def ReadIssueTitle(self):
        # Issue 開頭
        print("開始-->讀取Issue開頭")
        IssueTitleTest = self.driver.find_elements_by_css_selector(".issue-title")
        for i in IssueTitleTest:
            IssueTitle.append(i.text)
            IssueTitleTitle.append("IssueTitle")
        print("結束-->讀取Issue開頭")
        print("*"*10)
    def ReadIssueSubtitle(self):
        # Issue 大綱
        print("開始-->讀取Issue大綱")
        IssueSubtitleTest = self.driver.find_elements_by_css_selector(".issue-subtitle")
        for i in IssueSubtitleTest:
            IssueSubtitle.append(i.text)
            IssueSubtitleTitle.append("IssueSubtitle")
        print("結束-->讀取Issue大綱")
        print("*"*10)
    def ReadAllNumber(self):
        IssueAllNumber = len(IssueSubtitle)
        x = 1
        for i in range(IssueAllNumber):
            TestAll.append(x)
            TestAllTitle.append("Rank")
            x += 1

    def Get_RecentActivity(self):
        self.DefaultValue()
        RecentActivity = self.driver.find_elements_by_css_selector(".padding-left-15px tbody td")
        # print(len(RecentActivityOccurrences))
        x = 1
        for i in RecentActivity:
            if x % 2 == 0:
                RecentActivityOccurrences.append(i.text)
                RecentActivityOccurrencesTitle.append('Occurrences')
            else:
                RecentActivityVersion.append(i.text)
                RecentActivityVersionTitle.append('Version')
            x += 1
    def ListToJsonFile(self, FileName):

        print("開始-->將資料轉成Json")


        for i in range(len(IssueNumber)):

            '''先將原本的字串另存到新的空字串中'''
            IssueNumberA.append(IssueNumber[i])
            IssueTitleA.append(IssueTitle[i])
            IssueSubtitleA.append(IssueSubtitle[i])
            VersionA.append(Version[i])
            CrashA.append(Crash[i])
            UserA.append(User[i])
            URLA.append(URL[i])
            TestAllA.append(TestAll[i])

            '''將兩個字串合併成字典'''
            TestAllDict = OrderedDict(zip(TestAllTitle, TestAllA))
            IssueNumberDict = OrderedDict(zip(IssueNumberTitle, IssueNumberA))
            IssueTitleDict = OrderedDict(zip(IssueTitleTitle, IssueTitleA))
            IssueSubtitleDict = OrderedDict(zip(IssueSubtitleTitle, IssueSubtitleA))
            VersionDict = OrderedDict(zip(VersionTitle, VersionA))
            CrashDict = OrderedDict(zip(CrashTitle, CrashA))
            UserDict = OrderedDict(zip(UserTitle, UserA))
            URLDict = OrderedDict(zip(URLTitle, URLA))

            '''每次字典更新新增一筆'''
            TestAllDict.update(IssueNumberDict)
            TestAllDict.update(IssueTitleDict)
            TestAllDict.update(IssueSubtitleDict)
            TestAllDict.update(VersionDict)
            TestAllDict.update(CrashDict)
            TestAllDict.update(UserDict)
            TestAllDict.update(URLDict)
            data.append(TestAllDict)
            dataDict['data'] = data

        # '''將字典存成Json'''
        with open(FileName, 'w') as f:
            json.dump(dataDict, f)
        f.close()
        print("結束-->將資料轉成Json")
        print("*"*10)
        print("請查看" + FileName)

    def ListToJsonFile_Crash(self, FileName):
        print("開始-->將資料轉成Json")
        User_Input.Version.append('All Version')
        itmes = 0
        Test = len(AllUserSessions)//2
        User = len(GetUserNumberTest)

        for i in range(Test):
            # 主要是新增崩潰量 因為一次抓取兩個參數值
            # 一開始會先執行else部分之後都會去執行i >=1
            if i >= 1:
                itmes += 1
                AllUserSessionsA.append(AllUserSessions[i + itmes])
                AllUserSessionsNameA.append(AllUserSessionsName[i + itmes])
                AllUserSessionsA.append(AllUserSessions[i + itmes + 1])
                AllUserSessionsNameA.append(AllUserSessionsName[i + itmes + 1])
            else:
                AllUserSessionsA.append(AllUserSessions[i])
                AllUserSessionsNameA.append(AllUserSessionsName[i])
                AllUserSessionsA.append(AllUserSessions[i + 1])
                AllUserSessionsNameA.append(AllUserSessionsName[i + 1])

            # 主要是判斷使用者的人數 如果超過某區塊會將使用者人數設定為Null.
            if i > (User-1):
                # AllUserSessionsA.append('Null')
                # AllUserSessionsNameA.append('User')
                pass
            else:
                AllUserSessionsA.append(GetUserNumberTest[i])
                AllUserSessionsNameA.append('User')

            Sessions = OrderedDict(zip(AllUserSessionsNameA, AllUserSessionsA))
            SessionsA.append(Sessions)

        Get_crash_free_session = OrderedDict(zip(User_Input.Version, SessionsA))

        '''將字典存成Json'''
        with open(FileName, 'w') as f:
            json.dump(Get_crash_free_session, f)
        f.close()
        print("結束-->將資料轉成Json")
        print("*"*10)
        print("請查看" + FileName)


    def test_Read_Fabirc(self):
        print('Top build version query raw data')

        driver = self.driver
        driver.get(self.base_url + "/login")

        driver.find_element_by_id("email").clear()
        driver.find_element_by_id("email").send_keys(github_account)
        driver.find_element_by_id("password").clear()
        driver.find_element_by_id("password").send_keys(github_passwd)
        driver.find_element_by_class_name("sign-in").click()
        time.sleep(5)

        # iOS or Android
        self.Platform(PlatformName)  # Sean
        self.ClickCarshlytics()
        self.EnterVserion(Top_build)  # Sean
        self.ClearSelectIcon()
        self.SelectAll()
        self.ReadAllUserSessions()
        self.MoveWeb()
        self.ReadUrl()
        self.ReadCrashNumber()
        self.ReadUserNumber()
        self.ReadVersionNumber()
        self.ReadIssueNumber()
        self.ReadIssueTitle()
        self.ReadIssueSubtitle()
        self.ReadAllNumber()
        self.ListToJsonFile('Top_build_Fabric.json')

        print("Get Recent Activity")
        time.sleep(2)
        ADJson = self.JSonFile('Top_build_Fabric.json')

        # driver.get(ADJson['data'][0]['URL'])
        # self.Get_RecentActivity()

        for i in range(len(ADJson['data'])):
            driver.get(ADJson['data'][i]['URL'])
            self.Get_RecentActivity()

            for j in range(len(RecentActivityOccurrences)):
                RecentActivityOccurrencesA.append(RecentActivityOccurrences[j])
                RecentActivityVersionA.append(RecentActivityVersion[j])

                '''將兩個字串合併成字典'''
                RecentActivityOccurrencesDict = OrderedDict(
                    zip(RecentActivityOccurrencesTitle, RecentActivityOccurrencesA))
                RecentActivityVersionDict = OrderedDict(zip(RecentActivityVersionTitle, RecentActivityVersionA))

                '''每次字典更新新增一筆'''
                RecentActivityVersionDict.update(RecentActivityOccurrencesDict)
                RecentActivity.append(RecentActivityVersionDict)
                RecentActivityDict['RecentActivity'] = RecentActivity
                ADJson['data'][i].update(RecentActivityDict)


                # RecentActivityDict = {}

        with open('Top_build_Fabric.json', 'w') as f:
            json.dump(ADJson, f)
        f.close()
        print("結束-->將資料轉成Json")
        print("*" * 10)
        print("請查看" + 'Top_build_Fabric.json')


    def test_Carsh_Top(self):
        print('Get crash-free session only')

        driver = self.driver
        driver.get(self.base_url + "/login")

        driver.find_element_by_id("email").clear()
        driver.find_element_by_id("email").send_keys(github_account)
        driver.find_element_by_id("password").clear()
        driver.find_element_by_id("password").send_keys(github_passwd)
        driver.find_element_by_class_name("sign-in").click()
        driver.save_screenshot('Mark.png')
        time.sleep(5)
        self.Platform(PlatformName)  # Sean
        self.ClickCarshlytics()

        for i in range(len(SelectVersion)):
            SelectVersionA.append(SelectVersion[i])

            self.EnterVserion(SelectVersionA)  # Sean
            self.ClearSelectIcon()
            self.ReadAllUserSessions()
            # self.GetGoodAdoptionURLfunction()

            SelectVersionA.pop()

            # 讀取 All Verison
        print("你選擇的版本:\nAll Version")
        self.ClearSelectIcon()
        self.ReadAllUserSessions()
        # GetUserNumberTest.append("Null")
        self.GetGoodAdoptionUserNumber()
        # self.GetGoodAdoptionURLfunction()
        #
        # for i in range(len(GetGoodAdoptionURLTest)):
        #
        #     if GetGoodAdoptionURLTest[i] is 'Null':
        #         GetUserNumberTest.append('Null')
        #     else:
        #         self.driver.get(GetGoodAdoptionURLTest[i])
        #         time.sleep(15)
        #         self.driver.save_screenshot(str([i]) + 'Mark.png')
        #         self.GetGoodAdoptionUserNumber()

        # 查詢前幾版的崩潰狀況
        self.ListToJsonFile_Crash('Fabric.json')


    def GetGoodAdoptionUserNumber(self):
        UserURL = "https://www.fabric.io/photogrid/android/apps/" + pgk + "/dashboard/latest_release/launch_status?build="
        UserURLAll = "https://www.fabric.io/photogrid/android/apps/" + pgk + "/dashboard/latest_release/launch_status?build=all"

        print("你選擇的版本:")
        for i in range(len(SelectVersion)):
            print(SelectVersion[i])

        for i in range(len(SelectVersion)):
            self.driver.get(UserURL + SelectVersion[i])
            time.sleep(5)



            GetUserNumber = self.driver.find_elements_by_css_selector(".coverage-section .flex-1 .flex-1 .large")
            print('GetUserNumber : ' + str(GetUserNumber))
            x = 0
            for y in GetUserNumber:
                x += 1
                if x == 1:
                    GetUserNumberTest.append(str(y.text))
                    print("get user")
                    print(str(y.text))

            time.sleep(3)

        self.driver.get(UserURLAll)
        time.sleep(5)
        GetUserNumber = self.driver.find_elements_by_css_selector(".coverage-section .flex-1 .flex-1 .large")
        x = 0
        for y in GetUserNumber:
            x += 1
            if x == 1:
                GetUserNumberTest.append(str(y.text))
                print("get user")
                print(str(y.text))


    def GetGoodAdoptionURLfunction(self):
        GetGoodAdoptionURL = self.driver.find_elements_by_css_selector('.flex-1 .answers-link')
        TestList = []
        # 判斷如果沒有連結會自動帶入Null
        if GetGoodAdoptionURL == TestList:
            GetGoodAdoptionURLTest.append('Null')
        for i in GetGoodAdoptionURL:
            GetGoodAdoptionURLTest.append(i.get_attribute("href"))
            print("get href")
            print(i.get_attribute("href"))




    def DefaultValue(self):
        global RecentActivityOccurrencesA, RecentActivityVersionA, RecentActivityOccurrences, RecentActivityVersion, \
            RecentActivityDict, RecentActivity
        RecentActivityOccurrencesA = []
        RecentActivityVersionA = []
        RecentActivityOccurrences = []
        RecentActivityVersion = []
        RecentActivityDict = {}
        RecentActivity = []


    def tearDown(self):
        self.driver.quit()
        self.display.stop()
def check_assignments():
    write_html('Checking for new Assignments...')
    driver = ''
    display = ''
    try:
        display = Display(visible=0, size=(1920, 1080))
        display.start()  # Auto logon via firefox plugin
        profile = webdriver.FirefoxProfile(
            profile_directory=
            r"/home/Python_User/LogFiles/ATSNoticesAndCancelledLectures/RequiredFiles/SeleniumProfile/16ykebtq.Seleniumprofile"
        )
        profile.add_extension(
            r"/home/Python_User/LogFiles/ATSNoticesAndCancelledLectures/RequiredFiles/SeleniumProfile/seleniumDriver/autoauth-2.1-fx+fn.xpi"
        )
        driver = webdriver.Firefox(firefox_profile=profile)
        driver.get(ATS_Asgt_url)
        sleep(2)
        driver.find_element_by_xpath(
            '//*[@id="cmbSemester"]/option[2]').click()
        sleep(2)
        current_assignments = []
        file = open(
            r"/home/Python_User/LogFiles/ATSNoticesAndCancelledLectures/RequiredFiles/ATS_assignments.txt",
            "a+")
        file.seek(0)
        for ln in file:
            current_assignments.append(ln.strip('\n'))
            del ln
        try:
            table_id = driver.find_element_by_id('dgMaterialVerification')
            rows = table_id.find_elements_by_tag_name('tr')
            for row in rows:
                unt = row.find_elements_by_tag_name("td")[3]
                col = row.find_elements_by_tag_name("td")[5]
                st_ty = row.find_elements_by_tag_name("td")[7]
                unit_name = unt.text
                ass_title = col.text
                ass_sit_type = st_ty.text
                assignment = ass_title + ' ' + ass_sit_type
                if assignment not in current_assignments:
                    file.write(assignment + '\n')
                    write_html('ATS Assignment Information Collected\n')
                    email_message = 'You have New Assignments on ATS!\n\t' + 'Assignment Title: ' + ass_title + ', ' + ass_sit_type + ' (' + unit_name + ')\n\nATS link: ' + ATS_Asgt_url
                    group_post(str(email_message), 'GROUP ID NO ',
                               "ATS_Assignments")
                    del email_message
            del row, col, ass_title, table_id, rows, ass_title
        except:
            pass
        file.close()
        driver.quit()
        display.stop()
    except Exception as err2:
        driver.quit()
        display.stop()
        write_html('\nError Occurred!!')
        status2, err_msg2 = update_log(str(err2),
                                       'Error Origin: ATS Assignment Script')
        write_html(status2)
        del status2
        del err2
        del err_msg2
コード例 #59
0
def do_selenium(url, user_agent, domain, source):

    # start up the virtual display
    display = Display(visible=0, size=(1366, 768))
    display.start()

    # start up browser
    profile = webdriver.FirefoxProfile()
    profile.set_preference("general.useragent.override", user_agent)
    browser = webdriver.Firefox(firefox_profile=profile)
    browser.set_page_load_timeout(15)

    try:
        with Timeout(60):
            browser.get(url)
    except Timeout.Timeout:
        print bcolors.WARNING + "  [-] " + url + " has timed out. :(" + bcolors.ENDC
        return False
    except Exception:
        e = sys.exc_info()[0]
        print bcolors.WARNING + "  [-] " + url + " has errored: %s" % e + bcolors.ENDC
        return False

# accept a pop up alert if one comes up
    try:
        alert = browser.switch_to.alert
        print "\n[+] Popup alert observed: %s\n" % alert.text
        if re.search(
                "(?:requesting your username|zeus|call microsoft|call apple|call support)",
                alert.text, re.IGNORECASE):
            print "\n    [-] This looks like it might be a tech support scam user/password popup, leaving it alone."
            pass
        else:
            alert.accept()
            print "[+] Popup Alert observed, bypassing..."
    except Exception:
        pass

    # check page source to eliminate looking at pages that are parked and stuff we dont care about
    try:
        pagesource = browser.page_source
    except Exception:
        return False

    # do the screencap and sort it into known tp, known fp, or unknown
    try:
        pagetitle = browser.title.lower()
        shot_name = time.strftime(
            "%Y%m%d-%H%M%S") + '-' + source + '-' + domain + '.png'
        try:
            browser.save_screenshot(shot_name)
            print "  [+] Screencapped %s as %s" % (url, shot_name)
        except Exception:
            print bcolors.FAIL + "  [-] Unable to screencap " + url + bcolors.ENDC
            pass
    except Exception:
        print bcolors.FAIL + "  [-] An error occured, unable to screencap " + url + bcolors.ENDC
        pass

    # screencaps.close()
    browser.quit()
    display.stop()
    return True
コード例 #60
0
ファイル: low_level.py プロジェクト: fude1/botTrading
class LowLevelAPI(object):
    """low level api to interface with the service"""
    def __init__(self, brow="firefox"):
        self.brow_name = brow
        self.positions = []
        self.movements = []
        self.stocks = []
        # init globals
        Glob()

    def launch(self):
        """launch browser and virtual display, first of all to be launched"""
        try:
            # init virtual Display
            self.vbro = Display()
            self.vbro.start()
            logger.debug("virtual display launched")
        except Exception:
            raise exceptions.VBroException()
        try:
            self.browser = Browser(self.brow_name)
            logger.debug(f"browser {self.brow_name} launched")
        except Exception:
            raise exceptions.BrowserException(self.brow_name,
                                              "failed to launch")
        return True

    def css(self, css_path, dom=None):
        """css find function abbreviation"""
        if dom is None:
            dom = self.browser
        return expect(dom.find_by_css, args=[css_path])

    def css1(self, css_path, dom=None):
        """return the first value of self.css"""
        if dom is None:
            dom = self.browser

        def _css1(path, domm):
            """virtual local func"""
            return self.css(path, domm)[0]

        return expect(_css1, args=[css_path, dom])

    def search_name(self, name, dom=None):
        """name find function abbreviation"""
        if dom is None:
            dom = self.browser
        return expect(dom.find_by_name, args=[name])

    def xpath(self, xpath, dom=None):
        """xpath find function abbreviation"""
        if dom is None:
            dom = self.browser
        return expect(dom.find_by_xpath, args=[xpath])

    def elCss(self, css_path, dom=None):
        """check if element is present by css"""
        if dom is None:
            dom = self.browser
        return expect(dom.is_element_present_by_css, args=[css_path])

    def elXpath(self, xpath, dom=None):
        """check if element is present by css"""
        if dom is None:
            dom = self.browser
        return expect(dom.is_element_present_by_xpath, args=[xpath])

    def login(self, username, password, mode="demo"):
        """login function"""
        url = "https://trading212.com/it/login"
        try:
            logger.debug(f"visiting %s" % url)
            self.browser.visit(url)
            logger.debug(f"connected to %s" % url)
        except selenium.common.exceptions.WebDriverException:
            logger.critical("connection timed out")
            raise
        try:
            self.search_name("login[username]").fill(username)
            self.search_name("login[password]").fill(password)
            self.css1(path['log']).click()
            # define a timeout for logging in
            timeout = time.time() + 30
            while not self.elCss(path['logo']):
                if time.time() > timeout:
                    logger.critical("login failed")
                    raise CredentialsException(username)
            time.sleep(1)
            logger.info(f"logged in as {username}")
            # check if it's a weekend
            if mode == "demo" and datetime.now().isoweekday() in range(5, 8):
                timeout = time.time() + 10
                while not self.elCss(path['alert-box']):
                    if time.time() > timeout:
                        logger.warning("weekend trading alert-box not closed")
                        break
                if self.elCss(path['alert-box']):
                    self.css1(path['alert-box']).click()
                    logger.debug("weekend trading alert-box closed")
        except Exception as e:
            logger.critical("login failed")
            raise exceptions.BaseExc(e)
        return True

    def logout(self):
        """logout func (quit browser)"""
        try:
            self.browser.quit()
        except Exception:
            raise exceptions.BrowserException(self.brow_name, "not started")
            return False
        self.vbro.stop()
        logger.info("logged out")
        return True

    def get_bottom_info(self, info):
        accepted_values = {
            'free_funds': 'equity-free',
            'account_value': 'equity-total',
            'live_result': 'equity-ppl',
            'used_margin': 'equity-margin'
        }
        try:
            info_label = accepted_values[info]
            val = self.css1("div#%s span.equity-item-value" % info_label).text
            return num(val)
        except KeyError as e:
            raise exceptions.BaseExc(e)

    def get_price(self, name):
        soup = BeautifulSoup(
            self.css1("div.scrollable-area-content").html, "html.parser")
        for product in soup.select("div.tradebox"):
            fullname = product.select("span.instrument-name")[0].text.lower()
            if name.lower() in fullname:
                mark_closed_list = [
                    x
                    for x in product.select("div.quantity-list-input-wrapper")
                    if x.select("div.placeholder")[0].text.lower().find(
                        "close") != -1
                ]
                if mark_closed_list:
                    sell_price = product.select("div.tradebox-price-sell")[0]\
                        .text
                    return float(sell_price)
                else:
                    return False

    class MovementWindow(object):
        """add movement window"""
        def __init__(self, api, product):
            self.api = api
            self.product = product
            self.state = 'initialized'
            self.insfu = False

        def open(self, name_counter=None):
            """open the window"""
            if self.api.css1(path['add-mov']).visible:
                self.api.css1(path['add-mov']).click()
            else:
                self.api.css1('span.dataTable-no-data-action').click()
            logger.debug("opened window")
            self.api.css1(path['search-box']).fill(self.product)
            if self.get_result(0) is None:
                self.api.css1(path['close']).click()
                raise exceptions.ProductNotFound(self.product)
            result, product = self.search_res(self.product, name_counter)
            result.click()
            if self.api.elCss("div.widget_message"):
                self.decode(self.api.css1("div.widget_message"))
            self.product = product
            self.state = 'open'

        def _check_open(self):
            if self.state == 'open':
                return True
            else:
                raise exceptions.WindowException()

        def close(self):
            """close a movement"""
            self._check_open()
            self.api.css1(path['close']).click()
            self.state = 'closed'
            logger.debug("closed window")

        def confirm(self):
            """confirm the movement"""
            self._check_open()
            self.get_price()
            self.api.css1(path['confirm-btn']).click()
            widg = self.api.css("div.widget_message")
            if widg:
                self.decode(widg[0])
                raise exceptions.WidgetException(widg)
            if all(x for x in ['quantity', 'mode'] if hasattr(self, x)):
                self.api.movements.append(
                    Movement(self.product, self.quantity, self.mode,
                             self.price))
                logger.debug("%s movement appended to the list" % self.product)
            self.state = 'conclused'
            logger.debug("confirmed movement")

        def search_res(self, res, check_counter=None):
            """search for a res"""
            logger.debug("searching result")
            result = self.get_result(0)
            name = self.get_research_name(result)
            x = 0
            while not self.check_name(res, name, counter=check_counter):
                name = self.get_research_name(self.get_result(x))
                if name is None:
                    self.api.css1(path['close']).click()
                    raise exceptions.ProductNotFound(res)
                logger.debug(name)
                if self.check_name(res, name, counter=check_counter):
                    return self.get_result(x)
                x += 1
            logger.debug("found product at position %d" % (x + 1))
            return result, name

        def check_name(self, name, string, counter=None):
            """if both in string return False"""
            name = name.lower()
            string = string.lower()
            if counter is None:
                if name in string:
                    return True
                else:
                    return False
            counter = counter.lower()
            if name in string and counter in string:
                logger.debug("check_name: counter found in string")
                return False
            elif name in string and counter not in string:
                return True
            else:
                return False

        def get_research_name(self, res):
            """return result name"""
            if res is None:
                return None
            return self.api.css1("span.instrument-name", res).text

        def get_result(self, pos):
            """get pos result, where 0 is first"""
            evalxpath = path['res'] + f"[{pos + 1}]"
            try:
                res = self.api.xpath(evalxpath)[0]
                return res
            except Exception:
                return None

        def set_limit(self, category, mode, value):
            """set limit in movement window"""
            self._check_open()
            if (mode not in ["unit", "value"]
                    or category not in ["gain", "loss", "both"]):
                raise ValueError()
            if not hasattr(self, 'stop_limit'):
                self.stop_limit = {'gain': {}, 'loss': {}}
                logger.debug("initialized stop_limit")
            if category == 'gain':
                self.api.xpath(path['limit-gain-%s' % mode])[0].fill(
                    str(value))
            elif category == 'loss':
                self.api.xpath(path['limit-loss-%s' % mode])[0].fill(
                    str(value))
            if category != 'both':
                self.stop_limit[category]['mode'] = mode
                self.stop_limit[category]['value'] = value
            elif category == 'both':
                self.api.xpath(path['limit-gain-%s' % mode])[0].fill(
                    str(value))
                self.api.xpath(path['limit-loss-%s' % mode])[0].fill(
                    str(value))
                for cat in ['gain', 'loss']:
                    self.stop_limit[cat]['mode'] = mode
                    self.stop_limit[cat]['value'] = value
            logger.debug("set limit")

        def decode(self, message):
            """decode text pop-up"""
            title = self.api.css1("div.title", message).text
            text = self.api.css1("div.text", message).text
            if title == "Insufficient Funds":
                self.insfu = True
            elif title == "Maximum Quantity Limit":
                raise exceptions.MaxQuantLimit(num(text))
            elif title == "Minimum Quantity Limit":
                raise exceptions.MinQuantLimit(num(text))
            logger.debug("decoded message")

        def decode_update(self, message, value, mult=0.1):
            """decode and update the value"""
            try:
                msg_text = self.api.css1("div.text", message).text
                return num(msg_text)
            except Exception:
                if msg_text.lower().find("higher") != -1:
                    value += value * mult
                    return value
                else:
                    self.decode(message)
                    return None

        def get_mov_margin(self):
            """get the margin of the movement"""
            self._check_open()
            return num(self.api.css1("span.cfd-order-info-item-value").text)

        def set_mode(self, mode):
            """set mode (buy or sell)"""
            self._check_open()
            if mode not in ["buy", "sell"]:
                raise ValueError()
            self.api.css1(path[mode + '-btn']).click()
            self.mode = mode
            logger.debug("mode set")

        def get_quantity(self):
            """gte current quantity"""
            self._check_open()
            quant = int(num(self.api.css1(path['quantity']).value))
            self.quantity = quant
            return quant

        def set_quantity(self, quant):
            """set quantity"""
            self._check_open()
            self.api.css1(path['quantity']).fill(str(int(quant)))
            self.quantity = quant
            logger.debug("quantity set")

        def get_price(self, mode='buy'):
            """get current price"""
            if mode not in ['buy', 'sell']:
                raise ValueError()
            self._check_open()
            price = num(
                self.api.css1("div.orderdialog div.tradebox-price-%s" %
                              mode).text)
            self.price = price
            return price

        def get_unit_value(self):
            """get unit value of stock based on margin, memoized"""
            # find in the collection
            try:
                unit_value = Glob().theCollector.collection['unit_value']
                unit_value_res = unit_value[self.product]
                logger.debug("unit_value found in the collection")
                return unit_value_res
            except KeyError:
                logger.debug("unit_value not found in the collection")
            pip = get_pip(mov=self)
            quant = 1 / pip
            if hasattr(self, 'quantity'):
                old_quant == self.quantity
            self.set_quantity(quant)
            # update the site
            time.sleep(0.5)
            margin = self.get_mov_margin()
            logger.debug(f"quant: {quant} - pip: {pip} - margin: {margin}")
            if 'old_quant' in locals():
                self.set_quantity(old_quant)
            unit_val = margin / quant
            self.unit_value = unit_val
            Glob().unit_valueHandler.add_val({self.product: unit_val})
            return unit_val

    def new_mov(self, name):
        """factory method pattern"""
        return self.MovementWindow(self, name)

    class Position(PurePosition):
        """position object"""
        def __init__(self, api, html_div):
            """initialized from div"""
            self.api = api
            if isinstance(html_div, type('')):
                self.soup_data = BeautifulSoup(html_div, 'html.parser')
            else:
                self.soup_data = html_div
            self.product = self.soup_data.select("td.name")[0].text
            self.quantity = num(self.soup_data.select("td.quantity")[0].text)
            if ("direction-label-buy"
                    in self.soup_data.select("td.direction")[0].span['class']):
                self.mode = 'buy'
            else:
                self.mode = 'sell'
            self.price = num(self.soup_data.select("td.averagePrice")[0].text)
            self.margin = num(self.soup_data.select("td.margin")[0].text)
            self.id = self.find_id()

        def update(self, soup):
            """update the soup"""
            self.soup_data = soup
            return soup

        def find_id(self):
            """find pos ID with with given data"""
            pos_id = self.soup_data['id']
            self.id = pos_id
            return pos_id

        @property
        def close_tag(self):
            """obtain close tag"""
            return f"#{self.id} div.close-icon"

        def close(self):
            """close position via tag"""
            self.api.css1(self.close_tag).click()
            try:
                self.api.xpath(path['ok_but'])[0].click()
            except selenium.common.exceptions.ElementNotInteractableException:
                if (self.api.css1('.widget_message div.title').text ==
                        'Market Closed'):
                    logger.error("market closed, position can't be closed")
                    raise exceptions.MarketClosed()
                raise exceptions.WidgetException(
                    self.api.css1('.widget_message div.text').text)
                # wait until it's been closed
            # set a timeout
            timeout = time.time() + 10
            while self.api.elCss(self.close_tag):
                time.sleep(0.1)
                if time.time() > timeout:
                    raise TimeoutError("failed to close pos %s" % self.id)
            logger.debug("closed pos %s" % self.id)

        def get_gain(self):
            """get current profit"""
            gain = num(self.soup_data.select("td.ppl")[0].text)
            self.gain = gain
            return gain

        def bind_mov(self):
            """bind the corresponding movement"""
            logger = logging.getLogger("tradingAPI.low_level.bind_mov")
            mov_list = [
                x for x in self.api.movements if x.product == self.product
                and x.quantity == self.quantity and x.mode == self.mode
            ]
            if not mov_list:
                logger.debug("fail: mov not found")
                return None
            else:
                logger.debug("success: found movement")
            for x in mov_list:
                # find approximate price
                max_roof = self.price + self.price * 0.01
                min_roof = self.price - self.price * 0.01
                if min_roof < x.price < max_roof:
                    logger.debug("success: price corresponding")
                    # bind mov
                    self.mov = x
                    return x
                else:
                    logger.debug("fail: price %f not corresponding to %f" %
                                 (self.price, x.price))
                    continue
            # if nothing, return None
            return None

    def new_pos(self, html_div):
        """factory method pattern"""
        pos = self.Position(self, html_div)
        pos.bind_mov()
        self.positions.append(pos)
        return pos