Ejemplos de CommonFunctions.randomWait en Python

Lenguaje de programación: Python

Clase / Tipo: CommonFunctions

Método / Función: randomWait

Ejemplos en hotexamples.com: 4

Python CommonFunctions.randomWait - 4 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de CommonFunctions.randomWait de paquete mac_apt extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

ConvertSecondsToDayHourMinSec(30)

ShowErrorMessage(12)

generate_primes_less_than(10)

stripTags(8)

argumentOnly(7)

create_resultname(6)

commandOnly(6)

load_run(5)

PrintMainHeader(4)

InputError(4)

clickElement(4)

cleanField(4)

randomWait(4)

CreatePipeOutput(4)

concatTmp(3)

GetMD5HashBucketID(3)

getUnique(3)

is_pandigital(3)

load_runs(3)

find_divisors(3)

PrintMainFooter(3)

sieve_of_Erastothenes(3)

create_dir_if_not_exist(3)

ConvertSecondsToYearDayHourMinSec(3)

ask(3)

CommonFunctions(2)

daytoday(2)

getDriver(2)

is_palindrome(2)

find_prime_factors(2)

tuple_to_num(2)

GetFileHashesAsDict(2)

WriteArrayinFile(2)

clear_loaded_data(1)

is_prime(1)

generate_triangulars(1)

getLatLong(1)

GetFileCategory(1)

getUserInputNumbers(1)

GetDateSeconds(1)

GetCurrentDisplayDateTime(1)

load_mask(1)

generate_reptend_primes_less_than(1)

GetBufferHashesAsDict(1)

GenerateSearchField(1)

makeSlug(1)

makeUTF8(1)

FiletoArray(1)

FileExists(1)

Epitope_Info(1)

Ejemplo n.º 1

Mostrar archivo

Archivo: MisterWhat.py Proyecto: fi11222/TNScrape

def doSearch(p_search, p_location, p_csvPathA, p_csvPathB, p_minDelay, p_maxDelay):
    l_urlSearch = '{0}search?what={1}&where={2}'.format(
        g_url,
        urllib.parse.quote(p_search, safe=''),
        urllib.parse.quote(p_location, safe='')
    )

    # open output csv file (main)
    l_fOutMain = open(p_csvPathA, 'w')
    l_fOutMain.write('ID;NAME;ADDRESS;CP;CITY;CREATION;SIRET;TYPE;COUNT;OWNER;' +
                     'TEL1;TEL2;TEL3;TEL4;MAIL;WEB1;WEB2;WEB3;WEB4;HOURS;BUSINESS;ADDITIONAL\n')

    # open output csv file (secondary)
    l_fOutSecondary = open(p_csvPathB, 'w')
    l_fOutSecondary.write('ID;TYPE;RAW;CLEAN;FLAG\n')

    # Create a new instance of the Firefox driver
    l_driver = webdriver.Firefox()

    # Resize the window to the screen width/height
    l_driver.set_window_size(1500, 1500)

    # Move the window to position x/y
    l_driver.set_window_position(1000, 1000)

    l_count = 0

    l_finished = False
    while not l_finished:
        # go to the base Url
        l_driver.get(l_urlSearch)

        try:
            WebDriverWait(l_driver, 10).until(EC.presence_of_element_located(
                (By.XPATH, '//footer')))
        except EX.TimeoutException:
            l_finished = True
            continue

        l_itemList = []
        for l_article in l_driver.find_elements_by_xpath('//div[@class="listwrapper"]' +
                                                         '//div[@class="box-company"]/div/a'):
            l_itemLink = l_article.get_attribute('href')
            print('l_itemLink:', l_itemLink)
            l_itemList += [l_itemLink]

        l_nextLink = ''
        for l_next in l_driver.find_elements_by_xpath('//ul[@class="pagination "]/li[last()]/a'):
            l_nextLink = l_next.get_attribute('href')
            print('l_nextLink:', l_nextLink)

        for l_link in l_itemList:
            getOneCompany(l_driver, l_fOutMain, l_fOutSecondary, urllib.parse.urljoin(g_url, l_link), l_count)
            l_count += 1

            CommonFunctions.randomWait(p_minDelay, p_maxDelay)

        if l_nextLink == '':
            l_finished = True
        else:
            l_urlSearch = urllib.parse.urljoin(g_url, l_nextLink)

    print('Number of Items retrieved', l_count)
    l_driver.quit()

    l_fOutMain.close()
    l_fOutSecondary.close()
    return l_count

Ejemplo n.º 2

Mostrar archivo

Archivo: 118218.py Proyecto: fi11222/TNScrape

def doSearch(p_search, p_location, p_pathA, p_pathB, p_minDelay, p_maxDelay, p_distance):
    # http://www.118218.fr/recherche?category_id=&geo_id=&distance=&category=&what=plombier&where=75013
    if p_distance > 0:
        l_baseUrl = '{0}recherche?category_id=&geo_id=&distance={3}&category=&what={1}&where={2}'.format(
            g_url,
            urllib.parse.quote(p_search, safe=''),
            urllib.parse.quote(p_location, safe=''),
            p_distance
        )
    else:
        l_baseUrl = '{0}recherche?category_id=&geo_id=&distance=&category=&what={1}&where={2}'.format(
            g_url,
            urllib.parse.quote(p_search, safe=''),
            urllib.parse.quote(p_location, safe='')
        )
    l_urlSearch = l_baseUrl

    # open output csv file (main)
    l_fOutMain = open(p_pathA, 'w')
    l_fOutMain.write('ID;NAME;ADDRESS;CP;CITY;CREATION;SIRET;TYPE;COUNT;OWNER;' +
                     'TEL1;TEL2;TEL3;TEL4;MAIL;WEB1;WEB2;WEB3;WEB4;HOURS;BUSINESS;ADDITIONAL\n')

    # open output csv file (secondary)
    l_fOutSecondary = open(p_pathB, 'w')
    l_fOutSecondary.write('ID;TYPE;RAW;CLEAN;FLAG\n')

    # Create a new instance of the Firefox driver
    l_driver = CommonFunctions.getDriver()

    # go to the base Url
    l_driver.get(l_urlSearch)

    l_finished = False
    l_linksList = []
    l_currentPage = 1

    l_wait = 60
    # get all links in the result set
    while not l_finished:
        print('Result page:', l_currentPage)

        # Wait for the footer to appear
        if not waitFoFooter(l_driver):
            l_finished = True
            continue

        try:
            l_messageDisplay = l_driver.find_element_by_xpath(
                '//article/section[@class="staticContent ieWrapperFix"]')
            l_message = l_messageDisplay.text
            if re.match('Nos systèmes ont détecté un trafic important', l_message):
                print('Abuse message:', l_message)

                if l_currentPage <= 20 and l_wait <= 300:
                    print('Waiting for {0} seconds ...'.format(l_wait))
                    time.sleep(l_wait)
                    l_wait += 60

                    l_driver.get(l_urlSearch)
                    continue

                l_finished = True
                continue

        except EX.NoSuchElementException:
            print('Ok apparently ...')

        l_wait = 60

        try:
            l_resultCountLocation = l_driver.find_element_by_xpath('//p[@class="resultCount"]')
            l_resultCount = l_resultCountLocation.text
            print('l_resultCount:', l_resultCount)
        except EX.NoSuchElementException:
            print('No Results')
            l_finished = True
            continue

        l_countLink = 0
        for l_link in l_driver.find_elements_by_xpath('//h2/a'):
            l_linkUrl = l_link.get_attribute('href')
            l_linksList += [l_linkUrl]
            print('l_linkUrl:', l_linkUrl)
            l_countLink += 1

        try:
            l_found = False
            for l_link in l_driver.find_elements_by_xpath('//a'):
                # find next page link page
                if l_link.get_attribute('data-page') == str(l_currentPage + 1):

                    l_found = True
                    l_currentPage += 1
                    l_urlSearch = l_link.get_attribute('href')
                    print('Link to next page:', l_urlSearch)

                    # scroll to it, to make it visible, and then click it
                    l_actions = ActionChains(l_driver)
                    l_actions.move_to_element(l_link)
                    l_actions.click()
                    l_actions.perform()

                    CommonFunctions.randomWait(p_minDelay, p_maxDelay)
                    break

            if not l_found:
                # if the link was not found --> Finished
                print('No More Results')
                l_finished = True

        except EX.NoSuchElementException:
            print('No More Results')
            l_finished = True
            continue

    l_count = 0
    for l_url in l_linksList:
        # Scrape one company and stops in case of failure
        if not doOneCompany(l_driver, l_url, l_fOutMain, l_fOutSecondary, p_minDelay, p_maxDelay, l_count):
            break

        l_count += 1
        CommonFunctions.randomWait(p_minDelay, p_maxDelay)

    l_driver.quit()
    print('Number of items retrieved:', l_count)

    l_fOutMain.close()
    l_fOutSecondary.close()

    return l_count

Ejemplo n.º 3

Mostrar archivo

Archivo: MisterWhat.py Proyecto: fi11222/TNScrape

        l_totalCount = 0
        # one tmp file per commune
        for l_communeId, l_communeName in l_communes:
            l_tmpA = os.path.join(g_misterWhatDir, '__tmpA_{0}.csv'.format(l_communeId))
            l_tmpB = os.path.join(g_misterWhatDir, '__tmpB_{0}.csv'.format(l_communeId))

            if not os.path.isfile(l_tmpA) and not os.path.isfile(l_tmpA):
                print('Search for "{0}" in "{1}" ...'.format(l_search, l_communeName))

                l_count = doSearch(l_search, l_communeName, l_tmpA, l_tmpB, l_minDelay, l_maxDelay)
                l_totalCount += l_count

                print('Search for "{0}" in "{1}" Complete'.format(l_search, l_communeName))
                if l_count == 0:
                    CommonFunctions.randomWait(l_minDelay, l_maxDelay)

                # if l_totalCount > 300:
                #     break


        print('Total number of items retrieved:', l_totalCount)
        # merge the tmp files
        CommonFunctions.concatTmp(g_misterWhatDir, [i for i, c in l_communes], l_pathA, l_pathB)

        # sort the result
        CommonFunctions.csvSort(l_pathA, p_départements=True)
    else:
        # otherwise, do an ordinary search
        doSearch(l_search, l_location, l_pathA, l_pathB, l_minDelay, l_maxDelay)
        # and sort the results as well

Ejemplo n.º 4

Mostrar archivo

Archivo: PagesJaunes.py Proyecto: fi11222/TNScrape

def doSearch(p_search, p_location, p_pathA, p_pathB, p_minDelay, p_maxDelay):

    # open output csv file (main)
    l_fOutMain = open(p_pathA, 'w')
    l_fOutMain.write('ID;NAME;ADDRESS;CP;CITY;CREATION;SIRET;TYPE;COUNT;OWNER;' +
                     'TEL1;TEL2;TEL3;TEL4;MAIL;WEB1;WEB2;WEB3;WEB4;HOURS;BUSINESS;ADDITIONAL\n')

    # open output csv file (secondary)
    l_fOutSecondary = open(p_pathB, 'w')
    l_fOutSecondary.write('ID;TYPE;RAW;CLEAN;FLAG\n')

    # Create a new instance of the Firefox driver
    l_driver = CommonFunctions.getDriver()

    # go to the base Url
    l_driver.get(g_url)

    try:
        # locate the keyword search input text box and enter the search string
        l_quoiQui = WebDriverWait(l_driver, 10).until(EC.presence_of_element_located(
                    (By.XPATH, '//input[@id="pj_search_quoiqui"]')))
        print('l_quoiQui placeholder:', l_quoiQui.get_attribute('placeholder'))
        l_quoiQui.send_keys(p_search)

        # locate the location input text box and enter the location string
        l_ou = l_driver.find_element_by_id('pj_search_ou')
        print('l_ou placeholder:', l_ou.get_attribute('placeholder'))
        l_ou.send_keys(p_location)

        # submit the form
        l_driver.find_element_by_xpath('//button[@class="button primary icon large-button"]').click()
    except EX.NoSuchElementException:
        print('[01] Something is badly wrong (Element not found) ...')
        return 0
    except EX.TimeoutException:
        print('[02] Something is badly wrong (Timeout) ...')
        return 0

    l_finished = False
    l_count = 0
    while not l_finished:
        try:
            # WebDriverWait(driver,5).until(
            # lambda driver: driver.find_elements(By.ID,"a") or driver.find_elements(By.ID,"b"))

            WebDriverWait(l_driver, 10).until(
                lambda p_driver: \
                    p_driver.find_elements(By.XPATH, '//h2[@class="company-name"]') \
                    or p_driver.find_elements(By.XPATH, '//div[@class="no-response"]'))

            #WebDriverWait(l_driver, 10).until(EC.presence_of_element_located(
            #    (By.XPATH, '//h2[@class="company-name"]')))
        except EX.TimeoutException:
            print('[03] Something is badly wrong (Timeout) ...')
            return 0

        if killPopup(l_driver):
            continue

        try:
            l_driver.find_element_by_xpath('//div[@class="no-response"]')
            print('No results')

            l_finished = True
            continue
        except EX.NoSuchElementException:
            print('There should be results')

        try:
            # reformulation
            l_reformulation = l_driver.find_element_by_xpath(
                '//span[@class="denombrement"]/strong[@id="SEL-nbresultat"]')

            l_resultCount = l_reformulation.text
            print('l_resultCount:', l_resultCount)

        except EX.NoSuchElementException:
            print('No reformulation ?! ...')

        l_articleList = []
        try:
            for l_company in l_driver.find_elements_by_xpath('//h2[@class="company-name"]/../../../..'):
                l_articleId = l_company.get_attribute('id')
                print('l_articleId:', l_articleId)
                l_articleList += [l_articleId]

        except EX.NoSuchElementException:
            print('[04] Something is badly wrong (Element not found) ...')
            return 0

        try:
            l_article = 0
            for l_articleId in l_articleList:
                if killPopup(l_driver):
                    print('Popup Killed, waiting for 10 s.')
                    time.sleep(10)

                print('+ l_articleId:', l_articleId)
                l_company = l_driver.find_element_by_xpath(
                    '//article[@id="{0}"]//h2[@class="company-name"]/a[2]'.format(l_articleId))

                #l_driver.execute_script("return arguments[0].scrollIntoView();", l_company)

                l_name = l_company.text
                print('Fetching:', l_name)

                l_driver.execute_script("return arguments[0].scrollIntoView();", l_company)
                l_driver.execute_script("window.scrollBy(0, -300);")

                # Save the window opener (current window, do not mistaken with tab... not the same)
                l_mainWindow = l_driver.current_window_handle

                # l_company.send_keys(Keys.CONTROL + Keys.RETURN)
                # scroll to it, to make it visible, and then click it
                l_actions = ActionChains(l_driver)
                l_actions.move_to_element(l_company)
                l_actions.context_click()
                l_actions.send_keys(Keys.ARROW_DOWN)
                l_actions.send_keys(Keys.ENTER)
                l_actions.perform()

                # Switch tab to the new tab, which we will assume is the next one on the right
                l_driver.find_element_by_tag_name('body').send_keys(Keys.CONTROL + Keys.TAB)

                # Put focus on current window which will, in fact, put focus on the current visible tab
                l_driver.switch_to_window(l_mainWindow)

                if doOneCompany(l_driver, l_fOutMain, l_fOutSecondary, l_count):
                    l_count += 1

                CommonFunctions.randomWait(p_minDelay, p_maxDelay)

                # Close current tab
                l_driver.find_element_by_tag_name('body').send_keys(Keys.CONTROL + 'w')

                # Put focus on current window which will be the window opener
                l_driver.switch_to_window(l_mainWindow)

        except EX.NoSuchElementException:
            print('[05] Something is badly wrong (Element not found) ...')
            return 0

        # locate the next button and click it
        try:
            l_next = l_driver.find_element_by_id('pagination-next')

            # scroll to it, to make it visible, and then click it
            l_actions = ActionChains(l_driver)
            l_actions.move_to_element(l_next)
            l_actions.click()
            l_actions.perform()
        except EX.NoSuchElementException:
            print('No more results')
            l_finished = True

    print('Number of items retrieved:', l_count)

    l_fOutMain.close()
    l_fOutSecondary.close()

    l_driver.quit()
    return l_count