Python quit_driverの例、helper_class.chrome_driver.quit_driver Pythonの例

コード例 #1

0

ファイルを表示

ファイル: advisory_nz.py プロジェクト: MikeSmvl/travelingstrategy

def get_url_of_countries_nz(driver):
    info = {}
    LOGGER.info('Retrieving URL of all countries for New Zealand advisory')
    try:
        #this is the link to the first page
        url = 'https://safetravel.govt.nz/travel-advisories-destination'

         #set up the headless chrome driver
        driver = create_driver()
        driver.get(url)
       
        #Selenium hands the page source to Beautiful Soup
        soup=BeautifulSoup(driver.page_source, 'lxml')

        #patter of the link to the country page that the href should match
        reg = regex.compile(r'\w+-*')
        table = soup.find('table')
        table_body = table.find('tbody')
        table_rows = table_body.find_all('tr')

        for tr in table_rows:
            cols = tr.find_all('td')
            cols = [ele.text.strip() for ele in cols]

            name = cols[1]
            a = tr.find('a', attrs = {'href':reg})
            info[name] = {"href":a['href']}
            LOGGER.success(f'URL for {name} was successfully retrieved')
        LOGGER.success('Successfully retrieved URL of all countries for the New Zealand advisory')
    except Exception as error_msg:
        LOGGER.error(f'An error has occured while retrieving the URLs of {name} for New Zealand advisory because of the following error: {error_msg}')
    finally:
        quit_driver(driver)

    return info

コード例 #2

0

ファイルを表示

def parse_one_country_vaccine(url, country):
    driver = create_driver()
    driver.get(url)
    vaccines = {}
    LOGGER.info(
        f'Parsing the informations for vaccinations for the following country: {country}'
    )
    #Selenium hands the page source to Beautiful Soup
    soup = BeautifulSoup(driver.page_source, 'lxml')
    # table_row = soup.find_all
    count = 0
    for tbody in soup.findAll('tbody'):
        for row in tbody.findAll('tr'):
            name = row.find('td', {"class": "traveler-disease"})
            info = row.find('td', {"class": "traveler-findoutwhy"})
            if name and info:
                name = name.text.strip('/\n')
                if count == 0:
                    info = info.text.replace('\n', '')
                else:
                    info = info.text.replace('\n', ' ')
                vaccines[name] = info
                count = count + 1

    quit_driver(driver)
    save_one_country(vaccines, country)
    print(vaccines)
    return vaccines

コード例 #3

0

ファイルを表示

def find_a_post(location, request_id, i=1):

    LOGGER.info(f'Starting the parser for the following location: {location}')
    driver = create_driver()
    location = location.replace(' ', '')

    url = instagram_url + location + "/"
    try:
        LOGGER.info(f'Retreiving the link to the image page for: {location}')
        driver.get(url)
        soup = BeautifulSoup(driver.page_source, 'lxml')
        garb_all = soup.find_all('a', {'href': regex.compile(r'/p/')})
    except:
        LOGGER.error(
            f'Could not get the link to the image page for: {location}')
        exit

    count = 0
    for g in garb_all:
        count += 1
        if count > i:
            break

        u = "https://www.instagram.com" + g.get('href')
        try:
            image_info = get_image_info(driver, u)
            LOGGER.success(f'Image info for: {location}')
        except:
            LOGGER.error(
                f'Could not get the info of the image for: {location}')
            count -= 1

        try:
            save_img_url(image_info['image_link'],
                         'images_to_filter/check.jpg')
            selfie = check_if_selfie('images_to_filter/check.jpg')
            group_photo = check_if_group_photo('images_to_filter/check.jpg')
            objects_too_big = check_for_objects('images_to_filter/check.jpg')
            too_much_similar_colors = find_nearest_colors(
                'images_to_filter/check.jpg')
            if not selfie and not group_photo and not objects_too_big and not too_much_similar_colors and not check_if_wrong_geolocation(
                    location, image_info['geolocation']):
                save_image("images", image_info, location, str(request_id))
                LOGGER.success(f'Saved Image info for: {location}')
                return True
            else:
                failed_img = Image.open('images_to_filter/check.jpg')
                failed_img.save(
                    f'images_to_filter/discarded/{get_last_discarded()}.jpg')
                LOGGER.error(
                    f'Cannot save image. It is now in images_to_filter/discared/ '
                )
                count -= 1
        except:
            LOGGER.error(
                f'Could not save the info of the image for: {location}')
            count -= 1

    quit_driver(driver)

コード例 #4

0

ファイルを表示

ファイル: advisory_mu.py プロジェクト: MikeSmvl/travelingstrategy

def save_to_MU():
    LOGGER.info(f'Saving and parsing Mauritius into the databse')
    driver = create_driver()
    LOGGER.info('Begin parsing for Mauritius advisory')
    try:
        wiki_visa_url = wiki_visa_url_MU
        wiki_visa_ob = wiki_visa_parser(wiki_visa_url, driver)
        visas = wiki_visa_ob.visa_parser_table()
        LOGGER.success(
            'Parsing for Mauritius advisory has been successfully completed')
    except Exception as error_msg:
        LOGGER.error(
            f'Error has occured while parsing for Mauritius advisory because of the following error: {error_msg}'
        )
    info = {}
    array_info = []

    # create an an sqlite_advisory object
    db = Database("countries.sqlite")
    db.drop_table("MU")
    db.add_table("MU",
                 country_iso="text",
                 name="text",
                 advisory_text="text",
                 visa_info="text")
    LOGGER.info('Saving Mauritius table into the database')
    try:
        for country in visas:
            iso = find_iso_of_country(country)
            if (iso != ""):
                name = country
                LOGGER.info(f'Saving {name}')
                visa = visas[country].get(
                    'visa')  #dictionary for visa info is country{visa:text}
                advisory = "Not available yet"
                info = {
                    "country_iso": iso,
                    "name": name,
                    "advisory": advisory,
                    "visa_info": visa
                }
                array_info.append(info)
                print(name, "     ", visa, "    ", advisory)
                db.insert("MU", iso, name, advisory, visa)
                LOGGER.success(
                    f'{name} was sucessfully saved to the database with the following information: {visa}. {advisory}.'
                )
            LOGGER.success(
                'Mauritius table successfully saved to the database')
    except Exception as error_msg:
        LOGGER.error(
            f'An error has occured while saving Mauritius table to the database because of the following error: {error_msg}'
        )
    db.close_connection()

    quit_driver(driver)

    with open('./advisory-mu.json', 'w') as outfile:
        json.dump(array_info, outfile)

コード例 #5

0

ファイルを表示

def find_all_ireland():

    LOGGER.info("Begin parsing and saving for Ireland...")
    my_driver = create_driver()

    all_url = find_all_url(my_driver)
    data = find_all_iso(all_url)
    LOGGER.info(
        'Parsing visa requirements for all countries for the Ireland advisory')
    try:
        wiki_visa_ob = wiki_visa_parser(
            "https://en.wikipedia.org/wiki/Visa_requirements_for_Irish_citizens",
            my_driver)
        visas = wiki_visa_ob.visa_parser_table()
    except Exception as error_msg:
        LOGGER.error(
            f'An error has occured while getting the visa requirements for Ireland advisory because of the following error: {error_msg}'
        )

    for country in data:
        c = data[country]
        url = c['href']
        my_driver.implicitly_wait(5)
        my_driver.get(url)
        soup = BeautifulSoup(my_driver.page_source, 'lxml')
        c['visa-info'] = get_one_info(url, 'visa/passport', my_driver, soup)
        c['advisory-text'] = get_one_advisory(url, my_driver, soup)
        c['name'] = country
        if c['visa-info'] == '':
            c['visa-info'] = get_one_info(url, 'Entry requirements', my_driver,
                                          soup)
        iso = c['country-iso']
        #handling some exceptions, had to do research
        if iso == 'AI':
            c['visa-info'] = 'Visa not required for 3 months'
        elif iso == 'BM':
            c['visa-info'] = 'Visa not required for 21 days (extendable)'
        elif iso == 'MQ':
            iso = 'FR'
        elif iso == 'MS':
            c['visa-info'] = 'Visa not required for 6 months'
        elif iso == 'RE':
            iso = 'FR'
        else:
            try:
                c['visa-info'] = visas[country].get(
                    'visa') + "<br>" + c['visa-info']
            except Exception as error_msg:
                print(c, error_msg)
                LOGGER.warning(f'Error message: {error_msg}')
    #dump the data into js to be deleted later
    quit_driver(my_driver)
    with open('./advisory-ie.json', 'w') as outfile:
        json.dump(data, outfile)

    save_into_db(data)


#find_all_ireland()

コード例 #6

0

ファイルを表示

def save_to_united_states():

    LOGGER.info("Begin parsing and saving for United States table...")
    driver = create_driver()

    data = {}  #Used to store of all the parsed data of each country
    name_to_advisories = {}  #Stores the names and associated advisories
    LOGGER.info(
        f'Retrieving visa requirements for all countries for the United States advisory'
    )

    name_advisory = get_name_and_advisory_of_countries()
    wiki_visa_url = "https://en.wikipedia.org/wiki/Visa_requirements_for_United_States_citizens"
    wiki_visa_ob = wiki_visa_parser(wiki_visa_url, driver)
    visas = wiki_visa_ob.visa_parser_table()
    LOGGER.success(
        'Successfully retrieved visa requirements for all countries for the United States advisory'
    )

    for name in sorted(name_advisory.keys(
    )):  #Sorts the dictionary containing  names and advisories
        name_to_advisories[name] = name_advisory[name]

    counter_country = 0
    for country in name_to_advisories:  #iterates through name_to_advisories to retrieve advisories
        driver.implicitly_wait(5)
        name = country
        advisory = name_to_advisories[country]

        visa_text = ""
        for countryVisa in visas:  # iterates through list of visas to retrieve visas
            if (countryVisa == country):
                visa_text = visas[countryVisa].get('visa')
                del visas[countryVisa]
                break

        country_iso = "na"
        data[name] = {
            'country-iso': country_iso,
            'name': name,
            'advisory-text': advisory,
            'visa-info': visa_text
        }

        if ((counter_country % 50) == 0):
            quit_driver(driver)
            driver = create_driver()
        counter_country += 1

    data = find_all_iso(data)  #Sets iso for each country

    with open('./advisory-us.json', 'w') as outfile:
        json.dump(data, outfile)

    save_into_db(data)

コード例 #7

0

ファイルを表示

ファイル: advisory_uk.py プロジェクト: MikeSmvl/travelingstrategy

def parse_one_country_advisory(url, href):
    driver = create_driver()
    driver.get(url)
    advisory=""
    #Selenium hands the page source to Beautiful Soup
    soup=BeautifulSoup(driver.page_source, 'lxml')
    advisory_div = soup.find("div", {"class": "gem-c-govspeak govuk-govspeak direction-ltr"})
    advisory_paragraph1 = advisory_div.findAll("p")[0]
    advisory_paragraph2 = advisory_div.findAll("p")[1]
    advisory = advisory_paragraph1.text +" "+advisory_paragraph2.text
    quit_driver(driver)

    return advisory

コード例 #8

0

ファイルを表示

def parse_one_country_advisory(url):
    driver = create_driver()
    driver.get(url)
    #Selenium hands the page source to Beautiful Soup
    soup = BeautifulSoup(driver.page_source, 'lxml')
    advisory_paragraph1 = ""

    try:  #The html are made differently for certain countries pages
        advisory_div = soup.findAll(
            "div", {
                "class":
                "acc-content ui-accordion-content ui-corner-bottom ui-helper-reset ui-widget-content ui-accordion-content-active"
            })[1]
        advisory_paragraph = advisory_div.findAll("span")[0].text
        advisory_paragraph1 = advisory_paragraph.split('\n')[0]
    except IndexError:
        try:
            advisory_div = soup.findAll(
                "div", {
                    "class":
                    "acc-content ui-accordion-content ui-corner-bottom ui-helper-reset ui-widget-content ui-accordion-content-active"
                })[1]
            advisory_paragraph = advisory_div.findAll("p")[0].text
            advisory_paragraph1 = advisory_paragraph.split('\n')[0]
        except IndexError:
            try:
                advisory_div = soup.findAll(
                    "div", {
                        "class":
                        "acc-content ui-accordion-content ui-corner-bottom ui-helper-reset ui-widget-content ui-accordion-content-active"
                    })[1]
                advisory_paragraph = advisory_div.text
                advisory_paragraph1 = advisory_paragraph.split('\n')[1]
            except IndexError:
                try:
                    advisory_div = soup.findAll("div",
                                                {"class": "alert-section"})[0]
                    advisory_paragraph = advisory_div.findAll("p")[0].text
                    advisory_paragraph1 = advisory_paragraph.split('\n')[0]
                except IndexError:
                    advisory_div = soup.findAll("div", {"class": "space"})[0]
                    advisory_paragraph = advisory_div.findAll("p")[1].text
                    advisory_paragraph1 = advisory_paragraph.split('\n')[0]

    advisory_paragraph1 = advisory_paragraph1.lstrip()
    LOGGER.info({advisory_paragraph1})
    quit_driver(driver)

    return advisory_paragraph1

コード例 #9

0

ファイルを表示

def get_url_of_countries():
    info = {}
    try:
        #this is the link to the first page
        url = 'https://smartraveller.gov.au/countries/pages/list.aspx'
        LOGGER.info(
            'Retrieving the URLs for all countries for the Australian advisory'
        )
        # create a new chrome session
        driver = create_driver()
        driver.get(url)

        #Selenium hands the page source to Beautiful Soup
        soup = BeautifulSoup(driver.page_source, 'lxml')

        #patter of the link to the country page that the href should match
        reg = regex.compile(r'\/destinations\/\w+-*\w*\/\w+-*\w*')
        table = soup.find('table')
        table_body = table.find('tbody')
        table_rows = table_body.find_all('tr')

        for tr in table_rows:
            cols = tr.find_all('td')
            cols = [ele.text.strip() for ele in cols]

            if (cols[2] == ''):
                cols[2] = 'No advisory from the australian government'

            name = cols[0]
            advisory_text = cols[2]
            a = tr.find('a', attrs={'href': reg})
            if (a != None):
                href = a['href']
                info[name] = {"href": href, "advisory-text": advisory_text}
                LOGGER.success(f'Retrieved URL for {name}')
        LOGGER.success(
            'Successfully retrieved the URLs for all countries of the Australian advisory'
        )
    except:
        LOGGER.error(
            'An error has occured while retrieving the URLs for all countries for the Australian advisory'
        )
    finally:
        quit_driver(driver)

    return info

コード例 #10

0

ファイルを表示

ファイル: advisory_nz.py プロジェクト: MikeSmvl/travelingstrategy

def save_to_new_zealand():
    LOGGER.info("Begin parsing and saving for New Zealand table...")
    driver = create_driver()
    
    data = {} #Used to store of all the parsed data of each country
    url = get_url_of_countries_nz(driver) #this function create its own driver -- to change
    LOGGER.info('Retrieving visa requirements for New Zealand advisory')
    try:
        wiki_visa_url = "https://en.wikipedia.org/wiki/Visa_requirements_for_New_Zealand_citizens"
        wiki_visa_ob = wiki_visa_parser(wiki_visa_url,driver) 
        visas = wiki_visa_ob.visa_parser_table()# Used to acquire visa info of each country
        LOGGER.success('Succesfully retrieved visa requirements of all countries for New Zealand advisory')
    except Exception as error_msg:
        LOGGER.error(f'An error has occured while retrieving visa requirement for New Zealand adviosry because of the following error: {error_msg}')
    
    counter_country = 0
    for country in url: #iterates through urls to retrieve advisory information
        driver.implicitly_wait(5)
        name = country
        href = url[country].get("href")

        link = "https://safetravel.govt.nz/{}".format(href,sep='')
        advisory = parse_a_country_advisory(link,driver) 

        visa_text= ""
        for countryVisa in visas: # iterates through list of visas to retrieve visas
            if(countryVisa ==  country):
               visa_text = visas[countryVisa].get('visa')
               del visas[countryVisa]
               break;

        country_iso = "na"
        data[name] = {'country-iso':country_iso,'name':name,'advisory-text':advisory,'visa-info':visa_text}
        

        if ((counter_country%50) == 0):
            quit_driver(driver)
            driver = create_driver()
        counter_country += 1
      
    data = find_all_iso(data)#Sets iso for each country

    with open('./advisory-nz.json', 'w') as outfile:
        json.dump(data, outfile)

    save_into_db(data)

コード例 #11

0

ファイルを表示

ファイル: weather.py プロジェクト: MikeSmvl/travelingstrategy

def save_to_weather():

    #Antigua and Barbuda
    LOGGER.info(f'Beginning parsing for average monthly temperature')
    avg_monthly_temperature = ''
    try:
        driver = create_driver()
        wiki_temperature = wiki_weather_parser(wiki_visa_temperature, driver)
        avg_monthly_temperature = wiki_temperature.visa_parser_table()
        LOGGER.success(
            f'Following data was retrieved: {avg_monthly_temperature}')
        save_into_db('weather', avg_monthly_temperature)
        quit_driver(driver)
    except Exception as error_msg:
        LOGGER.error(
            f'An error has occured while parsing for temperature because of the following error: {error_msg}'
        )

コード例 #12

0

ファイルを表示

ファイル: unsafe_areas_ca.py プロジェクト: MikeSmvl/travelingstrategy

def get_all_links():
    LOGGER.info('Retrieving the URLs for all countries for unsafe areas')

    iso_list = config.iso_list
    data = {}
    #home page link
    home = 'https://travel.gc.ca/travelling/advisories'

    driver = create_driver()
    driver.get(home)

    try:
        soup = BeautifulSoup(driver.page_source, 'lxml')

        table = soup.find('table', attrs={'id': 'reportlist'})
        tbody = table.find('tbody')
        rows = tbody.findAll('tr')

        #parse the table get the link in the <a> tag
        for row in rows:
            col1 = row.find('a')
            name = col1.text
            href = col1['href']
            #the iso function accepts a dictionary with a key as name
            if (name == "Canary Islands"):
                data[name] = {'href': href, 'country-iso': 'CI'}
            elif (name == "Saint Vincent & the Grenadines"):
                name = "Saint Vincent and the Grenadines"
            elif (name == "Virgin Islands (U.S.)"):
                name = "United States Virgin Islands"

            data[name] = {'href': href}
            LOGGER.success(f'Retrieved the URL for {name}')
        LOGGER.success('Retrieved all the URLs for unsafe areas')
    except Exception as error_msg:
        LOGGER.error(
            f'An error has occured while retrieving the URLs for all countries from the canadian travel website because of the following error: {error_msg}'
        )
    finally:
        quit_driver(driver)

    data = find_all_iso(data)
    return data

コード例 #13

0

ファイルを表示

ファイル: phrases.py プロジェクト: MikeSmvl/travelingstrategy

def translate(iso_language):
    #parse the languages 10 by 10 to track any error more easily
    count = 0
    driver = create_driver()
    for lg in iso_language:
        for p in PHRASES:
            iso = iso_language[lg]
            p_edit = p.replace(" ", "%20")

            url = 'https://translate.google.com/?sl=en&tl=' + iso + '&text=' + p_edit
            try:
                driver = create_driver()
                driver.get(url)
                soup = BeautifulSoup(driver.page_source, 'lxml')
            except:
                LOGGER.error(f'Could not parse {lg}')
                continue

            try:
                translation = soup.find('span', {
                    'class': 'tlid-translation translation'
                }).text
                pronunciation = soup.find_all(
                    'div', {
                        'class':
                        'tlid-transliteration-content transliteration-content full'
                    })[1].text
            except:
                LOGGER.info(
                    f'Could not find data for {lg}; will ne replace by -')
                translation = "-"
                pronunciation = "-"

            DB.insert('phrases', iso, lg, p, translation, pronunciation)

        count += 1

        if count == 10:
            quit_driver(driver)
            return

    quit_driver(driver)
    return

コード例 #14

0

ファイルを表示

ファイル: unsafe_areas_ca.py プロジェクト: MikeSmvl/travelingstrategy

def save_to_unsafe_areas():
    driver = create_driver()
    all_countries = get_all_links()
    data = {}
    for country in all_countries:
        name = all_countries[country]
        href = name['href']
        url = "https://travel.gc.ca" + href
        regional_advisory = get_regional_advisories(url, driver)
        data[country] = {'unsafe_areas': regional_advisory}

    #canada special case
    data['Canada'] = {
        'unsafe_areas':
        'There is no regional advisory, take security precautions based on the general advisory for this country.'
    }

    data = find_all_iso(data)
    save_regional_advisories(data)
    quit_driver(driver)

コード例 #15

0

ファイルを表示

def save_to_SG():
    LOGGER.info(f'Saving Singapore into the databse')
    driver = create_driver()
    LOGGER.info(
        'Parsing visa requirments for all countries into the Singapore table')
    try:
        wiki_visa_url = wiki_visa_url_SG
        wiki_visa_ob = wiki_visa_parser(wiki_visa_url, driver)
        visas = wiki_visa_ob.visa_parser_table()
        LOGGER.success(
            'Visa requirements have been succesfully parsed for the Singapore table'
        )
    except Exception as error_msg:
        LOGGER.error(
            f'An error has occured whilse parsing for visa requirements because of the following error: {error_msg}'
        )
    advisories = parse_all_countries_advisories()
    array_info = []

    # create an an sqlite_advisory object
    db = Database("countries.sqlite")
    db.drop_table("SG")
    db.add_table("SG",
                 country_iso="text",
                 name="text",
                 advisory_text="text",
                 visa_info="text")

    array_info = save_info(db, visas, advisories, array_info)

    db.close_connection()
    LOGGER.success(f'Singapore was sucesfully saved to the database')
    quit_driver(driver)

    with open('./advisory-sg.json', 'w') as outfile:
        json.dump(array_info, outfile)

コード例 #16

0

ファイルを表示

def get_additional_advisory_info_url():

    url = 'https://travel.gc.ca/travelling/advisories'
    #set up the headless chrome driver
    driver = create_driver()
    driver.get(url)
    soup = BeautifulSoup(driver.page_source, 'lxml')

    table = soup.find('table')
    table_body = table.find('tbody')
    table_rows = table_body.find_all('tr', attrs={'class': 'gradeX'})
    additional_advisory = {}
    for row in table_rows:
        cols = row.find_all('td')
        country = cols[0].text
        iso = find_iso_of_country(country)
        advisory = cols[2].text
        additional_advisory[iso] = {
            'country_name': country,
            'advisory_text': advisory
        }

    quit_driver(driver)
    return additional_advisory

コード例 #17

0

ファイルを表示

    save_into_db(data)


def all_unsafe_areas():
    url = get_url_of_countries(
    )  #this function create its own driver -- to change
    data = {}
    driver = create_driver()
    LOGGER.info('Retrieving all unsafe areas')
    for country in url:
        href = url[country].get('href')
        link = "https://smartraveller.gov.au{}".format(href, sep='')

        unsafe_areas = regional_advice_level(driver, link)
        data[country] = {'unsafe_areas': unsafe_areas}
        LOGGER.info(f'{data[country]}')

    data = find_all_iso(data)
    driver.quit()
    #saving the data in json file
    with open('unsafe-areas-au.json', 'w') as fp:
        json.dump(data, fp)


# save_to_australia()
driver = create_driver()
data = regional_advice_level(
    driver, "https://www.smartraveller.gov.au/destinations/africa/mali")
quit_driver(driver)

#save_to_australia()