Esempio n. 1
0
def save_to_united_states():

    LOGGER.info("Begin parsing and saving for United States table...")
    driver = create_driver()

    data = {}  #Used to store of all the parsed data of each country
    name_to_advisories = {}  #Stores the names and associated advisories
    LOGGER.info(
        f'Retrieving visa requirements for all countries for the United States advisory'
    )

    name_advisory = get_name_and_advisory_of_countries()
    wiki_visa_url = "https://en.wikipedia.org/wiki/Visa_requirements_for_United_States_citizens"
    wiki_visa_ob = wiki_visa_parser(wiki_visa_url, driver)
    visas = wiki_visa_ob.visa_parser_table()
    LOGGER.success(
        'Successfully retrieved visa requirements for all countries for the United States advisory'
    )

    for name in sorted(name_advisory.keys(
    )):  #Sorts the dictionary containing  names and advisories
        name_to_advisories[name] = name_advisory[name]

    counter_country = 0
    for country in name_to_advisories:  #iterates through name_to_advisories to retrieve advisories
        driver.implicitly_wait(5)
        name = country
        advisory = name_to_advisories[country]

        visa_text = ""
        for countryVisa in visas:  # iterates through list of visas to retrieve visas
            if (countryVisa == country):
                visa_text = visas[countryVisa].get('visa')
                del visas[countryVisa]
                break

        country_iso = "na"
        data[name] = {
            'country-iso': country_iso,
            'name': name,
            'advisory-text': advisory,
            'visa-info': visa_text
        }

        if ((counter_country % 50) == 0):
            quit_driver(driver)
            driver = create_driver()
        counter_country += 1

    data = find_all_iso(data)  #Sets iso for each country

    with open('./advisory-us.json', 'w') as outfile:
        json.dump(data, outfile)

    save_into_db(data)
Esempio n. 2
0
def get_url_of_countries_nz(driver):
    info = {}
    LOGGER.info('Retrieving URL of all countries for New Zealand advisory')
    try:
        #this is the link to the first page
        url = 'https://safetravel.govt.nz/travel-advisories-destination'

         #set up the headless chrome driver
        driver = create_driver()
        driver.get(url)
       
        #Selenium hands the page source to Beautiful Soup
        soup=BeautifulSoup(driver.page_source, 'lxml')

        #patter of the link to the country page that the href should match
        reg = regex.compile(r'\w+-*')
        table = soup.find('table')
        table_body = table.find('tbody')
        table_rows = table_body.find_all('tr')

        for tr in table_rows:
            cols = tr.find_all('td')
            cols = [ele.text.strip() for ele in cols]

            name = cols[1]
            a = tr.find('a', attrs = {'href':reg})
            info[name] = {"href":a['href']}
            LOGGER.success(f'URL for {name} was successfully retrieved')
        LOGGER.success('Successfully retrieved URL of all countries for the New Zealand advisory')
    except Exception as error_msg:
        LOGGER.error(f'An error has occured while retrieving the URLs of {name} for New Zealand advisory because of the following error: {error_msg}')
    finally:
        quit_driver(driver)

    return info
Esempio n. 3
0
def get_url_of_for_letter(dictionnary, letter):
    try:
        #this is the link to the first page
        url = 'https://www.mfa.gov.sg/Where-Are-You-Travelling-To?letter={}'.format(
            letter, sep='')

        driver = create_driver()
        driver.get(url)

        #Selenium hands the page source to Beautiful Soup
        soup = BeautifulSoup(driver.page_source, 'lxml')
        #All countries for a given letter
        countries = soup.findAll("a", {"class": "embassy-dropdown"})

        # #retrieving links for all countries of the alphabet
        for country in countries:
            country_name = country.text.lstrip().rstrip()
            country_iso = find_iso_of_country(country_name)
            if (country_iso != ""
                ):  #Countries that don't have iso are not official counntries
                href = country['href']
                dictionnary[country_name] = {"href": href}
    finally:
        driver.close()
        driver.quit()
    return dictionnary
Esempio n. 4
0
def get_url_of_countries():
    info = {}
    LOGGER.info('Retrieving URL of all countries for United Kingdom advisory')
    try:
        #this is the link to the first page
        url = 'https://www.gov.uk/foreign-travel-advice'

        driver = create_driver()
        driver.get(url)

        #Selenium hands the page source to Beautiful Soup
        soup=BeautifulSoup(driver.page_source, 'lxml')

        #patter of the link to the country page that the href should match
        countries_div = soup.findAll("div", {"class": "govuk-grid-column-two-thirds"})[1]
        countries = countries_div.findAll('a')

        #retrieving links for all countries
        for country in countries:
            country_name = country.text
            country_iso = find_iso_of_country(country_name)
            if(country_iso != ""): #Countries that don't have iso are not official counntries
                href = country['href']
                info[country_iso] = {"href":href}
                LOGGER.success(f'URL of {country_name} was successfully retrieved')
    except Exception as error_msg:
      LOGGER.error(f'An error has occured while retrieving URL of countries for United Kingdom advisory because of the following error: {error_msg}')
    finally:
        driver.close()
        driver.quit()

    return info
Esempio n. 5
0
def parse_one_country_vaccine(url, country):
    driver = create_driver()
    driver.get(url)
    vaccines = {}
    LOGGER.info(
        f'Parsing the informations for vaccinations for the following country: {country}'
    )
    #Selenium hands the page source to Beautiful Soup
    soup = BeautifulSoup(driver.page_source, 'lxml')
    # table_row = soup.find_all
    count = 0
    for tbody in soup.findAll('tbody'):
        for row in tbody.findAll('tr'):
            name = row.find('td', {"class": "traveler-disease"})
            info = row.find('td', {"class": "traveler-findoutwhy"})
            if name and info:
                name = name.text.strip('/\n')
                if count == 0:
                    info = info.text.replace('\n', '')
                else:
                    info = info.text.replace('\n', ' ')
                vaccines[name] = info
                count = count + 1

    quit_driver(driver)
    save_one_country(vaccines, country)
    print(vaccines)
    return vaccines
Esempio n. 6
0
def save_to_MU():
    LOGGER.info(f'Saving and parsing Mauritius into the databse')
    driver = create_driver()
    LOGGER.info('Begin parsing for Mauritius advisory')
    try:
        wiki_visa_url = wiki_visa_url_MU
        wiki_visa_ob = wiki_visa_parser(wiki_visa_url, driver)
        visas = wiki_visa_ob.visa_parser_table()
        LOGGER.success(
            'Parsing for Mauritius advisory has been successfully completed')
    except Exception as error_msg:
        LOGGER.error(
            f'Error has occured while parsing for Mauritius advisory because of the following error: {error_msg}'
        )
    info = {}
    array_info = []

    # create an an sqlite_advisory object
    db = Database("countries.sqlite")
    db.drop_table("MU")
    db.add_table("MU",
                 country_iso="text",
                 name="text",
                 advisory_text="text",
                 visa_info="text")
    LOGGER.info('Saving Mauritius table into the database')
    try:
        for country in visas:
            iso = find_iso_of_country(country)
            if (iso != ""):
                name = country
                LOGGER.info(f'Saving {name}')
                visa = visas[country].get(
                    'visa')  #dictionary for visa info is country{visa:text}
                advisory = "Not available yet"
                info = {
                    "country_iso": iso,
                    "name": name,
                    "advisory": advisory,
                    "visa_info": visa
                }
                array_info.append(info)
                print(name, "     ", visa, "    ", advisory)
                db.insert("MU", iso, name, advisory, visa)
                LOGGER.success(
                    f'{name} was sucessfully saved to the database with the following information: {visa}. {advisory}.'
                )
            LOGGER.success(
                'Mauritius table successfully saved to the database')
    except Exception as error_msg:
        LOGGER.error(
            f'An error has occured while saving Mauritius table to the database because of the following error: {error_msg}'
        )
    db.close_connection()

    quit_driver(driver)

    with open('./advisory-mu.json', 'w') as outfile:
        json.dump(array_info, outfile)
Esempio n. 7
0
def find_a_post(location, request_id, i=1):

    LOGGER.info(f'Starting the parser for the following location: {location}')
    driver = create_driver()
    location = location.replace(' ', '')

    url = instagram_url + location + "/"
    try:
        LOGGER.info(f'Retreiving the link to the image page for: {location}')
        driver.get(url)
        soup = BeautifulSoup(driver.page_source, 'lxml')
        garb_all = soup.find_all('a', {'href': regex.compile(r'/p/')})
    except:
        LOGGER.error(
            f'Could not get the link to the image page for: {location}')
        exit

    count = 0
    for g in garb_all:
        count += 1
        if count > i:
            break

        u = "https://www.instagram.com" + g.get('href')
        try:
            image_info = get_image_info(driver, u)
            LOGGER.success(f'Image info for: {location}')
        except:
            LOGGER.error(
                f'Could not get the info of the image for: {location}')
            count -= 1

        try:
            save_img_url(image_info['image_link'],
                         'images_to_filter/check.jpg')
            selfie = check_if_selfie('images_to_filter/check.jpg')
            group_photo = check_if_group_photo('images_to_filter/check.jpg')
            objects_too_big = check_for_objects('images_to_filter/check.jpg')
            too_much_similar_colors = find_nearest_colors(
                'images_to_filter/check.jpg')
            if not selfie and not group_photo and not objects_too_big and not too_much_similar_colors and not check_if_wrong_geolocation(
                    location, image_info['geolocation']):
                save_image("images", image_info, location, str(request_id))
                LOGGER.success(f'Saved Image info for: {location}')
                return True
            else:
                failed_img = Image.open('images_to_filter/check.jpg')
                failed_img.save(
                    f'images_to_filter/discarded/{get_last_discarded()}.jpg')
                LOGGER.error(
                    f'Cannot save image. It is now in images_to_filter/discared/ '
                )
                count -= 1
        except:
            LOGGER.error(
                f'Could not save the info of the image for: {location}')
            count -= 1

    quit_driver(driver)
Esempio n. 8
0
def find_all_ireland():

    LOGGER.info("Begin parsing and saving for Ireland...")
    my_driver = create_driver()

    all_url = find_all_url(my_driver)
    data = find_all_iso(all_url)
    LOGGER.info(
        'Parsing visa requirements for all countries for the Ireland advisory')
    try:
        wiki_visa_ob = wiki_visa_parser(
            "https://en.wikipedia.org/wiki/Visa_requirements_for_Irish_citizens",
            my_driver)
        visas = wiki_visa_ob.visa_parser_table()
    except Exception as error_msg:
        LOGGER.error(
            f'An error has occured while getting the visa requirements for Ireland advisory because of the following error: {error_msg}'
        )

    for country in data:
        c = data[country]
        url = c['href']
        my_driver.implicitly_wait(5)
        my_driver.get(url)
        soup = BeautifulSoup(my_driver.page_source, 'lxml')
        c['visa-info'] = get_one_info(url, 'visa/passport', my_driver, soup)
        c['advisory-text'] = get_one_advisory(url, my_driver, soup)
        c['name'] = country
        if c['visa-info'] == '':
            c['visa-info'] = get_one_info(url, 'Entry requirements', my_driver,
                                          soup)
        iso = c['country-iso']
        #handling some exceptions, had to do research
        if iso == 'AI':
            c['visa-info'] = 'Visa not required for 3 months'
        elif iso == 'BM':
            c['visa-info'] = 'Visa not required for 21 days (extendable)'
        elif iso == 'MQ':
            iso = 'FR'
        elif iso == 'MS':
            c['visa-info'] = 'Visa not required for 6 months'
        elif iso == 'RE':
            iso = 'FR'
        else:
            try:
                c['visa-info'] = visas[country].get(
                    'visa') + "<br>" + c['visa-info']
            except Exception as error_msg:
                print(c, error_msg)
                LOGGER.warning(f'Error message: {error_msg}')
    #dump the data into js to be deleted later
    quit_driver(my_driver)
    with open('./advisory-ie.json', 'w') as outfile:
        json.dump(data, outfile)

    save_into_db(data)


#find_all_ireland()
Esempio n. 9
0
def save_to_new_zealand():
    LOGGER.info("Begin parsing and saving for New Zealand table...")
    driver = create_driver()
    
    data = {} #Used to store of all the parsed data of each country
    url = get_url_of_countries_nz(driver) #this function create its own driver -- to change
    LOGGER.info('Retrieving visa requirements for New Zealand advisory')
    try:
        wiki_visa_url = "https://en.wikipedia.org/wiki/Visa_requirements_for_New_Zealand_citizens"
        wiki_visa_ob = wiki_visa_parser(wiki_visa_url,driver) 
        visas = wiki_visa_ob.visa_parser_table()# Used to acquire visa info of each country
        LOGGER.success('Succesfully retrieved visa requirements of all countries for New Zealand advisory')
    except Exception as error_msg:
        LOGGER.error(f'An error has occured while retrieving visa requirement for New Zealand adviosry because of the following error: {error_msg}')
    
    counter_country = 0
    for country in url: #iterates through urls to retrieve advisory information
        driver.implicitly_wait(5)
        name = country
        href = url[country].get("href")

        link = "https://safetravel.govt.nz/{}".format(href,sep='')
        advisory = parse_a_country_advisory(link,driver) 

        visa_text= ""
        for countryVisa in visas: # iterates through list of visas to retrieve visas
            if(countryVisa ==  country):
               visa_text = visas[countryVisa].get('visa')
               del visas[countryVisa]
               break;

        country_iso = "na"
        data[name] = {'country-iso':country_iso,'name':name,'advisory-text':advisory,'visa-info':visa_text}
        

        if ((counter_country%50) == 0):
            quit_driver(driver)
            driver = create_driver()
        counter_country += 1
      
    data = find_all_iso(data)#Sets iso for each country

    with open('./advisory-nz.json', 'w') as outfile:
        json.dump(data, outfile)

    save_into_db(data)
Esempio n. 10
0
def translate(iso_language):
    #parse the languages 10 by 10 to track any error more easily
    count = 0
    driver = create_driver()
    for lg in iso_language:
        for p in PHRASES:
            iso = iso_language[lg]
            p_edit = p.replace(" ", "%20")

            url = 'https://translate.google.com/?sl=en&tl=' + iso + '&text=' + p_edit
            try:
                driver = create_driver()
                driver.get(url)
                soup = BeautifulSoup(driver.page_source, 'lxml')
            except:
                LOGGER.error(f'Could not parse {lg}')
                continue

            try:
                translation = soup.find('span', {
                    'class': 'tlid-translation translation'
                }).text
                pronunciation = soup.find_all(
                    'div', {
                        'class':
                        'tlid-transliteration-content transliteration-content full'
                    })[1].text
            except:
                LOGGER.info(
                    f'Could not find data for {lg}; will ne replace by -')
                translation = "-"
                pronunciation = "-"

            DB.insert('phrases', iso, lg, p, translation, pronunciation)

        count += 1

        if count == 10:
            quit_driver(driver)
            return

    quit_driver(driver)
    return
Esempio n. 11
0
def save_to_UK():

    LOGGER.info("Begin parsing and saving for United Kingdom table...")
    driver = create_driver()
    LOGGER.info('Parsing the visa requirements of all countries for United Kingdom advisory')
    try:
      wiki_visa_url ="https://en.wikipedia.org/wiki/Visa_requirements_for_British_citizens"
      wiki_visa_ob = wiki_visa_parser(wiki_visa_url,driver)
      visas = wiki_visa_ob.visa_parser_table()
      data = parse_all_countries_advisory()
      LOGGER.success('Successfully parsed the visa requirements of all countries for United Kingdom advisory')
    except Exception as error_msg:
      LOGGER.error(f'An error has occured while retrieving the visa reuirements of all countries for United Kingdom advisory because of the following error: {error_msg}')
    
    info = {}
    array_info = []
    # create an an sqlite_advisory object]
    db = Database("countries.sqlite")
    db.drop_table("GB")
    db.add_table("GB", country_iso="text", name="text", advisory_text="text", visa_info="text")
    LOGGER.info('Saving countries informations into the UK table')

    try:
      for country in visas:
          iso = find_iso_of_country(country)
          if(iso != ""):
              try:
                  name = country
                  advisory = data[iso].get('advisory') #dictionary for the travel advisory is iso{advisory:text}
                  visa_info = visas[country].get('visa') #dictionary for visa info is country{visa:text}
                  info = {
                      "country_iso" : iso,
                      "name": name,
                      "advisory": advisory,
                      "visa_info": visa_info
                  }
                  array_info.append(info)
                  LOGGER.success(f"Saving {name} into the UK table with the following information: {visa_info}. {advisory}")
                  db.insert("GB",iso,name,advisory,visa_info)
                  LOGGER.success(f'{name} sucesfully saved to the database.')
              except KeyError:
                  LOGGER.warning(f'This country doesn\'t have advisory info: {country}')
                  print("This country doesn't have advisory info: ",country)
                  LOGGER.info(f'Its ISO is {iso}')
                  print("Its ISO is: ",iso)
      LOGGER.success('All countries have been succesfully saved into the UK table')
   
    except Exception as error_msg:
      LOGGER.error(f'An error has occured while saving countries into the UK table because of the following: {error_msg}')
    db.close_connection()

    with open('./advisory-uk.json', 'w') as outfile:
        json.dump(array_info, outfile)
Esempio n. 12
0
def parse_one_country_advisory(url, href):
    driver = create_driver()
    driver.get(url)
    advisory=""
    #Selenium hands the page source to Beautiful Soup
    soup=BeautifulSoup(driver.page_source, 'lxml')
    advisory_div = soup.find("div", {"class": "gem-c-govspeak govuk-govspeak direction-ltr"})
    advisory_paragraph1 = advisory_div.findAll("p")[0]
    advisory_paragraph2 = advisory_div.findAll("p")[1]
    advisory = advisory_paragraph1.text +" "+advisory_paragraph2.text
    quit_driver(driver)

    return advisory
Esempio n. 13
0
def parse_one_country_advisory(url):
    driver = create_driver()
    driver.get(url)
    #Selenium hands the page source to Beautiful Soup
    soup = BeautifulSoup(driver.page_source, 'lxml')
    advisory_paragraph1 = ""

    try:  #The html are made differently for certain countries pages
        advisory_div = soup.findAll(
            "div", {
                "class":
                "acc-content ui-accordion-content ui-corner-bottom ui-helper-reset ui-widget-content ui-accordion-content-active"
            })[1]
        advisory_paragraph = advisory_div.findAll("span")[0].text
        advisory_paragraph1 = advisory_paragraph.split('\n')[0]
    except IndexError:
        try:
            advisory_div = soup.findAll(
                "div", {
                    "class":
                    "acc-content ui-accordion-content ui-corner-bottom ui-helper-reset ui-widget-content ui-accordion-content-active"
                })[1]
            advisory_paragraph = advisory_div.findAll("p")[0].text
            advisory_paragraph1 = advisory_paragraph.split('\n')[0]
        except IndexError:
            try:
                advisory_div = soup.findAll(
                    "div", {
                        "class":
                        "acc-content ui-accordion-content ui-corner-bottom ui-helper-reset ui-widget-content ui-accordion-content-active"
                    })[1]
                advisory_paragraph = advisory_div.text
                advisory_paragraph1 = advisory_paragraph.split('\n')[1]
            except IndexError:
                try:
                    advisory_div = soup.findAll("div",
                                                {"class": "alert-section"})[0]
                    advisory_paragraph = advisory_div.findAll("p")[0].text
                    advisory_paragraph1 = advisory_paragraph.split('\n')[0]
                except IndexError:
                    advisory_div = soup.findAll("div", {"class": "space"})[0]
                    advisory_paragraph = advisory_div.findAll("p")[1].text
                    advisory_paragraph1 = advisory_paragraph.split('\n')[0]

    advisory_paragraph1 = advisory_paragraph1.lstrip()
    LOGGER.info({advisory_paragraph1})
    quit_driver(driver)

    return advisory_paragraph1
Esempio n. 14
0
def parse_all_countries_advisory():
    data = {}
    urls = get_url_of_countries()
    driver = create_driver()

    
    for country in urls:
       
        href = urls[country].get("href")
        link = "https://www.gov.uk{}".format(href,sep='')
        advisory = parse_one_country_advisory(link,href)
        link =  "https://www.gov.uk{}/safety-and-security".format(href,sep='')
        additional_advisory_info = parse_additional_advisory_info(link, driver)
        data[country]= {"advisory": advisory + additional_advisory_info}
    return data
Esempio n. 15
0
def save_to_australia():

    LOGGER.info("Begin parsing and saving for Australia table...")
    url = get_url_of_countries(
    )  #this function create its own driver -- to change
    data = {}
    driver = create_driver()
    try:
        LOGGER.info(
            'Parsing visa requirements for all countries for the Australian advisory'
        )
        wiki_visa_url = 'https://en.wikipedia.org/wiki/Visa_requirements_for_Australian_citizens'
        wiki_visa_ob = wiki_visa_parser(wiki_visa_url, driver)
        wiki_visa = wiki_visa_ob.visa_parser_table()
    except Exception as error_msg:
        LOGGER.error(
            f'An error has occured while retrieving the visa requirements for all countries for the Australian advisory because of following error: {error_msg}'
        )

    for country in url:
        driver.implicitly_wait(5)
        name = country
        href = url[country].get('href')
        advisory_text = url[country].get('advisory-text')
        link = "https://smartraveller.gov.au{}".format(href, sep='')
        additional_advisory = get_additional_advisory(link, driver)
        advisory_text = advisory_text + additional_advisory
        LOGGER.info(f"Begin parsing {name} to insert into AU table")
        visa_info = parse_a_country(link, driver, 'Visas')
        LOGGER.success(
            f"The following information was retrieved for {name}: {visa_info}. {advisory_text}"
        )
        if (visa_info == ''):
            try:
                visa_info = wiki_visa[name].get('visa') + "<br>" + visa_info
            except:
                LOGGER.warning(f"No visa info for {name}")
        country_iso = "na"
        data[name] = {
            'country-iso': country_iso,
            'name': name,
            'advisory-text': advisory_text,
            'visa-info': visa_info
        }
    driver.quit()
    data = find_all_iso(data)

    save_into_db(data)
Esempio n. 16
0
def get_country_traffic_side():
    array_of_country_info = []
    already_parsed = []
    try:
        # this is the link to the first page
        url = 'https://www.worldstandards.eu/cars/list-of-left-driving-countries/'
        driver = create_driver()
        driver.get(url)
        # Selenium hands the page source to Beautiful Soup
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        table = soup.find('table')
        tbody = table.find('tbody')
        allRows = tbody.findAll('tr')
        for country_row in allRows:
            isHeader = country_row.find(
                'th') != None  #The header row should be discarded
            if (not isHeader):
                country = country_row.findAll('td')[0].text
                traffic_side = country_row.findAll('td')[1].text
                country_name_has_bracket = country.find('(')
                if (country_name_has_bracket > -1
                    ):  #We want to remove the bracket from the country name
                    country = country[0:country_name_has_bracket]

                country_iso = find_iso_of_country(country)
                if (country_iso != ""):
                    if country_iso not in already_parsed:  # Only parse the main traffic side of a country
                        if "left" in traffic_side:
                            traffic_side = "left"
                        else:
                            traffic_side = "right"
                        info = {
                            "country_iso": country_iso,
                            "country_name": country,
                            "traffic_side": traffic_side
                        }
                        already_parsed.append(country_iso)
                        array_of_country_info.append(info)
                    else:
                        print(
                            "The main land for this country is parsed already",
                            country)
        return array_of_country_info

    finally:
        driver.close()
        driver.quit()
Esempio n. 17
0
def save_to_weather():

    #Antigua and Barbuda
    LOGGER.info(f'Beginning parsing for average monthly temperature')
    avg_monthly_temperature = ''
    try:
        driver = create_driver()
        wiki_temperature = wiki_weather_parser(wiki_visa_temperature, driver)
        avg_monthly_temperature = wiki_temperature.visa_parser_table()
        LOGGER.success(
            f'Following data was retrieved: {avg_monthly_temperature}')
        save_into_db('weather', avg_monthly_temperature)
        quit_driver(driver)
    except Exception as error_msg:
        LOGGER.error(
            f'An error has occured while parsing for temperature because of the following error: {error_msg}'
        )
Esempio n. 18
0
def get_countries_cocainelaw():
    LOGGER.info("Retrieving information for cocaine")
    try:
        # this is the link to the first page
        url = 'https://en.wikipedia.org/wiki/Legal_status_of_cocaine'
        driver = create_driver()
        driver.get(url)
        # Selenium hands the page source to Beautiful Soup
        soup=BeautifulSoup(driver.page_source, 'html.parser')
        # patter of the link to the country page that the href should match
        table = soup.find('table', {'class':"wikitable"})
        tbody = table.find('tbody')
        table_rows = tbody.find_all('tr')

        cocaine_info= {}
        arrayCocaineInfo = {}
        for tablerow in table_rows:
            table_columns = tablerow.find_all('td')
            if(len(table_columns)>0):
                country_name= table_columns[0].text
                cocaine_possession= table_columns[1].text
                cocaine_possession= re.sub(r'\[\d*\]',' ',cocaine_possession.rstrip())
                cocaine_sale= table_columns[2].text
                cocaine_sale= re.sub(r'\[\d*\]',' ',cocaine_sale.rstrip())
                cocaine_transport= table_columns[3].text
                cocaine_transport= re.sub(r'\[\d*\]',' ',cocaine_transport.rstrip())
                cocaine_cultivation= table_columns[4].text
                cocaine_cultivation= re.sub(r'\[\d*\]',' ',cocaine_cultivation.rstrip())
                country_iso = find_iso_of_country(country_name)
                cocaine_info = {
                    "name":country_name,
                    "iso": country_iso,
                    "cocaine-possession": cocaine_possession,
                    "cocaine-sale": cocaine_sale,
                    "cocaine-transport": cocaine_transport,
                    "cocaine-cultivation": cocaine_cultivation
            }
                arrayCocaineInfo[country_iso] = cocaine_info
        return arrayCocaineInfo
    
    except Exception as error_msg:
        LOGGER.error(f'An error has occured while retrieving information for cocaine because of the following error: {error_msg}')
    
    finally:
        driver.close()
        driver.quit()
Esempio n. 19
0
def get_url_of_countries():
    info = {}
    try:
        #this is the link to the first page
        url = 'https://smartraveller.gov.au/countries/pages/list.aspx'
        LOGGER.info(
            'Retrieving the URLs for all countries for the Australian advisory'
        )
        # create a new chrome session
        driver = create_driver()
        driver.get(url)

        #Selenium hands the page source to Beautiful Soup
        soup = BeautifulSoup(driver.page_source, 'lxml')

        #patter of the link to the country page that the href should match
        reg = regex.compile(r'\/destinations\/\w+-*\w*\/\w+-*\w*')
        table = soup.find('table')
        table_body = table.find('tbody')
        table_rows = table_body.find_all('tr')

        for tr in table_rows:
            cols = tr.find_all('td')
            cols = [ele.text.strip() for ele in cols]

            if (cols[2] == ''):
                cols[2] = 'No advisory from the australian government'

            name = cols[0]
            advisory_text = cols[2]
            a = tr.find('a', attrs={'href': reg})
            if (a != None):
                href = a['href']
                info[name] = {"href": href, "advisory-text": advisory_text}
                LOGGER.success(f'Retrieved URL for {name}')
        LOGGER.success(
            'Successfully retrieved the URLs for all countries of the Australian advisory'
        )
    except:
        LOGGER.error(
            'An error has occured while retrieving the URLs for all countries for the Australian advisory'
        )
    finally:
        quit_driver(driver)

    return info
Esempio n. 20
0
def all_unsafe_areas():
    url = get_url_of_countries(
    )  #this function create its own driver -- to change
    data = {}
    driver = create_driver()
    LOGGER.info('Retrieving all unsafe areas')
    for country in url:
        href = url[country].get('href')
        link = "https://smartraveller.gov.au{}".format(href, sep='')

        unsafe_areas = regional_advice_level(driver, link)
        data[country] = {'unsafe_areas': unsafe_areas}
        LOGGER.info(f'{data[country]}')

    data = find_all_iso(data)
    driver.quit()
    #saving the data in json file
    with open('unsafe-areas-au.json', 'w') as fp:
        json.dump(data, fp)
def get_all_links():
    LOGGER.info('Retrieving the URLs for all countries for unsafe areas')

    iso_list = config.iso_list
    data = {}
    #home page link
    home = 'https://travel.gc.ca/travelling/advisories'

    driver = create_driver()
    driver.get(home)

    try:
        soup = BeautifulSoup(driver.page_source, 'lxml')

        table = soup.find('table', attrs={'id': 'reportlist'})
        tbody = table.find('tbody')
        rows = tbody.findAll('tr')

        #parse the table get the link in the <a> tag
        for row in rows:
            col1 = row.find('a')
            name = col1.text
            href = col1['href']
            #the iso function accepts a dictionary with a key as name
            if (name == "Canary Islands"):
                data[name] = {'href': href, 'country-iso': 'CI'}
            elif (name == "Saint Vincent & the Grenadines"):
                name = "Saint Vincent and the Grenadines"
            elif (name == "Virgin Islands (U.S.)"):
                name = "United States Virgin Islands"

            data[name] = {'href': href}
            LOGGER.success(f'Retrieved the URL for {name}')
        LOGGER.success('Retrieved all the URLs for unsafe areas')
    except Exception as error_msg:
        LOGGER.error(
            f'An error has occured while retrieving the URLs for all countries from the canadian travel website because of the following error: {error_msg}'
        )
    finally:
        quit_driver(driver)

    data = find_all_iso(data)
    return data
def save_to_unsafe_areas():
    driver = create_driver()
    all_countries = get_all_links()
    data = {}
    for country in all_countries:
        name = all_countries[country]
        href = name['href']
        url = "https://travel.gc.ca" + href
        regional_advisory = get_regional_advisories(url, driver)
        data[country] = {'unsafe_areas': regional_advisory}

    #canada special case
    data['Canada'] = {
        'unsafe_areas':
        'There is no regional advisory, take security precautions based on the general advisory for this country.'
    }

    data = find_all_iso(data)
    save_regional_advisories(data)
    quit_driver(driver)
Esempio n. 23
0
def get_name_and_advisory_of_countries():
    try:
        #this is the link to the first page
        url = 'https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories.html/'
        LOGGER.info("Retrieving URL of all countries for United States")
        #set up the headless chrome driver
        driver = create_driver()
        driver.get(url)

        #Selenium hands the page source to Beautiful Soup
        soup = BeautifulSoup(driver.page_source, 'lxml')

        #pattern of the link to the country page that the href should match
        table = soup.find('table')
        table_body = table.find('tbody')
        table_rows = table_body.find_all('tr')

        counter = 0
        info = {}
        for tr in table_rows:
            if (counter != 0):
                cols = tr.find_all('td')
                href = cols[0].find('a').get(
                    'href'
                )  # gets url for each country that is needed for additional advisory info
                link = "https://travel.state.gov/{}".format(href, sep='')

                cols = [ele.text.strip() for ele in cols]
                nameLength = len(cols[0]) - 16
                name = cols[0][0:nameLength]
                if (name != 'W'):
                    advisory = cols[1]
                    advisory += '</br>' + parse_a_country_additional_advisory_info(
                        link, driver)
                info[name] = advisory
            counter += 1
    finally:
        driver.close()
        driver.quit()

    return info
Esempio n. 24
0
def get_countries_canabaislaw():
    LOGGER.info("Retrieving information for canabais")
    try:
        # this is the link to the first page
        url = 'https://en.wikipedia.org/wiki/Legality_of_cannabis'
        driver = create_driver()
        driver.get(url)
        # Selenium hands the page source to Beautiful Soup
        soup=BeautifulSoup(driver.page_source, 'html.parser')
        # patter of the link to the country page that the href should match
        table = soup.find('table', {'class':"wikitable"})
        tbody = table.find('tbody')
        table_rows = tbody.find_all('tr')

        canabais_info= {}
        arrayCanabaisInfo = {}
        for tablerow in table_rows:
            table_columns = tablerow.find_all('td')
            if(len(table_columns)>0):
                country_name= table_columns[0].text
                recreational= table_columns[1].text
                recreational= re.sub(r'\[\d*\]',' ',recreational.rstrip())
                medical= table_columns[2].text
                medical= re.sub(r'\[\d*\]',' ',medical.rstrip())
                country_iso = find_iso_of_country(country_name)
                canabais_info = {
                    "name":country_name,
                    "iso": country_iso,
                    "canabais-recreational": recreational,
                    "canabais-medical": medical
            }
                arrayCanabaisInfo[country_iso] = canabais_info
        return  arrayCanabaisInfo
   
    except Exception as error_msg:
        LOGGER.error(f'An error has occured while retrieving information for cannabis because of the followin error: {error_msg}')

    finally:
        driver.close()
        driver.quit()
Esempio n. 25
0
def translateTest():
    #We are parsing the sentences from google translae
    url = 'https://translate.google.com/?sl=en&tl=#view=home&op=translate&sl=en&tl=fr&text=thank%20you'
    try:
        driver = create_driver()
        driver.get(url)
        soup = BeautifulSoup(driver.page_source, 'lxml')
    except:
        LOGGER.error("Could not connet to google translate.")

    try:
        translation = soup.find('span', {
            'class': 'tlid-translation translation'
        }).text

    except:
        LOGGER.info(
            "Data is missing for Thank You, 'en' to 'fr' and will be replace by '-'"
        )
        translation = "-"

    return translation
Esempio n. 26
0
def get_url_of_countries():
    info = {}
    LOGGER.info('Retrieving URL of all countries for Vaccines table')
    try:
        #this is the link to the first page
        driver = create_driver()
        driver.get(vaccine_url)

        #Selenium hands the page source to Beautiful Soup
        soup = BeautifulSoup(driver.page_source, 'lxml')

        #patter of the link to the country page that the href should match
        countries_per_letter_array = soup.find_all("ul",
                                                   {"class": "list-bullet"})
        for countries_per_letter in countries_per_letter_array:
            # print(countries_div)
            countries_given_letter_array = countries_per_letter.find_all('a')

            #retrieving links for all countries
            for country in countries_given_letter_array:
                country_name = country.text
                country_iso = find_iso_of_country(country_name)
                if (
                        country_iso != ""
                ):  #Countries that don't have iso are not official counntries
                    href = country['href']
                    info[country_iso] = {"href": href}
                    LOGGER.info(f' Retrieving URL of {country_name}, {href}')

    except Exception as error_msg:
        LOGGER.error(
            f'Could not retrieve URLs of countries because of the following error: {error_msg}'
        )
    finally:
        driver.close()
        driver.quit()
    return info
Esempio n. 27
0
def get_additional_advisory_info_url():

    url = 'https://travel.gc.ca/travelling/advisories'
    #set up the headless chrome driver
    driver = create_driver()
    driver.get(url)
    soup = BeautifulSoup(driver.page_source, 'lxml')

    table = soup.find('table')
    table_body = table.find('tbody')
    table_rows = table_body.find_all('tr', attrs={'class': 'gradeX'})
    additional_advisory = {}
    for row in table_rows:
        cols = row.find_all('td')
        country = cols[0].text
        iso = find_iso_of_country(country)
        advisory = cols[2].text
        additional_advisory[iso] = {
            'country_name': country,
            'advisory_text': advisory
        }

    quit_driver(driver)
    return additional_advisory
Esempio n. 28
0
def save_to_SG():
    LOGGER.info(f'Saving Singapore into the databse')
    driver = create_driver()
    LOGGER.info(
        'Parsing visa requirments for all countries into the Singapore table')
    try:
        wiki_visa_url = wiki_visa_url_SG
        wiki_visa_ob = wiki_visa_parser(wiki_visa_url, driver)
        visas = wiki_visa_ob.visa_parser_table()
        LOGGER.success(
            'Visa requirements have been succesfully parsed for the Singapore table'
        )
    except Exception as error_msg:
        LOGGER.error(
            f'An error has occured whilse parsing for visa requirements because of the following error: {error_msg}'
        )
    advisories = parse_all_countries_advisories()
    array_info = []

    # create an an sqlite_advisory object
    db = Database("countries.sqlite")
    db.drop_table("SG")
    db.add_table("SG",
                 country_iso="text",
                 name="text",
                 advisory_text="text",
                 visa_info="text")

    array_info = save_info(db, visas, advisories, array_info)

    db.close_connection()
    LOGGER.success(f'Singapore was sucesfully saved to the database')
    quit_driver(driver)

    with open('./advisory-sg.json', 'w') as outfile:
        json.dump(array_info, outfile)
def save_to_central_america():

    LOGGER.info("Begin parsing and saving for Central America...")
    #create driver
    driver = create_driver()

    #Mexico
    data_MX = mexico_all_links(driver)
    LOGGER.info("Saving Mexico to Central America")
    try:
        save_into_db_MX('MX', data_MX)
        LOGGER.success("MX successfully saved into the databse")
    except Exception as error_msg:
        LOGGER.error(
            f'MX was not successfully saved into the database because of the following error: {error_msg}'
        )

    #create obj driver and set belize as first url
    driver = create_driver()
    LOGGER.info(f'Beginning parsing for Belize')
    try:
        wiki_visa = wiki_visa_parser(wiki_visa_url_BZ, driver)
        visa_BZ = wiki_visa.visa_parser_table()
        visa_BZ = replace_key_by_iso(visa_BZ)
        LOGGER.success(f'Following data was retrieved: {visa_BZ}')
    except Exception as error_msg:
        LOGGER.error(
            f'An error has occured while parsing for Belize because of the following error: {error_msg}'
        )

    #Dominica
    driver.close()
    driver = create_driver()
    LOGGER.info(f'Beginning parsing for Dominica')
    try:
        wiki_visa = wiki_visa_parser(wiki_visa_url_DM, driver)
        visa_DM = wiki_visa.visa_parser_table()
        visa_DM = replace_key_by_iso(visa_DM)
        LOGGER.success(f'Following data was retrieved: {visa_DM}')
    except Exception as error_msg:
        LOGGER.error(
            f'An error has occured while parsing for Dominica because of the following error: {error_msg}'
        )

    #Dominican Republic
    driver.close()
    driver = create_driver()
    LOGGER.info(f'Beginning parsing for Dominican Republic')
    try:
        wiki_visa = wiki_visa_parser(wiki_visa_url_DO, driver)
        visa_DO = wiki_visa.visa_parser_table()
        visa_DO = replace_key_by_iso(visa_DO)
        LOGGER.success(f'Following data was retrieved: {visa_DO}')
    except Exception as error_msg:
        LOGGER.error(
            f'An error has occured while parsing for Dominican Republic because of the following error" {error_msg}'
        )

    #Panama
    driver.close()
    driver = create_driver()
    LOGGER.info(f'Beginning parsing for Panama')
    try:
        wiki_visa = wiki_visa_parser(wiki_visa_url_PA, driver)
        visa_PA = wiki_visa.visa_parser_table()
        visa_PA = replace_key_by_iso(visa_PA)
        LOGGER.success(f'Following data was retrieved: {visa_PA}')
    except Exception as error_msg:
        LOGGER.error(
            f'An error has occured while parsing for Panama because of the following error: {error_msg}'
        )

    driver.quit()

    #save the data into the DB
    save_into_db("BZ", visa_BZ)
    save_into_db("DM", visa_DM)
    save_into_db("DO", visa_DO)
    save_into_db("PA", visa_PA)
Esempio n. 30
0
    save_into_db(data)


def all_unsafe_areas():
    url = get_url_of_countries(
    )  #this function create its own driver -- to change
    data = {}
    driver = create_driver()
    LOGGER.info('Retrieving all unsafe areas')
    for country in url:
        href = url[country].get('href')
        link = "https://smartraveller.gov.au{}".format(href, sep='')

        unsafe_areas = regional_advice_level(driver, link)
        data[country] = {'unsafe_areas': unsafe_areas}
        LOGGER.info(f'{data[country]}')

    data = find_all_iso(data)
    driver.quit()
    #saving the data in json file
    with open('unsafe-areas-au.json', 'w') as fp:
        json.dump(data, fp)


# save_to_australia()
driver = create_driver()
data = regional_advice_level(
    driver, "https://www.smartraveller.gov.au/destinations/africa/mali")
quit_driver(driver)

#save_to_australia()