Python Cache.get Examples

Programming Language: Python

Namespace/Package Name: advanced_expiry_caching

Class/Type: Cache

Method/Function: get

Examples at hotexamples.com: 3

Python Cache.get - 3 examples found. These are the top rated real world Python examples of advanced_expiry_caching.Cache.get extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Cache(7)

set(5)

get(3)

Frequently Used Methods

Cache (7)

set (5)

get (3)

Example #1

Show file

File: SI507project_tools.py Project: shlee2112/si507final

def scrapeNPS():
    ##### CACHE

    FILENAME = "nps_cache.json"
    program_cache = Cache(FILENAME)

    url = "https://www.nps.gov" + "/index.htm"
    data = program_cache.get(url)

    if not data:
        data = requests.get(url).text
        program_cache.set(url, data, expire_in_days=1)

    soup = BeautifulSoup(data, "html.parser")

    ##### Get all state links
    state_lst = []
    for link in soup.find_all('a'):
        if '/state/' in link['href']:
            # print(link['href'])
            state_lst.append(link['href'])

    ##### Creating a new CSV called 'park_info'
    new_file = open('park_info.csv', 'w', encoding='utf8')
    new_file.write('name,type,location,description,state')
    new_file.write('\n')
    for states in state_lst:

        ##### Cache by states
        name = states.split("/")
        cache_each_state = "nps_cache_" + name[2] + ".json"
        program_cache = Cache(cache_each_state)
        url = "https://www.nps.gov" + states
        data = program_cache.get(url)

        if not data:
            data = requests.get(url).text
            program_cache.set(url, data, expire_in_days=1)
        soup = BeautifulSoup(data, "html.parser")

        ##### Scrap state's name and all parks
        state = soup.find("h1", "page-title")
        list = soup.find_all('div', {'class': 'list_left'})

        for park in list:
            name = str(park.find('h3').string)
            type = str(park.find('h2').string)
            loc = str(park.find('h4').string)
            des = str(park.find('p').string)
            des = des.replace('\n', ' ')
            des = des.replace('"', "'")
            state = state.string

            row_string = '"{}","{}","{}","{}","{}"'.format(
                name, type, loc, des, state)
            new_file.write(row_string)
            new_file.write('\n')

    new_file.close()

    ##### Save all States info and save as a csv
    new_state_file = open('states.csv', 'w', encoding='utf8')
    new_state_file.write('state,abbreviation,url')
    new_state_file.write('\n')

    for states in state_lst:

        ##### Cache by states
        name = states.split("/")
        abbr = name[2].upper()
        url = "https://www.nps.gov" + states
        data = requests.get(url).text

        soup = BeautifulSoup(data, "html.parser")

        ##### Scrap state's name and all parks
        state = soup.find("h1", "page-title")
        list = soup.find_all('div', {'class': 'list_left'})

        state_name = ""
        for park in list:
            state = state.string

        row_string = '"{}","{}","{}"'.format(state, abbr, url)
        new_state_file.write(row_string)
        new_state_file.write('\n')

    new_state_file.close()

Example #2

Show file

File: SI507project_tools.py Project: zuyicai/Final-Project_zuyicai

from advanced_expiry_caching import Cache  # use tool from the other file for caching
import requests, os
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from sqlalchemy.orm import relationship
import time

##########scraping data from the website: states, topics, activities
FILENAME = "allinfo_parks.json"  # saved in variable with convention of all-caps constant
program_cache = Cache(
    FILENAME)  # create a cache -- stored in a file of this name

url = "https://www.nps.gov/findapark/advanced-search.htm?p=1&v=0"  #url can act as identifier for caching in a scraping situation -- it IS frequently unique here, unlike in query requests

data = program_cache.get(url)
if not data:
    data = requests.get(url).text
    program_cache.set(url, data, expire_in_days=1)

soup = BeautifulSoup(
    data, "html.parser"
)  # html.parser string argument tells BeautifulSoup that it should work in the nice html way
states = soup.find_all(id="form-park")
activities = soup.find_all(id="form-activity")
topics = soup.find_all(id="form-topic")

states_name = []
for state in states:
    b = state.find_all('option')
    for i in range(len(b)):

Example #3

Show file

FILENAME = "dogs_cache.json"
program_cache = Cache(FILENAME)

url = "https://www.petwave.com/Dogs/Breeds.aspx"
data = requests.get(url).text
soup = BeautifulSoup(data, features="html.parser")
# print(soup.prettify()) # nice for investigation

all_urls = soup.findAll('div', attrs={'class': 'pw-rid-small-headline'})
for url in all_urls:
    links = url.findAll('a')
    for a in links:
        new_url = "https://www.petwave.com" + a['href']
        #cache all the urls into a json file
        data = program_cache.get(new_url)
        # print(new_url)
        if not data:
            data = requests.get(new_url).text
            program_cache.set(new_url, data)

try:
    cache_file = open(FILENAME, 'r')
    cache_contents = cache_file.read()
    cache_diction = json.loads(cache_contents)
    cache_file.close()
except:
    cache_diction = {}

names_list = []
description_list = []