Esempi in Python per Cache

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: urlgrab

Classe/tipologia: Cache

Esempi su hotexamples.com: 7

{shortObject} in {lang}: {examplesCount,plural,one{1 esempio trovato. Questo è il miglior esempio reale in {lang} per {object}, estratto da progetti open source. Lo} other{{examplesCount} esempi trovati. Questi sono i migliori esempi reali in {lang} per {object}, estratti da progetti open source. Li}} puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

user_agent(2)

Cache(1)

get(1)

Esempio n. 1

Mostra file

File: epgrabber.py Progetto: palfrey/epgrabber

def setup(options):
	from episodes_pb2 import All

	db = All()
	db.ParseFromString(open(options.database,"rb").read())
	yesterday = date.fromtimestamp(time())-timedelta(days=1)
	yesterday = yesterday.timetuple()

	cache = Cache(debug=options.debug)
	cache.user_agent = "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.8.1.3) Gecko/20070309 Firefox/2.0.0.3"

	items = {"yesterday":yesterday, "cache":cache, "db": db}
	for x in items:
		globals()[x] = items[x]
	return items

Esempio n. 2

Mostra file

File: literotica.py Progetto: palfrey/book-blog

from common import *
from sys import argv
from urlgrab import Cache
from re import compile, DOTALL, MULTILINE

cache = Cache()
url = argv[1]

titlePattern = compile("<h1>([^<]+)</h1>")
contentPattern = compile("<div class=\"b-story-body-x x-r15\">(.+?)</div><div class=\"b-story-stats-block\">" , DOTALL|MULTILINE)
nextPattern = compile("\"([^\"]+)\">Next</a>")

chapterPattern = compile("(.*?) (?:Ch.|Pt.) (\d+)")
memberPattern = compile("<a href=\"(https://www.literotica.com/stories/memberpage.php\?uid=\d+&amp;page=submissions)\">([^<]+)</a>")
chapterLinkPattern = compile("href=\"(https://www.literotica.com/s/[^\"]+)\">([^<]+)</a>")

page = cache.get(url, max_age = -1)
data = page.read()
open("dump", "wb").write(data.encode("utf-8"))

title = titlePattern.findall(data)
title = title[0]

chapter = chapterPattern.match(title)
if chapter != None:
	title = chapter.groups()[0]
	currentChapter = 1

print title

toc = tocStart(title)

Esempio n. 3

Mostra file

File: ao3.py Progetto: palfrey/book-blog

from sys import argv
from urlgrab import Cache
from codecs import open
import re
from common import *
from urlparse import urljoin

cache = Cache()
url = argv[1]

id = re.search("/works/(\d+)", url)
id = id.groups()[0]

navigate = "http://archiveofourown.org/works/%s/navigate"%id
print navigate

data = cache.get(navigate).read()
data = data.decode("utf-8")
info = re.search("<h2 class=\"heading\">Chapter Index for <a href=\"/works/\d+\">([^<]+)</a> by <a href=\"[^\"]+\" rel=\"author\">([^<]+)</a></h2>", data)
(title, author) = info.groups()

titlePattern = re.compile("<h2 class=\"title heading\">\s+(.*?)\s+</h2>")
summary = re.compile("<div[^>]+?class=\"summary module\"[^>]*?>(.+?)</div>", re.DOTALL|re.MULTILINE)
notes = re.compile("<div.+?class=\"notes module\"[^>]*>(.+?)</div>", re.DOTALL|re.MULTILINE)
mainContent = re.compile("<h3 class=\"landmark heading\" id=\"work\">Chapter Text</h3>(.*?)<!--/main-->", re.DOTALL|re.MULTILINE)
volumePattern = re.compile("<li><a href=\"(/works/\d+/chapters/\d+)\">(\d+). ([^<]+)</a>")

volumes = sorted(volumePattern.findall(data))

print volumes
volumes = dict([(int(x[1]), (x[0],x[2])) for x in volumes])

Esempio n. 4

Mostra file

File: books.py Progetto: palfrey/book-blog

# -*- coding: utf-8 -*-
from urlgrab import Cache
from google.protobuf import text_format
from blog_pb2 import All
from re import compile, DOTALL, MULTILINE
from os.path import exists, join
from codecs import open
from urlparse import urljoin
from optparse import OptionParser
from common import generatePage, tocStart, tocEnd, makeMobi

c = Cache()
c.user_agent = "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.8.1.3) Gecko/20070309 Firefox/2.0.0.3"

db = All()
text_format.Merge(open("series.txt","rb","utf-8").read(),db)
stripTags = compile("<[^>]+>");
stripAnchorTags = compile("(?:<a[^>]+>)|(?:</a>)");

# Kindle doesn't like various characters, so lets rewrite some of them...
wrong = {
		u"â€œ": u"\"",
		u"â€™": u"'",
		u"â€": u"\"",
		u"â€˜": u"'",
		u"â€”": u" - ",
		u"â€¦": u"-",
		u"": u"",
		u'“': u"\"",
		u'”':u"\"",
		u'–':u"-",

Esempio n. 5

Mostra file

File: bofh.py Progetto: palfrey/book-blog

from common import *
from re import compile, DOTALL, MULTILINE
from urlgrab import Cache
from urlparse import urljoin

linkPattern = compile("<h3><a href=\"(/[^\"]+)\">(.+?)</a></h3>")
earlierPattern = compile("<a href='([^\']+)'>.+?Earlier Stories.+?</a>", DOTALL | MULTILINE)
titlePattern = compile("<h2>(.+?)</h2>")
subtitlePattern = compile("<p class=\"standfirst\">(.+?)</p>")
contentPattern = compile("<strong class=\"trailer\">.+?</p>(.+?)(?:(?:<p>(?:(?:<i>)|(?:<small>)|(?:<font size=\"-2\">)|(?:<br>\n))?BOFH .+? Simon Travaglia)|(?:<ul class=\"noindent\">)|(?:<ul>.+?<li><a href=\"http://www.theregister.co.uk/content/30/index.html\">BOFH: The whole shebang</a></li>)|(?:</form>))", DOTALL| MULTILINE)
adPattern = compile("(<div id=ad-mu1-spot>.+?</div>)", MULTILINE | DOTALL)
episodePattern = compile("<strong class=\"trailer\">Episode \d+")

url = "http://www.theregister.co.uk/data_centre/bofh/"
pages = [url]
cache = Cache()

while True:
	print url
	data = cache.get(url).read()
	links = linkPattern.findall(data)

	if links == []:
		break

	pages.insert(0, url)

	earlier = earlierPattern.findall(data)
	url = urljoin(url, earlier[0])

skipTitles = ["Salmon Days is Go!"]

Esempio n. 6

Mostra file

File: fanfiction.py Progetto: palfrey/book-blog

from sys import argv
from urlgrab import Cache
from codecs import open
import re

cache = Cache()
url = argv[1]
data = cache.get(url).read()
open("dump", "wb", "utf-8").write(data)

title = re.search("<title>(.+?) Chapter \d+", data)
title = title.groups()
author = re.search("By:</span> <a[^>]+?href='/u/\d+/[^']+'>([^<]+)</a>", data)
author = author.groups()[0]
id = re.search("/s/(\d+)", url)
id = id.groups()[0]

print """series {
	name: "%s"
	description: "%s"
	author: "%s"
	startPage: "http://m.fanfiction.net/s/%s/1"
	titlePattern: "<img src='/[^']+/balloon.png' class='mt icons'>[\d,]+</a></span>(.+?)<br>"
	contentPattern: "id='storycontent' >(.+?)</div></div>.*?<hr size=1"
	nextPattern: "<a href='(/s/\d+/\d+/)'>Next &#187;</a>"
}"""%(title[0].replace(" ",""), title[0], author, id)

Esempio n. 7

Mostra file

#!/usr/bin/python

from urlgrab import Cache
from BeautifulSoup import MinimalSoup as BeautifulSoup
from re import compile
from os.path import exists, getsize, dirname, join
from urllib import urlretrieve, urlencode, quote
from sys import argv
import demjson
import zlib

folder = dirname(argv[0])

cache = Cache(debug=False)

pages = []

for index in range(1,11):
        index = cache.get("http://www.escapistmagazine.com/videos/view/zero-punctuation?page=%d"%index, max_age=60*60*2).read()
        index = index.replace("''>","'>")
        index = BeautifulSoup(index)

        for link in index.findAll("a"):
                if not link.has_key("href"):
                        continue
                if link["href"].find("http://www.escapistmagazine.com/videos/view/zero-punctuation/")!=-1:
                        short_href = link["href"]
                        slash = short_href.rfind("/")
                        if short_href[slash:].find("-")!=-1:
                                short_href = short_href[slash+1:slash+short_href[slash:].find("-")]
                        else: