def set(self, url, post_data, data, headers): if post_data: url_hash = hashlib.sha1(url + '?' + post_data).hexdigest() else: url_hash = hashlib.sha1(url).hexdigest() domain = ".".join(urlparse.urlparse(url)[1].split('.')[-2:]) conn = self.connect() c = conn.cursor() # Insert a row of data if not post_data: post_data="" only_headers = 0 if data == -1: only_headers = 1 data = "" created = time.mktime(time.localtime()) content_type = headers.get('content-type', '').split(';')[0].strip() if content_type in COMPRESS_TYPES: compressed = 1 data = zlib.compress(data) else: compressed = 0 data = sqlite3.Binary(data) #fixme: this looks wrong try: _headers = json.dumps(headers) except: for h in headers: headers[h] = headers[h].decode(detect_encoding(headers[h])) _headers = json.dumps(headers) t = (url_hash, domain, url, post_data, _headers, created, data, only_headers, compressed) c.execute(u"""INSERT OR REPLACE INTO cache values (?, ?, ?, ?, ?, ?, ?, ?, ?)""", t) # Save (commit) the changes and clean up conn.commit() c.close() conn.close()
result = store.get(url, data, headers, timeout) if not result: try: url_headers, result = net.read_url(url, data, headers, return_headers=True) except urllib2.HTTPError, e: e.headers['Status'] = "%s" % e.code url_headers = dict(e.headers) result = e.read() if url_headers.get('content-encoding', None) == 'gzip': result = gzip.GzipFile(fileobj=StringIO.StringIO(result)).read() if not valid or valid(result, url_headers): store.set(url, post_data=data, data=result, headers=url_headers) else: raise InvalidResult(result, url_headers) if unicode: encoding = detect_encoding(result) if not encoding: encoding = 'latin-1' result = result.decode(encoding) return result def save_url(url, filename, overwrite=False): if not os.path.exists(filename) or overwrite: dirname = os.path.dirname(filename) if not os.path.exists(dirname): os.makedirs(dirname) data = read_url(url) f = open(filename, 'w') f.write(data) f.close()