Beispiel #1
0
    def set(self, url, post_data, data, headers):
        if post_data:
            url_hash = hashlib.sha1(url + '?' + post_data).hexdigest()
        else:
            url_hash = hashlib.sha1(url).hexdigest()

        domain = ".".join(urlparse.urlparse(url)[1].split('.')[-2:])

        conn = self.connect()
        c = conn.cursor()

        # Insert a row of data
        if not post_data: post_data=""
        only_headers = 0
        if data == -1:
            only_headers = 1
            data = ""
        created = time.mktime(time.localtime())
        content_type = headers.get('content-type', '').split(';')[0].strip()
        if content_type in COMPRESS_TYPES:
            compressed = 1
            data = zlib.compress(data)
        else:
            compressed = 0
        data = sqlite3.Binary(data)

        #fixme: this looks wrong
        try:
            _headers = json.dumps(headers)
        except:
            for h in headers:
                headers[h] = headers[h].decode(detect_encoding(headers[h]))
            _headers = json.dumps(headers)
        t = (url_hash, domain, url, post_data, _headers, created,
             data, only_headers, compressed)
        c.execute(u"""INSERT OR REPLACE INTO cache values (?, ?, ?, ?, ?, ?, ?, ?, ?)""", t)

        # Save (commit) the changes and clean up
        conn.commit()
        c.close()
        conn.close()
Beispiel #2
0
    result = store.get(url, data, headers, timeout)
    if not result:
        try:
            url_headers, result = net.read_url(url, data, headers, return_headers=True)
        except urllib2.HTTPError, e:
            e.headers['Status'] = "%s" % e.code
            url_headers = dict(e.headers)
            result = e.read()
            if url_headers.get('content-encoding', None) == 'gzip':
                result = gzip.GzipFile(fileobj=StringIO.StringIO(result)).read()
        if not valid or valid(result, url_headers):
            store.set(url, post_data=data, data=result, headers=url_headers)
        else:
            raise InvalidResult(result, url_headers)
    if unicode:
        encoding = detect_encoding(result)
        if not encoding:
            encoding = 'latin-1'
        result = result.decode(encoding)
    return result

def save_url(url, filename, overwrite=False):
    if not os.path.exists(filename) or overwrite:
        dirname = os.path.dirname(filename)
        if not os.path.exists(dirname):
            os.makedirs(dirname)
        data = read_url(url)
        f = open(filename, 'w')
        f.write(data)
        f.close()