Esempio n. 1
0
def get_token():
    log.info("%s" % dict(request.query))
    code = request.query.get('code','')
    if memcache.get("oauth:%s" % request.query.get('state','')) != "session_id":
        abort(500, "Invalid state")
    token_url = "https://api.dropbox.com/1/oauth2/token"

    log.info(request.url)
    result = Struct(fetch(token_url, urllib.urlencode({
        "code"         : code,
        "grant_type"   : "authorization_code",
        "client_id"    : settings.dropbox.app_key,
        "client_secret": settings.dropbox.app_secret,
        "redirect_uri" : "http://localhost:8080/authorize/confirmation"
    })))
    if result.status == 200:
        try:
            r = Struct(json.loads(result.data))
            r.id = "default"
            log.info(r)
            t = DropboxToken(**r)
            t.put()
        except:
            log.error("%s", traceback.format_exc())

    return result
Esempio n. 2
0
    def get_feed(self, feed_name):
        if not feed_name in settings.feeds:
            raise Exception('Invalid feed name')

        feed_uri = settings.feeds[feed_name]

        return fetch(feed_uri)
Esempio n. 3
0
def html_fetcher(site):
    """Fetch the favicon the hard way"""
    endpoint = "http://%s" % urlparse.urlparse(site).hostname
    try:
        res = fetch(endpoint)
    except Exception, e:
        log.error("Could not fetch %s: %s" % (endpoint, e))
        return None
Esempio n. 4
0
def google_fetcher(site):
    """Fetch the favicon via Google services"""
    endpoint = "http://www.google.com/s2/favicons?domain=%s" % urlparse.urlparse(site).hostname
    try:
        res = fetch(endpoint)
    except Exception, e:
        log.error("could not fetch %s: %s" % (endpoint, e))
        return None
Esempio n. 5
0
def dumb_fetcher(site):
    """Fetch the favicon the dumb way"""
    endpoint = "http://%s/favicon.ico" % urlparse.urlparse(site).hostname
    try:
        res = fetch(endpoint)
    except Exception, e:
        log.error("could not fetch %s: %s" % (endpoint, tb_info()))
        return None
Esempio n. 6
0
    def get_page(self, page):
        """Return a single page from the cloud store, storing it locally"""

        if not self.token:
            log.debug("No token")
            return None

        # get the folder contents
        metadata = self.get_metadata(page)
        if not metadata:
            return None

        markup = None
        for i in metadata['contents']:
            if not i['is_dir']:
                if os.path.basename(i['path']) in self.valid_indexes:
                    markup = i['path']
                    break

        if not markup:
            return None
        
        get_url = _urls.files % (markup, urllib.urlencode({"access_token": self.token}))
        log.debug(get_url)
        r = fetch(get_url)

        if r['status'] == 200:
            metadata = json.loads(r['x-dropbox-metadata'])
            try:
                headers, body, mime_type = parse_rfc822(r['data'])
            except Exception as e:
                log.error("Could not parse %s: %s" % (page, e))
                return None

            # mtime is taken from cloud store metadata
            mtime = parse_dropbox_date(metadata['modified'])
            ctime = headers.get('created', None)
            if ctime:
                ctime = datetime.fromtimestamp(parse_date(ctime))

            id = page.lower()
            params = {
                "id"       : id,
                "path"     : page,
                "ctime"    : ctime,
                "mtime"    : mtime,
                "title"    : headers.get('title', 'Untitled'),
                "tags"     : headers.get('tags', None),
                "body"     : body,
                "mime_type": mime_type,
                "headers"  : headers
            }
            p = Page(**params)
            p.put()
            memcache.set(params['id'], params['headers'], namespace=NS_PAGE_METADATA)
            return p
        return None
Esempio n. 7
0
    def scan_subtree(self, path):

        if not self.token:
            log.debug("No token")
            return None

        params = {"access_token": self.token,
                  "query"       : "index"}
        search_url = _urls.search % (os.path.normpath(os.path.join(settings.dropbox.root_path, path)),
                                     urllib.urlencode(params))
        log.warn(search_url)
        r = fetch(search_url)

        if r['status'] == 200:
            results = filter(lambda x: not x["is_dir"],json.loads(r["data"]))
            return [transform_entry(i) for i in results]
        return None
Esempio n. 8
0
    def get_attachment(self, page, filename):
        """Return an attachment from the cloud store, storing it locally"""

        if not self.token:
            log.debug("No token")
            return None

        # get the folder contents
        metadata = self.get_metadata(page)
        if not metadata:
            return None

        target = None
        for i in metadata['contents']:
            if not i['is_dir']:
                if os.path.basename(i['path']) == filename:
                    target = i['path']
                    break

        if not target:
            return None

        get_url = _urls.files % (target, urllib.urlencode({"access_token": self.token}))
        log.debug(get_url)
        r = fetch(get_url)

        if r['status'] == 200:
            metadata = json.loads(r['x-dropbox-metadata'])

            # mtime is taken from cloud store metadata
            mtime = parse_dropbox_date(metadata['modified'])

            id   = os.path.join(page.lower(),filename.lower())
            params = {
                "id"       : id,
                "path"     : page,
                "filename" : filename,
                "mtime"    : mtime,
                "data"     : r['data'],
                "mime_type": metadata['mime_type'],
            }
            a = Attachment(**params)
            a.put()
            return a
        return None
Esempio n. 9
0
    def get_metadata(self, path):
        if not self.token:
            log.debug("No token")
            return None

        metadata = memcache.get(path, namespace=NS_CLOUD_METADATA)
        params = {"access_token": self.token}
        if metadata:
            params["hash"] = metadata["hash"]
        metadata_url = _urls.metadata % (os.path.normpath(os.path.join(settings.dropbox.root_path, path)),
                                         urllib.urlencode(params))
        log.debug(metadata_url)
        r = fetch(metadata_url)
        
        if r['status'] == 200:
            metadata = json.loads(r["data"])
            memcache.set(path, metadata, namespace=NS_CLOUD_METADATA)

        return metadata # also valid for 304/404 return values
Esempio n. 10
0
    def fetch_feed(self, feed):

        if not feed.enabled:
            return

        (schema, netloc, path, params, query,
         fragment) = urlparse.urlparse(feed.url)

        now = time.time()

        if feed.last_checked:
            if feed.ttl:
                if (now - feed.last_checked) < (feed.ttl * 60):
                    log.info("TTL %s" % netloc)
                    return

            if (now - feed.last_checked) < settings.fetcher.min_interval:
                log.info("INTERVAL %s" % netloc)
                return

        if feed.last_modified:
            if (now - feed.last_modified) < settings.fetcher.min_interval:
                log.info("LAST_MODIFIED %s" % netloc)
                return
            modified = http_time(feed.last_modified)
        else:
            modified = None

        try:
            response = fetch(feed.url,
                             etag=feed.etag,
                             last_modified=modified,
                             timeout=settings.fetcher.fetch_timeout)
            log.debug("%s - %d, %d" %
                      (feed.url, response['status'], len(response['data'])))
        except Exception, e:
            log.error("Could not fetch %s: %s" % (feed.url, e))
            # TODO: store reason properly
            # Network connect timeout error (Unknown)
            self.after_fetch(feed, status=599, error=True)
            return
Esempio n. 11
0
    def fetch_feed(self, feed):

        if not feed.enabled:
            return

        (schema, netloc, path, params, query, fragment) = urlparse.urlparse(feed.url)

        now = time.time()
        
        if feed.last_checked:
            if feed.ttl:
                if (now - feed.last_checked) < (feed.ttl * 60):
                    log.info("TTL %s" % netloc)
                    return
                
            if (now - feed.last_checked) < settings.fetcher.min_interval:
                log.info("INTERVAL %s" % netloc)
                return
    
        if feed.last_modified:
            if (now - feed.last_modified) < settings.fetcher.min_interval:
                log.info("LAST_MODIFIED %s" % netloc)
                return
            modified = http_time(feed.last_modified)
        else:
            modified = None
        
        try:
            response = fetch(feed.url, etag = feed.etag, last_modified = modified, timeout=settings.fetcher.fetch_timeout)
            log.debug("%s - %d, %d" % (feed.url, response['status'], len(response['data'])))
        except Exception, e:
            log.error("Could not fetch %s: %s" % (feed.url, e))
            # TODO: store reason properly
            # Network connect timeout error (Unknown)
            self.after_fetch(feed, status = 599, error = True)
            return
Esempio n. 12
0
    except Exception, e:
        log.error("Could not fetch %s: %s" % (endpoint, e))
        return None
        
    try:
        soup = BeautifulSoup(res['data'])
    except Exception, e:
        log.error("Could not parse %s: %s" % (endpoint, e))
        return None

    link = soup.find("link", rel="shortcut icon")
    if not link:
        return None        
    url = link['href']
    try:
        res = fetch(url)
    except Exception, e:
        log.error("could not fetch %s: %s" % (endpoint, e))
        return None
    return data_uri(res['content-type'], res['data'])


def fetch_anyway(site):
    global _default
    data = None
    for handler in [google_fetcher,dumb_fetcher,html_fetcher]:
        data = handler(site)
        if data:
            return data
    return _default