Beispiel #1
0
def _calculateHash(items):
    lines = []
    for item in items:
        url = item.get('url')
        if url:
            lines.append(url)
    return stringutil.calculateHash(lines)
Beispiel #2
0
def _calculateHash(items):
    lines = []
    for item in items:
        url = item.get('url')
        if url:
            lines.append(url)
        title = item.get('title')
        if title:
            lines.append(title)
    return stringutil.calculateHash(lines)
Beispiel #3
0
def _addTwitterPage(eventPages, word, twitterAccount):
    if not twitterAccount:
        return
    keyword = ' '.join(word['keywords'][:2])
    tpages = twitter.search(keyword, twitterAccount)
    if not tpages:
        return False
    tpage = tpages[0]
    tpage['hash'] = stringutil.calculateHash([tpage['content']])
    existed = False
    for page in eventPages:
        if page.get('hash') == tpage['hash']:
            existed = True
            break
    if not existed:
        eventPages.append(tpage)
        return True
    return False
Beispiel #4
0
 def get(self):
     url = self.request.get('url')
     page = None
     if url:
         try:
             url = base64.b64decode(url)
             url2 = ''
             length = len(url)
             for i in range(0, length, 2):
                 if i + 1 < length:
                     url2 += url[i+1] + url[i]
             if length % 2 != 0:
                 url2 += url[-1]
             url = url2
         except TypeError:
             pass
         key = stringutil.calculateHash([url])
         page = memcache.get(key)
         contentGot = bool(page)
         if not page:
             tried = 2 # the max try count is 3
             fetcher = ContentFetcher(url, tried=tried)
             fetchResult = fetcher.fetch()
             content = fetchResult.get('content')
             if content:
                 editorFormat = globalconfig.getEditorFormat()
                 page = pageanalyst.analyse(url, content, editorFormat=editorFormat)
                 if page:
                     page['url'] = url
                 if page and (page.get('content') or page.get('images')):
                     memcache.set(key, page)
                     contentGot = True
     if not contentGot:
         page = {'url': url}
         self.redirect(url, permanent=True)
         return
     if 'images' in page:
         for image in page['images']:
             image['url'] = '/image/?url=' + urllib.quote(image['url'].encode('utf-8'))
     templateValues = {
         'page': page,
     }
     self.render(templateValues, 'home.html')
Beispiel #5
0
 def get(self):
     url = self.request.get('url')
     if not url:
         return
     path = urlparse.urlparse(url).path
     ext = os.path.splitext(path)[1]
     if ext:
         contentType = 'image/%s' % (str(ext)[1:], )
     else:
         contentType = 'image/jpeg'
     self.response.headers['Content-Type'] = contentType
     key = stringutil.calculateHash([url])
     content = memcache.get(key)
     if not content:
         res = urllib2.urlopen(url)
         content = res.read()
         res.close()
         if content:
             memcache.set(key, content)
     self.response.out.write(content)