def _calculateHash(items): lines = [] for item in items: url = item.get('url') if url: lines.append(url) return stringutil.calculateHash(lines)
def _calculateHash(items): lines = [] for item in items: url = item.get('url') if url: lines.append(url) title = item.get('title') if title: lines.append(title) return stringutil.calculateHash(lines)
def _addTwitterPage(eventPages, word, twitterAccount): if not twitterAccount: return keyword = ' '.join(word['keywords'][:2]) tpages = twitter.search(keyword, twitterAccount) if not tpages: return False tpage = tpages[0] tpage['hash'] = stringutil.calculateHash([tpage['content']]) existed = False for page in eventPages: if page.get('hash') == tpage['hash']: existed = True break if not existed: eventPages.append(tpage) return True return False
def get(self): url = self.request.get('url') page = None if url: try: url = base64.b64decode(url) url2 = '' length = len(url) for i in range(0, length, 2): if i + 1 < length: url2 += url[i+1] + url[i] if length % 2 != 0: url2 += url[-1] url = url2 except TypeError: pass key = stringutil.calculateHash([url]) page = memcache.get(key) contentGot = bool(page) if not page: tried = 2 # the max try count is 3 fetcher = ContentFetcher(url, tried=tried) fetchResult = fetcher.fetch() content = fetchResult.get('content') if content: editorFormat = globalconfig.getEditorFormat() page = pageanalyst.analyse(url, content, editorFormat=editorFormat) if page: page['url'] = url if page and (page.get('content') or page.get('images')): memcache.set(key, page) contentGot = True if not contentGot: page = {'url': url} self.redirect(url, permanent=True) return if 'images' in page: for image in page['images']: image['url'] = '/image/?url=' + urllib.quote(image['url'].encode('utf-8')) templateValues = { 'page': page, } self.render(templateValues, 'home.html')
def get(self): url = self.request.get('url') if not url: return path = urlparse.urlparse(url).path ext = os.path.splitext(path)[1] if ext: contentType = 'image/%s' % (str(ext)[1:], ) else: contentType = 'image/jpeg' self.response.headers['Content-Type'] = contentType key = stringutil.calculateHash([url]) content = memcache.get(key) if not content: res = urllib2.urlopen(url) content = res.read() res.close() if content: memcache.set(key, content) self.response.out.write(content)