def post(self): data = json.loads(self.request.body) items = data['items'] origin = data['origin'] header = data.get('header') for item in items: url = item.get('url') if not url: continue fetcher = ContentFetcher(url, header=header, tried=2) fetchResult = fetcher.fetch() usedUrl = fetchResult.get('url') content = fetchResult.get('content') if not content: logging.error('Failed to get content from %s.' % (url, )) continue item['url'] = usedUrl try: editorFormat = globalconfig.getEditorFormat() page = pageanalyst.analyse(usedUrl, content, editorFormat=editorFormat, monitorTitle=item.get('title')) if not item.get('title') and page.get('title'): item['title'] = page['title'] if not item.get('published') and page.get('published') \ and not page['published'].endswith('0000'): # if no hour, minute, published is not precise enough item['published'] = page['published'] if origin.get('timezone'): item['published'] = dateutil.adjustDate14(item['published'], origin['timezone']) if not item.get('content') and page.get('content'): item['content'] = page['content'] if not item.get('img') and page.get('images'): item['img'] = page['images'][0] except Exception: logging.exception('Error happens when analyse %s.' % (usedUrl, )) responseData = { 'origin': data['origin'], 'items': items, } self.response.headers['Content-Type'] = 'text/plain' callbackurl = data['callbackurl'] success = networkutil.postData(callbackurl, responseData, trycount=_CALLBACK_TRYCOUNT, timeout=_URL_TIMEOUT) if success: message = 'Push items back for %s to %s.' % (data['origin'], callbackurl) else: message = 'Failed to push items back for %s to %s.' % (data['origin'], callbackurl) logging.info(message) self.response.out.write(message)
def get(self): url = self.request.get('url') page = None if url: try: url = base64.b64decode(url) url2 = '' length = len(url) for i in range(0, length, 2): if i + 1 < length: url2 += url[i+1] + url[i] if length % 2 != 0: url2 += url[-1] url = url2 except TypeError: pass key = stringutil.calculateHash([url]) page = memcache.get(key) contentGot = bool(page) if not page: tried = 2 # the max try count is 3 fetcher = ContentFetcher(url, tried=tried) fetchResult = fetcher.fetch() content = fetchResult.get('content') if content: editorFormat = globalconfig.getEditorFormat() page = pageanalyst.analyse(url, content, editorFormat=editorFormat) if page: page['url'] = url if page and (page.get('content') or page.get('images')): memcache.set(key, page) contentGot = True if not contentGot: page = {'url': url} self.redirect(url, permanent=True) return if 'images' in page: for image in page['images']: image['url'] = '/image/?url=' + urllib.quote(image['url'].encode('utf-8')) templateValues = { 'page': page, } self.render(templateValues, 'home.html')
def post(self): url = self.request.get('url') title = self.request.get('title') fetchResult = {} content = None page = None fortest = bool(self.request.get('fortest')) httpheader = self.request.get('httpheader') header = None if httpheader: header = json.loads(httpheader) if url: tried = 2 # the max try count is 3 fetcher = ContentFetcher(url, header=header, tried=tried ) fetchResult = fetcher.fetch() content = fetchResult.get('content') elementResult = {} if content: editorFormat = globalconfig.getEditorFormat() page = pageanalyst.analyse(url, content, editorFormat=editorFormat, monitorTitle=title, fortest=fortest, elementResult=elementResult) if header: httpheader = jsonutil.getReadableString(header) templateValues = { 'url': url, 'title': title, 'fortest': fortest, 'httpheader': httpheader, 'encoding': fetchResult.get('encoding'), 'encodingSrc': fetchResult.get('encoding.src'), 'oldContent': fetchResult.get('content.old'), 'content': fetchResult.get('content'), 'pagestr': jsonutil.getReadableString(page), 'page': page, 'elementResult': elementResult, } self.render(templateValues, 'test.html')