def get(self, message='', key=''): modelnames = cmapi.getModelNames() modelnames.sort() if not key: key = self.request.get('selectedkey') modelname = self.request.get('modelname') if modelname: modelkeys = cmapi.getModelKeys(modelname=modelname) modelkeys.sort() if key: value = cmapi.getItemValue(key, modelname=modelname) value = jsonutil.getReadableString(value) else: value = '' else: modelkeys = [] value = '' message = 'Please select a model' if self.request.get('action') == 'Remove': key = '' value = '' templateValues = { 'modelname': modelname, 'modelnames': modelnames, 'modelkeys': modelkeys, 'key': key, 'value': value, 'message': message, } self.render(templateValues, 'index.html')
def post(self): url = self.request.get('url') title = self.request.get('title') fetchResult = {} content = None page = None fortest = bool(self.request.get('fortest')) httpheader = self.request.get('httpheader') header = None if httpheader: header = json.loads(httpheader) if url: tried = 2 # the max try count is 3 fetcher = ContentFetcher(url, header=header, tried=tried ) fetchResult = fetcher.fetch() content = fetchResult.get('content') elementResult = {} if content: editorFormat = globalconfig.getEditorFormat() page = pageanalyst.analyse(url, content, editorFormat=editorFormat, monitorTitle=title, fortest=fortest, elementResult=elementResult) if header: httpheader = jsonutil.getReadableString(header) templateValues = { 'url': url, 'title': title, 'fortest': fortest, 'httpheader': httpheader, 'encoding': fetchResult.get('encoding'), 'encodingSrc': fetchResult.get('encoding.src'), 'oldContent': fetchResult.get('content.old'), 'content': fetchResult.get('content'), 'pagestr': jsonutil.getReadableString(page), 'page': page, 'elementResult': elementResult, } self.render(templateValues, 'test.html')
def post(self): action = self.request.get('action') keyword = '' pageinfo = None if action == 'JSON': jsonstr = self.request.get('jsonstr') if jsonstr: newssource = json.loads(jsonstr) else: newssource = _DEFAULT_NEWSSOURCE encodingUsed = '' urlUsed = '' content = '' httpheader = '' formatter = '' else: keyword = self.request.get('keyword').strip() pageinfo = self.request.get('pageinfo').strip() if pageinfo: pageinfo = json.loads(pageinfo) newssource = {} newssource['active'] = bool(self.request.get('active')) newssource['slug'] = self.request.get('slug') newssource['name'] = self.request.get('name') newssource['order'] = self.request.get('order') newssource['charts'] = bool(self.request.get('charts')) newssource['fetchurl'] = self.request.get('fetchurl') if newssource['fetchurl'] and not newssource['fetchurl'].startswith('http'): newssource['fetchurl'] = 'http://' + newssource['fetchurl'] if not newssource['slug'] and newssource['fetchurl']: newssource['slug'] = urlparse.urlparse(newssource['fetchurl']).netloc httpheader = self.request.get('httpheader') if httpheader: newssource['header'] = json.loads(httpheader) newssource['encoding'] = self.request.get('encoding') newssource['tags'] = self.request.get('tags') # following fields only for showing parsed result. encodingUsed = self.request.get('encodingUsed') urlUsed = self.request.get('urlUsed') oldContent = self.request.get('oldContent') newssource['selector'] = self.request.get('selector').strip() conditions = {} conditions['returnall'] = bool(self.request.get('returnall')) conditions['emptytitle'] = bool(self.request.get('emptytitle')) conditions['detectdetail'] = bool(self.request.get('detectdetail')) conditions['scripttext'] = bool(self.request.get('scripttext')) excludeselector = self.request.get('excludeselector').strip() if excludeselector: if 'exclude' not in conditions: conditions['exclude'] = {} conditions['exclude']['selector'] = excludeselector includeselector = self.request.get('includeselector').strip() if includeselector: if 'include' not in conditions: conditions['include'] = {} conditions['include']['selector'] = includeselector urlselector = self.request.get('urlselector').strip() titleselector = self.request.get('titleselector').strip() imageselector = self.request.get('imageselector').strip() contentselector = self.request.get('contentselector').strip() linkselector = self.request.get('linkselector').strip() imagelinkselector = self.request.get('imagelinkselector').strip() if urlselector or titleselector or contentselector or \ imageselector or linkselector or imagelinkselector: conditions['criterion'] = {} if urlselector: conditions['criterion']['url'] = urlselector if titleselector: conditions['criterion']['title'] = titleselector if contentselector: conditions['criterion']['content'] = contentselector if imageselector: conditions['criterion']['image'] = imageselector if linkselector: conditions['criterion']['link'] = linkselector if imagelinkselector: conditions['criterion']['imagelink'] = imagelinkselector newssource['conditions'] = conditions formatter = self.request.get('formatter') if formatter: newssource['formatter'] = json.loads(formatter) newssource['description'] = self.request.get('description').strip() content = self.request.get('content') jsonstr = jsonutil.getReadableString(newssource) if 'active' not in newssource: newssource['active'] = True items = [] links = [] selector = newssource.get('selector') fetchurl = newssource.get('fetchurl') tried = 2 # the max try count is 3 if not content and fetchurl: fetcher = ContentFetcher(fetchurl, header=newssource.get('header'), encoding=newssource.get('encoding'), tried=tried ) fetchResult = fetcher.fetch() content = fetchResult.get('content') oldContent = fetchResult.get('content.old') urlUsed = fetchResult.get('url') encodingUsed = '%s-%s' % (fetchResult.get('encoding'), fetchResult.get('encoding.src')) if content: content = lxmlutil.removeEncodingDeclaration(content) if selector: parser = HtmlContentParser() items = parser.parse(urlUsed, content, selector, newssource.get('conditions'), newssource.get('formatter')) else: links = linkdetector.detect(content, keyword) if items and newssource.get('conditions', {}).get('detectdetail'): detaildetector.populateDetailUrls(items) if newssource.get('header'): httpheader = jsonutil.getReadableString(newssource['header']) if newssource.get('formatter'): formatter = jsonutil.getReadableString(newssource['formatter']) if not pageinfo and fetchurl: pageinfo = pmapi.getPage(fetchurl) templateValues = { 'newssource': newssource, 'httpheader': httpheader, 'formatter': formatter, 'content': content, 'oldContent': oldContent, 'encodingUsed': encodingUsed, 'urlUsed': urlUsed, 'keyword': keyword, 'links': links, 'items': items, 'jsonstr': jsonstr, 'pageinfo': pageinfo, 'strpageinfo': json.dumps(pageinfo), } self._render(templateValues)
def post(self): action = self.request.get('action') keyword = '' pageinfo = None if action == 'JSON': jsonstr = self.request.get('jsonstr') if jsonstr: newssource = json.loads(jsonstr) else: newssource = _DEFAULT_NEWSSOURCE encodingUsed = '' urlUsed = '' content = '' httpheader = '' formatter = '' else: keyword = self.request.get('keyword').strip() pageinfo = self.request.get('pageinfo').strip() if pageinfo: pageinfo = json.loads(pageinfo) newssource = {} newssource['active'] = bool(self.request.get('active')) newssource['slug'] = self.request.get('slug') newssource['name'] = self.request.get('name') newssource['order'] = self.request.get('order') newssource['charts'] = bool(self.request.get('charts')) newssource['fetchurl'] = self.request.get('fetchurl') if newssource['fetchurl'] and not newssource[ 'fetchurl'].startswith('http'): newssource['fetchurl'] = 'http://' + newssource['fetchurl'] if not newssource['slug'] and newssource['fetchurl']: newssource['slug'] = urlparse.urlparse( newssource['fetchurl']).netloc httpheader = self.request.get('httpheader') if httpheader: newssource['header'] = json.loads(httpheader) newssource['encoding'] = self.request.get('encoding') newssource['tags'] = self.request.get('tags') # following fields only for showing parsed result. encodingUsed = self.request.get('encodingUsed') urlUsed = self.request.get('urlUsed') oldContent = self.request.get('oldContent') newssource['selector'] = self.request.get('selector').strip() conditions = {} conditions['returnall'] = bool(self.request.get('returnall')) conditions['emptytitle'] = bool(self.request.get('emptytitle')) conditions['detectdetail'] = bool(self.request.get('detectdetail')) conditions['scripttext'] = bool(self.request.get('scripttext')) excludeselector = self.request.get('excludeselector').strip() if excludeselector: if 'exclude' not in conditions: conditions['exclude'] = {} conditions['exclude']['selector'] = excludeselector includeselector = self.request.get('includeselector').strip() if includeselector: if 'include' not in conditions: conditions['include'] = {} conditions['include']['selector'] = includeselector urlselector = self.request.get('urlselector').strip() titleselector = self.request.get('titleselector').strip() imageselector = self.request.get('imageselector').strip() contentselector = self.request.get('contentselector').strip() linkselector = self.request.get('linkselector').strip() imagelinkselector = self.request.get('imagelinkselector').strip() if urlselector or titleselector or contentselector or \ imageselector or linkselector or imagelinkselector: conditions['criterion'] = {} if urlselector: conditions['criterion']['url'] = urlselector if titleselector: conditions['criterion']['title'] = titleselector if contentselector: conditions['criterion']['content'] = contentselector if imageselector: conditions['criterion']['image'] = imageselector if linkselector: conditions['criterion']['link'] = linkselector if imagelinkselector: conditions['criterion']['imagelink'] = imagelinkselector newssource['conditions'] = conditions formatter = self.request.get('formatter') if formatter: newssource['formatter'] = json.loads(formatter) newssource['description'] = self.request.get('description').strip() content = self.request.get('content') jsonstr = jsonutil.getReadableString(newssource) if 'active' not in newssource: newssource['active'] = True items = [] links = [] selector = newssource.get('selector') fetchurl = newssource.get('fetchurl') tried = 2 # the max try count is 3 if not content and fetchurl: fetcher = ContentFetcher(fetchurl, header=newssource.get('header'), encoding=newssource.get('encoding'), tried=tried) fetchResult = fetcher.fetch() content = fetchResult.get('content') oldContent = fetchResult.get('content.old') urlUsed = fetchResult.get('url') encodingUsed = '%s-%s' % (fetchResult.get('encoding'), fetchResult.get('encoding.src')) if content: content = lxmlutil.removeEncodingDeclaration(content) if selector: parser = HtmlContentParser() items = parser.parse(urlUsed, content, selector, newssource.get('conditions'), newssource.get('formatter')) else: links = linkdetector.detect(content, keyword) if items and newssource.get('conditions', {}).get('detectdetail'): detaildetector.populateDetailUrls(items) if newssource.get('header'): httpheader = jsonutil.getReadableString(newssource['header']) if newssource.get('formatter'): formatter = jsonutil.getReadableString(newssource['formatter']) if not pageinfo and fetchurl: pageinfo = pmapi.getPage(fetchurl) templateValues = { 'newssource': newssource, 'httpheader': httpheader, 'formatter': formatter, 'content': content, 'oldContent': oldContent, 'encodingUsed': encodingUsed, 'urlUsed': urlUsed, 'keyword': keyword, 'links': links, 'items': items, 'jsonstr': jsonstr, 'pageinfo': pageinfo, 'strpageinfo': json.dumps(pageinfo), } self._render(templateValues)