def index(): """ 关键词管理首页, 列举站点级别的关键词, 并支持简单查询和翻页. """ s = request.args.get('status', u'bare,processed,repeated') k = request.args.get('keyword', '') o = request.args.get('owner', '') p = int(request.args.get('page', '1')) start = (p - 1) * PAGE_COUNT condition = {'level': KeywordLevel.SITE} if k: condition['name'] = k.strip() if o: condition['owner'] = o.strip() status = s.split(u',') if status: condition['status'] = {'$in': status} count = Keyword.count(condition) cursor = Keyword.find(condition, skip=start, limit=PAGE_COUNT, sort=[('baiduIndex', pymongo.DESCENDING)]) keywords = [] for c in cursor: set_index(c) keywords.append(c) pagination = Pagination(p, PAGE_COUNT, count) return render_template('seo/index.html', keywords=keywords, pagination=pagination)
def analyze_keyword(k): """ 分析站点级别的关键字, 获取其百度指数以及其相关的长尾关键字. """ keyword = Keyword.find_one({'name': k}) if not keyword: print 'Keyword %s does not exist' % k return if keyword.baiduIndex > 0 or keyword.baiduResult > 0: print 'Keyword %s is imported before' % k return print 'Try to analyze keyword %s/%s' % (keyword._id, k) ss.headers['Referer'] = 'http://www.5118.com/' t = ss.get('http://www.5118.com/seo/words/%s' % url_quote(k)).text tree = html.fromstring(t) dls = tree.xpath('//div[@class="Fn-ui-list dig-list"]/dl') total = len(dls) for dl in dls: if dl.get('class', '') == 'dl-word': continue name = unicode(dl.xpath('./dd[1]//a[1]/@title')[0].strip()) baidu_index = dl.xpath('./dd[2]/text()')[0].strip() baidu_result = dl.xpath('./dd[3]/text()')[0].strip() if not baidu_index.isdigit(): baidu_index = 0 if not baidu_result.isdigit(): baidu_result = 0 print 'Found keyword: %s/%s/%s' % (name, baidu_index, baidu_result) if name == k: keyword.baiduIndex = int(baidu_index) keyword.baiduResult = int(baidu_result) if total > 2: keyword.total = total - 2 keyword.save() else: if Keyword.count({'name': name}) > 0: print 'This keyword already exists' continue long_tail = Keyword() long_tail.name = name long_tail.level = KeywordLevel.LONG_TAIL long_tail.parentId = keyword._id long_tail.baiduIndex = int(baidu_index) long_tail.baiduResult = int(baidu_result) long_tail.save() time.sleep(random.randint(5, 15))
def longtail(keyword_id): """ 获取指定站点关键字下的长尾关键字. """ keyword = Keyword.find_one({'_id': keyword_id}) if not keyword: abort(404) s = request.args.get('status', u'bare,processed,repeated') p = int(request.args.get('page', '1')) start = (p - 1) * PAGE_COUNT condition = {'level': KeywordLevel.LONG_TAIL, 'parentId': keyword_id} status = s.split(u',') if status: condition['status'] = {'$in': status} count = Keyword.count(condition) cursor = Keyword.find(condition, skip=start, limit=PAGE_COUNT, sort=[('baiduIndex', pymongo.DESCENDING)]) keywords = [] for c in cursor: keywords.append(c) pagination = Pagination(p, PAGE_COUNT, count) return render_template('seo/longtail.html', keyword=keyword, keywords=keywords, pagination=pagination)