def get(self): try: objs = Article.get_post_for_homepage() except: self.redirect('/install') return if objs: fromid = objs[0].id endid = objs[-1].id else: fromid = endid = '' allpost = Article.count_all_post() allpage = allpost / EACH_PAGE_POST_NUM if allpost % EACH_PAGE_POST_NUM: allpage += 1 output = self.render('index.html', { 'title': "%s - %s" % (SITE_TITLE, SITE_SUB_TITLE), 'keywords': KEYWORDS, 'description': SITE_DECR, 'objs': objs, 'cats': Category.get_all_cat_name(), 'tags': Tag.get_hot_tag_name(), 'page': 1, 'allpage': allpage, 'listtype': 'index', 'fromid': fromid, 'endid': endid, 'comments': Comment.get_recent_comments(), 'links': Link.get_all_links(), }, layout='_layout.html') self.write(output) return output
def get(self, direction = 'next', page = '2', base_id = '1'): if page == '1': self.redirect(BASE_URL) return objs = Article.get_page_posts(direction, page, base_id) if objs: if direction == 'prev': objs.reverse() fromid = objs[0].id endid = objs[-1].id else: fromid = endid = '' allpost = Article.count_all_post() allpage = allpost/EACH_PAGE_POST_NUM if allpost%EACH_PAGE_POST_NUM: allpage += 1 output = self.render('index.html', { 'title': "%s - %s | Part %s"%(SITE_TITLE,SITE_SUB_TITLE, page), 'keywords':KEYWORDS, 'description':SITE_DECR, 'objs': objs, 'cats': Category.get_all_cat_name(), 'tags': Tag.get_hot_tag_name(), 'page': int(page), 'allpage': allpage, 'listtype': 'index', 'fromid': fromid, 'endid': endid, 'comments': Comment.get_recent_comments(), 'links':Link.get_all_links(), },layout='_layout.html') self.write(output) return output
def run_decode(): logging.debug('decode your input by our pretrained model') try: source = request.get_json()['source'] # GET request with String from frontend directly logging.debug('input: {}'.format(source)) # GET String-type context from the backend try: logging.debug('using the pretrained model.') sentNums, summary = summarizationModel.decode.run_(source) except Exception as e: logging.error(e) else: logging.debug('The number of sentences is {}'.format(sentNums)) logging.debug('The abstract is that {}'.format(summary)) results = {'sent_no': sentNums, 'final': summary} try: article = Content(text=source) abstract = Summary(text=summary) pair = Article(article=article.id, abstract=abstract.id) article.save() abstract.save() pair.save() except Exception as e: logging.error(e) return json.dumps(results) except: message = {'message' : 'Fail to catch the data from client.'} return json.dumps(message)
def test_create_article(): _author_id = random.randint(1, 20) _new_article = Article() _new_article.title = 'Test_Article_%s' % random.randint(100001, 999999) _new_article.author_id = _author_id _new_article.published_datetime = _new_article.last_modified_datetime = datetime.datetime.now() _random_seed = str(random.random()) _new_article.digest = 'digest - %s' % (''.join(random.randint(2, 5)*md5(_random_seed).hexdigest())) _content = ArticleContent(content='content - %s' % (''.join(random.randint(10, 50)*sha224(_random_seed).hexdigest()))) _new_article.content = _content db_session.add(_new_article) # @UndefinedVariable db_session.flush() # @UndefinedVariable _catalogs = [ random.randint(1, 20), random.randint(1, 20), random.randint(1, 20), ] for _cid in _catalogs: db_session.execute(association_table_catalog_article.insert().values({ # @UndefinedVariable 'catalog_id': _cid, 'article_id': _new_article.id, })) db_session.commit() # @UndefinedVariable
def run(input_filename, output_filename): articles = defaultdict(set) without_identifiers = set() reader = csv.reader(open(input_filename, 'r')) try: biggest = 0 for i, article in enumerate(reader): article = Article(*article) identifiers = [(k,v) for k,v in article._asdict().items() if k in IDENTIFIERS and v] data = None # dict(identifiers) if not identifiers: without_identifiers.add(article.id) continue articles[identifiers[0]].add(article.id) for identifier in identifiers[1:]: if articles[identifiers[0]] is not articles[identifier]: articles[identifiers[0]] |= articles[identifier] articles[identifier] = articles[identifiers[0]] if len(articles[identifier]) > biggest: biggest = len(articles[identifier]) if i % 10000 == 0: print "%7d" % i, resource.getrusage(resource.RUSAGE_SELF)[2], biggest if resource.getrusage(resource.RUSAGE_SELF)[2] > 1e7: print "Using too much memory" raise Exception except Exception, e: print e
def post(self, secret="", id=""): article = (self.get_argument("article", ""),) if id and article and secret: if secret == getAttr("MOVE_SECRET"): Article.set_article(id, encode_special_txt(article[0])) return self.write("1") return self.write("Fail")
def post(self, secret=''): articles = self.get_argument("articles", ''), if articles and secret: if secret == getAttr('MOVE_SECRET'): Article.set_articles(articles[0]) return self.write('1') return self.write('Fail')
def get(self): try: objs = Article.get_post_for_homepage() except: self.redirect('/install') return if objs: fromid = objs[0].id endid = objs[-1].id else: fromid = endid = '' allpost = Article.count_all_post() allpage = allpost/EACH_PAGE_POST_NUM if allpost%EACH_PAGE_POST_NUM: allpage += 1 output = self.render('index.html', { 'title': "%s - %s"%(SITE_TITLE,SITE_SUB_TITLE), 'keywords':KEYWORDS, 'description':SITE_DECR, 'objs': objs, 'cats': Category.get_all_cat_name(), 'tags': Tag.get_hot_tag_name(), 'page': 1, 'allpage': allpage, 'listtype': 'index', 'fromid': fromid, 'endid': endid, 'comments': Comment.get_recent_comments(), 'links':Link.get_all_links(), },layout='_layout.html') self.write(output) return output
def get(self, id=''): #try: if id: oldobj = Article.get_article_by_id_edit(id) print 'DelPost()', oldobj if not oldobj: return if MYSQL_TO_KVDB_SUPPORT: oldobj_category = oldobj['category'] oldobj_archive = oldobj['archive'] oldobj_tags = oldobj['tags'] else: oldobj_category = oldobj.category oldobj_archive = oldobj.archive oldobj_tags = oldobj.tags Category.remove_postid_from_cat(oldobj_category, str(id)) Archive.remove_postid_from_archive(oldobj_archive, str(id)) Tag.remove_postid_from_tags(set(oldobj_tags.split(',')), str(id)) Article.del_post_by_id(id) increment('Totalblog', NUM_SHARDS, -1) cache_key_list = [ '/', 'post:%s' % id, 'cat:%s' % quoted_string(oldobj_category) ] clear_cache_by_pathlist(cache_key_list) clear_cache_by_pathlist(['post:%s' % id]) self.redirect('%s/admin/edit_post/' % (BASE_URL))
def post(self, secret='', id=''): article = self.get_argument("article", ''), if id and article and secret: if secret == getAttr('MOVE_SECRET'): Article.set_article(id, encode_special_txt(article[0])) return self.write('1') return self.write('Fail')
def getArticle(): article=Article() articlelist=Article.query.all() articles=[] for article in articlelist: articles.append(article.to_json()) return (jsonify(rows=articles))
def get(self, id=""): # try: if id: oldobj = Article.get_article_by_id_edit(id) print "DelPost()", oldobj if not oldobj: return if MYSQL_TO_KVDB_SUPPORT: oldobj_category = oldobj["category"] oldobj_archive = oldobj["archive"] oldobj_tags = oldobj["tags"] else: oldobj_category = oldobj.category oldobj_archive = oldobj.archive oldobj_tags = oldobj.tags Category.remove_postid_from_cat(oldobj_category, str(id)) Archive.remove_postid_from_archive(oldobj_archive, str(id)) Tag.remove_postid_from_tags(set(oldobj_tags.split(",")), str(id)) Article.del_post_by_id(id) increment("Totalblog", NUM_SHARDS, -1) cache_key_list = ["/", "post:%s" % id, "cat:%s" % quoted_string(oldobj_category)] clear_cache_by_pathlist(cache_key_list) clear_cache_by_pathlist(["post:%s" % id]) self.redirect("%s/admin/edit_post/" % (BASE_URL))
def post(self, secret=""): articles = (self.get_argument("articles", ""),) if articles and secret: if secret == getAttr("MOVE_SECRET"): Article.set_articles(articles[0]) return self.write("1") return self.write("Fail")
def run(input_filename, output_filename): articles = defaultdict(set) without_identifiers = set() reader = csv.reader(open(input_filename, 'r')) try: biggest = 0 for i, article in enumerate(reader): article = Article(*article) identifiers = [(k, v) for k, v in article._asdict().items() if k in IDENTIFIERS and v] data = None # dict(identifiers) if not identifiers: without_identifiers.add(article.id) continue articles[identifiers[0]].add(article.id) for identifier in identifiers[1:]: if articles[identifiers[0]] is not articles[identifier]: articles[identifiers[0]] |= articles[identifier] articles[identifier] = articles[identifiers[0]] if len(articles[identifier]) > biggest: biggest = len(articles[identifier]) if i % 10000 == 0: print "%7d" % i, resource.getrusage( resource.RUSAGE_SELF)[2], biggest if resource.getrusage(resource.RUSAGE_SELF)[2] > 1e7: print "Using too much memory" raise Exception except Exception, e: print e
def get_article_link(begin, publisher, fakeid): url = article_list_url(begin, fakeid) # 获取json响应 r = requests.get(url, headers=headers.get_cookie()) # json转化为字典 d = json.loads(r.content) log('d', type(d), d) app_msg_list = d['app_msg_list'] # log('app-msg-list', app_msg_list) articles = [] # 查询条目是否已经在数据库中,不在则插入,存在则更新 for item in app_msg_list: # 转义不能用在文件名里的非法字符 item['title'] = validate_title(item['title']) # 文件名为时间+标题+html time_prefix = formatted_time(item['create_time']).split(' ', 1)[0].replace('/', '') item['filename'] = time_prefix + '-' + item['title'] item['publisher'] = publisher a = Article.one(aid=item['aid']) if a is None: a = Article.new(item) else: a.update(a.id, **item) break articles.append(a) # 返回article对象的列表 return articles
def get(self, direction='next', page='2', base_id='1'): if page == '1': self.redirect(BASE_URL) return objs = Article.get_page_posts(direction, page, base_id) if objs: if direction == 'prev': objs.reverse() if MYSQL_TO_KVDB_SUPPORT: fromid = objs[0]['id'] endid = objs[-1]['id'] else: fromid = objs[0].id endid = objs[-1].id else: fromid = endid = '' allpost = Article.count_all_post() allpage = allpost / EACH_PAGE_POST_NUM if allpost % EACH_PAGE_POST_NUM: allpage += 1 output = self.render('index.html', { 'title': "%s - %s | Part %s" % (getAttr('SITE_TITLE'), getAttr('SITE_SUB_TITLE'), page), 'keywords': getAttr('KEYWORDS'), 'description': getAttr('SITE_DECR'), 'objs': objs, 'cats': Category.get_all_cat_name(), 'tags': Tag.get_hot_tag_name(), 'archives': Archive.get_all_archive_name(), 'page': int(page), 'allpage': allpage, 'listtype': 'index', 'fromid': fromid, 'endid': endid, 'comments': Comment.get_recent_comments(), 'links': Link.get_all_links(), 'isauthor': self.isAuthor(), 'Totalblog': get_count('Totalblog', NUM_SHARDS, 0), }, layout='_layout.html') self.write(output) return output
def editor(request): content = request.POST['content'] title = request.POST['title'] article = Article(title=title,content=content,group='1'); article.save() result = "文章保存成功!" print result return HttpResponse(result, mimetype='application/javascript')
def get(self): posts = Article.get_post_for_homepage() output = self.render('index.xml', { 'posts':posts, 'site_updated':Article.get_last_post_add_time(), }) self.set_header('Content-Type','application/atom+xml') self.write(output)
def get(self): posts = Article.get_post_for_homepage() output = self.render('index.xml', { 'posts': posts, 'site_updated': Article.get_last_post_add_time(), }) self.set_header('Content-Type', 'application/atom+xml') self.write(output)
def _create_structure(): category = Category('test category', 'category test', 'test_category') category.meta = {'id': 1, 'webtranslateit_ids': {'content': 1}} section = Section(category, 'test section', 'section test', 'test_section') section.meta = {'id': 2, 'webtranslateit_ids': {'content': 2}} category.sections.append(section) article = Article(section, 'test article', 'article body', 'test_article') article.meta = {'id': 3, 'webtranslateit_ids': {'content': 3, 'body': 4}} section.articles.append(article) return category, section, article
def get(self): logging.info("Seeding Datastore...") for user in users: user_ent = User(name=user[0], age=user[1]) user_ent.put() for article in articles: article_ent = Article(title=article[0]) article_ent.put()
def _createArticle(self, row): article = Article() article.title = self._getFieldValue(row, 'article', 'title') article.pagination = self._getFieldValue(row, 'article', 'pagination') for i in ['author', 'author2']: author_fullname = self._getFieldValue(row, 'article', i) if author_fullname: author = self.authorFctry.fromFullName(author_fullname) article.authors.append(author) article.periodique = self.periodique return article
def update_article(): title = request.args.get('Title') author = request.args.get('Author') email = request.args.get('Email') date = request.args.get('Date') url = request.args.get('URL') content = request.args.get('Content') status = request.args.get('Status') article = Article() article.create(title, author, email, date, url, content, status) return redirect('/blog', code=302)
def source_fetch(source): debug("SF: Doing fetch for source: {0}".format(source.url)) result = _source_fetch(source) debug("SF: Done with source fetch for {0}; result type: {1}".format(source.url, (result.method if result else None))) added_any = False now = datetime.datetime.now() to_put = [] tasks_to_enqueue = [] if result: if result.feed_title: source.title = result.feed_title if result.brand: source.brand = result.brand titles = [entry['title'] for entry in result.entries if entry['title']] source.shared_title_suffix = shared_suffix(titles) entries = result.entries[:min(25, len(result.entries))] entry_ids = [Article.id_for_article(entry['url'], source.url) for entry in entries] print "ENTRY IDs:", entry_ids print "ENtry id lens: ", str(map(len, entry_ids)) article_futures = [Article.get_or_insert_async(id) for id in entry_ids] articles = [future.get_result() for future in article_futures] print "ARTICLE_OBJECTS:", articles for i, (entry, article) in enumerate(zip(entries, articles)): if not article.url: added_any = True article.added_date = now article.added_order = i article.source = source.key article.url = canonical_url(entry.get('url')) article.submission_url = canonical_url(entry.get('submission_url')) if entry['published']: article.published = entry['published'] else: article.published = datetime.datetime.now() if not article.title: article.title = entry['title'] to_put.append(article) delay = (i+1) * 4 # wait 5 seconds between each tasks_to_enqueue.append(article.create_fetch_task(delay=delay)) debug("SF: About to put {0} items".format(len(to_put))) if len(to_put): ndb.put_multi(to_put) debug("SF: About to enqueue") if len(tasks_to_enqueue): taskqueue.Queue('articles').add_async(tasks_to_enqueue) debug("SF: done enqueuing") if added_any: source.most_recent_article_added_date = now source_search.add_source_to_index(source) source.last_fetched = now source.put()
def publish(): if request.method != 'POST': return render_template('publish.html') title, content = request.form.get('title', ''), request.form.get('content', '') a = Article() a.title = title a.content = content a.uid = session['uid'] db.session.add(a) # insert db.session.commit() return '发布成功'
def add_article(): """Adds new clothing article and redirects to the previous category page.""" category_id = request.form.get('category') description = request.form.get('article-description') file = request.files['article-image-upload'] tag_ids = request.form.getlist('article-tags') new_tag_string = request.form.get('new-tags') purchase_price = request.form.get('purchase-price') category = Category.query.get(category_id) if not allowed_file(file.filename): flash(f'File extension .{file.filename.rsplit(".", 1)[1]} not allowed') if file and allowed_file(file.filename): # Sanitizes user input filename = secure_filename(file.filename) # Cloudinary upload function: 1) folders by user and category name, # 2) unique filename is true, # 3) use cloudinary's AI to remove background # ^ (commented out b/c paid service) upload_file = upload( file, folder=f"user/{session['user_email']}/{category.name}", unique_filename=1, # background_removal = "cloudinary_ai", ) # For purchase_price, an empty string not ok, but okay to pass None new_article = Article(user_id=session['user_id'], category_id=category_id, image=upload_file['secure_url'], description=description, purchase_price=purchase_price or None) all_tags = [] for tag_id in tag_ids: all_tags.append(Tag.query.filter_by(tag_id=tag_id).one()) # Any newly created tags should be added to this as well all_tags += Tag.parse_str_to_tag(new_tag_string, session['user_id']) # Then create all the tag relationships for tag in all_tags: new_article.add_tag(tag) db.session.add(new_article) db.session.commit() flash(f"Created new item in {category.name}") return redirect(f'/categories/{category_id}')
def get(self, id = ''): try: if id: oldobj = Article.get_article_by_id_edit(id) Category.remove_postid_from_cat(oldobj.category, str(id)) Archive.remove_postid_from_archive(oldobj.archive, str(id)) Tag.remove_postid_from_tags( set(oldobj.tags.split(',')) , str(id)) Article.del_post_by_id(id) increment('Totalblog',NUM_SHARDS,-1) cache_key_list = ['/', 'post:%s'% id, 'cat:%s' % quoted_string(oldobj.category)] clear_cache_by_pathlist(cache_key_list) clear_cache_by_pathlist(['post:%s'%id]) self.redirect('%s/admin/edit_post/'% (BASE_URL)) except: pass
def wx_get_latest_articles(self): k = 'wx_latest' v = getMc(k) if v: return v posts = Article.get_articles_by_latest() articles_msg = {'articles':[]} for post in posts: #slug = slugfy(post['title'])#yobin 20160718 slug = post['title'] desc = HTML_REG.sub('',post['content'].decode('utf-8')[:DESCRIPTION_CUT_WORDS].encode('utf-8')) shorten_url = '%s/t/%s' % (BASE_URL, post['id']) article = { 'title': slug, 'description':desc, 'picUrl':WX_DEFAULT_PIC, 'url':shorten_url, } # 插入文章 articles_msg['articles'].append(article) article = {} setMc(k,articles_msg) return articles_msg
def wx_search_article(self, k): article = Article.get_article_by_keyword(k) if article: if MYSQL_TO_KVDB_SUPPORT: title = article['slug'] description = article['description'] url = article['absolute_url'] else: title = article.slug description = article.description url = article.absolute_url picUrl = WX_DEFAULT_PIC count = 1 articles_msg = {'articles': []} for i in range(0, count): article = { 'title': title, 'description': description, 'picUrl': picUrl, 'url': url } articles_msg['articles'].append(article) article = {} return articles_msg return ''
def wx_get_latest_articles(self): k = 'wx_latest' v = getMc(k) if v: return v posts = Article.get_articles_by_latest() articles_msg = {'articles': []} for post in posts: #slug = slugfy(post['title'])#yobin 20160718 slug = post['title'] desc = HTML_REG.sub( '', post['content'].decode('utf-8') [:DESCRIPTION_CUT_WORDS].encode('utf-8')) shorten_url = '%s/t/%s' % (BASE_URL, post['id']) article = { 'title': slug, 'description': desc, 'picUrl': WX_DEFAULT_PIC, 'url': shorten_url, } # 插入文章 articles_msg['articles'].append(article) article = {} setMc(k, articles_msg) return articles_msg
def get_articles(userid): user = User.objects(id=userid).first() page = int(request.args.get('page')) per_page = int(request.args.get('per_page')) kws = {'user': user} if request.args.get("status") != None: kws['status'] = int(request.args.get("status")) if request.args.get("channel_id") != None: channel = Channel.objects(id=request.args.get("channel_id")).first() kws['channel'] = channel if request.args.get("begin_pubdate") != None: kws['created__gte'] = request.args.get("begin_pubdate") if request.args.get("end_pubdate") != None: kws['created__lte'] = request.args.get("end_pubdate") articles = Article.objects(**kws) paginated_articles = articles.skip((page - 1) * per_page).limit(per_page) return jsonify({ "message": 'OK', "data": { "total_count": articles.count(), "page": page, "per_page": per_page, "results": articles.to_public_json() } })
def wx_get_article_by_id(self, post_id): k = 'wx_post_%s' % (str(post_id)) v = getMc(k) if v: return v article = Article.get_article_by_id_detail(post_id) if article: if MYSQL_TO_KVDB_SUPPORT: title = article['slug'] description = article['description'] url = article['absolute_url'] else: title = article.slug description = article.description url = article.absolute_url picUrl = WX_DEFAULT_PIC count = 1 articles_msg = {'articles': []} for i in range(0, count): article = { 'title': title, 'description': description, 'picUrl': picUrl, 'url': url } articles_msg['articles'].append(article) article = {} setMc(k, articles_msg) return articles_msg return ''
def _get_full_article(self, short_article): url = short_article.url response = requests.get(url) soup = BeautifulSoup(response.content, 'html.parser') try: text = self.get_formatted_article( text=soup.find('div', class_='itemFullText'), lead=soup.find('h2', class_='itemSubtitle')) author = soup.find('span', class_='itemAuthor') if author is None: author = "" else: author = author.text.split('Piše:')[-1].strip() facebook_id = soup.find('meta', {'property': 'fb:app_id'}) if facebook_id is not None: facebook_id = facebook_id['content'] domain = self._generic_url.split('https://')[1].split('/')[0] comments = self._get_facebook_comments(url=url, facebook_id=facebook_id, domain=domain) else: comments = [] if len(comments) > 0: logging.info('Total comments: %d' % len(comments)) full_article = Article(short_article, text, author, comments) return full_article except AttributeError: logging.error("Invalid URL: %s" % url) return None
def watcher(): try: # Check for email every 10 seconds. print(">> Listening for new emails") messages = inbox.Items message = messages.GetLast() if "[article]" in str.lower(message.subject): # Article is found # Check if the article is pre-existing db_search = session.query(Article).filter( Article.name == str(message.Sender), Article.urlslug == generate_slug(message.subject) ).first() if db_search is not None: print(" - Found existing article") else: sub = str.lower(message.subject) new_article = Article( title= sub.split("[article]")[1].strip(), name = str(message.Sender), body = message.HTMLBody, alias = get_alias(message.Sender.GetExchangeUser().PrimarySmtpAddress), time = dateutil.parser.parse(str(message.SentOn)), urlslug = generate_slug(message.subject) ) session.add(new_article) session.commit() time.sleep(10) except Exception as e: print(e) time.sleep(10)
def wx_search_article(self, k): article = Article.get_article_by_keyword(k) if article: if MYSQL_TO_KVDB_SUPPORT: title = article['slug'] description = article['description'] url = article['absolute_url'] else: title = article.slug description = article.description url = article.absolute_url picUrl = WX_DEFAULT_PIC count = 1 articles_msg = {'articles':[]} for i in range(0,count): article = { 'title':title, 'description':description, 'picUrl':picUrl, 'url':url } articles_msg['articles'].append(article) article = {} return articles_msg return ''
def get_response_article(self, keyword): global PIC_URL keyword = str(keyword) # 从数据库查询得到若干文章 article = Article.get_article_by_keyword(keyword) # 这里先用测试数据 if article: title = article.slug description = article.description picUrl = PIC_URL url = article.absolute_url count = 1 # 也有可能是若干篇 # 这里实现相关逻辑,从数据库中获取内容 # 构造图文消息 articles_msg = {'articles':[]} for i in range(0,count): article = { 'title':title, 'description':description, 'picUrl':picUrl, 'url':url } # 插入文章 articles_msg['articles'].append(article) article = {} # 返回文章 return articles_msg else: return
def get_response_article_by_id(self, post_id): global PIC_URL # 从数据库查询得到若干文章 article = Article.get_article_by_id_detail(post_id) # postId为文章id if article: title = article.slug description = article.description picUrl = PIC_URL url = article.absolute_url count = 1 # 这里实现相关逻辑,从数据库中获取内容 # 构造图文消息 articles_msg = {'articles':[]} for i in range(0,count): article = { 'title':title, 'description':description, 'picUrl':picUrl, 'url':url } # 插入文章 articles_msg['articles'].append(article) article = {} # 返回文章 return articles_msg else: return
def edit_view(self): article_id = request.args.get('id') article = Article.objects(id=article_id).first() classifications = Classification.objects.order_by('+name') return self.render('write.html', classifications=classifications, article=article)
def _get_full_article(self, short_article): url = short_article.url response = requests.get(url) soup = BeautifulSoup(response.content, 'html.parser') try: text = self.get_formatted_article(text=soup.find('div', class_='article-content mt3 mb3'), lead=soup.find('div', class_='h4 mt0 mb2 regular roboto-slab')) author_tag = soup.find('meta', attrs={'name': 'author'}) author = author_tag['content'] if 'content' in author_tag.attrs else "" foreign_id_tag = soup.find('form', class_='clearfix mxn1 comment-form') if foreign_id_tag is not None: foreign_id = soup.find('form', class_='clearfix mxn1 comment-form')['data-foreign-key'] comments = self._get_comments(foreign_id) else: comments = [] logging.warning("Foreign ID is None.") total_comments = int(soup.find('a', class_='px1 light-blue').text) if total_comments != len(comments) and len(comments) > 0: logging.warning("Scraped wrong number of comments") full_article = Article(short_article, text, author, comments) return full_article except AttributeError: with open(r'log/politika_errors.txt', 'a') as f: f.write("%s\n" % url) logging.error("Invalid URL: %s" % url) return None
def wx_get_article_by_id(self, post_id): k = 'wx_post_%s' % (str(post_id)) v = getMc(k) if v: return v article = Article.get_article_by_id_detail(post_id) if article: if MYSQL_TO_KVDB_SUPPORT: title = article['slug'] description = article['description'] url = article['absolute_url'] else: title = article.slug description = article.description url = article.absolute_url picUrl = WX_DEFAULT_PIC count = 1 articles_msg = {'articles':[]} for i in range(0,count): article = { 'title':title, 'description':description, 'picUrl':picUrl, 'url':url } articles_msg['articles'].append(article) article = {} setMc(k,articles_msg) return articles_msg return ''
def _get_full_article(self, short_article): url = short_article.url response = requests.get(url) soup = BeautifulSoup(response.content, 'html.parser') if "article-lock" in response.content.decode('utf-8'): logging.error("Pay-wall: %s" % url) return None try: article_text = soup.find('div', class_='article-body article-wrap') if article_text.find('article') is not None: article_text = article_text.find('article') text = self.get_formatted_article(text=article_text, lead=soup.find('p', class_='lead')) author = soup.find('div', class_='article-source') if author is None: author = "" else: author = author.text comments = self._get_comments() full_article = Article(short_article, text, author, comments) return full_article except AttributeError: logging.error("Invalid URL: %s" % url) return None
def _get_full_article(self, short_article: ShortArticle): url = short_article.url response = requests.get(url) soup = BeautifulSoup(response.content, 'html.parser') try: text = self.get_formatted_article(text=soup.find( 'div', class_='article-text article-video-scroll clearfix'), lead=soup.find( 'p', {'itemprop': 'description'})) author = soup.find('span', class_='inline-flex items-center') if author is None: author = "" else: author = author.text.strip() facebook_id = soup.find('meta', {'property': 'fb:app_id'})['content'] domain = self._generic_url.split('https://')[1].split('/')[0] comments = self._get_facebook_comments(url=url, facebook_id=facebook_id, domain=domain) full_article = Article(short_article, text, author, comments) return full_article except AttributeError: logging.error("Invalid URL: %s" % url) return None
def get(self, id = ''): obj = Article.get_article_by_id_simple(id) if obj: self.redirect('%s/topic/%d/%s'% (BASE_URL, obj.id, obj.title), 301) return else: self.redirect(BASE_URL)
def put_article(): ''' Add new article for a user. ''' username = request.headers.get('x-koala-username') apikey = request.headers.get('x-koala-key') user = locate_user(username, apikey) reqjson = request.get_json() result = validators.url(reqjson['url']) if not result: # try again but with http:// result = validators.url('http://' + reqjson['url']) if not result: logging.info("Bad URL: %s" % reqjson['url']) abort(400) else: reqjson['url'] = 'http://' + reqjson['url'] title = reqjson.get('title', reqjson['url']) url = reqjson['url'] date = str(datetime.now()) read = False favorite = False owner = user.id article = Article.create(title=title, url=url, date=date, read=read, favorite=favorite, owner=owner) return jsonify({'id': article.id}), 201
def post(self): class_id = self.get_body_argument('class_id', None) title = self.get_body_argument('title', None) image_url = self.get_body_argument('image_url', None) note = self.get_body_argument('note', None) content = self.get_body_argument('content', None) now = datetime.now().strftime('%Y-%m-%d %H:%M:%S') data = { 'class_id': class_id, 'title': title, 'image_url': image_url, 'note': note, 'content': content, 'author': 'LiJiaF', 'create_date': now, 'write_date': now } log.info('添加文章:' + json.dumps(data)) try: session = DBSession() new_article = Article(**data) session.add(new_article) session.commit() session.close() except Exception as e: log.error(e) return self.finish(json.dumps({'code': -1, 'msg': '添加失败'})) return self.finish(json.dumps({'code': 0, 'msg': '添加成功'}))
def combine_articles(group_id, articles): if len(articles) == 1: return articles[0] keywords = set() for article in articles: keywords |= set(kw.strip() for kw in article.keywords.split(',')) new_article['keywords'] = ','.join(keywords) titles = defaultdict(int) for article in articles: titles[article.title] += 1 _, title = max( (count * len(title)**2, title) for title, count in titles.items()) new_article['title'] = title for name in 'pmc pmid doi'.split(): identifiers = defaultdict(int) for article in articles: if getattr(article, name): identifiers[getattr(article, name)] += 1 if identifiers: _, identifier = max(map(swap, identifiers.items())) new_article[name] = identifier return Article(**new_article)
def load_articles(): """Load articles from seed-article.txt into database.""" print("Articles") # Delete all rows in table, so if we need to run this a second time, # we won't be trying to add duplicate users Article.query.delete() # Read seed category file and insert data for row in open("seed/seed-article-2.txt"): row = row.rstrip() # Works for original seed data # user_id, category_id, description = row.split("|") # These are metadata lines in the file if not row.startswith('--'): article_id, description, image, purchase_price, times_worn, sell_price, user_id, category_id = row.split( "|") # Prevent passing an empty string into field expecting float if not purchase_price: purchase_price = None article = Article( article_id=int(article_id), description=description, image=image, purchase_price=purchase_price, times_worn=times_worn, user_id=int(user_id), category_id=int(category_id), ) db.session.add(article) db.session.commit()
def get_response_article(self, keyword): global PIC_URL keyword = str(keyword) # 从数据库查询得到若干文章 article = Article.get_article_by_keyword(keyword) # 这里先用测试数据 if article: title = article.slug description = article.description picUrl = PIC_URL url = article.absolute_url count = 1 # 也有可能是若干篇 # 这里实现相关逻辑,从数据库中获取内容 # 构造图文消息 articles_msg = {'articles': []} for i in range(0, count): article = { 'title': title, 'description': description, 'picUrl': picUrl, 'url': url } # 插入文章 articles_msg['articles'].append(article) article = {} # 返回文章 return articles_msg else: return
def get_response_article_by_id(self, post_id): global PIC_URL # 从数据库查询得到若干文章 article = Article.get_article_by_id_detail(post_id) # postId为文章id if article: title = article.slug description = article.description picUrl = PIC_URL url = article.absolute_url count = 1 # 这里实现相关逻辑,从数据库中获取内容 # 构造图文消息 articles_msg = {'articles': []} for i in range(0, count): article = { 'title': title, 'description': description, 'picUrl': picUrl, 'url': url } # 插入文章 articles_msg['articles'].append(article) article = {} # 返回文章 return articles_msg else: return
def _get_full_article(self, short_article): url = short_article.url response = requests.get(url) while response.status_code == 429: time.sleep(5) print('Retry') response = requests.get(url) soup = BeautifulSoup(response.content, 'html.parser') try: text = self.get_formatted_article( text=soup.find('div', class_='itemFullText'), lead=soup.find('h2', class_='itemSubTitle')) author = soup.find('div', class_='col-authorname') if author is None: author = "" else: author = author.text facebook_id = soup.find('meta', {'property': 'fb:app_id'})['content'] domain = self._generic_url.split('https://')[1].split('/')[0] comments = self._get_facebook_comments(url=url, facebook_id=facebook_id, domain=domain) if len(comments) > 0: logging.info('Total comments: %d' % len(comments)) full_article = Article(short_article, text, author, comments) return full_article except AttributeError: logging.error("Invalid URL: %s" % url) return None
def get(self, id): article = Article.get(self.db, id) if article is None: error = '404: Page Not Found' self.render('error.html', error=error, home_title=options.home_title) else: labels = ' '.join(map(lambda item: '[' + item['detail'] + ']', article['labels'])) self.render('editArticle.html', article=article, labels=labels)
def get(self, pageId): p = Paginator(Article.all(self.db), 5) page = p.page(int(pageId)) isAdmin = self.isAdmin() label_list = Label.group(self.db) self.render('index.html', articles=page.object_list, label_list=label_list, isAdmin=isAdmin, page=page, home_title=options.home_title, user=options.user, photo=options.photo)
def get_articles(count=10, force=False): max_time = datetime.datetime.now() if force else datetime.datetime.now() - datetime.timedelta(hours=1) q = Article.query(Article.ml_service_time <= max_time, Article.ml_service_time != None).order(-Article.ml_service_time) articles = q.fetch(count) for a in articles: a.ml_service_time = datetime.datetime.now() ndb.put_multi(articles) return articles
def post(self): self.set_header('Content-Type','application/json') rspd = {'status': 201, 'msg':'ok'} try: tf = {'true':1,'false':0} timestamp = int(time()) post_dic = { 'category': self.get_argument("cat"), 'title': self.get_argument("tit"), 'content': self.get_argument("con"), 'tags': self.get_argument("tag",'').replace(u',',','), 'closecomment': self.get_argument("clo",'0'), 'password': self.get_argument("password",''), 'add_time': timestamp, 'edit_time': timestamp, 'archive': genArchive(), } if post_dic['tags']: tagslist = set([x.strip() for x in post_dic['tags'].split(',')]) try: tagslist.remove('') except: pass if tagslist: post_dic['tags'] = ','.join(tagslist) post_dic['closecomment'] = tf[post_dic['closecomment'].lower()] except: rspd['status'] = 500 rspd['msg'] = '错误: 注意必填的三项' self.write(json.dumps(rspd)) return postid = Article.add_new_article(post_dic) if postid: keyname = 'pv_%s' % (str(postid)) set_count(keyname,0,0) Category.add_postid_to_cat(post_dic['category'], str(postid)) Archive.add_postid_to_archive(genArchive(), str(postid)) increment('Totalblog') if post_dic['tags']: Tag.add_postid_to_tags(post_dic['tags'].split(','), str(postid)) rspd['status'] = 200 rspd['msg'] = '完成: 你已经成功添加了一篇文章 <a href="/t/%s" target="_blank">查看</a>' % str(postid) clear_cache_by_pathlist(['/', 'cat:%s' % quoted_string(post_dic['category'])]) if not debug: add_task('default', '/task/pingrpctask') self.write(json.dumps(rspd)) return else: rspd['status'] = 500 rspd['msg'] = '错误: 未知错误,请尝试重新提交' self.write(json.dumps(rspd)) return
def get(self, id=""): obj = None if id: obj = Article.get_article_by_id_edit(id) self.echo( "admin_editpost.html", {"title": "编辑文章", "cats": Category.get_all_cat_name(), "tags": Tag.get_all_tag_name(), "obj": obj}, layout="_layout_admin.html", )
def post(self, id): title = self.get_argument('title') content_md = self.get_argument('content') pattern = r'\[[^\[\]]+\]' labels = re.findall(pattern, self.get_argument('labels')) content_html = markdown.markdown(content_md, ['codehilite']) try: Article.update(self.db, id, title, content_md, content_html) Label.deleteAll(self.db, id) for label in labels: detail = label[1:-1].strip() Label.create(self.db, id, detail) self.redirect('/article/' + id, permanent=True) except: error = "The post data invalid" self.render('error.html', error=error, home_title=options.home_title)
def get(self): try: objs = Article.get_post_for_homepage() except: self.redirect('/install') return if objs: if MYSQL_TO_KVDB_SUPPORT: fromid = objs[0]['id'] endid = objs[-1]['id'] #totalblog = Article.get_totalnum_arti() totalblog = get_count('Totalblog',NUM_SHARDS,0) else: fromid = objs[0].id endid = objs[-1].id totalblog = get_count('Totalblog',NUM_SHARDS,0) else: fromid = endid = '' allpost = Article.count_all_post() allpage = allpost/EACH_PAGE_POST_NUM if allpost%EACH_PAGE_POST_NUM: allpage += 1 output = self.render('index.html', { 'title': "%s - %s"%(getAttr('SITE_TITLE'),getAttr('SITE_SUB_TITLE')), 'keywords':getAttr('KEYWORDS'), 'description':getAttr('SITE_DECR'), 'objs': objs, 'cats': Category.get_all_cat_name(), 'tags': Tag.get_hot_tag_name(), 'archives': Archive.get_all_archive_name(), 'page': 1, 'allpage': allpage, 'listtype': 'index', 'fromid': fromid, 'endid': endid, 'comments': Comment.get_recent_comments(), 'links':Link.get_all_links(), 'isauthor':self.isAuthor(), 'Totalblog':totalblog, },layout='_layout.html') self.write(output) return output
def get(self, id = '', title = ''): tmpl = '' obj = Article.get_article_by_id_detail(id) if not obj: self.redirect(BASE_URL) return #redirect to right title try: title = unquote(title).decode('utf-8') except: pass if title != obj.slug: self.redirect(obj.absolute_url, 301) return # if obj.password and THEME == 'default': rp = self.get_cookie("rp%s" % id, '') if rp != obj.password: tmpl = '_pw' elif obj.password and BLOG_PSW_SUPPORT: rp = self.get_cookie("rp%s" % id, '') print 'rp===%s' % (str(rp)) if rp != obj.password: tmpl = '_pw' keyname = 'pv_%s' % (str(id)) increment(keyname)#yobin 20120701 self.set_cookie(keyname, '1', path = "/", expires_days =1) self.set_header("Last-Modified", obj.last_modified) output = self.render('page%s.html'%tmpl, { 'title': "%s - %s"%(obj.title, getAttr('SITE_TITLE')), 'keywords':obj.keywords, 'description':obj.description, 'obj': obj, 'cobjs': obj.coms, 'postdetail': 'postdetail', 'cats': Category.get_all_cat_name(), 'tags': Tag.get_hot_tag_name(), 'archives': Archive.get_all_archive_name(), 'page': 1, 'allpage': 10, 'comments': Comment.get_recent_comments(), 'links':Link.get_all_links(), 'isauthor':self.isAuthor(), 'hits':get_count(keyname), 'Totalblog':get_count('Totalblog',NUM_SHARDS,0), 'listtype': '', },layout='_layout.html') self.write(output) if obj.password and BLOG_PSW_SUPPORT: return output elif obj.password and THEME == 'default': return else: return output
def get(self, id = ''): obj = Article.get_article_by_id_simple(id) if obj: if MYSQL_TO_KVDB_SUPPORT: self.redirect('%s/topic/%s/%s'% (BASE_URL, obj['id'], obj['title']), 301) else: self.redirect('%s/topic/%d/%s'% (BASE_URL, obj.id, obj.title), 301) return else: self.redirect(BASE_URL)