def save_news(self, url, title, content, published_on): news = News.query.filter(News.title == title).first() if (isinstance(news, News) == False): news = News() news.url = url news.title = title news.content = content news.published_on = published_on db.session.add(news) db.session.commit() db.session.refresh(news) return news
def get_news_content(url): html = download(url) soup = BeautifulSoup(html, "html.parser") # print soup.prettify("utf-8") print soup.find(attrs={"class": "pg-headline"}) title = soup.find(attrs={"class": "pg-headline"}) print soup.find(attrs={"class": "metadata__byline__author"}) author = soup.find(attrs={"class": "metadata__byline__author"}) print soup.find(attrs={"class": "update-time"}) update_time = soup.find(attrs={"class": "update-time"}) contents = soup.find_all(attrs={"class": "zn-body__paragraph"}) content = "" for i in contents: print i.text content += i.text #存入mongoDB session = Session.connect('runoob') #session.clear_collection(News) news = News(title=str(title), author=str(author), update_time=str(update_time), content=str(content)) print news.title session.save(news) print '查询结果' result = session.query(News).skip(3).limit(2) for news in session.query(News).skip(3).limit(2): print news.title, news.update_time
def east_money(url=URL): data = get_method(url) data = BeautifulSoup(data, 'lxml') li = data.find_all("div", attrs={"id": 'artitileList1'}) cd = li[0].contents[1].find_all('div') data = [] for section in cd: s = section title = section.find("p", attrs={'class': "title"}) if title: title = title.text href = section.find("a").get("href") info = section.find("p", attrs={'class': "info"}).get('title') info_check = section.find("p", attrs={'class': "info"}).text if len(info) <= len(info_check): info = info_check time = section.find("p", attrs={'class': "time"}).text time = time_format(time) news = News(title=title, abstract=info, url=href, source="东方财富", savedate=time) data.append(news) return data
def list(self, all_parameters, subject=None, body=None, visible=True, published=None, deleted_before=None, deleted_after=None, published_before=None, published_after=None, page=1, sort=None, limit=conf.api.pagination.limit): """ Returns news list :param subject: news' subject :param body: news' body :param page: page number :param visible: are deleted and not published news visible for user :param published: filter by publishing date :param limit: number of news per page :param str or List sort: Field name or list of field names which is used for sorting. Ascending ordering is default. For descending ordering use "-" before. :return list news_list: list of news **Example**:: { "news_list": { "per_page": 100, "total": 1, "limit": 200, "offset": 0 "items": [ { "news_id": 1, "subject": "test subject", "body": "test body", "deleted": None, "published": None }] } } """ # noinspection PyUnresolvedReferences all_parameters.setdefault("limit", limit) # noinspection PyUnresolvedReferences all_parameters.setdefault("page", page) exact = None if request_api_type() == API_CABINET: all_parameters['deleted'] = None exact = ['deleted'] all_parameters['published'] = '' else: if visible: all_parameters['deleted'] = None exact = ['deleted'] if published: all_parameters['published'] = '' all_parameters.pop('visible', None) query = News.api_filter(all_parameters, exact=exact) return {"news_list": self.paginated_list(query)}
def post(self): args = create_parse.parse_args() news = News( category_id=args.get('cid'), title=args.get('title'), body=args.get('body'), front_image=args.get('front_image'), ) try: db.session.add(news) db.session.commit() return Resp(data=news.to_json()) except Exception as ex: db.session.rollback() return Resp(code=400, msg='create news error')
def save_news(self, news_id, title, content): session = Session() if not self.get_news_by_id(news_id): print news_id news = News(link=news_id, title=title, content=content, crawl_time=datetime.datetime.now()) session.add(news) session.commit() session.close()
def commit(data, user, password): session = News.connector(user, password)() for d in data: try: session.add(d) session.commit() except IntegrityError: session.rollback() except Exception as e: print(type(e)) session.close()
def create_news(user, category, title, summary, article_text, external_link, picture_link, date_post): news = News(user=user, category=category, title=title, summary=summary, article_text=article_text, external_link=external_link, picture_link=picture_link, date_post=date_post) db.session.add(news) db.session.commit() return news
def save_news(self, news_id, title, content, written_clock): saved = False session = Session() if not self.get_news_by_id(news_id): print news_id news = News(link=news_id, title=title, contents=content, written_time=written_clock, crawl_time=dt.datetime.now()) session.add(news) session.commit() saved = True session.close() return saved
def add_news(): pid = int(request.args.get('pid')) if pid == 1: tsActive = "manage_news" elif pid == 2: tsActive = "help" elif pid == 3: tsActive = "manage_company" this = 'add' form = AddNewsForm() if form.validate_on_submit(): userid = int(request.form.get('userid')) title = request.form.get('title') getcontent = html.escape(request.form.get('editor')) display = int(request.form.get('display')) news = News(pid=pid, title=title, content=getcontent, display=display, userid=userid, teamid=current_user.teamid, addtime=datetime.datetime.now()) news_check = db_session.query(News).filter(News.title == title).first() if news_check: if pid == 1: flash('资讯已存在') elif pid == 2: flash('帮助已存在') return redirect('%s%s' % ('/manage/add_news?pid=', pid)) if len(title) and len(getcontent): try: db_session.add(news) db_session.commit() db_session.close() except: flash("数据库错误!") return redirect('%s%s' % ('/manage/add_news?pid=', pid)) flash("添加成功,<span id='time'>3</span>秒后自动跳转管理页。") return redirect('%s%s' % ('/manage/add_news?pid=', pid)) return render_template("edit_news.html", pagename=tsActive, this=this, pid=pid, form=form)
def prepare(pages): data = news_format(pages) data_set = [] for d in data: if d['title'] == d['abstract']: try: abst = abstract(d) d['abstract'] = abst except IndexError: pass url = "https://www.toutiao.com/a" + d['url'] t = News(title=d['title'], abstract=d['abstract'], url=url, source=d['source'], savedate=datetime.now()) data_set.append(t) return data_set
def save_news(self, link, title, content, written_time): saved = False session = Session() if not self.get_news_by_id(link): #print link news = News(link=link, title=title, content=content, written_time=written_time, crawl_time=datetime.datetime.now()) session.add(news) session.commit() saved = True session.close() return saved
def create_news(self, subject, body): """ Creates news :param subject: News' subject :param body: News' body :return dict news_info: News' info **Example**:: {"news_info": {"news_id": 1, "subject": "test subject", "body": "test body", "deleted": None, "published": None } } """ news = News.create_news(subject, body) return {"news_info": display(news)}
def test_news_create(self): News.create_news('news subject', 'news body') db.session.flush()
def get_index(id): return dict( story=News.one(id=id) )
def get_index(id): return dict(story=News.one(id=id))
news_list = get( "https://www3.nhk.or.jp/news/easy/news-list.json?_={0}".format(ts) ) except requests.exceptions.RequestException as err: raise err # step2 Load json and stored it in mongodb news_list_str = news_list.text if news_list_str.startswith(u'\ufeff'): news_list_str = news_list_str.encode('utf8')[3:] news_list_json = json.loads(news_list_str)[0] my_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) for date in news_list_json: news_today = news_list_json[date] for news in news_today: temp_news = News() temp_news.__dict__ = news news_count = news_collect.find({"news_id": temp_news.news_id}).count() if news_count == 0: news_url = "https://www3.nhk.or.jp/news/easy/{0}/{0}.html".format( temp_news.news_id) temp_html = get(news_url) temp_html.encoding = "utf-8" soup = BeautifulSoup(temp_html.text) article_html = soup.select_one("#js-article-body") article_text = article_html.text news["news_web_url"] = news_url news["article_html"] = str(article_html) news["article_text"] = str(article_text).replace("\n", "") if news["article_text"].find("近平") > -1 or news["article_text"].find("毛沢東") > -1 or news["article_text"].find("台湾") > -1 or news["article_text"].find("北朝鮮") > -1 or news["article_text"].find("ファーウェイ") > -1: continue