예제 #1
0
파일: crawler.py 프로젝트: adirkuhn/creu
    def save_news(self, url, title, content, published_on):
        news = News.query.filter(News.title == title).first()
        if (isinstance(news, News) == False):
            news = News()
            news.url = url
            news.title = title
            news.content = content
            news.published_on = published_on

            db.session.add(news)
            db.session.commit()
            db.session.refresh(news)

        return news
def get_news_content(url):
    html = download(url)
    soup = BeautifulSoup(html, "html.parser")
    # print soup.prettify("utf-8")
    print soup.find(attrs={"class": "pg-headline"})
    title = soup.find(attrs={"class": "pg-headline"})
    print soup.find(attrs={"class": "metadata__byline__author"})
    author = soup.find(attrs={"class": "metadata__byline__author"})
    print soup.find(attrs={"class": "update-time"})
    update_time = soup.find(attrs={"class": "update-time"})
    contents = soup.find_all(attrs={"class": "zn-body__paragraph"})
    content = ""
    for i in contents:
        print i.text
        content += i.text

    #存入mongoDB
    session = Session.connect('runoob')
    #session.clear_collection(News)

    news = News(title=str(title),
                author=str(author),
                update_time=str(update_time),
                content=str(content))
    print news.title
    session.save(news)
    print '查询结果'

    result = session.query(News).skip(3).limit(2)
    for news in session.query(News).skip(3).limit(2):
        print news.title, news.update_time
예제 #3
0
def east_money(url=URL):
    data = get_method(url)
    data = BeautifulSoup(data, 'lxml')
    li = data.find_all("div", attrs={"id": 'artitileList1'})
    cd = li[0].contents[1].find_all('div')

    data = []
    for section in cd:
        s = section
        title = section.find("p", attrs={'class': "title"})
        if title:
            title = title.text
            href = section.find("a").get("href")
            info = section.find("p", attrs={'class': "info"}).get('title')
            info_check = section.find("p", attrs={'class': "info"}).text
            if len(info) <= len(info_check):
                info = info_check
            time = section.find("p", attrs={'class': "time"}).text
            time = time_format(time)
            news = News(title=title,
                        abstract=info,
                        url=href,
                        source="东方财富",
                        savedate=time)
            data.append(news)

    return data
예제 #4
0
파일: news.py 프로젝트: deti/boss
    def list(self, all_parameters, subject=None, body=None, visible=True, published=None,
             deleted_before=None, deleted_after=None,
             published_before=None, published_after=None,
             page=1, sort=None, limit=conf.api.pagination.limit):
        """
        Returns news list

        :param subject: news' subject
        :param body: news' body
        :param page: page number
        :param visible: are deleted and not published news visible for user
        :param published: filter by publishing date
        :param limit: number of news per page
        :param str or List sort: Field name or list of field names which is used for sorting.
                                 Ascending ordering is default.
                                 For descending ordering use "-" before.
        :return list news_list: list of news

        **Example**::

            {
                "news_list": {
                    "per_page": 100,
                    "total": 1,
                    "limit": 200,
                    "offset": 0
                    "items": [
                    {
                        "news_id": 1,
                        "subject": "test subject",
                        "body": "test body",
                        "deleted": None,
                        "published": None
                    }]
                }
            }
        """
        # noinspection PyUnresolvedReferences
        all_parameters.setdefault("limit", limit)
        # noinspection PyUnresolvedReferences
        all_parameters.setdefault("page", page)
        exact = None
        if request_api_type() == API_CABINET:
            all_parameters['deleted'] = None
            exact = ['deleted']
            all_parameters['published'] = ''

        else:
            if visible:
                all_parameters['deleted'] = None
                exact = ['deleted']
            if published:
                all_parameters['published'] = ''

        all_parameters.pop('visible', None)
        query = News.api_filter(all_parameters, exact=exact)
        return {"news_list": self.paginated_list(query)}
예제 #5
0
파일: news.py 프로젝트: MOE-LYON/KLNews
    def post(self):

        args = create_parse.parse_args()

        news = News(
            category_id=args.get('cid'),
            title=args.get('title'),
            body=args.get('body'),
            front_image=args.get('front_image'),
        )

        try:
            db.session.add(news)
            db.session.commit()

            return Resp(data=news.to_json())
        except Exception as ex:
            db.session.rollback()
            return Resp(code=400, msg='create news error')
예제 #6
0
 def save_news(self, news_id, title, content):
     session = Session()
     if not self.get_news_by_id(news_id):
         print news_id
         news = News(link=news_id,
                     title=title,
                     content=content,
                     crawl_time=datetime.datetime.now())
         session.add(news)
         session.commit()
     session.close()
예제 #7
0
def commit(data, user, password):
    session = News.connector(user, password)()
    for d in data:
        try:
            session.add(d)
            session.commit()
        except IntegrityError:
            session.rollback()
        except Exception as e:
            print(type(e))
    session.close()
예제 #8
0
def create_news(user, category, title, summary, article_text, external_link,
                picture_link, date_post):
    news = News(user=user,
                category=category,
                title=title,
                summary=summary,
                article_text=article_text,
                external_link=external_link,
                picture_link=picture_link,
                date_post=date_post)

    db.session.add(news)
    db.session.commit()
    return news
예제 #9
0
    def save_news(self, news_id, title, content, written_clock):
        saved = False
        session = Session()
        if not self.get_news_by_id(news_id):
            print news_id
            news = News(link=news_id, title=title, contents=content,
                        written_time=written_clock, crawl_time=dt.datetime.now())
            session.add(news)
            session.commit()
            saved = True

        session.close()

        return saved
예제 #10
0
def add_news():

    pid = int(request.args.get('pid'))

    if pid == 1:
        tsActive = "manage_news"
    elif pid == 2:
        tsActive = "help"
    elif pid == 3:
        tsActive = "manage_company"
    this = 'add'

    form = AddNewsForm()
    if form.validate_on_submit():
        userid = int(request.form.get('userid'))
        title = request.form.get('title')
        getcontent = html.escape(request.form.get('editor'))
        display = int(request.form.get('display'))

        news = News(pid=pid,
                    title=title,
                    content=getcontent,
                    display=display,
                    userid=userid,
                    teamid=current_user.teamid,
                    addtime=datetime.datetime.now())
        news_check = db_session.query(News).filter(News.title == title).first()
        if news_check:
            if pid == 1:
                flash('资讯已存在')
            elif pid == 2:
                flash('帮助已存在')
            return redirect('%s%s' % ('/manage/add_news?pid=', pid))
        if len(title) and len(getcontent):
            try:
                db_session.add(news)
                db_session.commit()
                db_session.close()
            except:
                flash("数据库错误!")
                return redirect('%s%s' % ('/manage/add_news?pid=', pid))

            flash("添加成功,<span id='time'>3</span>秒后自动跳转管理页。")
            return redirect('%s%s' % ('/manage/add_news?pid=', pid))
    return render_template("edit_news.html",
                           pagename=tsActive,
                           this=this,
                           pid=pid,
                           form=form)
예제 #11
0
def prepare(pages):
    data = news_format(pages)
    data_set = []

    for d in data:
        if d['title'] == d['abstract']:
            try:
                abst = abstract(d)
                d['abstract'] = abst
            except IndexError:
                pass
        url = "https://www.toutiao.com/a" + d['url']
        t = News(title=d['title'], abstract=d['abstract'], url=url, source=d['source'], savedate=datetime.now())
        data_set.append(t)
    return data_set
예제 #12
0
    def save_news(self, link, title, content, written_time):
        saved = False
        session = Session()
        if not self.get_news_by_id(link):
            #print link
            news = News(link=link,
                        title=title,
                        content=content,
                        written_time=written_time,
                        crawl_time=datetime.datetime.now())
            session.add(news)
            session.commit()
            saved = True
        session.close()

        return saved
예제 #13
0
파일: crawler.py 프로젝트: adirkuhn/creu
    def save_news(self, url, title, content, published_on):
        news = News.query.filter(News.title == title).first()
        if (isinstance(news, News) == False):
            news = News()
            news.url = url
            news.title = title
            news.content = content
            news.published_on = published_on

            db.session.add(news)
            db.session.commit()
            db.session.refresh(news)

        return news
예제 #14
0
파일: news.py 프로젝트: omarabdalhamid/boss
    def create_news(self, subject, body):
        """
        Creates news

        :param subject: News' subject
        :param body: News' body
        :return dict news_info: News' info

        **Example**::

            {"news_info":
                {"news_id": 1,
                 "subject": "test subject",
                 "body": "test body",
                 "deleted": None,
                 "published": None
                }
            }

        """
        news = News.create_news(subject, body)
        return {"news_info": display(news)}
예제 #15
0
파일: news.py 프로젝트: deti/boss
    def create_news(self, subject, body):
        """
        Creates news

        :param subject: News' subject
        :param body: News' body
        :return dict news_info: News' info

        **Example**::

            {"news_info":
                {"news_id": 1,
                 "subject": "test subject",
                 "body": "test body",
                 "deleted": None,
                 "published": None
                }
            }

        """
        news = News.create_news(subject, body)
        return {"news_info": display(news)}
예제 #16
0
 def test_news_create(self):
     News.create_news('news subject', 'news body')
     db.session.flush()
예제 #17
0
파일: news.py 프로젝트: omarabdalhamid/boss
    def list(self,
             all_parameters,
             subject=None,
             body=None,
             visible=True,
             published=None,
             deleted_before=None,
             deleted_after=None,
             published_before=None,
             published_after=None,
             page=1,
             sort=None,
             limit=conf.api.pagination.limit):
        """
        Returns news list

        :param subject: news' subject
        :param body: news' body
        :param page: page number
        :param visible: are deleted and not published news visible for user
        :param published: filter by publishing date
        :param limit: number of news per page
        :param str or List sort: Field name or list of field names which is used for sorting.
                                 Ascending ordering is default.
                                 For descending ordering use "-" before.
        :return list news_list: list of news

        **Example**::

            {
                "news_list": {
                    "per_page": 100,
                    "total": 1,
                    "limit": 200,
                    "offset": 0
                    "items": [
                    {
                        "news_id": 1,
                        "subject": "test subject",
                        "body": "test body",
                        "deleted": None,
                        "published": None
                    }]
                }
            }
        """
        # noinspection PyUnresolvedReferences
        all_parameters.setdefault("limit", limit)
        # noinspection PyUnresolvedReferences
        all_parameters.setdefault("page", page)
        exact = None
        if request_api_type() == API_CABINET:
            all_parameters['deleted'] = None
            exact = ['deleted']
            all_parameters['published'] = ''

        else:
            if visible:
                all_parameters['deleted'] = None
                exact = ['deleted']
            if published:
                all_parameters['published'] = ''

        all_parameters.pop('visible', None)
        query = News.api_filter(all_parameters, exact=exact)
        return {"news_list": self.paginated_list(query)}
예제 #18
0
파일: test_news.py 프로젝트: deti/boss
 def test_news_create(self):
     News.create_news('news subject', 'news body')
     db.session.flush()
예제 #19
0
파일: news.py 프로젝트: Longsight/QCWeb
def get_index(id):
	return dict(
		story=News.one(id=id)
	)
예제 #20
0
def get_index(id):
    return dict(story=News.one(id=id))
예제 #21
0
    news_list = get(
        "https://www3.nhk.or.jp/news/easy/news-list.json?_={0}".format(ts)
    )
except requests.exceptions.RequestException as err:
    raise err

# step2 Load json and stored it in mongodb
news_list_str = news_list.text
if news_list_str.startswith(u'\ufeff'):
    news_list_str = news_list_str.encode('utf8')[3:]
news_list_json = json.loads(news_list_str)[0]
my_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
for date in news_list_json:
    news_today = news_list_json[date]
    for news in news_today:
        temp_news = News()
        temp_news.__dict__ = news
        news_count = news_collect.find({"news_id": temp_news.news_id}).count()
        if news_count == 0:
            news_url = "https://www3.nhk.or.jp/news/easy/{0}/{0}.html".format(
                temp_news.news_id)
            temp_html = get(news_url)
            temp_html.encoding = "utf-8"
            soup = BeautifulSoup(temp_html.text)
            article_html = soup.select_one("#js-article-body")
            article_text = article_html.text
            news["news_web_url"] = news_url
            news["article_html"] = str(article_html)
            news["article_text"] = str(article_text).replace("\n", "")
            if news["article_text"].find("近平") > -1 or news["article_text"].find("毛沢東") > -1 or news["article_text"].find("台湾") > -1 or news["article_text"].find("北朝鮮") > -1 or news["article_text"].find("ファーウェイ") > -1:
                continue