Python Article Examples

Programming Language: Python

Namespace/Package Name: entities

Class/Type: Article

Examples at hotexamples.com: 5

Python Article - 5 examples found. These are the top rated real world Python examples of entities.Article extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Article(3)

rebuild(2)

html(1)

score(1)

summary(1)

title(1)

Example #1

Show file

File: publish.py Project: cash2one/yugong

def publish_toutiao():
    client = db.get_redis_client(config.get('app.redis'))
    published_key = 'published_articles'
    publisher = Toutiao()
    logger.info('Start toutiao publish-processing...')
    article_str = client.lpop('fetched_article')
    while article_str and len(article_str) > 0:
        logger.info('Fetched article str from redis')
        try:
            if isinstance(article_str, bytes):
                article_str = bytes.decode(article_str)
            article_json = json.loads(article_str)
            article = Article()
            article.rebuild(article_json)
            title = repr(article.title)
            logger.info('Pre-publish article [%s]' % title)
            if article and (not client.sismember(published_key, title)):
                publisher.publish(article)
                client.sadd(published_key, title)
            else:
                logger.error(
                    'Pre-publish article [%s] error, due to published before' %
                    title)
        except Exception as e:
            logger.error('Pickle loads article error')
            logger.error(e)
        finally:
            article_str = client.lpop('fetched_article')

Example #2

Show file

File: main.py Project: IamBusy/yugong

def publish_toutiao(event, context):
    import json
    from publisher.toutiao.publisher import ToutiaoPublisher
    from core import logger, config, db
    from entities import Article
    import hashlib

    client = db.get_redis_client(config.get('app.redis'))
    published_key = 'published_articles'
    publisher = ToutiaoPublisher()
    logger.info('Start toutiao publish-processing...')
    article_str = client.lpop('fetched_article')
    while article_str and len(article_str) > 0:
        logger.info('Fetched article str from redis')
        try:
            if isinstance(article_str, bytes):
                article_str = bytes.decode(article_str)
            article_json = json.loads(article_str)
            article = Article()
            article.rebuild(article_json)
            title = repr(article.title)
            hashed_title = hashlib.md5(title.encode('utf8')).hexdigest().upper()
            logger.info('Pre-publish article [%s] hash value [%s]' % (title, hashed_title))
            if article and (not client.sismember(published_key, hash(title))):
                publisher.publish(article)
                client.sadd(published_key, hashed_title)
            else:
                logger.error('Pre-publish article [%s] error, due to published before' % title)
        except Exception as e:
            logger.error('Pickle loads article error')
            logger.error(e)
        finally:
            article_str = client.lpop('fetched_article')

Example #3

Show file

File: analytizer.py Project: cash2one/yugong

 def estimate(self, article: Article):
     try:
         article.summary = summarize(article)
         for sen in article.summary:
             article.abstract_str += str(sen)
         article.score = baidu_repetition_rate(article)
     except Exception as e:
         logger.error('Estimate article error: [%s]' % article.title)

Example #4

Show file

    def transformer(self, article: Article):
        '''
        :param article:
        :return:
        purify href of img tag
        '''
        soup = BeautifulSoup(article.html)

        # purify image
        for img in soup.find_all('img'):
            try:
                if 'src' in img.attrs:
                    img['src'] = uploader.upload(img['src'])
            except Exception as e:
                logger.error(e)
                continue

        # remove link
        for a in soup.find_all('a'):
            try:
                if 'href' in a.attrs:
                    del a['href']
            except Exception as e:
                logger.error(e)
                continue

        # control length of title 5-30
        # 一个汉字算一个长度，2个字母算一个长度
        alpha_num = 1
        word_num = 0
        for x in range(len(article.title)):
            if alpha_num / 2 + word_num >= 28:
                article.title = article.title[:x]
            if article.title[x] in punctuation or (
                    u'/u4e00' <= article.title[x] <= u'/u9fa5'):
                word_num += 1
            else:
                alpha_num += 1
        if alpha_num / 2 + word_num < 5:
            article.title = '技术专栏-' + article.title

        # append summarize
        summ = ''
        if article.abstract_str:
            summ = '<h1>内容导读</h1><blockquote><p>%s</p></blockquote>' % str(
                article.abstract_str)
        article.html = summ + str(soup)

Example #5

Show file

File: jianshu.py Project: cash2one/yugong

 def fetch_article_from_url(self, url):
     resp = requests.get(url, headers=headers)
     soup = BeautifulSoup(resp.text)
     article = soup.find('div', class_='article')
     title = article.h1.string
     content = article.find('div', class_='show-content')
     for img in content.find_all('img'):
         if 'data-original-src' in img.attrs:
             img['src'] = 'http:' + img['data-original-src']
     return Article(title, content.get_text(), str(content))