예제 #1
0
파일: main.py 프로젝트: IamBusy/yugong
def publish_toutiao(event, context):
    import json
    from publisher.toutiao.publisher import ToutiaoPublisher
    from core import logger, config, db
    from entities import Article
    import hashlib

    client = db.get_redis_client(config.get('app.redis'))
    published_key = 'published_articles'
    publisher = ToutiaoPublisher()
    logger.info('Start toutiao publish-processing...')
    article_str = client.lpop('fetched_article')
    while article_str and len(article_str) > 0:
        logger.info('Fetched article str from redis')
        try:
            if isinstance(article_str, bytes):
                article_str = bytes.decode(article_str)
            article_json = json.loads(article_str)
            article = Article()
            article.rebuild(article_json)
            title = repr(article.title)
            hashed_title = hashlib.md5(title.encode('utf8')).hexdigest().upper()
            logger.info('Pre-publish article [%s] hash value [%s]' % (title, hashed_title))
            if article and (not client.sismember(published_key, hash(title))):
                publisher.publish(article)
                client.sadd(published_key, hashed_title)
            else:
                logger.error('Pre-publish article [%s] error, due to published before' % title)
        except Exception as e:
            logger.error('Pickle loads article error')
            logger.error(e)
        finally:
            article_str = client.lpop('fetched_article')
예제 #2
0
파일: publish.py 프로젝트: cash2one/yugong
def publish_toutiao():
    client = db.get_redis_client(config.get('app.redis'))
    published_key = 'published_articles'
    publisher = Toutiao()
    logger.info('Start toutiao publish-processing...')
    article_str = client.lpop('fetched_article')
    while article_str and len(article_str) > 0:
        logger.info('Fetched article str from redis')
        try:
            if isinstance(article_str, bytes):
                article_str = bytes.decode(article_str)
            article_json = json.loads(article_str)
            article = Article()
            article.rebuild(article_json)
            title = repr(article.title)
            logger.info('Pre-publish article [%s]' % title)
            if article and (not client.sismember(published_key, title)):
                publisher.publish(article)
                client.sadd(published_key, title)
            else:
                logger.error(
                    'Pre-publish article [%s] error, due to published before' %
                    title)
        except Exception as e:
            logger.error('Pickle loads article error')
            logger.error(e)
        finally:
            article_str = client.lpop('fetched_article')
예제 #3
0
파일: jianshu.py 프로젝트: cash2one/yugong
 def fetch_article_from_url(self, url):
     resp = requests.get(url, headers=headers)
     soup = BeautifulSoup(resp.text)
     article = soup.find('div', class_='article')
     title = article.h1.string
     content = article.find('div', class_='show-content')
     for img in content.find_all('img'):
         if 'data-original-src' in img.attrs:
             img['src'] = 'http:' + img['data-original-src']
     return Article(title, content.get_text(), str(content))