Ejemplo n.º 1
0
 def save(self, url, author, title):
     entity = session.query(Topic).filter_by(url=url).first()
     if entity:
         logger.warning('topic已存在,id:%s' % entity.id)
         self.existed_cnt += 1
     else:
         t = Topic(url=url, author=author, title=title)
         t.save()
         logger.info(t)
Ejemplo n.º 2
0
    def parse_topic_page(self, url, html):
        """解析话题详情页面"""
        soup = BeautifulSoup(html, 'lxml')
        author = soup.find('small', class_='gray').a.text
        title = soup.title.text.split(' - V2EX')[0]
        description = soup.find('div', class_='topic_content').text

        entity = session.query(Topic).filter_by(url=url).first()
        if not entity:
            t = Topic(url=url,
                      author=author,
                      title=title,
                      description=description)
            t.save()
            logger.info(t)
Ejemplo n.º 3
0
    def _persist_detail_info(self, html, group_name, url):
        """获取帖子详情

        """
        if "机器人".encode() in html:
            logging.warn("{} 403.html".format(url))
            self.cache.r_sadd("group:{}:403".format(group_name), url)
            return None

        topic = {}
        images = []
        title = self.parser(self.__rules["detail_title_lg"], html,
                            True) or self.parser(
                                self.__rules["detail_title_sm"], html, True)
        if title is None:
            return None

        topic["title"] = title.strip()
        topic["url"] = url
        topic["crawled_at"] = time.strftime("%Y-%m-%d %H:%M:%S")
        topic["create_time"] = self.parser(self.__rules["create_time"], html,
                                           True)
        author = self.parser(self.__rules["detail_author"], html, True)
        topic["author"] = filter_emoji(author)
        content = "\n".join(self.parser(self.__rules["content"], html))
        if content is not "":
            topic["content"] = filter_emoji(content)
        else:
            content = "\n".join(self.parser(self.__rules["content_text"],
                                            html))
            topic["content"] = filter_emoji(content)
        images.extend(self.parser(self.__rules["images"], html))
        if len(images) > 0:
            topic["images"] = ",".join(images)
        else:
            topic["images"] = ""
        topic["topic_id"] = re.findall(r"(\d+)", url)[0]
        # phone = re.findall(r'(1[3|5|7|8|][0-9]{8})', content)
        # topic['phone'] = '' if not phone else phone[0]
        # sns = re.findall(r'(微信|qq|QQ)号?(:|:|\s)?(\s)?([\d\w_一二两三四五六七八九零]{5,})', content)
        # topic['sns'] = '' if not sns else sns[0]
        # area = re.findall(r'((\d{1,3})(多)?[平|㎡])', content)
        # topic['area'] = '' if not area else ''.join(area[0])
        # modle = re.findall(r'([\d一二两三四五六七八九][居室房]([123一二两三]厅)?([12一二两]厨)?([1234一二两三四]卫)?([12一二两]厨)?)', content)
        # topic['model'] = ''
        with mysql_db.atomic():
            Topic.create(**topic)
        return topic
Ejemplo n.º 4
0
    def get(self, topic_id):
        try:
            topic = Topic.get(Topic.topic_id == topic_id)

            topic_dict = model_to_dict(topic)
            topic_dict["images"] = topic_dict["images"].split(",")
            return jsonify(topic_dict)
        except:
            abort(404, message="Topic id {} is not found".format(topic_id))
Ejemplo n.º 5
0
def test_Topic(db_handle):
    """
    Topic has:
    class Topic(db.Model):
        id = db.Column(db.Integer, primary_key=True)
        name = db.Column(db.String(256), nullable=True)

        questions = db.relationship("Question", back_populates="topic")
    """
    ### Test creating
    topic = Topic(name='nametest')
    db_handle.session.add(topic)
    db_handle.session.commit()
    assert Topic.query.count() == 1

    ### Check the name
    topic = Topic.query.filter_by(name='nametest').first()
    assert topic.name == 'nametest'
    
    ### Test deleting
    # add one more
    topic = Topic(name='addtest')
    db_handle.session.add(topic)
    db_handle.session.commit()
    
    # should be 2 now
    assert Topic.query.count() == 2
    
    # delete
    topic = Topic.query.filter_by(name='addtest').first()
    db_handle.session.delete(topic)
    db_handle.session.commit()
    
    # deleted?
    assert Topic.query.count() == 1
    
    # check that the first one is still there
    topic = Topic.query.first()
    assert topic.name == 'nametest'
Ejemplo n.º 6
0
def scheduler(url=None):
    for url, updated in [('http://' + request.full_path[10:],
                          datetime.now())] if url else parse_list():
        topic = db.session.query(Topic).filter(Topic.url == url).first()
        if topic and topic.updated == updated:
            continue
        title, published, body = parse_page(url)
        if topic:
            topic.title = title
            topic.body = body
            topic.updated = updated
        else:
            topic = Topic(url, title, published, updated, body)
        db.session.add(topic)
    db.session.commit()
    return 'ok'
Ejemplo n.º 7
0
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from flask import url_for

from db import Base, Topic, Category, Article

engine = create_engine('sqlite:///compscicatalog.db?check_same_thread=False')
Base.metadata.bind = engine

DBSession = sessionmaker(bind=engine)
session = DBSession()

AI = Topic(name='Artificial Intelligence',
           url='Artificial-Intelligence',
           image='AI.jpeg')
session.add(AI)
session.commit()

supervised_learning = Category(name='Supervised Learning',
                               url='Supervised-Learning',
                               topic_id=1,
                               image='supervised.png')
session.add(supervised_learning)
session.commit()

neural_networks = Article(name='Neural Networks',
                          category_id=1,
                          content='Artificial neural networks (ANN) or ' +
                          'connectionist systems are computing ' +
                          'systems vaguely inspired by the biological ' +
                          'neural networks that constitute ' +
Ejemplo n.º 8
0
from db import db, Topic, Question, Answer, Comment, User, Quiz
from datetime import datetime

db.create_all()
print('Database created!')
print('Populating...')

## create Topic resource to db
topic1 = Topic(name='Test_topic_name')
db.session.add(topic1)
db.session.commit()
print('Added Topic')

## create User resource to db
user1 = User(username='******',
             email='test_email',
             pw_hash='test_pw_hash')
db.session.add(user1)
db.session.commit()
print('Added User')

## create Question resource to db
question1 = Question(
    topic_id=1,  # has to exist
    question_text='test_topic_id',
    image_src='test_img_link')
db.session.add(question1)
db.session.commit()
print('Added Question')

## create Answer resource to db