Exemple #1
0
def main(config):

    config = get_config(config)
    teardown(drop_db=False, config=config)
    document = json.load(open(config))
    engine = pg_engine(
        database=document[0].get('database', document[0]['index']))
    Session = sessionmaker(bind=engine, autoflush=True)
    session = Session()

    # Bootstrap
    users = {
        'Carla Ferreira Cardoso':
        User(name='Carla Ferreira Cardoso', age=19, gender='female'),
        'Uwe Fuerst':
        User(name='Uwe Fuerst', age=58, gender='male'),
        'Otitodilinna Chigolum':
        User(name='Otitodilinna Chigolum', age=36, gender='male'),
    }
    with subtransactions(session):
        session.add_all(users.values())

    posts = {
        'Post1': Post(slug='post_1', title='This is the first post'),
        'Post2': Post(slug='post_2', title='This is the second post'),
        'Post3': Post(slug='post_3', title='This is the third post'),
    }
    with subtransactions(session):
        session.add_all(posts.values())

    comments = {
        'Comment1':
        Comment(
            title='Comment 1',
            content='This is a sample comment for comment 1',
        ),
        'Comment2':
        Comment(
            title='Comment 2',
            content='This is a sample comment for comment 2',
        ),
        'Comment3':
        Comment(
            title='Comment 3',
            content='This is a sample comment for comment 3',
        ),
        'Comment4':
        Comment(
            title='Comment 4',
            content='This is a sample comment for comment 4',
        ),
        'Comment5':
        Comment(
            title='Comment 5',
            content='This is a sample comment for comment 5',
        ),
        'Comment6':
        Comment(
            title='Comment 6',
            content='This is a sample comment for comment 6',
        ),
    }
    with subtransactions(session):
        session.add_all(comments.values())

    tags = {
        'Economics': Tag(name='Economics'),
        'Career': Tag(name='Career'),
        'Political': Tag(name='Political'),
        'Fitness': Tag(name='Fitness'),
        'Entertainment': Tag(name='Entertainment'),
        'Education': Tag(name='Education'),
        'Technology': Tag(name='Technology'),
        'Health': Tag(name='Health'),
        'Fashion': Tag(name='Fashion'),
        'Design': Tag(name='Design'),
        'Photography': Tag(name='Photography'),
        'Lifestyle': Tag(name='Lifestyle'),
    }
    with subtransactions(session):
        session.add_all(tags.values())

    user_posts = [
        UserPost(
            user=users['Carla Ferreira Cardoso'],
            post=posts['Post1'],
        ),
        UserPost(
            user=users['Uwe Fuerst'],
            post=posts['Post2'],
        ),
        UserPost(
            user=users['Otitodilinna Chigolum'],
            post=posts['Post3'],
        ),
    ]
    with subtransactions(session):
        session.add_all(user_posts)

    user_tags = [
        UserTag(
            user=users['Carla Ferreira Cardoso'],
            tag=tags['Economics'],
        ),
        UserTag(
            user=users['Carla Ferreira Cardoso'],
            tag=tags['Career'],
        ),
        UserTag(
            user=users['Carla Ferreira Cardoso'],
            tag=tags['Political'],
        ),
        UserTag(
            user=users['Carla Ferreira Cardoso'],
            tag=tags['Lifestyle'],
        ),
        UserTag(
            user=users['Carla Ferreira Cardoso'],
            tag=tags['Health'],
        ),
        UserTag(
            user=users['Uwe Fuerst'],
            tag=tags['Education'],
        ),
        UserTag(
            user=users['Uwe Fuerst'],
            tag=tags['Lifestyle'],
        ),
        UserTag(
            user=users['Otitodilinna Chigolum'],
            tag=tags['Fashion'],
        ),
    ]
    with subtransactions(session):
        session.add_all(user_tags)

    post_comments = [
        PostComment(
            post=posts['Post1'],
            comment=comments['Comment1'],
        ),
        PostComment(
            post=posts['Post1'],
            comment=comments['Comment2'],
        ),
        PostComment(
            post=posts['Post2'],
            comment=comments['Comment3'],
        ),
        PostComment(
            post=posts['Post2'],
            comment=comments['Comment4'],
        ),
        PostComment(
            post=posts['Post3'],
            comment=comments['Comment5'],
        ),
        PostComment(
            post=posts['Post3'],
            comment=comments['Comment6'],
        ),
    ]
    with subtransactions(session):
        session.add_all(post_comments)
Exemple #2
0
def parse_one(id):
    Session = session()

    logger.debug('Query feed %s', id)
    feed = Session.query(Feed).get(id)
    logger.debug('Parse feed %s: %s', id, feed.url)
    try:
        resp = requests.get(feed.url, timeout=10, verify=False)
    except requests.exceptions.Timeout:
        return
    parser = feedparser.parse(resp.content)
    logger.debug('Got %s posts', len(parser.entries))

    updated = (parser.feed.get('updated_parsed', None)
               or parser.feed.get('date_parsed', None))
    if updated:
        updated = dt.fromtimestamp(mktime(updated))

    if updated and feed.updated == updated:
        logger.debug('Feed %s already up-to-date. Exit', feed.title)
        return  # already up to date

    logger.debug('Query already fetched posts')
    fetched_ids = tuple((p.id for p in parser.entries if hasattr(p, 'id')))
    query = Session.query(Post).filter(Post.feed_id == feed.id)
    posts = query.filter(Post.entry_id.in_(fetched_ids)).all()
    logger.debug('%s posts are already in the database', len(posts))
    posts = {post.entry_id: post for post in posts}

    for post in parser.entries:
        if not hasattr(post, 'id'):
            logger.debug('Post has no attribute id %s', post)
            continue
        pubdate = (post.get('published_parsed', None)
                   or post.get('date_parsed', None))
        pubdate = pubdate and dt.fromtimestamp(mktime(pubdate)) or dt.utcnow()

        updated = post.get('updated_parsed', None)
        updated = updated and dt.fromtimestamp(mktime(updated))

        try:
            p = posts[post.id]
            if not updated or p.updated == updated:
                continue
            logger.debug('Updating post %s', post.title)
        except (KeyError, AttributeError):
            logger.debug('Creating new post %s', post.title)
            p = Post()
            feed.posts.append(p)
        p.read = False
        p.entry_id = post.id
        p.title = post.title[:400]
        p.link = post.link
        p.author = parser.feed.get('author', 'None')[:80]
        p.published = pubdate
        p.updated = updated
        p.summary = post.get('summary', '')

    feed.updated = updated
    Session.commit()
    Session.close()
Exemple #3
0
def parse_post(content):
    post = Post()
    users = []
    comments = []

    # Note: this code needs patched lxml with support for huge_tree in HTMLParser
    parser = lxml.etree.HTMLParser(recover=True, huge_tree=True)
    root = lxml.etree.HTML(content, parser=parser)

    # post
    post_node = root.xpath('//li[@class="hentry"]')[0]
    comments_node = post_node.xpath('.//div[@class="entry-comments"]')[0]
    author_node = post_node.xpath('.//p[@class="author"]')[0]

    post_url = post_node.xpath('.//a[@class="entry-title"]')[0].get('href')
    post.post_id = int(re.search(r'/(\d+)$', post_url).group(1))

    comment_list_raw = comments_node.xpath('ul')[0].get("id")
    post.comment_list_id = int(re.match(r'comments_(\d+)$', comment_list_raw).group(1))

    post.language = post_node.xpath('.//a[@rel="chapter"]')[0].text

    post.code = post_node.xpath('div[@class="entry-content"]/pre/code')[0].text
    post.text = inner_html(post_node.xpath('p[@class="description"]')[0])

    post.posted = parse_date(author_node.xpath('abbr')[0].get('title'))

    post.vote_plus, post.vote_minus, post.rating = parse_rating(post_node.xpath('p[@class="vote"]/strong')[0])

    # author info
    user = User()

    user_url = author_node.xpath('a[1]')[0].get('href')
    user.user_id = int(re.search(r'/user/(\d+)$', user_url).group(1))
    user.name = author_node.xpath('a[2]')[0].text
    user.avatar_hash = parse_avatar(author_node.xpath('a[1]/img')[0].get('src'))

    post.user_id = user.user_id
    users.append(user)

    # comments
    for comment_node in comments_node.xpath('.//div[@class="entry-comment-wrapper"]'):
        comment = Comment()

        comment.comment_id = int(re.match(r'comment-(\d+)$', comment_node.get('id')).group(1))
        comment.post_id = post.post_id

        parent_node = comment_node.getparent().getparent().getparent()
        if parent_node.tag == 'li':
            parent_node = parent_node.xpath('div[@class="entry-comment-wrapper"]')[0]
            comment.parent_id = int(re.match(r'comment-(\d+)$', parent_node.get('id')).group(1))
        else:
            comment.parent_id = None

        comment.text = inner_html(comment_node.xpath('.//span[@class="comment-text"]')[0])

        info_node = comment_node.xpath('p[@class="entry-info"]')[0]

        comment.posted = parse_date(info_node.xpath('abbr[@class="published"]')[0].get('title'))
        comment.vote_plus, comment.vote_minus, comment.rating = parse_rating(info_node.xpath('span[@class="comment-vote"]/strong')[0])

        user_node = info_node.xpath('strong[@class="entry-author"]/a')[0]

        user = User()
        user.user_id = int(re.search(r'/user/(\d+)$', user_node.get('href')).group(1))
        user.name = user_node.text
        user.avatar_hash = parse_avatar(info_node.xpath('img[@class="avatar"]')[0].get('src'))

        comment.user_id = user.user_id
        users.append(user)
        comments.append(comment)

    return (post, users, comments)
Exemple #4
0
def main(config):

    config = get_config(config)
    teardown(drop_db=False, config=config)
    documents = json.load(open(config))
    engine = pg_engine(
        database=documents[0].get("database", documents[0]["index"]))
    Session = sessionmaker(bind=engine, autoflush=True)
    session = Session()

    # Bootstrap
    users = {
        "Carla Ferreira Cardoso":
        User(name="Carla Ferreira Cardoso", age=19, gender="female"),
        "Uwe Fuerst":
        User(name="Uwe Fuerst", age=58, gender="male"),
        "Otitodilinna Chigolum":
        User(name="Otitodilinna Chigolum", age=36, gender="male"),
    }
    with subtransactions(session):
        session.add_all(users.values())

    posts = {
        "Post 1": Post(slug="post_1", title="This is the first post"),
        "Post 2": Post(slug="post_2", title="This is the second post"),
        "Post 3": Post(slug="post_3", title="This is the third post"),
    }
    with subtransactions(session):
        session.add_all(posts.values())

    comments = {
        "Comment 1":
        Comment(
            title="Comment 1",
            content="This is a sample comment for comment 1",
        ),
        "Comment 2":
        Comment(
            title="Comment 2",
            content="This is a sample comment for comment 2",
        ),
        "Comment 3":
        Comment(
            title="Comment 3",
            content="This is a sample comment for comment 3",
        ),
        "Comment 4":
        Comment(
            title="Comment 4",
            content="This is a sample comment for comment 4",
        ),
        "Comment 5":
        Comment(
            title="Comment 5",
            content="This is a sample comment for comment 5",
        ),
        "Comment 6":
        Comment(
            title="Comment 6",
            content="This is a sample comment for comment 6",
        ),
    }
    with subtransactions(session):
        session.add_all(comments.values())

    tags = {
        "Economics": Tag(name="Economics"),
        "Career": Tag(name="Career"),
        "Political": Tag(name="Political"),
        "Fitness": Tag(name="Fitness"),
        "Entertainment": Tag(name="Entertainment"),
        "Education": Tag(name="Education"),
        "Technology": Tag(name="Technology"),
        "Health": Tag(name="Health"),
        "Fashion": Tag(name="Fashion"),
        "Design": Tag(name="Design"),
        "Photography": Tag(name="Photography"),
        "Lifestyle": Tag(name="Lifestyle"),
    }
    with subtransactions(session):
        session.add_all(tags.values())

    user_posts = [
        UserPost(
            user=users["Carla Ferreira Cardoso"],
            post=posts["Post 1"],
        ),
        UserPost(
            user=users["Uwe Fuerst"],
            post=posts["Post 2"],
        ),
        UserPost(
            user=users["Otitodilinna Chigolum"],
            post=posts["Post 3"],
        ),
    ]
    with subtransactions(session):
        session.add_all(user_posts)

    user_tags = [
        UserTag(
            user=users["Carla Ferreira Cardoso"],
            tag=tags["Economics"],
        ),
        UserTag(
            user=users["Carla Ferreira Cardoso"],
            tag=tags["Career"],
        ),
        UserTag(
            user=users["Carla Ferreira Cardoso"],
            tag=tags["Political"],
        ),
        UserTag(
            user=users["Carla Ferreira Cardoso"],
            tag=tags["Lifestyle"],
        ),
        UserTag(
            user=users["Carla Ferreira Cardoso"],
            tag=tags["Health"],
        ),
        UserTag(
            user=users["Uwe Fuerst"],
            tag=tags["Education"],
        ),
        UserTag(
            user=users["Uwe Fuerst"],
            tag=tags["Lifestyle"],
        ),
        UserTag(
            user=users["Otitodilinna Chigolum"],
            tag=tags["Fashion"],
        ),
    ]
    with subtransactions(session):
        session.add_all(user_tags)

    post_comments = [
        PostComment(
            post=posts["Post 1"],
            comment=comments["Comment 1"],
        ),
        PostComment(
            post=posts["Post 1"],
            comment=comments["Comment 2"],
        ),
        PostComment(
            post=posts["Post 2"],
            comment=comments["Comment 3"],
        ),
        PostComment(
            post=posts["Post 2"],
            comment=comments["Comment 4"],
        ),
        PostComment(
            post=posts["Post 3"],
            comment=comments["Comment 5"],
        ),
        PostComment(
            post=posts["Post 3"],
            comment=comments["Comment 6"],
        ),
    ]
    with subtransactions(session):
        session.add_all(post_comments)
Exemple #5
0
def post(post_payload: Post):
    if not post_payload.id:
        post_payload.id = len(posts) + 1
    posts.append(post_payload)
    return {"data": post_payload}