コード例 #1
0
ファイル: pipeline.py プロジェクト: smg7d/canary
def addNewComments(subredditName, session):
    now = int(datetime.now(tz=timezone.utc).timestamp())
    commentsAdded = 0

    existingPosts = [
        p for p in session.query(Post).filter(Post.subreddit == subredditName)
    ]
    for existingPost in existingPosts:
        if (now - existingPost.created) < 60 * 60 * 24:

            existingComments = [
                com.commentId for com in session.query(Comments).filter(
                    Comments.postId == existingPost.postId)
            ]

            post = reddit.submission(id=existingPost.postId)
            post.comments.replace_more(limit=None)
            commentList = post.comments.list()

            levelMap = {}
            for comment in commentList:
                if comment not in existingComments:
                    parentId = comment.parent_id[
                        3:]  #trim off prefix of t1_ or t3_
                    levelMap[comment.id] = levelMap.get(parentId, 0) + 1

                    #this is lazy nonetype reference handling. don't judge.
                    try:
                        author = "" if comment.author is None else comment.author.name
                    except:
                        author = ""

                    newComment = Comments(commentId=comment.id,
                                          parentId=parentId,
                                          level=levelMap[comment.id],
                                          commentText=comment.body,
                                          author=author,
                                          postId=comment.submission,
                                          created=int(comment.created_utc),
                                          edited=bool(comment.edited))

                    existingPost.comments.append(newComment)
                    commentsAdded += 1

                #logic to add to closure table goes here
                parentId = comment.parent_id[
                    3:]  #starts at the existing parent
                existingComment = session.query(Comments).filter(
                    Comments.commentId == comment.id).one()
                while (parentId != existingComment.postId):
                    newCommentClosure = CommentsClosure(
                        parentId=parentId,
                        childId=existingComment.commentId,
                        postId=existingComment.postId)

                    isInClosureAlready = False
                    for dClosure in existingComment.commentsClosures:
                        if dClosure.parentId == newCommentClosure.parentId and dClosure.childId == newCommentClosure.childId:
                            isInClosureAlready = True

                    if not isInClosureAlready:
                        existingComment.commentsClosures.append(
                            newCommentClosure)

                    parentComment = session.query(Comments).filter(
                        Comments.commentId == parentId).one()
                    if parentComment is None:
                        break

                    parentId = parentComment.parentId

                session.add(existingComment)

            session.add(existingPost)

    session.commit()
    logging.info(f"{subredditName}: {commentsAdded} comments added")