コード例 #1
0
ファイル: tasks.py プロジェクト: VikParuchuri/comment-matcher
def pull_down_comments():
    try:
        sleep_time = random.randint(0,300)
        time.sleep(sleep_time)
        raw_data = read_raw_data_from_cache("raw_data_cache.p")
        items_done = read_raw_data_from_cache("items_done.p")
        comments = [c['comment'] for c in items_done]
        replies = [c['reply'] for c in items_done]
        knn_matcher = train_knn_matcher(raw_data)
        for subreddit in REPLY_SUBREDDIT_LIST:
            try:
                comment = get_single_comment(subreddit)
                print comment
                if comment is None:
                    log.info("Could not get a comment")
                    continue
                text = comment.body
                cid = comment.id
                reply = test_knn_matcher(knn_matcher, text)
                if text in comments or (reply in replies and reply is not None):
                    continue
                data = {'comment' : text, 'reply' : reply, 'comment_id' : cid}
                items_done.append(data)
                replies.append(reply)
                comments.append(text)
                log.info("Subreddit: {0}".format(subreddit))
                log.info("Comment: {0} {1}".format(cid, text))
                log.info("Reply: {0}".format(reply))
                log.info("-------------------")
            except:
                log.exception("Cannot get reply for {0}".format(subreddit))
                continue
        write_data_to_cache(items_done, "items_done.p", "comment_id")
    except Exception:
        log.exception("Could not pull down comment.")
コード例 #2
0
ファイル: tasks.py プロジェクト: VikParuchuri/comment-matcher
def get_reddit_posts():
    try:
        all_message_replies = []
        for subreddit in COMMENT_SUBREDDIT_LIST:
            message_replies = get_message_replies(subreddit =subreddit, max_replies= 500, submission_count = 300, min_reply_score = 20)
            all_message_replies += message_replies
        raw_data = list([mr.get_raw_data() for mr in all_message_replies])
        write_data_to_cache(raw_data, "raw_data_cache.p")
    except Exception:
        log.exception("Could not save posts.")
コード例 #3
0
ファイル: tasks.py プロジェクト: VikParuchuri/comment-matcher
def get_reddit_posts():
    try:
        all_message_replies = []
        for subreddit in COMMENT_SUBREDDIT_LIST:
            message_replies = get_message_replies(subreddit=subreddit,
                                                  max_replies=500,
                                                  submission_count=300,
                                                  min_reply_score=20)
            all_message_replies += message_replies
        raw_data = list([mr.get_raw_data() for mr in all_message_replies])
        write_data_to_cache(raw_data, "raw_data_cache.p")
    except Exception:
        log.exception("Could not save posts.")
コード例 #4
0
ファイル: tasks.py プロジェクト: VikParuchuri/comment-matcher
def pull_down_comments():
    try:
        sleep_time = random.randint(0, 300)
        time.sleep(sleep_time)
        raw_data = read_raw_data_from_cache("raw_data_cache.p")
        items_done = read_raw_data_from_cache("items_done.p")
        comments = [c['comment'] for c in items_done]
        replies = [c['reply'] for c in items_done]
        knn_matcher = train_knn_matcher(raw_data)
        for subreddit in REPLY_SUBREDDIT_LIST:
            try:
                comment = get_single_comment(subreddit)
                print comment
                if comment is None:
                    log.info("Could not get a comment")
                    continue
                text = comment.body
                cid = comment.id
                reply = test_knn_matcher(knn_matcher, text)
                if text in comments or (reply in replies
                                        and reply is not None):
                    continue
                data = {'comment': text, 'reply': reply, 'comment_id': cid}
                items_done.append(data)
                replies.append(reply)
                comments.append(text)
                log.info("Subreddit: {0}".format(subreddit))
                log.info("Comment: {0} {1}".format(cid, text))
                log.info("Reply: {0}".format(reply))
                log.info("-------------------")
            except:
                log.exception("Cannot get reply for {0}".format(subreddit))
                continue
        write_data_to_cache(items_done, "items_done.p", "comment_id")
    except Exception:
        log.exception("Could not pull down comment.")