Esempio n. 1
0
 def score_response(self, comment, response):
     """
     This function can be modified to give a good internal scoring for a
     response. If negative, we won't post. This is useful when there is more
     than one possible response in our database.
     """
     # Discard the obviously bad responses
     if response.body.strip() == "[deleted]":
         return -1
     simple_body = rewriter.simplify_body(comment.body)
     if response.score < config.good_comment_threshold:
         return -1
     # Derive our base score. We use a logarithm, because reddit scores are
     # roughly logrithmic <http://amix.dk/blog/post/19588>
     base_score = math.log10(response.score)
     # A raw pentalty to subtract for the comment being in a different
     # context from it's parent
     response_parent = self.__get_parent(response)
     if response_parent is not None:
         similarity = self._get_parent_similarity_ratio(comment, response_parent)
         difference_penalty = math.log10(10000) * (1 - similarity) ** 10
     else:
         difference_penalty = math.log10(10000)
     # give it some points for length
     length_reward = math.log10(len(simple_body))
     # throw in some randomness for good luck
     fuzz_multiplier = random.gauss(mu=1, sigma=0.05)
     # put it all together
     final_score = (base_score - difference_penalty + length_reward) * fuzz_multiplier
     return final_score
Esempio n. 2
0
 def insert_comments(self, *comments, fast=False):
     if not comments: return
     comments_by_name = {c.name:c for c in comments}
     documents = []
     for c in comments:
         r = c.reddit_session
         json = self.__comment_to_json(c)
         if fast or c.is_root:
             parent_body = None
             parent_simple_body = None
         else:
             try:
                 parent = comments_by_name[c.parent_id]
             except KeyError:
                 parent = r.get_info(thing_id=c.parent_id)
             parent_body = parent.body
             parent_simple_body = rewriter.simplify_body(parent_body)
         # We insert all of our database-specific stuff in "metadata", so
         # that we can easily remove it before constructing comment objects
         json["metadata"] = {
             "parent_simple_body": parent_simple_body,
             "parent_body": parent_body,
             "score": c.score, # ups minus downs
             "insert_time": time.time(),
             "database_format": DATABASE_FORMAT,
         }
         documents.append(json)
     self.comments.insert(documents)
Esempio n. 3
0
 def generate_comment_metadata(self, comment_json, comments_by_id={},
                               reddit_session=None):
     parent_body = parent_simple_body = None
     # find the parent body
     # Comments are of type t1, submissions are t3
     if comment_json["parent_id"][:2] != "t3":
         # lookup in local table
         parent_id = comment_json["parent_id"]
         try:
             parent = comments_by_id[parent_id]
             if isinstance(parent, praw.objects.Comment):
                 parent = parent.json_dict
         except KeyError:
             # lookup in database
             parent = self.comments.find_one({"name": parent_id},
                                             {"body": True})
             if parent is None and reddit_session is not None:
                 # fall back to looking it up with the API
                 parent = r.get_info(thing_id=comment.parent_id).json_dict
         # Pull out what we want for later
         if parent is not None:
             parent_body = parent["body"]
             parent_simple_body = rewriter.simplify_body(parent_body)
     # We shouldn't overwrite insert time if we can avoid it
     try: insert_time = comment_json["metadata"]["insert_time"]
     except KeyError: insert_time = time.time()
     # Put it all together
     return {
         "parent_simple_body": parent_simple_body,
         "parent_body": parent_body,
         "score": comment_json["ups"] - comment_json["downs"],
         "insert_time": insert_time,
         "database_format": DATABASE_FORMAT,
     }
Esempio n. 4
0
def score_response(comment, response):
    """
    This function can be modified to give a good internal scoring for a
    response. If negative, we won't post.
    """
    if response.body.strip() == "[deleted]": return -1
    simple_body = rewriter.simplify_body(comment.body)
    if response.score < 5: return -1
    return (response.score - 40 + len(simple_body)) * random.gauss(1, .1)
Esempio n. 5
0
def get_best_response(comment):
    simple_body = rewriter.simplify_body(comment.body)
    if simple_body in config.ignore_phrases: return None
    if len(simple_body) < 10 or simple_body.count(" ") < 2: return None
    responses = db.get_comments(r, {
        "$query": {"metadata.parent_simple_body": simple_body},
        "$orderby": {"metadata.score": -1},
    })
    if not responses: return None
    best_response = max(zip(map(
        score_response, itertools.repeat(comment), responses), responses),
        key=lambda v:v[0])
    if best_response[0] < 0: return None
    return best_response[1]
Esempio n. 6
0
 def get_best_response(self, comment):
     simple_body = rewriter.simplify_body(comment.body)
     if simple_body in config.ignore_phrases:
         return None
     if len(simple_body) < 10 or simple_body.count(" ") < 2:
         return None
     responses = self.database.get_comments(
         self.reddit_session, {"metadata.parent_simple_body": simple_body}, good_only=True, limit=100
     )
     if not responses:
         return None
     best_response = max(
         zip(map(self.score_response, itertools.repeat(comment), responses), responses), key=lambda v: v[0]
     )
     if best_response[0] < 0:
         return None
     return best_response[1]