Example #1
3
def add_comments(comments):
    """Add comments to the CommentTree and update scores."""
    from r2.models.builder import write_comment_orders

    link_ids = [comment.link_id for comment in tup(comments)]
    links = Link._byID(link_ids, data=True)

    comments = tup(comments)
    comments_by_link_id = defaultdict(list)
    for comment in comments:
        comments_by_link_id[comment.link_id].append(comment)

    for link_id, link_comments in comments_by_link_id.iteritems():
        link = links[link_id]

        timer = g.stats.get_timer(
            'comment_tree.add.%s' % link.comment_tree_version)
        timer.start()

        # write scores before CommentTree because the scores must exist for all
        # comments in the tree
        for sort in ("_controversy", "_confidence", "_score"):
            scores_by_comment = {
                comment._id36: getattr(comment, sort)
                for comment in link_comments
            }
            CommentScoresByLink.set_scores(link, sort, scores_by_comment)

        scores_by_comment = _get_qa_comment_scores(link, link_comments)
        CommentScoresByLink.set_scores(link, "_qa", scores_by_comment)
        timer.intermediate('scores')

        with CommentTree.mutation_context(link, timeout=180):
            try:
                timer.intermediate('lock')
                comment_tree = CommentTree.by_link(link, timer)
                timer.intermediate('get')
                comment_tree.add_comments(link_comments)
                timer.intermediate('update')
            except InconsistentCommentTreeError:
                # failed to add a comment to the CommentTree because its parent
                # is missing from the tree. this comment will be lost forever
                # unless a rebuild is performed.
                comment_ids = [comment._id for comment in link_comments]
                g.log.error(
                    "comment_tree_inconsistent: %s %s" % (link, comment_ids))
                g.stats.simple_event('comment_tree_inconsistent')
                return

            # do this under the same lock because we want to ensure we are using
            # the same version of the CommentTree as was just written
            write_comment_orders(link)
            timer.intermediate('write_order')

        timer.stop()
Example #2
2
def add_comments(comments):
    """Add comments to the CommentTree and update scores."""
    from r2.models.builder import write_comment_orders

    link_ids = [comment.link_id for comment in tup(comments)]
    links = Link._byID(link_ids, data=True)

    comments = tup(comments)
    comments_by_link_id = defaultdict(list)
    for comment in comments:
        comments_by_link_id[comment.link_id].append(comment)

    for link_id, link_comments in comments_by_link_id.iteritems():
        link = links[link_id]

        timer = g.stats.get_timer("comment_tree.add.1")
        timer.start()

        # write scores before CommentTree because the scores must exist for all
        # comments in the tree
        for sort in ("_controversy", "_confidence", "_score"):
            scores_by_comment = {comment._id36: getattr(comment, sort) for comment in link_comments}
            CommentScoresByLink.set_scores(link, sort, scores_by_comment)

        scores_by_comment = _get_qa_comment_scores(link, link_comments)
        CommentScoresByLink.set_scores(link, "_qa", scores_by_comment)
        timer.intermediate("scores")

        CommentTree.add_comments(link, link_comments)
        timer.intermediate("update")

        write_comment_orders(link)
        timer.intermediate("write_order")

        timer.stop()
Example #3
1
def add_comments(comments):
    """Add comments to the CommentTree and update scores."""
    from r2.models.builder import write_comment_orders

    link_ids = [comment.link_id for comment in tup(comments)]
    links_by_id = Link._byID(link_ids)

    comments = tup(comments)
    comments_by_link_id = defaultdict(list)
    for comment in comments:
        comments_by_link_id[comment.link_id].append(comment)

    for link_id, link_comments in comments_by_link_id.iteritems():
        link = links_by_id[link_id]

        timer = g.stats.get_timer('comment_tree.add.1')
        timer.start()

        write_comment_scores(link, link_comments)
        timer.intermediate('scores')

        CommentTree.add_comments(link, link_comments)
        timer.intermediate('update')

        write_comment_orders(link)
        timer.intermediate('write_order')

        timer.stop()
Example #4
0
def update_score(obj, up_change, down_change, new_valid_thing, old_valid_thing):
     obj._incr('_ups',   up_change)
     obj._incr('_downs', down_change)
     if isinstance(obj, Comment):
         if hasattr(obj, 'parent_id'):
             Comment._byID(obj.parent_id).incr_descendant_karma([], up_change - down_change)
         Link._byID(obj.link_id)._incr('_descendant_karma', up_change - down_change)
Example #5
0
def calc_rising():
    #As far as I can tell this can only ever return a series of 0's as that is what is hard coded in...  In which case nothing should ever be rising unless I explicitly make it so.
    sr_count = count.get_link_counts()
    link_count = dict((k, v[0]) for k,v in sr_count.iteritems())
    link_names = Link._by_fullname(sr_count.keys(), data=True)

    #max is half the average of the top 10 counts
    counts = link_count.values()
    counts.sort(reverse=True)
    maxcount = sum(counts[:10]) / 2.*min(10,len(counts))
    
    #prune the list
    print link_count
    print link_names
    print maxcount
    rising = [(n, link_names[n].sr_id)
              for n in link_names.keys() if False or link_count[n] < maxcount]
    print rising

    cur_time = datetime.now(g.tz)

    def score(pair):
        name = pair[0]
        link = link_names[name]
        hours = (cur_time - link._date).seconds / 3600 + 1
        return float(link._ups) / (max(link_count[name], 1) * hours)

    def r(x):
        return 1 if x > 0 else -1 if x < 0 else 0

    rising.sort(lambda x, y: r(score(y) - score(x)))
    return rising
Example #6
0
def calc_rising():
    sr_count = count.get_link_counts()
    link_count = dict((k, v[0]) for k,v in sr_count.iteritems())
    link_names = Link._by_fullname(sr_count.keys(), data=True)

    #max is half the average of the top 10 counts
    counts = link_count.values()
    counts.sort(reverse=True)
    maxcount = sum(counts[:10]) / 20

    #prune the list
    rising = [(n, link_names[n].sr_id)
              for n in link_names.keys() if link_count[n] < maxcount]

    cur_time = datetime.now(g.tz)

    def score(pair):
        name = pair[0]
        link = link_names[name]
        hours = (cur_time - link._date).seconds / 3600 + 1
        return float(link._ups) / (max(link_count[name], 1) * hours)

    def r(x):
        return 1 if x > 0 else -1 if x < 0 else 0

    rising.sort(lambda x, y: r(score(y) - score(x)))
    return rising
Example #7
0
def add_comments(comments):
    links = Link._byID([com.link_id for com in tup(comments)], data=True)
    comments = tup(comments)

    link_map = {}
    for com in comments:
        link_map.setdefault(com.link_id, []).append(com)

    for link_id, coms in link_map.iteritems():
        link = links[link_id]
        timer = g.stats.get_timer('comment_tree.add.%s' %
                                  link.comment_tree_version)
        timer.start()
        try:
            with CommentTree.mutation_context(link):
                timer.intermediate('lock')
                cache = get_comment_tree(link, timer=timer)
                timer.intermediate('get')
                cache.add_comments(coms)
                timer.intermediate('update')
        except:
            g.log.exception(
                'add_comments_nolock failed for link %s, recomputing tree',
                link_id)

            # calculate it from scratch
            get_comment_tree(link, _update=True, timer=timer)
        timer.stop()
        update_comment_votes(coms)
Example #8
0
def calc_rising():
    sr_count = count.get_link_counts()
    link_count = dict((k, v[0]) for k,v in sr_count.iteritems())
    link_names = Link._by_fullname(sr_count.keys(), data=True)

    #max is half the average of the top 10 counts
    counts = link_count.values()
    counts.sort(reverse=True)
    maxcount = sum(counts[:10]) / 20

    #prune the list
    rising = [(n, link_names[n].sr_id)
              for n in link_names.keys() if link_count[n] < maxcount]

    cur_time = datetime.now(g.tz)

    def score(pair):
        name = pair[0]
        link = link_names[name]
        hours = (cur_time - link._date).seconds / 3600 + 1
        return float(link._ups) / (max(link_count[name], 1) * hours)

    def r(x):
        return 1 if x > 0 else -1 if x < 0 else 0

    rising.sort(lambda x, y: r(score(y) - score(x)))
    return rising
Example #9
0
def link_comments(link_id, _update=False):
    key = comments_key(link_id)

    r = g.permacache.get(key)

    if r and not _update:
        return r
    else:
        # This operation can take longer than most (note the inner
        # locks) better to time out request temporarily than to deal
        # with an inconsistent tree
        with g.make_lock(lock_key(link_id), timeout=180):
            r = _load_link_comments(link_id)
            # rebuild parent dict
            cids, cid_tree, depth, num_children, num_comments = r
            r = r[:
                  -1]  # Remove num_comments from r; we don't need to cache it.
            g.permacache.set(parent_comments_key(link_id),
                             _parent_dict_from_tree(cid_tree))

            g.permacache.set(key, r)

            # update the link's comment count and schedule it for search
            # reindexing
            link = Link._byID(link_id, data=True)
            link.num_comments = num_comments
            link._commit()
            from r2.lib.db.queries import changed
            changed(link)

        return r
Example #10
0
def add_comments(comments):
    links = Link._byID([com.link_id for com in tup(comments)], data=True)
    comments = tup(comments)

    link_map = {}
    for com in comments:
        link_map.setdefault(com.link_id, []).append(com)

    for link_id, coms in link_map.iteritems():
        link = links[link_id]
        add_comments = [comment for comment in coms if not comment._deleted]
        delete_comments = (comment for comment in coms if comment._deleted)
        timer = g.stats.get_timer('comment_tree.add.%s' %
                                  link.comment_tree_version)
        timer.start()
        try:
            with CommentTree.mutation_context(link, timeout=30):
                timer.intermediate('lock')
                cache = get_comment_tree(link, timer=timer)
                timer.intermediate('get')
                if add_comments:
                    cache.add_comments(add_comments)
                for comment in delete_comments:
                    cache.delete_comment(comment, link)
                timer.intermediate('update')
        except InconsistentCommentTreeError:
            comment_ids = [comment._id for comment in coms]
            g.log.exception(
                'add_comments_nolock failed for link %s %s, recomputing',
                link_id, comment_ids)
            rebuild_comment_tree(link, timer=timer)
            g.stats.simple_event('comment_tree_inconsistent')

        timer.stop()
        update_comment_votes(coms)
Example #11
0
    def GET_document(self):
        try:
            #no cookies on errors
            c.cookies.clear()

            code =  request.GET.get('code', '')
            srname = request.GET.get('srname', '')
            takedown = request.GET.get('takedown', "")
            if srname:
                c.site = Subreddit._by_name(srname)
            if c.render_style not in self.allowed_render_styles:
                return str(code)
            elif takedown and code == '404':
                link = Link._by_fullname(takedown)
                return pages.TakedownPage(link).render()
            elif code == '403':
                return self.send403()
            elif code == '500':
                return redditbroke % rand_strings.sadmessages
            elif code == '503':
                c.response.status_code = 503
                c.response.headers['Retry-After'] = 1
                c.response.content = toofast
                return c.response
            elif code == '304':
                if request.GET.has_key('x-sup-id'):
                    c.response.headers['x-sup-id'] = request.GET.get('x-sup-id')
                return c.response
            elif c.site:
                return self.send404()
            else:
                return "page not found"
        except:
            return handle_awful_failure("something really bad just happened.")
Example #12
0
def update_comment_votes(comments, write_consistency_level = None):
    from r2.models import CommentSortsCache

    comments = tup(comments)

    link_map = {}
    for com in comments:
        link_map.setdefault(com.link_id, []).append(com)
    all_links = Link._byID(link_map.keys(), data=True)

    comment_trees = {}
    for link in all_links.values():
        comment_trees[link._id] = get_comment_tree(link)

    for link_id, coms in link_map.iteritems():
        link = all_links[link_id]
        for sort in ("_controversy", "_hot", "_confidence", "_score", "_date",
                     "_qa"):
            cid_tree = comment_trees[link_id].tree
            sorter = _comment_sorter_from_cids(coms, sort, link, cid_tree,
                                               by_36=True)

            # Cassandra always uses the id36 instead of the integer
            # ID, so we'll map that first before sending it
            c_key = sort_comments_key(link_id, sort)
            CommentSortsCache._set_values(c_key, sorter,
                                          write_consistency_level = write_consistency_level)
Example #13
0
def add_comments(comments):
    links = Link._byID([com.link_id for com in tup(comments)], data=True)
    comments = tup(comments)

    link_map = {}
    for com in comments:
        link_map.setdefault(com.link_id, []).append(com)

    for link_id, coms in link_map.iteritems():
        link = links[link_id]
        timer = g.stats.get_timer('comment_tree.add.%s'
                                  % link.comment_tree_version)
        timer.start()
        try:
            with CommentTree.mutation_context(link):
                timer.intermediate('lock')
                cache = get_comment_tree(link, timer=timer)
                timer.intermediate('get')
                cache.add_comments(coms)
                timer.intermediate('update')
        except:
            g.log.exception(
                'add_comments_nolock failed for link %s, recomputing tree',
                link_id)

            # calculate it from scratch
            get_comment_tree(link, _update=True, timer=timer)
        timer.stop()
        update_comment_votes(coms)
Example #14
0
def add_comments(comments):
    """Add comments to the CommentTree and update scores."""
    from r2.models.builder import write_comment_orders

    link_ids = [comment.link_id for comment in tup(comments)]
    links_by_id = Link._byID(link_ids)

    comments = tup(comments)
    comments_by_link_id = defaultdict(list)
    for comment in comments:
        comments_by_link_id[comment.link_id].append(comment)

    for link_id, link_comments in comments_by_link_id.iteritems():
        link = links_by_id[link_id]

        timer = g.stats.get_timer('comment_tree.add.1')
        timer.start()

        write_comment_scores(link, link_comments)
        timer.intermediate('scores')

        CommentTree.add_comments(link, link_comments)
        timer.intermediate('update')

        write_comment_orders(link)
        timer.intermediate('write_order')

        timer.stop()
Example #15
0
def update_comment_votes(comments, write_consistency_level=None):
    from r2.models import CommentSortsCache, CommentScoresByLink

    comments = tup(comments)

    link_map = {}
    for com in comments:
        link_map.setdefault(com.link_id, []).append(com)
    all_links = Link._byID(link_map.keys(), data=True)

    comment_trees = {}
    for link in all_links.values():
        comment_trees[link._id] = get_comment_tree(link)

    for link_id, coms in link_map.iteritems():
        link = all_links[link_id]
        for sort in ("_controversy", "_hot", "_confidence", "_score", "_date",
                     "_qa"):
            cid_tree = comment_trees[link_id].tree
            scores_by_comment = _comment_sorter_from_cids(coms,
                                                          sort,
                                                          link,
                                                          cid_tree,
                                                          by_36=True)

            # Cassandra always uses the id36 instead of the integer
            # ID, so we'll map that first before sending it
            c_key = sort_comments_key(link_id, sort)
            CommentSortsCache._set_values(
                c_key,
                scores_by_comment,
                write_consistency_level=write_consistency_level)
            CommentScoresByLink.set_scores(link, sort, scores_by_comment)
Example #16
0
def process_new_links(period=media_period, force=False):
    """Fetches links from the last period and sets their media
    properities. If force is True, it will fetch properities for links
    even if the properties already exist"""
    links = Link._query(Link.c._date > timeago(period),
                        sort=desc('_date'),
                        data=True)
    results = {}
    jobs = []
    for link in fetch_things2(links):
        if link.is_self or link.promoted:
            continue
        elif not force and (link.has_thumbnail or link.media_object):
            continue

        jobs.append(make_link_info_job(results, link, g.useragent))

    #send links to a queue
    wq = WorkQueue(jobs, num_workers=20, timeout=30)
    wq.start()
    wq.jobs.join()

    #when the queue is finished, do the db writes in this thread
    for link, info in results.items():
        update_link(link, info[0], info[1])
Example #17
0
def update_comment_votes(comments):
    from r2.models import CommentScoresByLink

    comments = tup(comments)

    link_map = {}
    for com in comments:
        link_map.setdefault(com.link_id, []).append(com)
    all_links = Link._byID(link_map.keys(), data=True)

    comment_trees = {}
    for link in all_links.values():
        comment_trees[link._id] = get_comment_tree(link)

    for link_id, coms in link_map.iteritems():
        link = all_links[link_id]
        for sort in ("_controversy", "_hot", "_confidence", "_score", "_date",
                     "_qa"):
            cid_tree = comment_trees[link_id].tree
            scores_by_comment = _comment_sorter_from_cids(coms,
                                                          sort,
                                                          link,
                                                          cid_tree,
                                                          by_36=True)
            CommentScoresByLink.set_scores(link, sort, scores_by_comment)
Example #18
0
def add_comments(comments):
    links = Link._byID([com.link_id for com in tup(comments)], data=True)
    comments = tup(comments)

    link_map = {}
    for com in comments:
        link_map.setdefault(com.link_id, []).append(com)

    for link_id, coms in link_map.iteritems():
        link = links[link_id]
        add_comments = [comment for comment in coms if not comment._deleted]
        delete_comments = (comment for comment in coms if comment._deleted)
        timer = g.stats.get_timer('comment_tree.add.%s'
                                  % link.comment_tree_version)
        timer.start()
        try:
            with CommentTree.mutation_context(link, timeout=30):
                timer.intermediate('lock')
                cache = get_comment_tree(link, timer=timer)
                timer.intermediate('get')
                if add_comments:
                    cache.add_comments(add_comments)
                for comment in delete_comments:
                    cache.delete_comment(comment, link)
                timer.intermediate('update')
        except InconsistentCommentTreeError:
            comment_ids = [comment._id for comment in coms]
            g.log.exception(
                'add_comments_nolock failed for link %s %s, recomputing',
                link_id, comment_ids)
            rebuild_comment_tree(link, timer=timer)

        timer.stop()
        update_comment_votes(coms)
Example #19
0
def link_comments(link_id, _update=False):
    key = comments_key(link_id)

    r = g.permacache.get(key)

    if r and not _update:
        return r
    else:
        # This operation can take longer than most (note the inner
        # locks) better to time out request temporarily than to deal
        # with an inconsistent tree
        with g.make_lock(lock_key(link_id), timeout=180):
            r = _load_link_comments(link_id)
            # rebuild parent dict
            cids, cid_tree, depth, num_children, num_comments = r
            r = r[:-1]  # Remove num_comments from r; we don't need to cache it.
            g.permacache.set(parent_comments_key(link_id),
                             _parent_dict_from_tree(cid_tree))

            g.permacache.set(key, r)

            # update the link's comment count and schedule it for search
            # reindexing
            link = Link._byID(link_id, data = True)
            link.num_comments = num_comments
            link._commit()
            from r2.lib.db.queries import changed
            changed(link)

        return r
Example #20
0
def add_comments(comments):
    links = Link._byID([com.link_id for com in tup(comments)], data=True)
    comments = tup(comments)

    link_map = {}
    for com in comments:
        link_map.setdefault(com.link_id, []).append(com)

    for link_id, coms in link_map.iteritems():
        link = links[link_id]
        add_comments = [comment for comment in coms if not comment._deleted]
        delete_comments = (comment for comment in coms if comment._deleted)
        timer = g.stats.get_timer("comment_tree.add.%s" % link.comment_tree_version)
        timer.start()
        try:
            with CommentTree.mutation_context(link):
                timer.intermediate("lock")
                cache = get_comment_tree(link, timer=timer)
                timer.intermediate("get")
                if add_comments:
                    cache.add_comments(add_comments)
                for comment in delete_comments:
                    cache.delete_comment(comment, link)
                timer.intermediate("update")
        except:
            g.log.exception("add_comments_nolock failed for link %s, recomputing tree", link_id)

            # calculate it from scratch
            get_comment_tree(link, _update=True, timer=timer)
        timer.stop()
        update_comment_votes(coms)
Example #21
0
    def GET_document(self):
        try:
            c.errors = c.errors or ErrorSet()
            # clear cookies the old fashioned way 
            c.cookies = Cookies()

            code =  request.GET.get('code', '')
            try:
                code = int(code)
            except ValueError:
                code = 404
            srname = request.GET.get('srname', '')
            takedown = request.GET.get('takedown', "")

            # StatusBasedRedirect will override this anyway, but we need this
            # here for pagecache to see.
            response.status_int = code

            if srname:
                c.site = Subreddit._by_name(srname)

            if request.GET.has_key('allow_framing'):
                c.allow_framing = bool(request.GET['allow_framing'] == '1')

            if code in (204, 304):
                # NEVER return a content body on 204/304 or downstream
                # caches may become very confused.
                if request.GET.has_key('x-sup-id'):
                    x_sup_id = request.GET.get('x-sup-id')
                    if '\r\n' not in x_sup_id:
                        response.headers['x-sup-id'] = x_sup_id
                return ""
            elif c.render_style not in self.allowed_render_styles:
                return str(code)
            elif c.render_style in extensions.API_TYPES:
                data = request.environ.get('extra_error_data', {'error': code})
                if request.environ.get("WANT_RAW_JSON"):
                    return scriptsafe_dumps(data)
                return websafe_json(json.dumps(data))
            elif takedown and code == 404:
                link = Link._by_fullname(takedown)
                return pages.TakedownPage(link).render()
            elif code == 403:
                return self.send403()
            elif code == 429:
                return self.send429()
            elif code == 500:
                randmin = {'admin': random.choice(self.admins)}
                failien_url = make_failien_url()
                sad_message = safemarkdown(rand_strings.sadmessages % randmin)
                return redditbroke % (failien_url, sad_message)
            elif code == 503:
                return self.send503()
            elif c.site:
                return self.send404()
            else:
                return "page not found"
        except Exception as e:
            return handle_awful_failure("ErrorController.GET_document: %r" % e)
Example #22
0
    def GET_document(self):
        try:
            c.errors = c.errors or ErrorSet()
            # clear cookies the old fashioned way
            c.cookies = Cookies()

            code = request.GET.get('code', '')
            try:
                code = int(code)
            except ValueError:
                code = 404
            srname = request.GET.get('srname', '')
            takedown = request.GET.get('takedown', "")

            # StatusBasedRedirect will override this anyway, but we need this
            # here for pagecache to see.
            response.status_int = code

            if srname:
                c.site = Subreddit._by_name(srname)

            if request.GET.has_key('allow_framing'):
                c.allow_framing = bool(request.GET['allow_framing'] == '1')

            if code in (204, 304):
                # NEVER return a content body on 204/304 or downstream
                # caches may become very confused.
                if request.GET.has_key('x-sup-id'):
                    x_sup_id = request.GET.get('x-sup-id')
                    if '\r\n' not in x_sup_id:
                        response.headers['x-sup-id'] = x_sup_id
                return ""
            elif c.render_style not in self.allowed_render_styles:
                return str(code)
            elif c.render_style in extensions.API_TYPES:
                data = request.environ.get('extra_error_data', {'error': code})
                if request.environ.get("WANT_RAW_JSON"):
                    return scriptsafe_dumps(data)
                return websafe_json(json.dumps(data))
            elif takedown and code == 404:
                link = Link._by_fullname(takedown)
                return pages.TakedownPage(link).render()
            elif code == 403:
                return self.send403()
            elif code == 429:
                return self.send429()
            elif code == 500:
                randmin = {'admin': random.choice(self.admins)}
                failien_url = make_failien_url()
                sad_message = safemarkdown(rand_strings.sadmessages % randmin)
                return redditbroke % (failien_url, sad_message)
            elif code == 503:
                return self.send503()
            elif c.site:
                return self.send404()
            else:
                return "page not found"
        except Exception as e:
            return handle_awful_failure("ErrorController.GET_document: %r" % e)
Example #23
0
def add_comments(comments):
    """Add comments to the CommentTree and update scores."""
    from r2.models.builder import write_comment_orders

    link_ids = [comment.link_id for comment in tup(comments)]
    links = Link._byID(link_ids, data=True)

    comments = tup(comments)
    comments_by_link_id = defaultdict(list)
    for comment in comments:
        comments_by_link_id[comment.link_id].append(comment)

    for link_id, link_comments in comments_by_link_id.iteritems():
        link = links[link_id]

        timer = g.stats.get_timer('comment_tree.add.%s' %
                                  link.comment_tree_version)
        timer.start()

        # write scores before CommentTree because the scores must exist for all
        # comments in the tree
        for sort in ("_controversy", "_confidence", "_score"):
            scores_by_comment = {
                comment._id36: getattr(comment, sort)
                for comment in link_comments
            }
            CommentScoresByLink.set_scores(link, sort, scores_by_comment)

        scores_by_comment = _get_qa_comment_scores(link, link_comments)
        CommentScoresByLink.set_scores(link, "_qa", scores_by_comment)
        timer.intermediate('scores')

        with CommentTree.mutation_context(link, timeout=180):
            try:
                timer.intermediate('lock')
                comment_tree = CommentTree.by_link(link, timer)
                timer.intermediate('get')
                comment_tree.add_comments(link_comments)
                timer.intermediate('update')
            except InconsistentCommentTreeError:
                # failed to add a comment to the CommentTree because its parent
                # is missing from the tree. this comment will be lost forever
                # unless a rebuild is performed.
                comment_ids = [comment._id for comment in link_comments]
                g.log.error("comment_tree_inconsistent: %s %s" %
                            (link, comment_ids))
                g.stats.simple_event('comment_tree_inconsistent')
                return

            # do this under the same lock because we want to ensure we are using
            # the same version of the CommentTree as was just written
            write_comment_orders(link)
            timer.intermediate('write_order')

        timer.stop()
Example #24
0
    def GET_document(self):
        try:
            c.errors = c.errors or ErrorSet()
            # clear cookies the old fashioned way
            c.cookies = Cookies()

            code = request.GET.get("code", "")
            try:
                code = int(code)
            except ValueError:
                code = 404
            srname = request.GET.get("srname", "")
            takedown = request.GET.get("takedown", "")

            # StatusBasedRedirect will override this anyway, but we need this
            # here for pagecache to see.
            response.status_int = code

            if srname:
                c.site = Subreddit._by_name(srname)

            if code in (204, 304):
                # NEVER return a content body on 204/304 or downstream
                # caches may become very confused.
                if request.GET.has_key("x-sup-id"):
                    x_sup_id = request.GET.get("x-sup-id")
                    if "\r\n" not in x_sup_id:
                        response.headers["x-sup-id"] = x_sup_id
                return ""
            elif c.render_style not in self.allowed_render_styles:
                return str(code)
            elif c.render_style in extensions.API_TYPES:
                data = request.environ.get("extra_error_data", {"error": code})
                return websafe_json(json.dumps(data))
            elif takedown and code == 404:
                link = Link._by_fullname(takedown)
                return pages.TakedownPage(link).render()
            elif code == 403:
                return self.send403()
            elif code == 429:
                return self.send429()
            elif code == 500:
                randmin = {"admin": random.choice(self.admins)}
                failien_url = make_failien_url()
                return redditbroke % (failien_url, rand_strings.sadmessages % randmin)
            elif code == 503:
                return self.send503()
            elif c.site:
                return self.send404()
            else:
                return "page not found"
        except:
            return handle_awful_failure("something really bad just happened.")
Example #25
0
    def GET_document(self):
        try:
            # clear cookies the old fashioned way
            c.cookies = Cookies()

            code = request.GET.get('code', '')
            try:
                code = int(code)
            except ValueError:
                code = 404
            srname = request.GET.get('srname', '')
            takedown = request.GET.get('takedown', "")

            if srname:
                c.site = Subreddit._by_name(srname)
            if c.render_style not in self.allowed_render_styles:
                if code not in (204, 304):
                    c.response.content = str(code)
                c.response.status_code = code
                return c.response
            elif c.render_style == "api":
                data = request.environ.get('extra_error_data', {'error': code})
                c.response.content = json.dumps(data)
                return c.response
            elif takedown and code == 404:
                link = Link._by_fullname(takedown)
                return pages.TakedownPage(link).render()
            elif code == 403:
                return self.send403()
            elif code == 429:
                return self.send429()
            elif code == 500:
                randmin = {'admin': rand.choice(self.admins)}
                failien_name = 'youbrokeit%d.png' % rand.randint(
                    1, NUM_FAILIENS)
                failien_url = static(failien_name)
                return redditbroke % (failien_url,
                                      rand_strings.sadmessages % randmin)
            elif code == 503:
                return self.send503()
            elif code == 304:
                if request.GET.has_key('x-sup-id'):
                    x_sup_id = request.GET.get('x-sup-id')
                    if '\r\n' not in x_sup_id:
                        c.response.headers['x-sup-id'] = x_sup_id
                return c.response
            elif c.site:
                return self.send404()
            else:
                return "page not found"
        except:
            return handle_awful_failure("something really bad just happened.")
Example #26
0
    def process_link(msg):
        fname = msg.body
        link = Link._by_fullname(msg.body, data=True)

        try:
            TimeoutFunction(_set_media, 30)(link)
        except TimeoutFunctionException:
            print "Timed out on %s" % fname
        except KeyboardInterrupt:
            raise
        except:
            print "Error fetching %s" % fname
            print traceback.format_exc()
Example #27
0
    def process_link(msg):
        fname = msg.body
        link = Link._by_fullname(msg.body, data=True)

        try:
            TimeoutFunction(_set_media, 30)(embedly_services, link)
        except TimeoutFunctionException:
            print "Timed out on %s" % fname
        except KeyboardInterrupt:
            raise
        except:
            print "Error fetching %s" % fname
            print traceback.format_exc()
Example #28
0
    def GET_document(self):
        try:
            c.errors = c.errors or ErrorSet()
            # clear cookies the old fashioned way 
            c.cookies = Cookies()

            code =  request.GET.get('code', '')
            try:
                code = int(code)
            except ValueError:
                code = 404
            srname = request.GET.get('srname', '')
            takedown = request.GET.get('takedown', "")
            
            if srname:
                c.site = Subreddit._by_name(srname)
            if c.render_style not in self.allowed_render_styles:
                if code not in (204, 304):
                     c.response.content = str(code)
                c.response.status_code = code
                return c.response
            elif c.render_style in extensions.API_TYPES:
                data = request.environ.get('extra_error_data', {'error': code})
                c.response.content = websafe_json(json.dumps(data))
                return c.response
            elif takedown and code == 404:
                link = Link._by_fullname(takedown)
                return pages.TakedownPage(link).render()
            elif code == 403:
                return self.send403()
            elif code == 429:
                return self.send429()
            elif code == 500:
                randmin = {'admin': rand.choice(self.admins)}
                failien_name = 'youbrokeit%d.png' % rand.randint(1, NUM_FAILIENS)
                failien_url = static(failien_name)
                return redditbroke % (failien_url, rand_strings.sadmessages % randmin)
            elif code == 503:
                return self.send503()
            elif code == 304:
                if request.GET.has_key('x-sup-id'):
                    x_sup_id = request.GET.get('x-sup-id')
                    if '\r\n' not in x_sup_id:
                        c.response.headers['x-sup-id'] = x_sup_id
                return c.response
            elif c.site:
                return self.send404()
            else:
                return "page not found"
        except:
            return handle_awful_failure("something really bad just happened.")
Example #29
0
    def GET_document(self):
        try:
            c.errors = c.errors or ErrorSet()
            # clear cookies the old fashioned way 
            c.cookies = Cookies()

            code =  request.GET.get('code', '')
            try:
                code = int(code)
            except ValueError:
                code = 404
            srname = request.GET.get('srname', '')
            takedown = request.GET.get('takedown', "")
            
            if srname:
                c.site = Subreddit._by_name(srname)

            if code in (204, 304):
                # NEVER return a content body on 204/304 or downstream
                # caches may become very confused.
                if request.GET.has_key('x-sup-id'):
                    x_sup_id = request.GET.get('x-sup-id')
                    if '\r\n' not in x_sup_id:
                        response.headers['x-sup-id'] = x_sup_id
                return ""
            elif c.render_style not in self.allowed_render_styles:
                return str(code)
            elif c.render_style in extensions.API_TYPES:
                data = request.environ.get('extra_error_data', {'error': code})
                return websafe_json(json.dumps(data))
            elif takedown and code == 404:
                link = Link._by_fullname(takedown)
                return pages.TakedownPage(link).render()
            elif code == 403:
                return self.send403()
            elif code == 429:
                return self.send429()
            elif code == 500:
                randmin = {'admin': random.choice(self.admins)}
                failien_url = make_failien_url()
                return redditbroke % (failien_url, rand_strings.sadmessages % randmin)
            elif code == 503:
                return self.send503()
            elif c.site:
                return self.send404()
            else:
                return "page not found"
        except:
            return handle_awful_failure("something really bad just happened.")
Example #30
0
def delete_comment(comment):
    link = Link._byID(comment.link_id, data=True)
    timer = g.stats.get_timer('comment_tree.delete.%s' %
                              link.comment_tree_version)
    timer.start()
    with CommentTree.mutation_context(link):
        timer.intermediate('lock')
        cache = get_comment_tree(link)
        timer.intermediate('get')
        cache.delete_comment(comment, link)
        timer.intermediate('update')
        from r2.lib.db.queries import changed
        changed([link])
        timer.intermediate('changed')
    timer.stop()
Example #31
0
def delete_comment(comment):
    link = Link._byID(comment.link_id, data=True)
    timer = g.stats.get_timer('comment_tree.delete.%s'
                              % link.comment_tree_version)
    timer.start()
    with CommentTree.mutation_context(link):
        timer.intermediate('lock')
        cache = get_comment_tree(link)
        timer.intermediate('get')
        cache.delete_comment(comment, link)
        timer.intermediate('update')
        from r2.lib.db.queries import changed
        changed([link])
        timer.intermediate('changed')
    timer.stop()
Example #32
0
def find_tz():
    q = Link._query(sort=desc('_hot'), limit=1)
    link = list(q)[0]
    t = tdb_sql.get_thing_table(Link._type_id)[0]

    s = sa.select([sa.func.hot(t.c.ups, t.c.downs, t.c.date), t.c.thing_id],
                  t.c.thing_id == link._id)
    db_hot = s.execute().fetchall()[0].hot.__float__()

    db_hot == round(db_hot, 7)

    for tz_name in pytz.common_timezones:
        tz = pytz.timezone(tz_name)
        sorts.epoch = datetime(1970, 1, 1, tzinfo=tz)

        if db_hot == link._hot:
            print tz_name
Example #33
0
def calc_rising():
    link_counts = count.get_link_counts()

    links = Link._by_fullname(link_counts.keys(), data=True)

    def score(link):
        count = link_counts[link._fullname][0]
        return float(link._ups) / max(count, 1)

    # build the rising list, excluding items having 1 or less upvotes
    rising = []
    for link in links.values():
        if link._ups > 1:
            rising.append((link._fullname, score(link), link.sr_id))

    # return rising sorted by score
    return sorted(rising, key=lambda x: x[1], reverse=True)
Example #34
0
    def GET_document(self):
        try:
            # clear cookies the old fashioned way 
            c.cookies = Cookies()

            code =  request.GET.get('code', '')
            try:
                code = int(code)
            except ValueError:
                code = 404
            srname = request.GET.get('srname', '')
            takedown = request.GET.get('takedown', "")
            
            if srname:
                c.site = Subsciteit._by_name(srname)
            if c.render_style not in self.allowed_render_styles:
                if code not in (204, 304):
                     c.response.content = str(code)
                return c.response
            elif c.render_style == "api":
                c.response.content = "{error: %s}" % code
                return c.response
            elif takedown and code == 404:
                link = Link._by_fullname(takedown)
                return pages.TakedownPage(link).render()
            elif code == 403:
                return self.send403()
            elif code == 500:
                randmin = {'admin': rand.choice(self.admins)}
                failien_name = 'youbrokeit%d.png' % rand.randint(1, NUM_FAILIENS)
                failien_url = static(failien_name)
                return sciteitbroke % (failien_url, rand_strings.sadmessages % randmin)
            elif code == 503:
                return self.send503()
            elif code == 304:
                if request.GET.has_key('x-sup-id'):
                    x_sup_id = request.GET.get('x-sup-id')
                    if '\r\n' not in x_sup_id:
                        c.response.headers['x-sup-id'] = x_sup_id
		return c.response
            elif c.site:
                return self.send404()
            else:
                return "page not found"
        except:
            return handle_awful_failure("something really bad just happened.")
Example #35
0
def calc_rising():
    link_counts = count.get_link_counts()

    links = Link._by_fullname(link_counts.keys(), data=True)

    def score(link):
        count = link_counts[link._fullname][0]
        return float(link._ups) / max(count, 1)

    # build the rising list, excluding items having 1 or less upvotes
    rising = []
    for link in links.values():
        if link._ups > 1:
            rising.append((link._fullname, score(link), link.sr_id))

    # return rising sorted by score
    return sorted(rising, key=lambda x: x[1], reverse=True)
Example #36
0
def find_tz():
    q = Link._query(sort = desc('_hot'), limit = 1)
    link = list(q)[0]
    t = tdb_sql.get_thing_table(Link._type_id)[0]

    s = sa.select([sa.func.hot(t.c.ups, t.c.downs, t.c.date),
                   t.c.thing_id],
                  t.c.thing_id == link._id)
    db_hot = s.execute().fetchall()[0].hot.__float__()

    db_hot == round(db_hot, 7)

    for tz_name in pytz.common_timezones:
        tz = pytz.timezone(tz_name)
        sorts.epoch = datetime(1970, 1, 1, tzinfo = tz)
        
        if db_hot == link._hot:
            print tz_name
Example #37
0
    def GET_document(self):
        try:
            # clear cookies the old fashioned way
            c.cookies = Cookies()

            code = request.GET.get("code", "")
            try:
                code = int(code)
            except ValueError:
                code = 404
            srname = request.GET.get("srname", "")
            takedown = request.GET.get("takedown", "")
            if srname:
                c.site = Subreddit._by_name(srname)
            if c.render_style not in self.allowed_render_styles:
                if code not in (204, 304):
                    c.response.content = str(code)
                return c.response
            elif c.render_style == "api":
                c.response.content = "{error: %s}" % code
                return c.response
            elif takedown and code == 404:
                link = Link._by_fullname(takedown)
                return pages.TakedownPage(link).render()
            elif code == 403:
                return self.send403()
            elif code == 500:
                return redditbroke % (rand.randint(1, NUM_FAILIENS), rand_strings.sadmessages)
            elif code == 503:
                return self.send503()
            elif code == 304:
                if request.GET.has_key("x-sup-id"):
                    x_sup_id = request.GET.get("x-sup-id")
                    if "\r\n" not in x_sup_id:
                        c.response.headers["x-sup-id"] = x_sup_id
                return c.response
            elif c.site:
                return self.send404()
            else:
                return "page not found"
        except:
            return handle_awful_failure("something really bad just happened.")
Example #38
0
    def GET_document(self):
        try:
            # clear cookies the old fashioned way
            c.cookies = Cookies()

            code = request.GET.get('code', '')
            try:
                code = int(code)
            except ValueError:
                code = 404
            srname = request.GET.get('srname', '')
            takedown = request.GET.get('takedown', "")
            if srname:
                c.site = Subreddit._by_name(srname)
            if c.render_style not in self.allowed_render_styles:
                if code not in (204, 304):
                    c.response.content = str(code)
                return c.response
            elif c.render_style == "api":
                c.response.content = "{error: %s}" % code
                return c.response
            elif takedown and code == 404:
                link = Link._by_fullname(takedown)
                return pages.TakedownPage(link).render()
            elif code == 403:
                return self.send403()
            elif code == 500:
                return redditbroke % (rand.randint(
                    1, NUM_FAILIENS), rand_strings.sadmessages)
            elif code == 503:
                return self.send503()
            elif code == 304:
                if request.GET.has_key('x-sup-id'):
                    c.response.headers['x-sup-id'] = request.GET.get(
                        'x-sup-id')
                return c.response
            elif c.site:
                return self.send404()
            else:
                return "page not found"
        except:
            return handle_awful_failure("something really bad just happened.")
Example #39
0
def update_comment_votes(comments):
    from r2.models import CommentScoresByLink

    comments = tup(comments)

    link_map = {}
    for com in comments:
        link_map.setdefault(com.link_id, []).append(com)
    all_links = Link._byID(link_map.keys(), data=True)

    comment_trees = {}
    for link in all_links.values():
        comment_trees[link._id] = get_comment_tree(link)

    for link_id, coms in link_map.iteritems():
        link = all_links[link_id]
        for sort in ("_controversy", "_hot", "_confidence", "_score", "_date", "_qa"):
            cid_tree = comment_trees[link_id].tree
            scores_by_comment = _comment_sorter_from_cids(coms, sort, link, cid_tree, by_36=True)
            CommentScoresByLink.set_scores(link, sort, scores_by_comment)
Example #40
0
def delete_comment(comment):
    with g.make_lock(lock_key(comment.link_id)):
        cids, comment_tree, depth, num_children = link_comments(comment.link_id)

        # only completely remove comments with no children
        if comment._id not in comment_tree:
            if comment._id in cids:
                cids.remove(comment._id)
            if comment._id in depth:
                del depth[comment._id]
            if comment._id in num_children:
                del num_children[comment._id]
            g.permacache.set(comments_key(comment.link_id),
                             (cids, comment_tree, depth, num_children))

        # update the link's comment count and schedule it for search reindexing
        link = Link._byID(comment.link_id, data = True)
        link._incr('num_comments', -1)
        from r2.lib.db.queries import changed
        changed(link)
Example #41
0
def update_comment_votes(comments):
    from r2.models import CommentScoresByLink

    comments = tup(comments)

    comments_by_link_id = defaultdict(list)
    for comment in comments:
        comments_by_link_id[comment.link_id].append(comment)
    links_by_id = Link._byID(comments_by_link_id.keys(), data=True)

    for link_id, link_comments in comments_by_link_id.iteritems():
        link = links_by_id[link_id]
        for sort in ("_controversy", "_confidence", "_score"):
            scores_by_comment = {
                comment._id36: getattr(comment, sort)
                for comment in link_comments
            }
            CommentScoresByLink.set_scores(link, sort, scores_by_comment)

        scores_by_comment = _get_qa_comment_scores(link, link_comments)
        CommentScoresByLink.set_scores(link, "_qa", scores_by_comment)
Example #42
0
def delete_comment(comment):
    with g.make_lock(lock_key(comment.link_id)):
        cids, comment_tree, depth, num_children = link_comments(
            comment.link_id)

        # only completely remove comments with no children
        if comment._id not in comment_tree:
            if comment._id in cids:
                cids.remove(comment._id)
            if comment._id in depth:
                del depth[comment._id]
            if comment._id in num_children:
                del num_children[comment._id]
            g.permacache.set(comments_key(comment.link_id),
                             (cids, comment_tree, depth, num_children))

        # update the link's comment count and schedule it for search reindexing
        link = Link._byID(comment.link_id, data=True)
        link._incr('num_comments', -1)
        from r2.lib.db.queries import changed
        changed(link)
Example #43
0
def update_comment_votes(comments):
    from r2.models import CommentScoresByLink

    comments = tup(comments)

    comments_by_link_id = defaultdict(list)
    for comment in comments:
        comments_by_link_id[comment.link_id].append(comment)
    links_by_id = Link._byID(comments_by_link_id.keys(), data=True)

    for link_id, link_comments in comments_by_link_id.iteritems():
        link = links_by_id[link_id]
        for sort in ("_controversy", "_confidence", "_score"):
            scores_by_comment = {
                comment._id36: getattr(comment, sort)
                for comment in link_comments
            }
            CommentScoresByLink.set_scores(link, sort, scores_by_comment)

        scores_by_comment = _get_qa_comment_scores(link, link_comments)
        CommentScoresByLink.set_scores(link, "_qa", scores_by_comment)
Example #44
0
    def GET_document(self):
        try:
            # clear cookies the old fashioned way 
            c.cookies = Cookies()

            code =  request.GET.get('code', '')
            try:
                code = int(code)
            except ValueError:
                code = 404
            srname = request.GET.get('srname', '')
            takedown = request.GET.get('takedown', "")
            if srname:
                c.site = Subreddit._by_name(srname)
            if c.render_style not in self.allowed_render_styles:
                c.response.content = str(int(code))
                return c.response
            elif c.render_style == "api":
                c.response.content = "{error: %s}" % code
                return c.response
            elif takedown and code == '404':
                link = Link._by_fullname(takedown)
                return pages.TakedownPage(link).render()
            elif code == '403':
                return self.send403()
            elif code == '500':
                return redditbroke % (rand.randint(1,NUM_FAILIENS), rand_strings.sadmessages)
            elif code == '503':
                return self.send503()
            elif code == '304':
                if request.GET.has_key('x-sup-id'):
                    c.response.headers['x-sup-id'] = request.GET.get('x-sup-id')
                return c.response
            elif c.site:
                return self.send404()
            else:
                return "page not found"
        except:
            return handle_awful_failure("something really bad just happened.")
Example #45
0
def add_comments(comments):
    """Add comments to the CommentTree and update scores."""
    from r2.models.builder import write_comment_orders

    link_ids = [comment.link_id for comment in tup(comments)]
    links = Link._byID(link_ids, data=True)

    comments = tup(comments)
    comments_by_link_id = defaultdict(list)
    for comment in comments:
        comments_by_link_id[comment.link_id].append(comment)

    for link_id, link_comments in comments_by_link_id.iteritems():
        link = links[link_id]

        timer = g.stats.get_timer('comment_tree.add.1')
        timer.start()

        # write scores before CommentTree because the scores must exist for all
        # comments in the tree
        for sort in ("_controversy", "_confidence", "_score"):
            scores_by_comment = {
                comment._id36: getattr(comment, sort)
                for comment in link_comments
            }
            CommentScoresByLink.set_scores(link, sort, scores_by_comment)

        scores_by_comment = _get_qa_comment_scores(link, link_comments)
        CommentScoresByLink.set_scores(link, "_qa", scores_by_comment)
        timer.intermediate('scores')

        CommentTree.add_comments(link, link_comments)
        timer.intermediate('update')

        write_comment_orders(link)
        timer.intermediate('write_order')

        timer.stop()
Example #46
0
def process_new_links(period = media_period, force = False):
    """Fetches links from the last period and sets their media
    properities. If force is True, it will fetch properities for links
    even if the properties already exist"""
    links = Link._query(Link.c._date > timeago(period), sort = desc('_date'),
                        data = True)
    results = {}
    jobs = []
    for link in fetch_things2(links):
        if link.is_self or link.promoted:
            continue
        elif not force and (link.has_thumbnail or link.media_object):
            continue

        jobs.append(make_link_info_job(results, link, g.useragent))

    #send links to a queue
    wq = WorkQueue(jobs, num_workers = 20, timeout = 30)
    wq.start()
    wq.jobs.join()

    #when the queue is finished, do the db writes in this thread
    for link, info in results.items():
        update_link(link, info[0], info[1])
Example #47
0
File: error.py Project: z0r0/saidit
    def GET_document(self):
        try:
            c.errors = c.errors or ErrorSet()
            # clear cookies the old fashioned way 
            c.cookies = Cookies()

            code =  request.GET.get('code', '')
            try:
                code = int(code)
            except ValueError:
                code = 404
            srname = request.GET.get('srname', '')
            takedown = request.GET.get('takedown', '')
            error_name = request.GET.get('error_name', '')

            if isinstance(c.user, basestring):
                # somehow requests are getting here with c.user unset
                c.user_is_loggedin = False
                c.user = UnloggedUser(browser_langs=None)

            if srname:
                c.site = Subreddit._by_name(srname)

            if request.GET.has_key('allow_framing'):
                c.allow_framing = bool(request.GET['allow_framing'] == '1')

            if (error_name == 'IN_TIMEOUT' and
                    not 'usable_error_content' in request.environ):
                timeout_days_remaining = c.user.days_remaining_in_timeout

                errpage = pages.InterstitialPage(
                    _("suspended"),
                    content=pages.InTimeoutInterstitial(
                        timeout_days_remaining=timeout_days_remaining,
                    ),
                )
                request.environ['usable_error_content'] = errpage.render()

            if code in (204, 304):
                # NEVER return a content body on 204/304 or downstream
                # caches may become very confused.
                return ""
            elif c.render_style not in self.allowed_render_styles:
                return str(code)
            elif c.render_style in extensions.API_TYPES:
                data = request.environ.get('extra_error_data', {'error': code})
                message = request.GET.get('message', '')
                if message:
                    data['message'] = message
                if request.environ.get("WANT_RAW_JSON"):
                    return scriptsafe_dumps(data)
                return websafe_json(json.dumps(data))
            elif takedown and code == 404:
                link = Link._by_fullname(takedown)
                return pages.TakedownPage(link).render()
            elif code == 400:
                return self.send400()
            elif code == 403:
                return self.send403()
            elif code == 429:
                return self.send429()
            elif code == 500:
                failien_url = make_failien_url()
                sad_message = get_funny_translated_string("500_page")
                sad_message %= {'admin': random.choice(self.admins)}
                sad_message = safemarkdown(sad_message)
                return redditbroke % (failien_url, sad_message)
            elif code == 503:
                return self.send503()
            elif c.site:
                return self.send404()
            else:
                return "page not found"
        except Exception as e:
            return handle_awful_failure("ErrorController.GET_document: %r" % e)
Example #48
0
        print '  aborting - bad preview object: %s' % preview_object
        return False
    if not preview_object['url']:
        print '  aborting - bad preview url: %s' % preview_object['url']
        return False
    return True


s3 = boto.connect_s3(g.S3KEY_ID or None, g.S3SECRET_KEY or None)

for uid, columns in LinksByImage._cf.get_range():
    # When resuming, use:
    #for uid, columns in LinksByImage._cf.get_range(start='<uid>'):
    print 'Looking at image %s' % uid
    link_ids = columns.keys()
    links = Link._byID36(link_ids, return_dict=False, data=True)
    if not links:
        continue

    # Pull information about the image from the first link (they *should* all
    # be the same).
    link = links[0]
    preview_object = link.preview_object
    if not good_preview_object(preview_object):
        continue

    u = UrlParser(preview_object['url'])
    if preview_object['url'].startswith(g.media_fs_base_url_http):
        # Uploaded to the local filesystem instead of s3.  Should only be in
        # dev.
        print '  non-s3 image'
Example #49
0
def add_comments(comments):
    """Add comments to the CommentTree and update scores."""
    from r2.models.builder import write_comment_orders

    link_ids = [comment.link_id for comment in tup(comments)]
    links = Link._byID(link_ids, data=True)

    comments = tup(comments)
    comments_by_link_id = defaultdict(list)
    for comment in comments:
        comments_by_link_id[comment.link_id].append(comment)

    for link_id, link_comments in comments_by_link_id.iteritems():
        link = links[link_id]

        new_comments = [
            comment for comment in link_comments if not comment._deleted
        ]
        deleted_comments = [
            comment for comment in link_comments if comment._deleted
        ]
        timer = g.stats.get_timer('comment_tree.add.%s' %
                                  link.comment_tree_version)
        timer.start()

        # write scores before CommentTree because the scores must exist for all
        # comments in the tree
        for sort in ("_controversy", "_confidence", "_score"):
            scores_by_comment = {
                comment._id36: getattr(comment, sort)
                for comment in link_comments
            }
            CommentScoresByLink.set_scores(link, sort, scores_by_comment)

        scores_by_comment = _get_qa_comment_scores(link, link_comments)
        CommentScoresByLink.set_scores(link, "_qa", scores_by_comment)
        timer.intermediate('scores')

        with CommentTree.mutation_context(link, timeout=180):
            try:
                timer.intermediate('lock')
                comment_tree = CommentTree.by_link(link, timer)
                timer.intermediate('get')

                if new_comments:
                    comment_tree.add_comments(new_comments)

                for comment in deleted_comments:
                    comment_tree.delete_comment(comment, link)

                timer.intermediate('update')
            except InconsistentCommentTreeError:
                # this exception occurs when we add a comment to the tree but
                # its parent isn't in the tree yet, need to rebuild the tree
                # from scratch

                comment_ids = [comment._id for comment in link_comments]
                g.log.exception(
                    'add_comments_nolock failed for link %s %s, recomputing',
                    link_id, comment_ids)

                comment_tree = CommentTree.rebuild(link)
                timer.intermediate('rebuild')
                # the tree rebuild updated the link's comment count, so schedule
                # it for search reindexing
                link.update_search_index()
                timer.intermediate('update_search_index')
                g.stats.simple_event('comment_tree_inconsistent')

            # do this under the same lock because we want to ensure we are using
            # the same version of the CommentTree as was just written
            write_comment_orders(link)
            timer.intermediate('write_order')

        timer.stop()
Example #50
0
 def _process_link(fname):
     link = Link._by_fullname(fname, data=True, return_dict=False)
     set_media(link)
Example #51
0
 def _process_link(fname):
     link = Link._by_fullname(fname, data=True)
     set_media(link)
Example #52
0
 def process_message(msg):
     fname = msg.body
     link = Link._by_fullname(fname, data=True)
     extract_keywords(link)
Example #53
0
 def _process_link(fname):
     link = Link._by_fullname(fname, data=True)
     set_media(link)
Example #54
0
def add_comments(comments):
    """Add comments to the CommentTree and update scores."""
    from r2.models.builder import write_comment_orders

    link_ids = [comment.link_id for comment in tup(comments)]
    links = Link._byID(link_ids, data=True)

    comments = tup(comments)
    comments_by_link_id = defaultdict(list)
    for comment in comments:
        comments_by_link_id[comment.link_id].append(comment)

    for link_id, link_comments in comments_by_link_id.iteritems():
        link = links[link_id]

        new_comments = [
            comment for comment in link_comments if not comment._deleted]
        deleted_comments = [
            comment for comment in link_comments if comment._deleted]
        timer = g.stats.get_timer(
            'comment_tree.add.%s' % link.comment_tree_version)
        timer.start()

        # write scores before CommentTree because the scores must exist for all
        # comments in the tree
        for sort in ("_controversy", "_confidence", "_score"):
            scores_by_comment = {
                comment._id36: getattr(comment, sort)
                for comment in link_comments
            }
            CommentScoresByLink.set_scores(link, sort, scores_by_comment)

        scores_by_comment = _get_qa_comment_scores(link, link_comments)
        CommentScoresByLink.set_scores(link, "_qa", scores_by_comment)
        timer.intermediate('scores')

        with CommentTree.mutation_context(link, timeout=180):
            try:
                timer.intermediate('lock')
                comment_tree = CommentTree.by_link(link, timer)
                timer.intermediate('get')

                if new_comments:
                    comment_tree.add_comments(new_comments)

                for comment in deleted_comments:
                    comment_tree.delete_comment(comment, link)

                timer.intermediate('update')
            except InconsistentCommentTreeError:
                # this exception occurs when we add a comment to the tree but
                # its parent isn't in the tree yet, need to rebuild the tree
                # from scratch

                comment_ids = [comment._id for comment in link_comments]
                g.log.exception(
                    'add_comments_nolock failed for link %s %s, recomputing',
                    link_id, comment_ids)

                comment_tree = CommentTree.rebuild(link)
                timer.intermediate('rebuild')
                # the tree rebuild updated the link's comment count, so schedule
                # it for search reindexing
                link.update_search_index()
                timer.intermediate('update_search_index')
                g.stats.simple_event('comment_tree_inconsistent')

            # do this under the same lock because we want to ensure we are using
            # the same version of the CommentTree as was just written
            write_comment_orders(link, timer)
            timer.intermediate('write_order')

        timer.stop()
Example #55
0
 def _process_link(fname):
     link = Link._by_fullname(fname, data=True, return_dict=False)
     set_media(link)