Exemple #1
0
def process_votes(drain=False, limit=100):
    def _handle_votes(msgs, chan):
        to_do = []
        uids = set()
        tids = set()
        for x in msgs:
            r = pickle.loads(x.body)
            uid, tid, dir, ip, organic, cheater = r

            print(uid, tid, dir, ip, organic, cheater)

            uids.add(uid)
            tids.add(tid)
            to_do.append((uid, tid, dir, ip, organic, cheater))

        users = Account._byID(uids, data=True, return_dict=True)
        things = Thing._by_fullname(tids, data=True, return_dict=True)

        for uid, tid, dir, ip, organic, cheater in to_do:
            handle_vote(users[uid],
                        things[tid],
                        dir,
                        ip,
                        organic,
                        cheater=cheater)

    amqp.handle_items('register_vote_q',
                      _handle_votes,
                      limit=limit,
                      drain=drain)
Exemple #2
0
def run_changed(drain=False):
    """
        Run by `cron` (through `paster run`) on a schedule to update
        all Things that have been created or have changed since the
        last run. Note: unlike many queue-using functions, this one is
        run from cron and totally drains the queue before terminating
    """
    @g.stats.amqp_processor('solrsearch_changes')
    def _run_changed(msgs, chan):
        print "changed: Processing %d items" % len(msgs)
        msgs = [strordict_fullname(msg.body)
                for msg in msgs]
        fullnames = set(msg['fullname'] for msg in msgs if not msg.get('boost_only'))

        things = Thing._by_fullname(fullnames, data=True, return_dict=False)
        things = [x for x in things if isinstance(x, indexed_types)]

        update_things = [x for x in things if not x._spam and not x._deleted]
        delete_things = [x for x in things if x._spam or x._deleted]

        with SolrConnection() as s:
            if update_things:
                tokenized = tokenize_things(update_things)
                s.add(tokenized)
            if delete_things:
                for i in delete_things:
                    s.delete(id=i._fullname)

    amqp.handle_items('solrsearch_changes', _run_changed, limit=1000,
                      drain=drain)
Exemple #3
0
def consume_subreddit_query_queue(qname="subreddit_query_q", limit=1000):
    @g.stats.amqp_processor(qname)
    def process_message(msgs, chan):
        """Update get_links(), the Links by Subreddit precomputed query.

        get_links() is a CachedResult which is stored in permacache. To
        update these objects we need to do a read-modify-write which requires
        obtaining a lock. Sharding these updates by subreddit allows us to run
        multiple consumers (but ideally just one per shard) to avoid lock
        contention.

        """

        from r2.lib.db.queries import add_queries, get_links

        link_names = {msg.body for msg in msgs}
        links = Link._by_fullname(link_names, return_dict=False)
        print 'Processing %r' % (links,)

        links_by_sr_id = defaultdict(list)
        for link in links:
            links_by_sr_id[link.sr_id].append(link)

        srs_by_id = Subreddit._byID(links_by_sr_id.keys(), stale=True)

        for sr_id, links in links_by_sr_id.iteritems():
            with g.stats.get_timer("link_vote_processor.subreddit_queries"):
                sr = srs_by_id[sr_id]
                add_queries(
                    queries=[get_links(sr, sort, "all") for sort in SORTS],
                    insert_items=links,
                )

    amqp.handle_items(qname, process_message, limit=limit)
Exemple #4
0
def run_changed(drain=False, limit=100, sleep_time=10, verbose=True):
    """reddit-consumer-update_promos: amqp consumer of update_promos_q
    
    Handles asynch accepting/rejecting of ads that are scheduled to be live
    right now
    
    """
    @g.stats.amqp_processor(UPDATE_QUEUE)
    def _run(msgs, chan):
        items = [json.loads(msg.body) for msg in msgs]
        if QUEUE_ALL in items:
            # QUEUE_ALL is just an indicator to run make_daily_promotions.
            # There's no promotion log to update in this case.
            print "Received %s QUEUE_ALL message(s)" % items.count(QUEUE_ALL)
            items = [i for i in items if i != QUEUE_ALL]
        make_daily_promotions()
        links = Link._by_fullname([i["link"] for i in items])
        for item in items:
            PromotionLog.add(
                links[item['link']],
                "Finished remaking current promotions (this link "
                "was: %(message)s" % item)

    amqp.handle_items(UPDATE_QUEUE,
                      _run,
                      limit=limit,
                      drain=drain,
                      sleep_time=sleep_time,
                      verbose=verbose)
Exemple #5
0
def run_changed(drain=False, limit=100, sleep_time=10, verbose=False):
    """reddit-consumer-update_promos: amqp consumer of update_promos_q
    
    Handles asynch accepting/rejecting of ads that are scheduled to be live
    right now
    
    """

    @g.stats.amqp_processor(UPDATE_QUEUE)
    def _run(msgs, chan):
        items = [json.loads(msg.body) for msg in msgs]
        if QUEUE_ALL in items:
            # QUEUE_ALL is just an indicator to run make_daily_promotions.
            # There's no promotion log to update in this case.
            items.remove(QUEUE_ALL)
        make_daily_promotions()
        links = Link._by_fullname([i["link"] for i in items])
        for item in items:
            PromotionLog.add(
                links[c.link_id],
                "Finished remaking current promotions (this link " " was: %(message)s" % item,
                commit=True,
            )

    amqp.handle_items(UPDATE_QUEUE, _run, limit=limit, drain=drain, sleep_time=sleep_time, verbose=verbose)
Exemple #6
0
def run_changed(drain=False):
    """
        Run by `cron` (through `paster run`) on a schedule to update
        all Things that have been created or have changed since the
        last run. Note: unlike many queue-using functions, this one is
        run from cron and totally drains the queue before terminating
    """
    def _run_changed(msgs, chan):
        print "changed: Processing %d items" % len(msgs)
        msgs = [strordict_fullname(msg.body) for msg in msgs]
        fullnames = set(msg['fullname'] for msg in msgs)

        things = Thing._by_fullname(fullnames, data=True, return_dict=False)
        things = [x for x in things if isinstance(x, indexed_types)]

        update_things = [x for x in things if not x._spam and not x._deleted]
        delete_things = [x for x in things if x._spam or x._deleted]

        with SolrConnection() as s:
            if update_things:
                tokenized = tokenize_things(update_things)
                s.add(tokenized)
            if delete_things:
                for i in delete_things:
                    s.delete(id=i._fullname)

    amqp.handle_items('solrsearch_changes',
                      _run_changed,
                      limit=1000,
                      drain=drain)
Exemple #7
0
def consume_subreddit_query_queue(qname="subreddit_query_q", limit=1000):
    @g.stats.amqp_processor(qname)
    def process_message(msgs, chan):
        """Update get_links(), the Links by Subreddit precomputed query.

        get_links() is a CachedResult which is stored in permacache. To
        update these objects we need to do a read-modify-write which requires
        obtaining a lock. Sharding these updates by subreddit allows us to run
        multiple consumers (but ideally just one per shard) to avoid lock
        contention.

        """

        from r2.lib.db.queries import add_queries, get_links

        link_names = {msg.body for msg in msgs}
        links = Link._by_fullname(link_names, return_dict=False)
        print 'Processing %r' % (links, )

        links_by_sr_id = defaultdict(list)
        for link in links:
            links_by_sr_id[link.sr_id].append(link)

        srs_by_id = Subreddit._byID(links_by_sr_id.keys(), stale=True)

        for sr_id, links in links_by_sr_id.iteritems():
            with g.stats.get_timer("link_vote_processor.subreddit_queries"):
                sr = srs_by_id[sr_id]
                add_queries(
                    queries=[get_links(sr, sort, "all") for sort in SORTS],
                    insert_items=links,
                )

    amqp.handle_items(qname, process_message, limit=limit)
Exemple #8
0
def process_comment_sorts(limit=500):
    def _handle_sort(msgs, chan):
        cids = list(set(int(msg.body) for msg in msgs))
        comments = Comment._byID(cids, data = True, return_dict = False)
        print comments
        update_comment_votes(comments)

    amqp.handle_items('commentsort_q', _handle_sort, limit = limit)
Exemple #9
0
def process_comment_sorts(limit=500):
    def _handle_sort(msgs, chan):
        cids = list(set(int(msg.body) for msg in msgs))
        comments = Comment._byID(cids, data=True, return_dict=False)
        print comments
        update_comment_votes(comments)

    amqp.handle_items('commentsort_q', _handle_sort, limit=limit)
Exemple #10
0
def run_changed(drain=False, limit=1000):
    """
        Run by `cron` (through `paster run`) on a schedule to send Things to
        IndexTank
    """
    def _run_changed(msgs, chan):
        start = datetime.now(g.tz)

        changed = map(lambda x: strordict_fullname(x.body), msgs)

        boost = set()
        add = set()

        # an item can request that only its boost fields be updated,
        # so we need to separate those out

        for item in changed:
            fname = item['fullname']
            boost_only = item.get('boost_only', False)

            if fname in add:
                # we're already going to do all of the work
                continue

            #boo if boost_only:
            if False:
                boost.add(fname)
            else:
                if fname in boost:
                    # we've previously seen an instance of this fname
                    # that requested that only its boosts be updated,
                    # but now we have to update the whole thing
                    boost.remove(fname)

                add.add(fname)

        things = Thing._by_fullname(boost | add, data=True, return_dict=True)

        boost_time = add_time = 0.0
        if boost:
            boost_time = inject([things[fname] for fname in boost], boost_only=True)
        if add:
            add_time = inject([things[fname] for fname in add])

        totaltime = epoch_seconds(datetime.now(g.tz)) - epoch_seconds(start)

        print ("%s: %d messages: %d docs (%.2fs), %d boosts (%.2fs) in %.2fs (%d duplicates, %s remaining)"
               % (start,
                  len(changed),
                  len(add), add_time,
                  len(boost), boost_time,
                  totaltime,
                  len(changed) - len(things),
                  msgs[-1].delivery_info.get('message_count', 'unknown'),
                  ))

    amqp.handle_items('indextank_changes', _run_changed, limit=limit,
                      drain=drain, verbose=False)
Exemple #11
0
def run_changed(drain=False, limit=1000):
    """
        Run by `cron` (through `paster run`) on a schedule to send Things to
        IndexTank
    """
    def _run_changed(msgs, chan):
        start = datetime.now(g.tz)

        changed = map(lambda x: strordict_fullname(x.body), msgs)

        boost = set()
        add = set()

        # an item can request that only its boost fields be updated,
        # so we need to separate those out

        for item in changed:
            fname = item['fullname']
            boost_only = item.get('boost_only', False)

            if fname in add:
                # we're already going to do all of the work
                continue

            if boost_only:
                boost.add(fname)
            else:
                if fname in boost:
                    # we've previously seen an instance of this fname
                    # that requested that only its boosts be updated,
                    # but now we have to update the whole thing
                    boost.remove(fname)

                add.add(fname)

        things = Thing._by_fullname(boost | add, data=True, return_dict=True)

        boost_time = add_time = 0.0
        if boost:
            boost_time = inject([things[fname] for fname in boost], boost_only=True)
        if add:
            add_time = inject([things[fname] for fname in add])

        totaltime = epoch_seconds(datetime.now(g.tz)) - epoch_seconds(start)

        print ("%s: %d messages: %d docs (%.2fs), %d boosts (%.2fs) in %.2fs (%d duplicates, %s remaining)"
               % (start,
                  len(changed),
                  len(add), add_time,
                  len(boost), boost_time,
                  totaltime,
                  len(changed) - len(things),
                  msgs[-1].delivery_info.get('message_count', 'unknown'),
                  ))

    amqp.handle_items('indextank_changes', _run_changed, limit=limit,
                      drain=drain, verbose=False)
Exemple #12
0
 def run_changed(self, drain=False, min_size=int(getattr(g, 'SOLR_MIN_BATCH', 500)), limit=1000, sleep_time=10, 
         use_safe_get=False, verbose=False):
     '''Run by `cron` (through `paster run`) on a schedule to send Things to Cloud
     '''
     if use_safe_get:
         CloudSearchUploader.use_safe_get = True
     amqp.handle_items('cloudsearch_changes', _run_changed, min_size=min_size,
                       limit=limit, drain=drain, sleep_time=sleep_time,
                       verbose=verbose)
Exemple #13
0
def process_events(g, timeout=5.0, **kw):
    publisher = EventPublisher(
        g.events_collector_url,
        g.secrets["events_collector_key"],
        g.secrets["events_collector_secret"],
        g.useragent,
        g.stats,
        timeout=timeout,
    )
    test_publisher = EventPublisher(
        g.events_collector_test_url,
        g.secrets["events_collector_key"],
        g.secrets["events_collector_secret"],
        g.useragent,
        g.stats,
        timeout=timeout,
    )

    @g.stats.amqp_processor("event_collector")
    def processor(msgs, chan):
        events = []
        test_events = []

        for msg in msgs:
            headers = msg.properties.get("application_headers", {})
            truncatable_field = headers.get("truncatable_field")

            event = PublishableEvent(msg.body, truncatable_field)
            if msg.delivery_info["routing_key"] == "event_collector_test":
                test_events.append(event)
            else:
                events.append(event)

        to_publish = itertools.chain(
            publisher.publish(events),
            test_publisher.publish(test_events),
        )
        for response, sent in to_publish:
            if response.ok:
                g.log.info("Published %s events", len(sent))
            else:
                g.log.warning(
                    "Event send failed %s - %s",
                    response.status_code,
                    _get_reason(response),
                )
                g.log.warning("Response headers: %r", response.headers)

                # if the events were too large, move them into a separate
                # queue to get them out of here, since they'll always fail
                if response.status_code == 413:
                    for event in sent:
                        amqp.add_item("event_collector_failed", event)
                else:
                    response.raise_for_status()

    amqp.handle_items("event_collector", processor, **kw)
Exemple #14
0
def process_events(g, timeout=5.0, **kw):
    publisher = EventPublisher(
        g.events_collector_url,
        g.secrets["events_collector_key"],
        g.secrets["events_collector_secret"],
        g.useragent,
        g.stats,
        timeout=timeout,
    )
    test_publisher = EventPublisher(
        g.events_collector_test_url,
        g.secrets["events_collector_key"],
        g.secrets["events_collector_secret"],
        g.useragent,
        g.stats,
        timeout=timeout,
    )

    @g.stats.amqp_processor("event_collector")
    def processor(msgs, chan):
        events = []
        test_events = []

        for msg in msgs:
            headers = msg.properties.get("application_headers", {})
            truncatable_field = headers.get("truncatable_field")

            event = PublishableEvent(msg.body, truncatable_field)
            if msg.delivery_info["routing_key"] == "event_collector_test":
                test_events.append(event)
            else:
                events.append(event)

        to_publish = itertools.chain(
            publisher.publish(events),
            test_publisher.publish(test_events),
        )
        for response, sent in to_publish:
            if response.ok:
                g.log.info("Published %s events", len(sent))
            else:
                g.log.warning(
                    "Event send failed %s - %s",
                    response.status_code,
                    _get_reason(response),
                )
                g.log.warning("Response headers: %r", response.headers)

                # if the events were too large, move them into a separate
                # queue to get them out of here, since they'll always fail
                if response.status_code == 413:
                    for event in sent:
                        amqp.add_item("event_collector_failed", event)
                else:
                    response.raise_for_status()

    amqp.handle_items("event_collector", processor, **kw)
Exemple #15
0
 def run_changed(self, drain=False, min_size=int(getattr(g, 'solr_min_batch', 500)), limit=1000, sleep_time=10, 
         use_safe_get=False, verbose=False):
     '''Run by `cron` (through `paster run`) on a schedule to send Things to Solr
     '''
     if use_safe_get:
         SolrSearchUploader.use_safe_get = True
     amqp.handle_items('cloudsearch_changes', _run_changed, min_size=min_size,
                       limit=limit, drain=drain, sleep_time=sleep_time,
                       verbose=verbose)
Exemple #16
0
def run(limit=1000, verbose=False):
    def myfunc(msgs, chan):
        if verbose:
            print "processing a batch"

        incrs = {}

        for msg in msgs:
            try:
                d = check_dict(msg.body)
            except TypeError:
                log_text("usage_q error", "wtf is %r" % msg.body, "error")
                continue

            hund_sec = hund_from_start_and_end(d["start_time"], d["end_time"])

            action = d["action"].replace("-", "_")

            fudged_count = int(1 / d["sampling_rate"])
            fudged_elapsed = int(hund_sec / d["sampling_rate"])

            for exp_time, bucket in buckets(d["end_time"]):
                k = "%s-%s" % (bucket, action)
                incrs.setdefault(k, [0, 0, exp_time])
                incrs[k][0] += fudged_count
                incrs[k][1] += fudged_elapsed

        for k, (count, elapsed, exp_time) in incrs.iteritems():
            c_key = "profile_count-" + k
            e_key = "profile_elapsed-" + k

            if verbose:
                c_old = g.hardcache.get(c_key)
                e_old = g.hardcache.get(e_key)

            g.hardcache.accrue(c_key, delta=count, time=exp_time)
            g.hardcache.accrue(e_key, delta=elapsed, time=exp_time)

            if verbose:
                c_new = g.hardcache.get(c_key)
                e_new = g.hardcache.get(e_key)

                print "%s: %s -> %s" % (c_key, c_old, c_new)
                print "%s: %s -> %s" % (e_key, e_old, e_new)

        if len(msgs) < limit / 2:
            if verbose:
                print "Sleeping..."
            sleep(10)

    amqp.handle_items(q,
                      myfunc,
                      limit=limit,
                      drain=False,
                      verbose=verbose,
                      sleep_time=30)
Exemple #17
0
def run_changed(drain=False, min_size=500, limit=1000, sleep_time=10,
                use_safe_get=False, verbose=False):
    '''Run by `cron` (through `paster run`) on a schedule to send Things to
        Amazon CloudSearch
    
    '''
    if use_safe_get:
        CloudSearchUploader.use_safe_get = True
    amqp.handle_items('cloudsearch_changes', _run_changed, min_size=min_size,
                      limit=limit, drain=drain, sleep_time=sleep_time,
                      verbose=verbose)
Exemple #18
0
def run_commentstree(limit=100):
    """Add new incoming comments to their respective comments trees"""
    def _run_commentstree(msgs, chan):
        comments = Comment._by_fullname([msg.body for msg in msgs],
                                        data=True,
                                        return_dict=False)
        print 'Processing %r' % (comments, )

        add_comment_tree(comments)

    amqp.handle_items('commentstree_q', _run_commentstree, limit=limit)
Exemple #19
0
def run_commentstree(limit=100):
    """Add new incoming comments to their respective comments trees"""

    @g.stats.amqp_processor("commentstree_q")
    def _run_commentstree(msgs, chan):
        comments = Comment._by_fullname([msg.body for msg in msgs], data=True, return_dict=False)
        print "Processing %r" % (comments,)

        add_comment_tree(comments)

    amqp.handle_items("commentstree_q", _run_commentstree, limit=limit)
Exemple #20
0
def run_changed(drain=False, min_size=500, limit=1000, sleep_time=10,
                use_safe_get=False, verbose=False):
    '''Run by `cron` (through `paster run`) on a schedule to send Things to
        Amazon CloudSearch
    
    '''
    if use_safe_get:
        CloudSearchUploader.use_safe_get = True
    amqp.handle_items('cloudsearch_changes', _run_changed, min_size=min_size,
                      limit=limit, drain=drain, sleep_time=sleep_time,
                      verbose=verbose)
Exemple #21
0
def run_new_comments():
    """Add new incoming comments to the /comments page"""
    # this is done as a queue because otherwise the contention for the
    # lock on the query would be very high

    def _run_new_comments(msgs, chan):
        fnames = [msg.body for msg in msgs]
        comments = Comment._by_fullname(fnames, data=True, return_dict=False)

        add_queries([get_all_comments()], insert_items=comments)

    amqp.handle_items("newcomments_q", _run_new_comments, limit=100)
def run_changed(drain=False):
    """
        Run by `cron` (through `paster run`) on a schedule to send Things to
        IndexTank
    """
    def _run_changed(msgs, chan):
        fullnames = set([x.body for x in msgs])
        things = Thing._by_fullname(fullnames, data=True, return_dict=False)
        inject(things)

    amqp.handle_items('indextank_changes', _run_changed, limit=1000,
                      drain=drain)
Exemple #23
0
def run_new_comments():
    """Add new incoming comments to the /comments page"""

    # this is done as a queue because otherwise the contention for the
    # lock on the query would be very high

    def _run_new_comments(msgs, chan):
        fnames = [msg.body for msg in msgs]
        comments = Comment._by_fullname(fnames, data=True, return_dict=False)

        add_queries([get_all_comments()], insert_items=comments)

    amqp.handle_items('newcomments_q', _run_new_comments, limit=100)
Exemple #24
0
def run(limit=1000, verbose=False):
    def myfunc(msgs, chan):
        if verbose:
            print "processing a batch"

        incrs = {}

        for msg in msgs:
            try:
                d = check_dict(msg.body)
            except TypeError:
                log_text("usage_q error", "wtf is %r" % msg.body, "error")
                continue

            hund_sec = hund_from_start_and_end(d["start_time"], d["end_time"])

            action = d["action"].replace("-", "_")

            fudged_count   = int(       1 / d["sampling_rate"])
            fudged_elapsed = int(hund_sec / d["sampling_rate"])

            for exp_time, bucket in buckets(d["end_time"]):
                k = "%s-%s" % (bucket, action)
                incrs.setdefault(k, [0, 0, exp_time])
                incrs[k][0] += fudged_count
                incrs[k][1] += fudged_elapsed

        for k, (count, elapsed, exp_time) in incrs.iteritems():
            c_key = "profile_count-" + k
            e_key = "profile_elapsed-" + k

            if verbose:
                c_old = g.hardcache.get(c_key)
                e_old = g.hardcache.get(e_key)

            g.hardcache.accrue(c_key, delta=count,   time=exp_time)
            g.hardcache.accrue(e_key, delta=elapsed, time=exp_time)

            if verbose:
                c_new = g.hardcache.get(c_key)
                e_new = g.hardcache.get(e_key)

                print "%s: %s -> %s" % (c_key, c_old, c_new)
                print "%s: %s -> %s" % (e_key, e_old, e_new)

        if len(msgs) < limit / 2:
            if verbose:
                print "Sleeping..."
            sleep (10)
    amqp.handle_items(q, myfunc, limit=limit, drain=False, verbose=verbose,
                      sleep_time = 30)
Exemple #25
0
def run_new_comments(limit=1000):
    """Add new incoming comments to the /comments page"""
    # this is done as a queue because otherwise the contention for the
    # lock on the query would be very high

    @g.stats.amqp_processor('newcomments_q')
    def _run_new_comments(msgs, chan):
        fnames = [msg.body for msg in msgs]

        comments = Comment._by_fullname(fnames, data=True, return_dict=False)
        add_queries([get_all_comments()], insert_items=comments)

        bysrid = _by_srid(comments, False)
        for srid, sr_comments in bysrid.iteritems():
            add_queries([_get_sr_comments(srid)], insert_items=sr_comments)

    amqp.handle_items('newcomments_q', _run_new_comments, limit=limit)
Exemple #26
0
def run_new_comments(limit=1000):
    """Add new incoming comments to the /comments page"""
    # this is done as a queue because otherwise the contention for the
    # lock on the query would be very high

    @g.stats.amqp_processor("newcomments_q")
    def _run_new_comments(msgs, chan):
        fnames = [msg.body for msg in msgs]

        comments = Comment._by_fullname(fnames, data=True, return_dict=False)
        add_queries([get_all_comments()], insert_items=comments)

        bysrid = _by_srid(comments, False)
        for srid, sr_comments in bysrid.iteritems():
            add_queries([_get_sr_comments(srid)], insert_items=sr_comments)

    amqp.handle_items("newcomments_q", _run_new_comments, limit=limit)
Exemple #27
0
def run():
    def callback(msgs, chan):
        for msg in msgs:  # will be len==1
            # cr is a r2.lib.db.queries.CachedResults
            cr = pickle.loads(msg.body)
            iden = cr.query._iden()

            working_key = working_prefix + iden
            key = prefix + iden

            last_time = g.memcache.get(key)
            # check to see if we've computed this job since it was
            # added to the queue
            if last_time and last_time > msg.timestamp:
                print 'skipping, already computed ', key
                return

            if not cr.preflight_check():
                print 'skipping, preflight check failed', key
                return

            # check if someone else is working on this
            elif not g.memcache.add(working_key, 1, TIMEOUT):
                print 'skipping, someone else is working', working_key
                return

            print 'working: ', iden, cr.query._rules, cr.query._sort
            start = datetime.now()
            try:
                cr.update()
                g.memcache.set(key, datetime.now())

                cr.postflight()

            finally:
                g.memcache.delete(working_key)

            done = datetime.now()
            q_time_s = (done - msg.timestamp).seconds
            proc_time_s = (done - start).seconds + (
                (done - start).microseconds / 1000000.0)
            print('processed %s in %.6f seconds after %d seconds in queue' %
                  (iden, proc_time_s, q_time_s))

    amqp.handle_items('prec_links', callback, limit=1)
Exemple #28
0
def process_votes(limit=None):
    # limit is taken but ignored for backwards compatibility

    def _handle_vote(msgs, chan):
        assert(len(msgs) == 1)
        msg = msgs[0]

        r = pickle.loads(msg.body)

        uid, tid, dir, ip, organic, cheater = r
        voter = Account._byID(uid, data=True)
        votee = Thing._by_fullname(tid, data = True)

        print (voter, votee, dir, ip, organic, cheater)
        handle_vote(voter, votee, dir, ip, organic,
                    cheater = cheater)

    amqp.handle_items('register_vote_q', _handle_vote)
Exemple #29
0
def run():
    def callback(msgs, chan):
        for msg in msgs: # will be len==1
            # cr is a r2.lib.db.queries.CachedResults
            cr = pickle.loads(msg.body)
            iden = cr.query._iden()

            working_key = working_prefix + iden
            key = prefix + iden

            last_time = g.memcache.get(key)
            # check to see if we've computed this job since it was
            # added to the queue
            if last_time and last_time > msg.timestamp:
                print 'skipping, already computed ', key
                return

            if not cr.preflight_check():
                print 'skipping, preflight check failed', key
                return

            # check if someone else is working on this
            elif not g.memcache.add(working_key, 1, TIMEOUT):
                print 'skipping, someone else is working', working_key
                return

            print 'working: ', iden, cr.query._rules, cr.query._sort
            start = datetime.now()
            try:
                cr.update()
                g.memcache.set(key, datetime.now())

                cr.postflight()

            finally:
                g.memcache.delete(working_key)

            done = datetime.now()
            q_time_s = (done - msg.timestamp).seconds
            proc_time_s = (done - start).seconds + ((done - start).microseconds/1000000.0)
            print ('processed %s in %.6f seconds after %d seconds in queue'
                   % (iden, proc_time_s, q_time_s))

    amqp.handle_items('prec_links', callback, limit = 1)
def run_changed(drain=False, min_size=1, limit=1000, sleep_time=10,
                use_safe_get=False, verbose=False):
    '''Run by `cron` (through `paster run`) on a schedule to send Things to
        Amazon CloudSearch
    
    '''

    @g.stats.amqp_processor('cloudsearch_changes_q')
    def _run_changed(msgs, chan):
        '''Consume the cloudsearch_changes_q queue, and print reporting information
        on how long it took and how many remain

        '''
        start = datetime.now(g.tz)

        changed = [pickle.loads(msg.body) for msg in msgs]

        fullnames = set()
        fullnames.update(LinkUploader.desired_fullnames(changed))
        fullnames.update(SubredditUploader.desired_fullnames(changed))

        things = Thing._by_fullname(fullnames, data=True, return_dict=False)

        link_uploader = LinkUploader(g.CLOUDSEARCH_DOC_API, things=things)
        subreddit_uploader = SubredditUploader(g.CLOUDSEARCH_SUBREDDIT_DOC_API,
                                               things=things)

        link_time = link_uploader.inject()
        subreddit_time = subreddit_uploader.inject()
        cloudsearch_time = link_time + subreddit_time

        totaltime = (datetime.now(g.tz) - start).total_seconds()

        print ("%s: %d messages in %.2fs seconds (%.2fs secs waiting on "
               "cloudsearch); %d duplicates, %s remaining)" %
               (start, len(changed), totaltime, cloudsearch_time,
                len(changed) - len(things),
                msgs[-1].delivery_info.get('message_count', 'unknown')))

    if use_safe_get:
        CloudSearchUploader.use_safe_get = True
    amqp.handle_items('cloudsearch_changes_q', _run_changed, min_size=min_size,
                      limit=limit, drain=drain, sleep_time=sleep_time,
                      verbose=verbose)
Exemple #31
0
def run_changed(drain=False, limit=100, sleep_time=10, verbose=True):
    """reddit-consumer-update_promos: amqp consumer of update_promos_q
    
    Handles asynch accepting/rejecting of ads that are scheduled to be live
    right now
    
    """
    @g.stats.amqp_processor(UPDATE_QUEUE)
    def _run(msgs, chan):
        items = [json.loads(msg.body) for msg in msgs]
        if QUEUE_ALL in items:
            # QUEUE_ALL is just an indicator to run make_daily_promotions.
            # There's no promotion log to update in this case.
            print "Received %s QUEUE_ALL message(s)" % items.count(QUEUE_ALL)
            items = [i for i in items if i != QUEUE_ALL]
            make_daily_promotions()

    amqp.handle_items(UPDATE_QUEUE, _run, limit=limit, drain=drain,
                      sleep_time=sleep_time, verbose=verbose)
Exemple #32
0
def run():
    def process_msgs(msgs, chan):
        def _process_link(fname):
            link = Link._by_fullname(fname, data=True, return_dict=False)
            set_media(link)

        for msg in msgs:
            fname = msg.body
            try:
                TimeoutFunction(_process_link, 30)(fname)
            except TimeoutFunctionException:
                print "Timed out on %s" % fname
            except KeyboardInterrupt:
                raise
            except:
                print "Error fetching %s" % fname
                print traceback.format_exc()

    amqp.handle_items('scraper_q', process_msgs, limit=1)
Exemple #33
0
def run():
    def process_msgs(msgs, chan):
        def _process_link(fname):
            link = Link._by_fullname(fname, data=True, return_dict=False)
            set_media(link)

        for msg in msgs:
            fname = msg.body
            try:
                TimeoutFunction(_process_link, 30)(fname)
            except TimeoutFunctionException:
                print "Timed out on %s" % fname
            except KeyboardInterrupt:
                raise
            except:
                print "Error fetching %s" % fname
                print traceback.format_exc()

    amqp.handle_items('scraper_q', process_msgs, limit=1)
Exemple #34
0
def process_votes(limit=1000):
    # limit is taken but ignored for backwards compatibility

    def _handle_vote(msgs, chan):
        #assert(len(msgs) == 1)
        comments = []
        for msg in msgs:
            r = pickle.loads(msg.body)

            uid, tid, dir, ip, organic, cheater = r
            voter = Account._byID(uid, data=True)
            votee = Thing._by_fullname(tid, data=True)
            if isinstance(votee, Comment):
                comments.append(votee)

            print(voter, votee, dir, ip, organic, cheater)
            handle_vote(voter, votee, dir, ip, organic, cheater=cheater)

        update_comment_votes(comments)

    amqp.handle_items('register_vote_q', _handle_vote, limit=limit)
Exemple #35
0
def process_votes(limit=1000):
    # limit is taken but ignored for backwards compatibility

    def _handle_vote(msgs, chan):
        # assert(len(msgs) == 1)
        comments = []
        for msg in msgs:
            r = pickle.loads(msg.body)

            uid, tid, dir, ip, organic, cheater = r
            voter = Account._byID(uid, data=True)
            votee = Thing._by_fullname(tid, data=True)
            if isinstance(votee, Comment):
                comments.append(votee)

            print (voter, votee, dir, ip, organic, cheater)
            handle_vote(voter, votee, dir, ip, organic, cheater=cheater)

        update_comment_votes(comments)

    amqp.handle_items("register_vote_q", _handle_vote, limit=limit)
Exemple #36
0
def process_votes(drain=False, limit=100):
    def _handle_votes(msgs, chan):
        to_do = []
        uids = set()
        tids = set()
        for x in msgs:
            r = pickle.loads(x.body)
            uid, tid, dir, ip, organic, cheater = r

            print (uid, tid, dir, ip, organic, cheater)

            uids.add(uid)
            tids.add(tid)
            to_do.append((uid, tid, dir, ip, organic, cheater))

        users = Account._byID(uids, data=True, return_dict=True)
        things = Thing._by_fullname(tids, data=True, return_dict=True)

        for uid, tid, dir, ip, organic, cheater in to_do:
            handle_vote(users[uid], things[tid], dir, ip, organic, cheater=cheater)

    amqp.handle_items("register_vote_q", _handle_votes, limit=limit, drain=drain)
Exemple #37
0
def run_commentstree():
    """Add new incoming comments to their respective comments trees"""

    def _run_commentstree(msgs, chan):
        fnames = [msg.body for msg in msgs]
        comments = Comment._by_fullname(fnames, data=True, return_dict=False)

        links = Link._byID(set(cm.link_id for cm in comments), data=True, return_dict=True)

        # add the comment to the comments-tree
        for comment in comments:
            l = links[comment.link_id]
            try:
                add_comment_tree(comment, l)
            except KeyError:
                # Hackity hack. Try to recover from a corrupted
                # comment tree
                print "Trying to fix broken comments-tree."
                link_comments(l._id, _update=True)
                add_comment_tree(comment, l)

    amqp.handle_items("commentstree_q", _run_commentstree, limit=1)
Exemple #38
0
def run_commentstree():
    """Add new incoming comments to their respective comments trees"""
    def _run_commentstree(msgs, chan):
        fnames = [msg.body for msg in msgs]
        comments = Comment._by_fullname(fnames, data=True, return_dict=False)

        links = Link._byID(set(cm.link_id for cm in comments),
                           data=True,
                           return_dict=True)

        # add the comment to the comments-tree
        for comment in comments:
            l = links[comment.link_id]
            try:
                add_comment_tree(comment, l)
            except KeyError:
                # Hackity hack. Try to recover from a corrupted
                # comment tree
                print "Trying to fix broken comments-tree."
                link_comments(l._id, _update=True)
                add_comment_tree(comment, l)

    amqp.handle_items('commentstree_q', _run_commentstree, limit=1)
Exemple #39
0
def consume_domain_query_queue(qname="domain_query_q", limit=1000):
    @g.stats.amqp_processor(qname)
    def process_message(msgs, chan):
        """Update get_domain_links(), the Links by domain precomputed query.

        get_domain_links() is a CachedResult which is stored in permacache. To
        update these objects we need to do a read-modify-write which requires
        obtaining a lock. Sharding these updates by domain allows us to run
        multiple consumers (but ideally just one per shard) to avoid lock
        contention.

        """

        from r2.lib.db.queries import add_queries, get_domain_links

        link_names = {msg.body for msg in msgs}
        links = Link._by_fullname(link_names, return_dict=False)
        print 'Processing %r' % (links, )

        links_by_domain = defaultdict(list)
        for link in links:
            parsed = UrlParser(link.url)

            # update the listings for all permutations of the link's domain
            for domain in parsed.domain_permutations():
                links_by_domain[domain].append(link)

        for d, links in links_by_domain.iteritems():
            with g.stats.get_timer("link_vote_processor.domain_queries"):
                add_queries(
                    queries=[
                        get_domain_links(d, sort, "all") for sort in SORTS
                    ],
                    insert_items=links,
                )

    amqp.handle_items(qname, process_message, limit=limit)
Exemple #40
0
def consume_domain_query_queue(qname="domain_query_q", limit=1000):
    @g.stats.amqp_processor(qname)
    def process_message(msgs, chan):
        """Update get_domain_links(), the Links by domain precomputed query.

        get_domain_links() is a CachedResult which is stored in permacache. To
        update these objects we need to do a read-modify-write which requires
        obtaining a lock. Sharding these updates by domain allows us to run
        multiple consumers (but ideally just one per shard) to avoid lock
        contention.

        """

        from r2.lib.db.queries import add_queries, get_domain_links

        link_names = {msg.body for msg in msgs}
        links = Link._by_fullname(link_names, return_dict=False)
        print 'Processing %r' % (links,)

        links_by_domain = defaultdict(list)
        for link in links:
            parsed = UrlParser(link.url)

            # update the listings for all permutations of the link's domain
            for domain in parsed.domain_permutations():
                links_by_domain[domain].append(link)

        for d, links in links_by_domain.iteritems():
            with g.stats.get_timer("link_vote_processor.domain_queries"):
                add_queries(
                    queries=[
                        get_domain_links(d, sort, "all") for sort in SORTS],
                    insert_items=links,
                )

    amqp.handle_items(qname, process_message, limit=limit)
Exemple #41
0
            uid, tid, dir, ip, organic, cheater = r
            voter = Account._byID(uid, data=True)
            votee = Thing._by_fullname(tid, data = True)
            if isinstance(votee, Comment):
                comments.append(votee)

            if not isinstance(votee, (Link, Comment)):
                # I don't know how, but somebody is sneaking in votes
                # for subreddits
                continue

            print (voter, votee, dir, ip, organic, cheater)
            try:
                handle_vote(voter, votee, dir, ip, organic,
                            cheater=cheater, foreground=False)
            except Exception, e:
                print 'Rejecting %r:%r because of %r' % (msg.delivery_tag, r,e)
                chan.basic_reject(msg.delivery_tag, requeue=True)

        update_comment_votes(comments)

    amqp.handle_items(qname, _handle_vote, limit = limit)

process_votes = process_votes_single

try:
    from r2admin.lib.admin_queries import *
except ImportError:
    pass
Exemple #42
0
            uid, tid, dir, ip, organic, cheater = r
            voter = Account._byID(uid, data=True)
            votee = Thing._by_fullname(tid, data = True)
            if isinstance(votee, Comment):
                comments.append(votee)

            if not isinstance(votee, (Link, Comment)):
                # I don't know how, but somebody is sneaking in votes
                # for subreddits
                continue

            print (voter, votee, dir, ip, organic, cheater)
            try:
                handle_vote(voter, votee, dir, ip, organic,
                            cheater=cheater, foreground=False)
            except Exception, e:
                print 'Rejecting %r:%r because of %r' % (msg.delivery_tag, r,e)
                chan.basic_reject(msg.delivery_tag, requeue=True)

        update_comment_votes(comments)

    amqp.handle_items(qname, _handle_vote, limit = limit)

process_votes = process_votes_single

try:
    from r2admin.lib.admin_queries import *
except ImportError:
    pass
Exemple #43
0
            if not isinstance(votee, (Link, Comment)):
                # I don't know how, but somebody is sneaking in votes
                # for subreddits
                continue

            print (voter, votee, dir, ip, organic, cheater)
            try:
                handle_vote(voter, votee, dir, ip, organic,
                            cheater=cheater, foreground=False)
            except Exception, e:
                print 'Rejecting %r:%r because of %r' % (msg.delivery_tag, r,e)
                chan.basic_reject(msg.delivery_tag, requeue=True)

        update_comment_votes(comments)

    amqp.handle_items('register_vote_q', _handle_vote, limit = limit)

process_votes = process_votes_single

def process_comment_sorts(limit=500):
    def _handle_sort(msgs, chan):
        cids = list(set(int(msg.body) for msg in msgs))
        comments = Comment._byID(cids, data = True, return_dict = False)
        print comments
        update_comment_votes(comments)

    amqp.handle_items('commentsort_q', _handle_sort, limit = limit)

try:
    from r2admin.lib.admin_queries import *
except ImportError:
Exemple #44
0
def run(limit=100, streamfile=None, verbose=False):
    if streamfile:
        stream_fp = open(streamfile, "a")
    else:
        stream_fp = None

    def streamlog(msg, important=False):
        if stream_fp:
            stream_fp.write(msg + "\n")
            stream_fp.flush()
        if important:
            print msg

    def add_timestamps (d):
        d['hms'] = d['time'].strftime("%H:%M:%S")

        d['occ'] = "<%s:%s, pid=%-5s, %s>" % (d['host'], d['port'], d['pid'],
                                      d['time'].strftime("%Y-%m-%d %H:%M:%S"))

    def limited_append(l, item):
        if len(l) >= 25:
            l.pop(12)
        l.append(item)

    def log_exception(d, daystring):
        exc_desc = d['exception_desc']
        exc_type = d['exception_type']

        exc_str = "%s: %s" % (exc_type, exc_desc)

        add_timestamps(d)

        tb = []

        key_material = exc_type
        pretty_lines = []

        make_lock_seen = False
        flaky_db_seen = False

        for tpl in d['traceback']:
            tb.append(tpl)
            filename, lineno, funcname, text = tpl
            if text is None:
                pass
            elif (text.startswith("with g.make_lock(") or
                  text.startswith("with make_lock(")):
                make_lock_seen = True
            elif (text.startswith("(ProgrammingError) server closed the connection")):
                flaky_db_seen = True
            key_material += "%s %s " % (filename, funcname)
            pretty_lines.append ("%s:%s: %s()" % (filename, lineno, funcname))
            pretty_lines.append ("    %s" % text)

        if exc_desc.startswith("QueuePool limit of size"):
            fingerprint = "QueuePool_overflow"
        elif exc_desc.startswith("error 2 from memcached_get: HOSTNAME "):
            fingerprint = "memcache_suckitude"
        elif exc_type == "TimeoutExpired" and make_lock_seen:
            fingerprint = "make_lock_timeout"
        elif exc_desc.startswith("(OperationalError) FATAL: the database " +
                                 "system is in recovery mode"):
            fingerprint = "recovering_db"
        elif exc_desc.startswith("(OperationalError) could not connect " +
                                 "to server"):
            fingerprint = "unconnectable_db"
        elif exc_desc.startswith("(OperationalError) server closed the " +
                                 "connection unexpectedly"):
            fingerprint = "flaky_db_op"
        elif exc_type == "ProgrammingError" and flaky_db_seen:
            fingerprint = "flaky_db_prog"
            # SQLAlchemy includes the entire query in the exception
            # description which can sometimes be gigantic, in the case of
            # SELECTs. Get rid of it.
            select_pos = exc_str.find("SELECT")
            if select_pos > 0:
                exc_str = exc_str[pos]
        elif exc_type == "NoServerAvailable":
            fingerprint = "cassandra_suckitude"
        else:
            fingerprint = md5(key_material).hexdigest()

        nickname_key = "error_nickname-" + fingerprint
        status_key = "error_status-" + fingerprint

        nickname = g.hardcache.get(nickname_key)

        if nickname is None:
            nickname = '"%s" Exception' % randword().capitalize()
            news = ("A new kind of thing just happened! " +
                    "I'm going to call it a %s\n\n" % nickname)

            news += "Where and when: %s\n\n" % d['occ']
            news += "Traceback:\n"
            news += "\n".join(pretty_lines)
            news += exc_str
            news += "\n"

            emailer.nerds_email(news, "Exception Watcher")

            g.hardcache.set(nickname_key, nickname, 86400 * 365)
            g.hardcache.set(status_key, "new", 86400)

        if g.hardcache.get(status_key) == "fixed":
            g.hardcache.set(status_key, "new", 86400)
            news = "This was marked as fixed: %s\n" % nickname
            news += "But it just occurred, so I'm marking it new again."
            emailer.nerds_email(news, "Exception Watcher")

        err_key = "-".join(["error", daystring, fingerprint])

        existing = g.hardcache.get(err_key)

        if not existing:
            existing = dict(exception=exc_str, traceback=tb, occurrences=[])

        limited_append(existing['occurrences'], d['occ'])

        g.hardcache.set(err_key, existing, 7 * 86400)

        streamlog ("%s [X] %-70s" % (d['hms'], nickname), verbose)

    def log_text(d, daystring):
        add_timestamps(d)
        char = d['level'][0].upper()
        streamlog ("%s [%s] %r" % (d['hms'], char, d['text']), verbose)
        logclass_key = "logclass-" + d['classification']

        if not g.hardcache.get(logclass_key):
            g.hardcache.set(logclass_key, True, 86400 * 90)

            if d['level'] != 'debug':
                news = "The code just generated a [%s] message.\n" % \
                       d['classification']
                news += "I don't remember ever seeing one of those before.\n"
                news += "\n"
                news += "It happened on: %s\n" % d['occ']
                news += "The log level was: %s\n" % d['level']
                news += "The complete text was:\n"
                news += repr(d['text'])
                emailer.nerds_email (news, "reddit secretary")

        occ_key = "-".join(["logtext", daystring,
                            d['level'], d['classification']])

        occurrences = g.hardcache.get(occ_key)

        if occurrences is None:
            occurrences = []

        d2 = {}

        d2['occ'] = d['occ']
        d2['text'] = repr(d['text'])

        limited_append(occurrences, d2)
        g.hardcache.set(occ_key, occurrences, 86400 * 7)

    def myfunc(msgs, chan):
        daystring = datetime.now(g.display_tz).strftime("%Y/%m/%d")

        for msg in msgs:
            try:
                d = pickle.loads(msg.body)
            except TypeError:
                streamlog ("wtf is %r" % msg.body, True)
                continue

            if not 'type' in d:
                streamlog ("wtf is %r" % d, True)
            elif d['type'] == 'exception':
                try:
                    log_exception(d, daystring)
                except Exception as e:
                    print "Error in log_exception(): %r" % e
            elif d['type'] == 'text':
                try:
                    log_text(d, daystring)
                except Exception as e:
                    print "Error in log_text(): %r" % e
            else:
                streamlog ("wtf is %r" % d['type'], True)

    amqp.handle_items(q, myfunc, limit=limit, drain=False, verbose=verbose)
Exemple #45
0
            try:
                handle_vote(voter,
                            votee,
                            dir,
                            ip,
                            organic,
                            cheater=cheater,
                            foreground=False)
            except Exception, e:
                print 'Rejecting %r:%r because of %r' % (msg.delivery_tag, r,
                                                         e)
                chan.basic_reject(msg.delivery_tag, requeue=True)

        update_comment_votes(comments)

    amqp.handle_items('register_vote_q', _handle_vote, limit=limit)


process_votes = process_votes_single


def process_comment_sorts(limit=500):
    def _handle_sort(msgs, chan):
        cids = list(set(int(msg.body) for msg in msgs))
        comments = Comment._byID(cids, data=True, return_dict=False)
        print comments
        update_comment_votes(comments)

    amqp.handle_items('commentsort_q', _handle_sort, limit=limit)

def run_realtime_email_queue(limit=1, debug=False):
    # Email new posts, comments or messages to whoever's set to get them
    # Called from reddit_consumer-realtime_email_q long running job
    
    from r2.lib import amqp
    from r2.models import Comment, Subreddit, Link, Thing, SaveHide
    from r2.lib.db.operators import asc, desc
    from r2.lib.utils import fetch_things2
    import time

    run_realtime_email_queue.accounts = None
    run_realtime_email_queue.last_got_accounts = 0
    
    @g.stats.amqp_processor('realtime_email_q')
    def _run_realtime_email_queue(msgs, chan):

        if time.time() - run_realtime_email_queue.last_got_accounts > 600:
            #-- Pick up a fresh list of accounts, if we havenn't done so recently, in case settings change
            if g.email_debug:
                g.log.info('Getting accounts')
            run_realtime_email_queue.accounts = Account._query(Account.c.email != None, sort = asc('_date'), data=True)
            run_realtime_email_queue.last_got_accounts = time.time()
        
        for msg in msgs:
            # msg.body contains the unique name of the post, comment or message, e.g. 't1_2n'(comment #95) or 't6_q'(post #26)
            fullname = str(msg.body)
            fullname_type = fullname[0:2]
            id36 = fullname[3:]
            if g.email_debug:
                g.log.info('msg: %r', fullname)
            howold = (datetime.datetime.now() - msg.timestamp).total_seconds() 
            if  howold < 110:
                # Wait until this item is 2 minutes old, to allow time for corrections
                if g.email_debug:
                    g.log.info('waiting for a moment')
                time.sleep(120 - howold)

            is_com = is_post = False
            thing = link = comment = None
            if fullname_type == 't1':
                # a comment
                is_com = True
                comment = Comment._byID36(id36, data=True)
                if g.email_debug:
                    g.log.info('comment: %r', comment.body)
                thing = comment
                author = Account._byID(comment.author_id, True)
                kind = Email.Kind.REALTIME_COMMENT
                template = 'email_realtime_comment.html'
                link = Link._byID(comment.link_id, data=True)  
                subject = 'Re: %s' % link.title
                sr_id = comment.sr_id
                
            elif fullname_type == 't6':
                # a post/link
                is_post = True
                link = Link._byID36(id36, data=True)
                if g.email_debug:
                    g.log.info('post: %r', link.title)
                thing = link
                author = Account._byID(link.author_id, True)
                kind = Email.Kind.REALTIME_POST
                template = 'email_realtime_post.html'
                subject = link.title
                sr_id = link.sr_id
                
            else:
                return
            
            sr = Subreddit._byID(sr_id, data=True)
            
            subject = "[%s] %s" % (sr.name, subject)
            
            for account in run_realtime_email_queue.accounts:
                
                sub = sr.get_subscriber(account)
                
                if is_com: 
                    if hasattr(sub,'email_comments') and sub.email_comments:
                        if g.email_debug:
                            g.log.info('  account %r: we should send this comment, because of the space setting', account.name)
                        whysend = 'space'
                    else:
                        email_thread = Link._somethinged(SaveHide, account, link, 'email')[account,link,'email']
                        if email_thread:
                            if g.email_debug:
                                g.log.info('  account %r: we should send this comment, because of the thread setting', account.name)
                            whysend = 'thread'
                        else:    
                            continue
                    
                elif is_post:
                    if hasattr(sub,'email_posts') and sub.email_posts:
                        if g.email_debug:
                            g.log.info('  account %r: we should send this post', account.name)
                        whysend = 'space'
                    else:
                        continue

                if not ('session' in locals()):
                    # Open the SMTP session
                    if g.email_debug:
                        g.log.info('Opening SMTP session')
                    session = open_smtp_session()

                # Render the template
                html_email_template = g.mako_lookup.get_template(template)
                html_body = html_email_template.render(link=link, comment=comment, thing=thing, account=account, sub=sub, whysend=whysend)
            
                from_email = '"%s" <%s>' % (g.realtime_email_from_name, g.share_reply,)
                send_html_email(account.email, g.share_reply, subject, html_body, from_full=from_email, session=session)
                if g.email_debug:
                    g.log.info('    sent to %r at %r', account.name, account.email)

        if g.email_debug:
            g.log.info('Done running queue')

        if 'session' in locals():
            # Close the session.
            session.quit()

    amqp.handle_items('realtime_email_q', _run_realtime_email_queue, limit = limit)
    
    
    
Exemple #47
0
def run(limit=100, streamfile=None, verbose=False):
    if streamfile:
        stream_fp = open(streamfile, "a")
    else:
        stream_fp = None

    def streamlog(msg, important=False):
        if stream_fp:
            stream_fp.write(msg + "\n")
            stream_fp.flush()
        if important:
            print msg

    def add_timestamps(d):
        d['hms'] = d['time'].strftime("%H:%M:%S")

        d['occ'] = "<%s:%s, pid=%-5s, %s>" % (
            d['host'], d['port'], d['pid'],
            d['time'].strftime("%Y-%m-%d %H:%M:%S"))

    def limited_append(l, item):
        if len(l) >= 25:
            l.pop(12)
        l.append(item)

    def log_exception(d, daystring):
        exc_desc = d['exception_desc']
        exc_type = d['exception_type']

        exc_str = "%s: %s" % (exc_type, exc_desc)

        add_timestamps(d)

        tb = []

        key_material = exc_type
        pretty_lines = []

        make_lock_seen = False
        flaky_db_seen = False

        for tpl in d['traceback']:
            tb.append(tpl)
            filename, lineno, funcname, text = tpl
            if text is None:
                pass
            elif (text.startswith("with g.make_lock(")
                  or text.startswith("with make_lock(")):
                make_lock_seen = True
            elif (text.startswith(
                    "(ProgrammingError) server closed the connection")):
                flaky_db_seen = True
            key_material += "%s %s " % (filename, funcname)
            pretty_lines.append("%s:%s: %s()" % (filename, lineno, funcname))
            pretty_lines.append("    %s" % text)

        if exc_desc.startswith("QueuePool limit of size"):
            fingerprint = "QueuePool_overflow"
        elif exc_desc.startswith("error 2 from memcached_get: HOSTNAME "):
            fingerprint = "memcache_suckitude"
        elif exc_type == "TimeoutExpired" and make_lock_seen:
            fingerprint = "make_lock_timeout"
        elif exc_desc.startswith("(OperationalError) FATAL: the database " +
                                 "system is in recovery mode"):
            fingerprint = "recovering_db"
        elif exc_desc.startswith("(OperationalError) could not connect " +
                                 "to server"):
            fingerprint = "unconnectable_db"
        elif exc_desc.startswith("(OperationalError) server closed the " +
                                 "connection unexpectedly"):
            fingerprint = "flaky_db_op"
        elif exc_type == "ProgrammingError" and flaky_db_seen:
            fingerprint = "flaky_db_prog"
            # SQLAlchemy includes the entire query in the exception
            # description which can sometimes be gigantic, in the case of
            # SELECTs. Get rid of it.
            select_pos = exc_str.find("SELECT")
            if select_pos > 0:
                exc_str = exc_str[pos]
        elif exc_type == "NoServerAvailable":
            fingerprint = "cassandra_suckitude"
        else:
            fingerprint = md5(key_material).hexdigest()

        nickname_key = "error_nickname-" + fingerprint
        status_key = "error_status-" + fingerprint

        nickname = g.hardcache.get(nickname_key)

        if nickname is None:
            nickname = '"%s" Exception' % randword().capitalize()
            news = ("A new kind of thing just happened! " +
                    "I'm going to call it a %s\n\n" % nickname)

            news += "Where and when: %s\n\n" % d['occ']
            news += "Traceback:\n"
            news += "\n".join(pretty_lines)
            news += exc_str
            news += "\n"

            emailer.nerds_email(news, "Exception Watcher")

            g.hardcache.set(nickname_key, nickname, 86400 * 365)
            g.hardcache.set(status_key, "new", 86400)

        if g.hardcache.get(status_key) == "fixed":
            g.hardcache.set(status_key, "new", 86400)
            news = "This was marked as fixed: %s\n" % nickname
            news += "But it just occurred, so I'm marking it new again."
            emailer.nerds_email(news, "Exception Watcher")

        err_key = "-".join(["error", daystring, fingerprint])

        existing = g.hardcache.get(err_key)

        if not existing:
            existing = dict(exception=exc_str, traceback=tb, occurrences=[])

        limited_append(existing['occurrences'], d['occ'])

        g.hardcache.set(err_key, existing, 7 * 86400)

        streamlog("%s [X] %-70s" % (d['hms'], nickname), verbose)

    def log_text(d, daystring):
        add_timestamps(d)
        char = d['level'][0].upper()
        streamlog("%s [%s] %r" % (d['hms'], char, d['text']), verbose)
        logclass_key = "logclass-" + d['classification']

        if not g.hardcache.get(logclass_key):
            g.hardcache.set(logclass_key, True, 86400 * 90)

            if d['level'] != 'debug':
                news = "The code just generated a [%s] message.\n" % \
                       d['classification']
                news += "I don't remember ever seeing one of those before.\n"
                news += "\n"
                news += "It happened on: %s\n" % d['occ']
                news += "The log level was: %s\n" % d['level']
                news += "The complete text was:\n"
                news += repr(d['text'])
                emailer.nerds_email(news, "reddit secretary")

        occ_key = "-".join(
            ["logtext", daystring, d['level'], d['classification']])

        occurrences = g.hardcache.get(occ_key)

        if occurrences is None:
            occurrences = []

        d2 = {}

        d2['occ'] = d['occ']
        d2['text'] = repr(d['text'])

        limited_append(occurrences, d2)
        g.hardcache.set(occ_key, occurrences, 86400 * 7)

    def myfunc(msgs, chan):
        daystring = datetime.now(g.display_tz).strftime("%Y/%m/%d")

        for msg in msgs:
            try:
                d = pickle.loads(msg.body)
            except TypeError:
                streamlog("wtf is %r" % msg.body, True)
                continue

            if not 'type' in d:
                streamlog("wtf is %r" % d, True)
            elif d['type'] == 'exception':
                try:
                    log_exception(d, daystring)
                except Exception as e:
                    print "Error in log_exception(): %r" % e
            elif d['type'] == 'text':
                try:
                    log_text(d, daystring)
                except Exception as e:
                    print "Error in log_text(): %r" % e
            else:
                streamlog("wtf is %r" % d['type'], True)

    amqp.handle_items(q, myfunc, limit=limit, drain=False, verbose=verbose)