Example #1
0
def get_activity(verbose=False):
    lists = util.list_mailman_lists(verbose)
    for l in lists:
        if verbose:
            print "Processing activity for %s..." % l["name"]
        latest = (
            Session.query(ActivityInMailman)
            .filter(ActivityInMailman.list_name == l["name"])
            .order_by(ActivityInMailman.message_id.desc())
            .first()
        )
        # Walk through message history from the web front-end
        archive_url = l["link"].replace("mailman/listinfo", "pipermail")
        limit = 1000
        latest_id = latest.message_id if latest else -1
        for msg in _yield_messages(archive_url, latest_id, verbose=verbose):
            if verbose:
                print '  -> got msg #%d (%s: "%s")' % (msg["id"], msg["email"], msg["subject"])
            Session.add(
                ActivityInMailman(
                    list_name=l["name"],
                    message_id=msg["id"],
                    subject=msg["subject"],
                    author=msg["author"],
                    email=msg["email"],
                    link=msg["link"],
                    timestamp=msg["date"],
                )
            )
            limit -= 1
            # if limit==0:
            # if verbose: print '  -> Reached activity limit (100)'
            # break;
        Session.commit()
Example #2
0
def snapshot_mailman(verbose=False):
    lists = util.list_mailman_lists(verbose)
    today = datetime.now().date()
    for l in lists:
        if verbose: print 'Processing snapshots for %s...' % l['name']
        latest = Session.query(SnapshotOfMailman)\
                .filter(SnapshotOfMailman.list_name==l['name'])\
                .order_by(SnapshotOfMailman.timestamp.desc())\
                .first()
        # By default, gather 30 days of snapshots
        since = today - timedelta(days=180)
        if latest:
            if latest.timestamp>=today:
                if verbose: print ' -> most recent snapshots have already been processed.'
                continue
            since = latest.timestamp + timedelta(days=1)
        # Download subscriber list
        roster_url = l['link'].replace('listinfo','roster')
        num_subscribers = len(_scrape_subscribers(roster_url, verbose=verbose))
        # Create a snapshot of each day
        while since<today:
            posts_today = Session.query(ActivityInMailman)\
                            .filter(ActivityInMailman.list_name==l['name'])\
                            .filter(ActivityInMailman.timestamp.between(since,since+timedelta(days=1)))\
                            .count()
            sn = SnapshotOfMailman(\
                    list_name=l['name'],\
                    timestamp=since,\
                    subscribers=num_subscribers,
                    posts_today=posts_today)
            Session.add(sn)
            if verbose: print '  -> ',sn.toJson()
            since += timedelta(days=1)
        # Walk through message history, counting messages per day
        Session.commit()