Beispiel #1
0
    def dump_data(self):
        if not self.public_url:
            returnValue(False)
        stats = yield find_stats({'user': self.user}, filter=sortasc('timestamp'), timeout=120)
        dates = [s['timestamp'] for s in stats]
        tweets = [s['tweets'] for s in stats]
        tweets_diff = [a - b for a, b in zip(tweets[1:],tweets[:-1])]
        followers = [s['followers'] for s in stats]
        followers_diff = [a - b for a, b in zip(followers[1:], followers[:-1])]
        rts_diff = [s['rts_last_hour'] for s in stats]
        rts = []
        n = 0
        for a in rts_diff:
            n += a
            rts.append(n)

        jsondata = {}
        imax = len(dates) - 1
        for i, date in enumerate(dates):
            ts = int(time.mktime(date.timetuple()))
            jsondata[ts] = { 'tweets': tweets[i], 'followers': followers[i], 'rts': rts[i] }
            if i < imax:
                jsondata[ts].update({ 'tweets_diff': tweets_diff[i], 'followers_diff': followers_diff[i], 'rts_diff': rts_diff[i+1] })

        try:
            jsondir = os.path.join('web', 'data')
            if not os.path.exists(jsondir):
                os.makedirs(jsondir)
                os.chmod(jsondir, 0o755)
            with open(os.path.join(jsondir, 'stats_%s.json' % self.user), 'w') as outfile:
                write_json(jsondata, outfile)
        except IOError as e:
            loggerr("Could not write web/data/stats_%s.json : %s" % (self.user, e), action="stats")

        try:
            from plots import CumulativeCurve, DailyHistogram, WeekPunchCard
            imgdir = os.path.join('web', 'img')
            if not os.path.exists(imgdir):
                os.makedirs(imgdir)
                os.chmod(imgdir, 0o755)
            CumulativeCurve(dates, tweets, 'Total tweets', imgdir, 'tweets_%s' % self.user)
            CumulativeCurve(dates, followers, 'Total followers', imgdir, 'followers_%s' % self.user)
            CumulativeCurve(dates, rts, 'Total RTs since %s' % dates[0], imgdir, 'rts_%s' % self.user)
            DailyHistogram(dates[:-1], tweets_diff, 'New tweets', imgdir, 'new_tweets_%s' % self.user)
            DailyHistogram(dates[:-1], followers_diff, 'New followers', imgdir, 'new_followers_%s' % self.user)
            DailyHistogram(dates[:-1], rts_diff[1:], 'New RTs', imgdir, 'new_rts_%s' % self.user)
            WeekPunchCard(dates[:-1], tweets_diff, 'Tweets punchcard', imgdir, 'tweets_card_%s' % self.user)
            WeekPunchCard(dates[:-1], followers_diff, 'Followers punchcard', imgdir, 'followers_card_%s' % self.user)
            WeekPunchCard(dates[:-1], rts_diff[1:], 'RTs punchcard', imgdir, 'rts_card_%s' % self.user)
        except Exception as e:
            loggerr("Could not write images in web/img for %s : %s" % (self.user, e), action="stats")

        data = {'user': self.user, 'url': self.public_url}
        self.render_template("static_stats.html", self.user, data)
        returnValue(True)
Beispiel #2
0
def getFeeds(db,
             channel,
             database,
             url_format=True,
             add_url=None,
             randorder=None):
    urls = []
    queries = yield db['feeds'].find(
        {
            'database': database,
            'channel': channel.lower()
        },
        fields=['name', 'query'],
        filter=sortasc('timestamp'))
    if database == "tweets":
        # create combined queries on Icerocket/Topsy or the Twitter API from search words retrieved in db
        query = ""
        try:
            queries = [queries[i] for i in randorder]
        except:
            pass
        for feed in queries:
            # queries starting with @ should return only tweets from corresponding user
            arg = str(feed['query'].encode('utf-8')).replace('@', 'from:')
            rawrg = arg
            space = " OR "
            if url_format:
                if not arg.startswith('from:') and not arg.startswith('#'):
                    arg = "(%s)" % urllib.quote(arg, '')
                if add_url:
                    space = "+OR+"
                arg = "%s%s" % (arg, space)
            else:
                arg = " «%s»  | " % arg
            if " OR " in rawrg or " -" in rawrg:
                urls.append(formatQuery(arg, add_url))
            elif query.count(space) < 3:
                query += arg
            else:
                urls.append(formatQuery(query, add_url))
                query = arg
        if query != "":
            urls.append(formatQuery(query, add_url))
    else:
        if not url_format:
            urls = assembleResults([feed['name'] for feed in queries])
        elif database == "pages":
            urls = [(str(feed['query']), feed['name']) for feed in queries]
        else:
            urls = [str(feed['query']) for feed in queries]
    defer.returnValue(urls)
Beispiel #3
0
def getFeeds(db, channel, database, url_format=True, add_url=None, randorder=None):
    urls = []
    queries = yield db["feeds"].find(
        {"database": database, "channel": re.compile("^%s$" % channel, re.I)},
        fields=["name", "query"],
        filter=sortasc("timestamp"),
    )
    if database == "tweets":
        # create combined queries on Icerocket/Topsy or the Twitter API from search words retrieved in db
        query = ""
        try:
            queries = [queries[i] for i in randorder]
        except:
            pass
        for feed in queries:
            # queries starting with @ should return only tweets from corresponding user
            arg = str(feed["query"].encode("utf-8")).replace("@", "from:")
            rawrg = arg
            space = " OR "
            if url_format:
                if not arg.startswith("from:") and not arg.startswith("#"):
                    arg = "(%s)" % urllib.quote(arg, "")
                if add_url:
                    space = "+OR+"
                arg = "%s%s" % (arg, space)
            else:
                arg = " «%s»  | " % arg
            if " OR " in rawrg or " -" in rawrg:
                urls.append(formatQuery(arg, add_url))
            elif query.count(space) < 3:
                query += arg
            else:
                urls.append(formatQuery(query, add_url))
                query = arg
        if query != "":
            urls.append(formatQuery(query, add_url))
    else:
        if not url_format:
            urls = assembleResults([feed["name"] for feed in queries])
        else:
            urls = [str(feed["query"]) for feed in queries]
    defer.returnValue(urls)
Beispiel #4
0
    def digest(self, hours, channel):
        now = datetime.today()
        since = now - timedelta(hours=hours)
        re_chan = re.compile(r'^#*%s$' % channel.lower(), re.I)
        query = {'channel': re_chan, 'timestamp': {'$gte': since}}
        data = {
            "channel": channel,
            "t0": clean_date(since),
            "t1": clean_date(now),
            "news": [],
            "links": [],
            "imgs": [],
            "tweets": []
        }

        news = yield SingleMongo('news', 'find', query, fields=['sourcename', 'source', 'link', 'message'], filter=sortasc('sourcename')+sortasc('timestamp'))
        lastsource = ""
        for n in news:
            source = n["sourcename"]
            if source != lastsource:
                lastsource = source
                data["news"].append({
                    "name": source,
                    "link": n["link"],
                    "elements": []
                })
            data["news"][-1]["elements"].append({
                "text": n["message"],
                "link": n["link"]
            })
        del(news)

        tweets = yield SingleMongo('tweets', 'find', query, fields=['screenname', 'message', 'link'], filter=sortasc('id'))
        links = {}
        imgs = {}
        filters = yield SingleMongo('filters', 'find', {'channel': re_chan}, fields=['keyword'])
        filters = [keyword['keyword'].lower() for keyword in filters]
        for t in tweets:
            skip = False
            tuser_low = t['screenname'].lower()
            if "@%s" % tuser_low in filters:
                continue
            msg_low = t["message"].lower()
            if not ((self.user and self.user in msg_low) or self.user == tuser_low):
                for k in filters:
                    if k in msg_low:
                        skip = True
                        break
            if skip: continue
            for link in URL_REGEX.findall(t["message"]):
                link, _ = clean_url(link[2])
                if not link.startswith("http"):
                    continue
                tid = re_twitmedia.search(link)
                if tid:
                    tid = tid.group(1)
                    if tid not in imgs:
                        imgs[tid] = 1
                        data["imgs"].append({"id": tid})
                    continue
                if re_tweet.match(link):
                    continue
                if link not in links:
                    links[link] = {
                        "link": link,
                        "first": ("%s: %s" % (t["screenname"], t["message"].replace(link, ""))),
                        "firstlink": t["link"],
                        "count": 0
                    }
                links[link]["count"] += 1

        del(tweets)
        data["tweets"] = sorted(links.values(), key=lambda x: "%06d-%s" % (10**6-x['count'], x['link']))
        del(links)

        query["user"] = {"$ne": config.BOTNAME.lower()}
        query["message"] = re.compile(r'https?://')
        links = yield SingleMongo('logs', 'find', query, fields=['screenname', 'message'], filter=sortasc('timestamp'))
        for entry in links:
            for link in re_links.findall(entry["message"]):
                data["links"].append({
                    "user": entry["screenname"],
                    "msg": entry["message"],
                    "link": link
                })
        del(links)

        filename = "%s_%s_%s" % (channel.lstrip("#"), data["t0"].replace(" ", "+"), data["t1"].replace(" ", "+"))
        if not self.render_template("digest.html", filename, data):
            returnValue("Wooops could not generate html for %s..." % filename)
        returnValue("Digest for the last %s hours available at %sdigest_%s.html" % (hours, self.public_url, filename))
Beispiel #5
0
def getFeeds(db, channel, database, url_format=True, add_url=None, randorder=None):
    urls = []
    queries = yield db['feeds'].find({'database': database, 'channel': channel.lower()}, fields=['name', 'query'], filter=sortasc('timestamp'))
    if database == "tweets":
        # create combined queries on Icerocket/Topsy or the Twitter API from search words retrieved in db
        query = ""
        try:
            queries = [queries[i] for i in randorder]
        except:
            pass
        for feed in queries:
            # queries starting with @ should return only tweets from corresponding user
            arg = str(feed['query'].encode('utf-8')).replace('@', 'from:')
            rawrg = arg
            space = " OR "
            if url_format:
                if not arg.startswith('from:') and not arg.startswith('#'):
                   arg = "(%s)" % urllib.quote(arg, '')
                if add_url:
                    space = "+OR+"
                arg = "%s%s" % (arg, space)
            else:
                arg = " «%s»  | " % arg
            if " OR " in rawrg or " -" in rawrg:
                urls.append(formatQuery(arg, add_url))
            elif query.count(space) < 3:
                query += arg
            else:
                urls.append(formatQuery(query, add_url))
                query = arg
        if query != "":
            urls.append(formatQuery(query, add_url))
    else:
        if not url_format:
            urls = assembleResults([feed['name'] for feed in queries])
        elif database == "pages":
            urls = [(str(feed['query']), feed['name']) for feed in queries]
        else:
            urls = [str(feed['query']) for feed in queries]
    defer.returnValue(urls)