Example #1
0
 def save_screenshot(self, version, retries=3):
     name = self.get_file(version, "png")
     try:
         img = yield client.getPage(manet_url(self.url, retries > 1))
         with open(name, "wb") as f:
             f.write(img)
         os.chmod(name, 0o644)
     except Exception as e:
         if retries:
             yield deferredSleep(3)
             yield self.save_screenshot(version, retries=retries - 1)
         else:
             loggerr("%s: %s %s %s" % (self.name, type(e), e, self.url),
                     self.channel, "WebMonitor-shot")
     else:
         try:
             w = 200.0
             thumbname = "%s-small.png" % name[:-4]
             with Image(filename=name) as i:
                 iW, iH = i.size
                 h = round(iH * w / iW)
                 with i.clone() as i2:
                     i2.resize(int(w), int(h))
                     i2.save(filename=thumbname)
             os.chmod(thumbname, 0o644)
             del img, i, i2
         except Exception as e:
             loggerr("%s: %s %s" % (self.name, type(e), e), self.channel,
                     "WebMonitor-resize")
Example #2
0
 def __init__(self, channel='private'):
     filename = BOTNAME
     if channel:
         filename += '_' + channel
     if not os.path.isdir('log'):
         os.mkdir('log')
     self.loggers = {}
     for name, suffix in [("normal", ""), ("filtered", "_filtered")]:
         f = str(os.path.join('log', "%s%s.log" % (filename, suffix)))
         self.loggers[name] = getLogger("%s%s" % (channel, suffix))
         if not len(self.loggers[name].handlers):
             self.loggers[name].addHandler(
                 RotatingFileHandler(f, backupCount=1000, encoding="utf-8"))
         self.loggers[name].handlers[0].setFormatter(
             Formatter('%(asctime)s %(message)s', "%Y-%m-%d %H:%M:%S"))
         channel = channel if channel != "private" else None
         if os.path.isfile(f) and os.path.getsize(f) > 1024 * 1024:
             logg("Rolling log file %s" % f,
                  color="yellow",
                  action="LOGS",
                  channel=channel)
             try:
                 self.loggers[name].handlers[0].doRollover()
             except Exception as e:
                 loggerr("Rolling file %s crashed: %s\n%s" %
                         (f, self.loggers[name].handlers, e),
                         action="LOGS",
                         channel=channel)
Example #3
0
 def save_screenshot(self, version, retries=3):
     name = self.get_file(version, "png")
     try:
         img = yield client.getPage(manet_url(self.url, retries > 1))
         with open(name, "wb") as f:
             f.write(img)
         os.chmod(name, 0o644)
     except Exception as e:
         if retries:
             yield deferredSleep(3)
             yield self.save_screenshot(version, retries=retries-1)
         else:
             loggerr("%s: %s %s %s" % (self.name, type(e), e, self.url), self.channel, "WebMonitor-shot")
     else:
         try:
             w = 200.0
             thumbname = "%s-small.png" % name[:-4]
             with Image(filename=name) as i:
                 iW, iH = i.size
                 h = round(iH * w / iW)
                 with i.clone() as i2:
                     i2.resize(int(w), int(h))
                     i2.save(filename=thumbname)
             os.chmod(thumbname, 0o644)
             del img, i, i2
         except Exception as e:
             loggerr("%s: %s %s" % (self.name, type(e), e), self.channel, "WebMonitor-resize")
Example #4
0
def _clean_redir_urls(text, urls={}, first=True, pool=None):
    for res in URL_REGEX.findall(text):
        url00 = res[2].encode('utf-8')
        url0 = url00
        if not url00.startswith('http'):
            if "@" in url00 or url00.startswith('#'):
                continue
            url0 = "http://%s" % url00
        if url0 in urls:
            url1 = urls[url0]
            if url1 == url0:
                continue
        else:
            try:
                url1 = yield deferToThreadPool(reactor, pool, get_url, url0, timeout=8)
                url1 = clean_url(url1)
                urls[url0] = url1
                urls[url1] = url1
            except Exception as e:
                if config.DEBUG and not first:
                    loggerr("trying to resolve %s : %s" % (url0, e))
                if "403" in str(e) or "Error 30" in str(e):
                    urls[url0] = url00
                url1 = url00
        if first and not url1 == url00:
            url1 = url1.replace('http', '##HTTP##')
        try:
            url1 = url1.decode('utf-8')
            text = text.replace(res[0], '%s%s%s' % (res[1], url1, res[4]))
        except:
            if config.DEBUG:
                logerr("encoding %s" % url1)
    if not first:
        text = text.replace('##HTTP##', 'http')
    defer.returnValue((text, urls))
Example #5
0
 def render_template(self, path, filename):
     data = {'user': self.user, 'url': self.url}
     outfile = filename.replace('.html', '_%s.html' % self.user)
     try:
         import pystache
         from contextlib import nested
         with nested(open(os.path.join(path, filename), "r"), open(os.path.join("web", outfile), "w")) as (template, generated):
             generated.write(pystache.render(template.read(), data))
     except IOError as e:
         loggerr("Could not write web/%s from %s/%s : %s" % (outfile, path, filename, e), action="stats")
Example #6
0
    def dump_data(self):
        if not self.public_url:
            returnValue(False)
        stats = yield find_stats({'user': self.user}, filter=sortasc('timestamp'), timeout=120)
        dates = [s['timestamp'] for s in stats]
        tweets = [s['tweets'] for s in stats]
        tweets_diff = [a - b for a, b in zip(tweets[1:],tweets[:-1])]
        followers = [s['followers'] for s in stats]
        followers_diff = [a - b for a, b in zip(followers[1:], followers[:-1])]
        rts_diff = [s['rts_last_hour'] for s in stats]
        rts = []
        n = 0
        for a in rts_diff:
            n += a
            rts.append(n)

        jsondata = {}
        imax = len(dates) - 1
        for i, date in enumerate(dates):
            ts = int(time.mktime(date.timetuple()))
            jsondata[ts] = { 'tweets': tweets[i], 'followers': followers[i], 'rts': rts[i] }
            if i < imax:
                jsondata[ts].update({ 'tweets_diff': tweets_diff[i], 'followers_diff': followers_diff[i], 'rts_diff': rts_diff[i+1] })

        try:
            jsondir = os.path.join('web', 'data')
            if not os.path.exists(jsondir):
                os.makedirs(jsondir)
                os.chmod(jsondir, 0o755)
            with open(os.path.join(jsondir, 'stats_%s.json' % self.user), 'w') as outfile:
                write_json(jsondata, outfile)
        except IOError as e:
            loggerr("Could not write web/data/stats_%s.json : %s" % (self.user, e), action="stats")

        try:
            from plots import CumulativeCurve, DailyHistogram, WeekPunchCard
            imgdir = os.path.join('web', 'img')
            if not os.path.exists(imgdir):
                os.makedirs(imgdir)
                os.chmod(imgdir, 0o755)
            CumulativeCurve(dates, tweets, 'Total tweets', imgdir, 'tweets_%s' % self.user)
            CumulativeCurve(dates, followers, 'Total followers', imgdir, 'followers_%s' % self.user)
            CumulativeCurve(dates, rts, 'Total RTs since %s' % dates[0], imgdir, 'rts_%s' % self.user)
            DailyHistogram(dates[:-1], tweets_diff, 'New tweets', imgdir, 'new_tweets_%s' % self.user)
            DailyHistogram(dates[:-1], followers_diff, 'New followers', imgdir, 'new_followers_%s' % self.user)
            DailyHistogram(dates[:-1], rts_diff[1:], 'New RTs', imgdir, 'new_rts_%s' % self.user)
            WeekPunchCard(dates[:-1], tweets_diff, 'Tweets punchcard', imgdir, 'tweets_card_%s' % self.user)
            WeekPunchCard(dates[:-1], followers_diff, 'Followers punchcard', imgdir, 'followers_card_%s' % self.user)
            WeekPunchCard(dates[:-1], rts_diff[1:], 'RTs punchcard', imgdir, 'rts_card_%s' % self.user)
        except Exception as e:
            loggerr("Could not write images in web/img for %s : %s" % (self.user, e), action="stats")

        data = {'user': self.user, 'url': self.public_url}
        self.render_template("static_stats.html", self.user, data)
        returnValue(True)
Example #7
0
 def render_template(self, template, name, data):
     outfile = template.replace('.html', '_%s.html' % name)
     try:
         ofile = os.path.join("web", outfile)
         with nested(open(os.path.join(self.templates, template), "r"), codecs.open(ofile, "w", encoding="utf-8")) as (temp, generated):
             generated.write(Renderer(string_encoding='utf8').render(temp.read(), data))
         os.chmod(ofile, 0o644)
         return True
     except IOError as e:
         loggerr("Could not write web/%s from %s/%s : %s" % (outfile, self.templates, template, e), action="stats")
         return False
Example #8
0
    def dump_data(self):
        if not self.url:
            return

        self.db.authenticate(config.MONGODB['USER'], config.MONGODB['PSWD'])
        stats = list(self.db['stats'].find({'user': self.user}, sort=[('timestamp', pymongo.ASCENDING)]))
        dates = [s['timestamp'] for s in stats]
        tweets = [s['tweets'] for s in stats]
        tweets_diff = [a - b for a, b in zip(tweets[1:],tweets[:-1])]
        followers = [s['followers'] for s in stats]
        followers_diff = [a - b for a, b in zip(followers[1:], followers[:-1])]
        rts_diff = [s['rts_last_hour'] for s in stats]
        rts = []
        n = 0
        for a in rts_diff:
            n += a
            rts.append(n)

        jsondata = {}
        imax = len(dates) - 1
        for i, date in enumerate(dates):
            ts = int(time.mktime(date.timetuple()))
            jsondata[ts] = { 'tweets': tweets[i], 'followers': followers[i], 'rts': rts[i] }
            if i < imax:
                jsondata[ts].update({ 'tweets_diff': tweets_diff[i], 'followers_diff': followers_diff[i], 'rts_diff': rts_diff[i+1] })

        try:
            jsondir = os.path.join('web', 'data')
            if not os.path.exists(jsondir):
                os.makedirs(jsondir)
            with open(os.path.join(jsondir, 'stats_%s.json' % self.user), 'w') as outfile:
                write_json(jsondata, outfile)
        except IOError as e:
            loggerr("Could not write web/data/stats_%s.json : %s" % (self.user, e), action="stats")

        try:
            from plots import CumulativeCurve, DailyHistogram, WeekPunchCard
            imgdir = os.path.join('web', 'img')
            if not os.path.exists(imgdir):
                os.makedirs(imgdir)
            CumulativeCurve(dates, tweets, 'Total tweets', imgdir, 'tweets_%s' % self.user)
            CumulativeCurve(dates, followers, 'Total followers', imgdir, 'followers_%s' % self.user)
            CumulativeCurve(dates, rts, 'Total RTs since %s' % dates[0], imgdir, 'rts_%s' % self.user)
            DailyHistogram(dates[:-1], tweets_diff, 'New tweets', imgdir, 'new_tweets_%s' % self.user)
            DailyHistogram(dates[:-1], followers_diff, 'New followers', imgdir, 'new_followers_%s' % self.user)
            DailyHistogram(dates[:-1], rts_diff[1:], 'New RTs', imgdir, 'new_rts_%s' % self.user)
            WeekPunchCard(dates[:-1], tweets_diff, 'Tweets punchcard', imgdir, 'tweets_card_%s' % self.user)
            WeekPunchCard(dates[:-1], followers_diff, 'Followers punchcard', imgdir, 'followers_card_%s' % self.user)
            WeekPunchCard(dates[:-1], rts_diff[1:], 'RTs punchcard', imgdir, 'rts_card_%s' % self.user)
        except Exception as e:
            loggerr("Could not write images in web/img for %s : %s" % (self.user, e), action="stats")

        self.render_template(os.path.join("web", "templates"), "static_stats.html")
Example #9
0
 def add_version(self, data):
     version = time.strftime("%y%m%d-%H%M")
     for ftype in data:
         name = self.get_file(version, ftype)
         try:
             with open(name, "w") as f:
                 f.write(data[ftype])
             os.chmod(name, 0o644)
         except Exception as e:
             loggerr("%s: %s %s" % (self.name, type(e), e), self.channel, "WebMonitor")
     if URL_MANET:
         yield self.save_screenshot(version)
     self.versions.append(version)
Example #10
0
 def add_version(self, data):
     version = time.strftime("%y%m%d-%H%M")
     for ftype in data:
         name = self.get_file(version, ftype)
         try:
             with open(name, "w") as f:
                 f.write(data[ftype])
             os.chmod(name, 0o644)
         except Exception as e:
             loggerr("%s: %s %s" % (self.name, type(e), e), self.channel,
                     "WebMonitor")
     if URL_MANET:
         yield self.save_screenshot(version)
     self.versions.append(version)
Example #11
0
def _clean_redir_urls(text, cache_urls, last=False):
    for res in URL_REGEX.findall(text):
        url00 = res[2].encode('utf-8')
        url0 = url00
        if re_tweet_url.search(url0):
            continue
        if not url00.startswith('http'):
            if "@" in url00 or url00.startswith('#'):
                continue
            url0 = "http://%s" % url00
        if url0.startswith('http://t.co/') and url0[-1] in ".,:\"'":
            url0 = url0[:-1]
        if url0 in cache_urls:
            url1 = cache_urls[url0]
            if url1 == url0:
                continue
        else:
            try:
                agent = ResolverAgent(url0)
                yield agent.resolve()
                url1, cache_urls = clean_url(agent.lastURI, url0, cache_urls)
            except DNSLookupError:
                if url00.startswith('http'):
                    url1, cache_urls = clean_url(agent.lastURI, url0,
                                                 cache_urls)
                else:
                    url1 = url00
                    cache_urls[url0] = url00
            except Exception as e:
                if config.DEBUG and last and url00.startswith('http'):
                    loggerr("%s trying to resolve %s : %s" %
                            (type(e), url0, e),
                            action="utils")
                if "403" in str(e) or "Error 30" in str(e):
                    cache_urls[url0] = url00
                url1 = url00
        if not last and url1 != url00 and not re_shorteners.search(url1):
            url1 = url1.replace('http', '##HTTP##')
        try:
            url1 = url1.decode('utf-8')
            text = text.replace(res[0], '%s%s%s' % (res[1], url1, res[4]))
        except:
            if config.DEBUG:
                loggerr("encoding %s" % url1, action="utils")
    if last:
        text = text.replace('##HTTP##', 'http')
    defer.returnValue((text, cache_urls))
Example #12
0
def _clean_redir_urls(text, cache_urls, last=False):
    for res in URL_REGEX.findall(text):
        url00 = res[2].encode('utf-8')
        url0 = url00
        if re_tweet_url.search(url0):
            continue
        if not url00.startswith('http'):
            if "@" in url00 or url00.startswith('#'):
                continue
            url0 = "http://%s" % url00
        if url0.startswith('http://t.co/') and url0[-1] in ".,:\"'":
            url0 = url0[:-1]
        if url0 in cache_urls:
            url1 = cache_urls[url0]
            if url1 == url0:
                continue
        else:
            try:
                agent = ResolverAgent(url0)
                yield agent.resolve()
                url1, cache_urls = clean_url(agent.lastURI, url0, cache_urls)
            except DNSLookupError:
                if url00.startswith('http'):
                    url1, cache_urls = clean_url(agent.lastURI, url0, cache_urls)
                else:
                    url1 = url00
                    cache_urls[url0] = url00
            except Exception as e:
                if config.DEBUG and last and url00.startswith('http'):
                    loggerr("%s trying to resolve %s : %s" % (type(e), url0, e), action="utils")
                if "403" in str(e) or "Error 30" in str(e):
                    cache_urls[url0] = url00
                url1 = url00
        if not last and url1 != url00 and not re_shorteners.search(url1):
            url1 = url1.replace('http', '##HTTP##')
        try:
            url1 = url1.decode('utf-8')
            text = text.replace(res[0], '%s%s%s' % (res[1], url1, res[4]))
        except:
            if config.DEBUG:
                loggerr("encoding %s" % url1, action="utils")
    if last:
        text = text.replace('##HTTP##', 'http')
    defer.returnValue((text, cache_urls))
Example #13
0
 def __init__(self, channel='private'):
     filename = BOTNAME
     if channel:
         filename += '_' + channel
     if not os.path.isdir('log'):
         os.mkdir('log')
     self.loggers = {}
     for name, suffix in [("normal", ""), ("filtered", "_filtered")]:
         f = str(os.path.join('log', "%s%s.log" % (filename, suffix)))
         self.loggers[name] = getLogger("%s%s" % (channel, suffix))
         if not len(self.loggers[name].handlers):
             self.loggers[name].addHandler(RotatingFileHandler(f, backupCount=1000, encoding="utf-8"))
         self.loggers[name].handlers[0].setFormatter(Formatter('%(asctime)s %(message)s', "%Y-%m-%d %H:%M:%S"))
         channel = channel if channel != "private" else None
         if os.path.isfile(f) and os.path.getsize(f) > 1024*1024:
             logg("Rolling log file %s" % f, color="yellow", action="LOGS", channel=channel)
             try:
                 self.loggers[name].handlers[0].doRollover()
             except Exception as e:
                 loggerr("Rolling file %s crashed: %s\n%s" % (f, self.loggers[name].handlers, e), action="LOGS", channel=channel)
Example #14
0
 def logerr(self, action, message=""):
     if self.coll:
         action += " %s" % self.coll
     if self.method:
         action += " %s" % self.method
     loggerr("%s. %s" % (action, message), action="mongodb")