def save_screenshot(self, version, retries=3): name = self.get_file(version, "png") try: img = yield client.getPage(manet_url(self.url, retries > 1)) with open(name, "wb") as f: f.write(img) os.chmod(name, 0o644) except Exception as e: if retries: yield deferredSleep(3) yield self.save_screenshot(version, retries=retries - 1) else: loggerr("%s: %s %s %s" % (self.name, type(e), e, self.url), self.channel, "WebMonitor-shot") else: try: w = 200.0 thumbname = "%s-small.png" % name[:-4] with Image(filename=name) as i: iW, iH = i.size h = round(iH * w / iW) with i.clone() as i2: i2.resize(int(w), int(h)) i2.save(filename=thumbname) os.chmod(thumbname, 0o644) del img, i, i2 except Exception as e: loggerr("%s: %s %s" % (self.name, type(e), e), self.channel, "WebMonitor-resize")
def __init__(self, channel='private'): filename = BOTNAME if channel: filename += '_' + channel if not os.path.isdir('log'): os.mkdir('log') self.loggers = {} for name, suffix in [("normal", ""), ("filtered", "_filtered")]: f = str(os.path.join('log', "%s%s.log" % (filename, suffix))) self.loggers[name] = getLogger("%s%s" % (channel, suffix)) if not len(self.loggers[name].handlers): self.loggers[name].addHandler( RotatingFileHandler(f, backupCount=1000, encoding="utf-8")) self.loggers[name].handlers[0].setFormatter( Formatter('%(asctime)s %(message)s', "%Y-%m-%d %H:%M:%S")) channel = channel if channel != "private" else None if os.path.isfile(f) and os.path.getsize(f) > 1024 * 1024: logg("Rolling log file %s" % f, color="yellow", action="LOGS", channel=channel) try: self.loggers[name].handlers[0].doRollover() except Exception as e: loggerr("Rolling file %s crashed: %s\n%s" % (f, self.loggers[name].handlers, e), action="LOGS", channel=channel)
def save_screenshot(self, version, retries=3): name = self.get_file(version, "png") try: img = yield client.getPage(manet_url(self.url, retries > 1)) with open(name, "wb") as f: f.write(img) os.chmod(name, 0o644) except Exception as e: if retries: yield deferredSleep(3) yield self.save_screenshot(version, retries=retries-1) else: loggerr("%s: %s %s %s" % (self.name, type(e), e, self.url), self.channel, "WebMonitor-shot") else: try: w = 200.0 thumbname = "%s-small.png" % name[:-4] with Image(filename=name) as i: iW, iH = i.size h = round(iH * w / iW) with i.clone() as i2: i2.resize(int(w), int(h)) i2.save(filename=thumbname) os.chmod(thumbname, 0o644) del img, i, i2 except Exception as e: loggerr("%s: %s %s" % (self.name, type(e), e), self.channel, "WebMonitor-resize")
def _clean_redir_urls(text, urls={}, first=True, pool=None): for res in URL_REGEX.findall(text): url00 = res[2].encode('utf-8') url0 = url00 if not url00.startswith('http'): if "@" in url00 or url00.startswith('#'): continue url0 = "http://%s" % url00 if url0 in urls: url1 = urls[url0] if url1 == url0: continue else: try: url1 = yield deferToThreadPool(reactor, pool, get_url, url0, timeout=8) url1 = clean_url(url1) urls[url0] = url1 urls[url1] = url1 except Exception as e: if config.DEBUG and not first: loggerr("trying to resolve %s : %s" % (url0, e)) if "403" in str(e) or "Error 30" in str(e): urls[url0] = url00 url1 = url00 if first and not url1 == url00: url1 = url1.replace('http', '##HTTP##') try: url1 = url1.decode('utf-8') text = text.replace(res[0], '%s%s%s' % (res[1], url1, res[4])) except: if config.DEBUG: logerr("encoding %s" % url1) if not first: text = text.replace('##HTTP##', 'http') defer.returnValue((text, urls))
def render_template(self, path, filename): data = {'user': self.user, 'url': self.url} outfile = filename.replace('.html', '_%s.html' % self.user) try: import pystache from contextlib import nested with nested(open(os.path.join(path, filename), "r"), open(os.path.join("web", outfile), "w")) as (template, generated): generated.write(pystache.render(template.read(), data)) except IOError as e: loggerr("Could not write web/%s from %s/%s : %s" % (outfile, path, filename, e), action="stats")
def dump_data(self): if not self.public_url: returnValue(False) stats = yield find_stats({'user': self.user}, filter=sortasc('timestamp'), timeout=120) dates = [s['timestamp'] for s in stats] tweets = [s['tweets'] for s in stats] tweets_diff = [a - b for a, b in zip(tweets[1:],tweets[:-1])] followers = [s['followers'] for s in stats] followers_diff = [a - b for a, b in zip(followers[1:], followers[:-1])] rts_diff = [s['rts_last_hour'] for s in stats] rts = [] n = 0 for a in rts_diff: n += a rts.append(n) jsondata = {} imax = len(dates) - 1 for i, date in enumerate(dates): ts = int(time.mktime(date.timetuple())) jsondata[ts] = { 'tweets': tweets[i], 'followers': followers[i], 'rts': rts[i] } if i < imax: jsondata[ts].update({ 'tweets_diff': tweets_diff[i], 'followers_diff': followers_diff[i], 'rts_diff': rts_diff[i+1] }) try: jsondir = os.path.join('web', 'data') if not os.path.exists(jsondir): os.makedirs(jsondir) os.chmod(jsondir, 0o755) with open(os.path.join(jsondir, 'stats_%s.json' % self.user), 'w') as outfile: write_json(jsondata, outfile) except IOError as e: loggerr("Could not write web/data/stats_%s.json : %s" % (self.user, e), action="stats") try: from plots import CumulativeCurve, DailyHistogram, WeekPunchCard imgdir = os.path.join('web', 'img') if not os.path.exists(imgdir): os.makedirs(imgdir) os.chmod(imgdir, 0o755) CumulativeCurve(dates, tweets, 'Total tweets', imgdir, 'tweets_%s' % self.user) CumulativeCurve(dates, followers, 'Total followers', imgdir, 'followers_%s' % self.user) CumulativeCurve(dates, rts, 'Total RTs since %s' % dates[0], imgdir, 'rts_%s' % self.user) DailyHistogram(dates[:-1], tweets_diff, 'New tweets', imgdir, 'new_tweets_%s' % self.user) DailyHistogram(dates[:-1], followers_diff, 'New followers', imgdir, 'new_followers_%s' % self.user) DailyHistogram(dates[:-1], rts_diff[1:], 'New RTs', imgdir, 'new_rts_%s' % self.user) WeekPunchCard(dates[:-1], tweets_diff, 'Tweets punchcard', imgdir, 'tweets_card_%s' % self.user) WeekPunchCard(dates[:-1], followers_diff, 'Followers punchcard', imgdir, 'followers_card_%s' % self.user) WeekPunchCard(dates[:-1], rts_diff[1:], 'RTs punchcard', imgdir, 'rts_card_%s' % self.user) except Exception as e: loggerr("Could not write images in web/img for %s : %s" % (self.user, e), action="stats") data = {'user': self.user, 'url': self.public_url} self.render_template("static_stats.html", self.user, data) returnValue(True)
def render_template(self, template, name, data): outfile = template.replace('.html', '_%s.html' % name) try: ofile = os.path.join("web", outfile) with nested(open(os.path.join(self.templates, template), "r"), codecs.open(ofile, "w", encoding="utf-8")) as (temp, generated): generated.write(Renderer(string_encoding='utf8').render(temp.read(), data)) os.chmod(ofile, 0o644) return True except IOError as e: loggerr("Could not write web/%s from %s/%s : %s" % (outfile, self.templates, template, e), action="stats") return False
def dump_data(self): if not self.url: return self.db.authenticate(config.MONGODB['USER'], config.MONGODB['PSWD']) stats = list(self.db['stats'].find({'user': self.user}, sort=[('timestamp', pymongo.ASCENDING)])) dates = [s['timestamp'] for s in stats] tweets = [s['tweets'] for s in stats] tweets_diff = [a - b for a, b in zip(tweets[1:],tweets[:-1])] followers = [s['followers'] for s in stats] followers_diff = [a - b for a, b in zip(followers[1:], followers[:-1])] rts_diff = [s['rts_last_hour'] for s in stats] rts = [] n = 0 for a in rts_diff: n += a rts.append(n) jsondata = {} imax = len(dates) - 1 for i, date in enumerate(dates): ts = int(time.mktime(date.timetuple())) jsondata[ts] = { 'tweets': tweets[i], 'followers': followers[i], 'rts': rts[i] } if i < imax: jsondata[ts].update({ 'tweets_diff': tweets_diff[i], 'followers_diff': followers_diff[i], 'rts_diff': rts_diff[i+1] }) try: jsondir = os.path.join('web', 'data') if not os.path.exists(jsondir): os.makedirs(jsondir) with open(os.path.join(jsondir, 'stats_%s.json' % self.user), 'w') as outfile: write_json(jsondata, outfile) except IOError as e: loggerr("Could not write web/data/stats_%s.json : %s" % (self.user, e), action="stats") try: from plots import CumulativeCurve, DailyHistogram, WeekPunchCard imgdir = os.path.join('web', 'img') if not os.path.exists(imgdir): os.makedirs(imgdir) CumulativeCurve(dates, tweets, 'Total tweets', imgdir, 'tweets_%s' % self.user) CumulativeCurve(dates, followers, 'Total followers', imgdir, 'followers_%s' % self.user) CumulativeCurve(dates, rts, 'Total RTs since %s' % dates[0], imgdir, 'rts_%s' % self.user) DailyHistogram(dates[:-1], tweets_diff, 'New tweets', imgdir, 'new_tweets_%s' % self.user) DailyHistogram(dates[:-1], followers_diff, 'New followers', imgdir, 'new_followers_%s' % self.user) DailyHistogram(dates[:-1], rts_diff[1:], 'New RTs', imgdir, 'new_rts_%s' % self.user) WeekPunchCard(dates[:-1], tweets_diff, 'Tweets punchcard', imgdir, 'tweets_card_%s' % self.user) WeekPunchCard(dates[:-1], followers_diff, 'Followers punchcard', imgdir, 'followers_card_%s' % self.user) WeekPunchCard(dates[:-1], rts_diff[1:], 'RTs punchcard', imgdir, 'rts_card_%s' % self.user) except Exception as e: loggerr("Could not write images in web/img for %s : %s" % (self.user, e), action="stats") self.render_template(os.path.join("web", "templates"), "static_stats.html")
def add_version(self, data): version = time.strftime("%y%m%d-%H%M") for ftype in data: name = self.get_file(version, ftype) try: with open(name, "w") as f: f.write(data[ftype]) os.chmod(name, 0o644) except Exception as e: loggerr("%s: %s %s" % (self.name, type(e), e), self.channel, "WebMonitor") if URL_MANET: yield self.save_screenshot(version) self.versions.append(version)
def _clean_redir_urls(text, cache_urls, last=False): for res in URL_REGEX.findall(text): url00 = res[2].encode('utf-8') url0 = url00 if re_tweet_url.search(url0): continue if not url00.startswith('http'): if "@" in url00 or url00.startswith('#'): continue url0 = "http://%s" % url00 if url0.startswith('http://t.co/') and url0[-1] in ".,:\"'": url0 = url0[:-1] if url0 in cache_urls: url1 = cache_urls[url0] if url1 == url0: continue else: try: agent = ResolverAgent(url0) yield agent.resolve() url1, cache_urls = clean_url(agent.lastURI, url0, cache_urls) except DNSLookupError: if url00.startswith('http'): url1, cache_urls = clean_url(agent.lastURI, url0, cache_urls) else: url1 = url00 cache_urls[url0] = url00 except Exception as e: if config.DEBUG and last and url00.startswith('http'): loggerr("%s trying to resolve %s : %s" % (type(e), url0, e), action="utils") if "403" in str(e) or "Error 30" in str(e): cache_urls[url0] = url00 url1 = url00 if not last and url1 != url00 and not re_shorteners.search(url1): url1 = url1.replace('http', '##HTTP##') try: url1 = url1.decode('utf-8') text = text.replace(res[0], '%s%s%s' % (res[1], url1, res[4])) except: if config.DEBUG: loggerr("encoding %s" % url1, action="utils") if last: text = text.replace('##HTTP##', 'http') defer.returnValue((text, cache_urls))
def __init__(self, channel='private'): filename = BOTNAME if channel: filename += '_' + channel if not os.path.isdir('log'): os.mkdir('log') self.loggers = {} for name, suffix in [("normal", ""), ("filtered", "_filtered")]: f = str(os.path.join('log', "%s%s.log" % (filename, suffix))) self.loggers[name] = getLogger("%s%s" % (channel, suffix)) if not len(self.loggers[name].handlers): self.loggers[name].addHandler(RotatingFileHandler(f, backupCount=1000, encoding="utf-8")) self.loggers[name].handlers[0].setFormatter(Formatter('%(asctime)s %(message)s', "%Y-%m-%d %H:%M:%S")) channel = channel if channel != "private" else None if os.path.isfile(f) and os.path.getsize(f) > 1024*1024: logg("Rolling log file %s" % f, color="yellow", action="LOGS", channel=channel) try: self.loggers[name].handlers[0].doRollover() except Exception as e: loggerr("Rolling file %s crashed: %s\n%s" % (f, self.loggers[name].handlers, e), action="LOGS", channel=channel)
def logerr(self, action, message=""): if self.coll: action += " %s" % self.coll if self.method: action += " %s" % self.method loggerr("%s. %s" % (action, message), action="mongodb")