class AutoshunBot(bot.PollingBot): COLUMNS = ["ip", "time", "info"] time_offset = 5 feed_url = bot.Param(default=AUTOSHUN_CSV_URL) use_cymru_whois = bot.BoolParam() def poll(self): pipe = self._poll(url=self.feed_url) if self.use_cymru_whois: pipe = pipe | cymruwhois.augment("ip") return pipe | self._normalize() @idiokit.stream def _poll(self, url): self.log.info("Downloading %s" % url) try: info, fileobj = yield utils.fetch_url(url) except utils.FetchUrlFailed, fuf: self.log.error("Download failed: %r", fuf) idiokit.stop() self.log.info("Downloaded") # Grab time offset from first line of the CSV header = fileobj.readline() # Source file header row may sometimes be empty if header.startswith("Shunlist as of"): offset = -1 * int(header[-5:]) / 100 # ex: -0500 to 5 self.time_offset = offset if -12 <= offset <= 12 else 5 yield utils.csv_to_events(fileobj, columns=self.COLUMNS, charset=info.get_param("charset"))
class DataplaneBot(bot.PollingBot): url = bot.Param() use_cymru_whois = bot.BoolParam() # The first column values (ASN and AS name) are ignored. COLUMNS = [None, None, "ip", "time", "category"] def poll(self): if self.use_cymru_whois: return self._poll() | cymruwhois.augment("ip") return self._poll() @idiokit.stream def _poll(self): self.log.info("Downloading %s" % self.url) try: info, fileobj = yield utils.fetch_url(self.url) except utils.FetchUrlFailed, fuf: self.log.error("Download failed: %r", fuf) return self.log.info("Downloaded") charset = info.get_param("charset") filtered = (x for x in fileobj if x.strip() and not x.startswith("#")) yield utils.csv_to_events(filtered, delimiter="|", columns=self.COLUMNS, charset=charset)
class BruteForceBlockerBot(bot.PollingBot): # Ignore the last column ("id"). COLUMNS = ["ip", "time", "count", None] use_cymru_whois = bot.BoolParam() def poll(self): if self.use_cymru_whois: return self._poll() | cymruwhois.augment("ip") return self._poll() @idiokit.stream def _poll( self, url="http://danger.rulez.sk/projects/bruteforceblocker/blist.php"): self.log.info("Downloading %s" % url) try: info, fileobj = yield utils.fetch_url(url) except utils.FetchUrlFailed, fuf: self.log.error("Download failed: %r", fuf) idiokit.stop(False) self.log.info("Downloaded") filtered = (x for x in fileobj if x.strip() and not x.startswith("#")) lines = (re.sub("\t+", "\t", x) for x in filtered) yield (utils.csv_to_events(lines, delimiter="\t", columns=self.COLUMNS, charset=info.get_param("charset")) | idiokit.map(self._normalize, url))
def parse_csv(inner, self, filename, fileobj): match = re.match(self.filename_rex, filename) if match is None: self.log.error("Filename %r did not match", filename) inner.finish(False) yield inner.sub(utils.csv_to_events(fileobj) | self.normalize(match.groupdict())) inner.finish(True)
def poll(self): self.log.info("Downloading updates from {0!r}".format(self.url)) try: info, fileobj = yield utils.fetch_url(self.url) except utils.FetchUrlFailed as fuf: raise bot.PollSkipped("Downloading {0!r} failed ({1})".format(self.url, fuf)) self.log.info("Updates downloaded from {0!r}".format(self.url)) yield idiokit.pipe( utils.csv_to_events(fileobj, columns=self._columns), idiokit.map(self._normalize))
class CleanMXBot(bot.PollingBot): def feed_keys(self, csv_url, csv_name=None, **keys): if csv_name is None: csv_name = urlparse.urlparse(csv_url)[2].split("/")[-1] yield (csv_url, csv_name) @idiokit.stream def poll(self, url, name): try: self.log.info("Downloading page from: %r", url) info, fileobj = yield utils.fetch_url(url) except utils.FetchUrlFailed, e: self.log.error("Failed to download page %r: %r", url, e) return charset = info.get_param("charset", None) lines = (line.strip() for line in fileobj if line.strip()) yield utils.csv_to_events(lines, charset=charset) | self.normalize(name)
def poll(self): self.log.info("Downloading {0}".format(self.feed_url)) try: info, fileobj = yield utils.fetch_url(self.feed_url) except utils.FetchUrlFailed as fuf: raise bot.PollSkipped("Download failed: {0}".format(fuf)) lines = [] for line in fileobj: line = line.strip() if line and not line.startswith("#"): lines.append(line) yield idiokit.pipe( utils.csv_to_events(tuple(lines), columns=COLUMNS, charset=info.get_param("charset", None)), _parse())
def poll(self): self.log.info("Downloading {0}".format(self.feed_url)) try: info, fileobj = yield utils.fetch_url(self.feed_url) except utils.FetchUrlFailed as fuf: raise bot.PollSkipped("Download failed: {0}".format(fuf)) lines = [] for line in fileobj: line = line.strip() if line and not line.startswith("#"): lines.append(line) yield idiokit.pipe( utils.csv_to_events(tuple(lines), columns=COLUMNS, charset=info.get_param("charset", None)), _parse() )
def main(inner, self): # Join the XMPP network using credentials given from the command line conn = yield self.xmpp_connect() # Join the XMPP room room = yield conn.muc.join(self.xmpp_room, self.bot_name) self.log.info("Joined room %r", self.xmpp_room) # Fetch the URL info and data as an file-like object. # Info contains e.g. the HTTP(S) headers, ignored for now. info, fileobj = yield utils.fetch_url(self.csv_url) self.log.info("Opened URL %r", self.csv_url) # csv_to_events feeds out abusehelper.core.events.Event # objects, so convert them to XML elements before sending them # to the room. csv_feed = utils.csv_to_events(fileobj, delimiter=self.csv_delimiter, columns=self.csv_columns) yield csv_feed | events.events_to_elements() | room | threado.dev_null()