def handle_search(bot, event): if not event.options: event.makeoptions() all = event.options.all res = [] target = event.args if not target: event.missing("<search words seperated by space>") ; return coll = PersistCollection(getdatadir() + os.sep + 'spider' + os.sep + "data") files = coll.filenames(target) if files: for f in files: try: res.append(Persist(f).data.url) except AttributeError as ex: continue objs = coll.search('txt', event.rest) if not objs: objs = list(coll.objects().values()) stats = makestats(objs, target, res) urls = stats_response(stats, target) res.extend(urls) if res: if len(res) < 4 or all: event.reply("found %s urls: " % len(res), res, dot=" -or- ") else: event.reply("found %s urls, use --all for more: " % len(res), res[:3], dot=" -or- ") else: event.reply("no urls found")
def markovlearnspider(target): logging.warn("starting spider learn on %s" % target) coll = PersistCollection(getdatadir() + os.sep + "spider" + os.sep + "data") if target.startswith("spider://"): target = target[9:] objs = coll.search("url", target) for obj in objs: if not obj.data and obj.data.url: continue time.sleep(0.001) if target not in obj.data.url: continue logging.warn("url is %s" % obj.data.url) try: if obj.data and obj.data.txt: for line in obj.data.txt.split("\n"): if line.count(";") > 1: continue markovtalk_learn(striphtml(line)) except: handle_exception()
def __init__(self, *args, **kwargs): self.path = getdatadir() + os.sep + "spider" + os.sep + "data" + os.sep PersistCollection.__init__(self, self.path, *args, **kwargs)