def cmd_reduceurl(msg): minify = list() if not len(msg.args): global LAST_URLS if msg.channel in LAST_URLS and len(LAST_URLS[msg.channel]) > 0: minify.append(LAST_URLS[msg.channel].pop()) else: raise IMException("I have no more URL to reduce.") if len(msg.args) > 4: raise IMException("I cannot reduce that many URLs at once.") else: minify += msg.args if 'provider' in msg.kwargs and msg.kwargs['provider'] in PROVIDERS: provider = msg.kwargs['provider'] else: provider = DEFAULT_PROVIDER res = list() for url in minify: o = urlparse(web.getNormalizedURL(url), "http") minief_url = reduce(url, provider) if o.netloc == "": res.append(gen_response(minief_url, msg, o.scheme)) else: res.append(gen_response(minief_url, msg, o.netloc)) return res
def add_site(url, nick, channel, server, diffType="diff"): """Add a site to watching list Argument: url -- URL to watch """ o = urlparse(getNormalizedURL(url), "http") if o.netloc == "": raise IMException("sorry, I can't watch this URL :(") alert = ModuleState("alert") alert["nick"] = nick alert["server"] = server alert["channel"] = channel alert["message"] = "{url} just changed!" if url not in DATAS.index: watch = ModuleState("watch") watch["type"] = diffType watch["url"] = url watch["time"] = 123 DATAS.addChild(watch) watch.addChild(alert) start_watching(watch) else: DATAS.index[url].addChild(alert) save() return Response(channel=channel, nick=nick, message="this site is now under my supervision.")
def validator(url): """Run the w3c validator on the given URL Argument: url -- the URL to validate """ o = urllib.parse.urlparse(getNormalizedURL(url), "http") if o.netloc == "": raise IMException("Indicate a valid URL!") try: req = urllib.request.Request( "https://validator.w3.org/check?uri=%s&output=json" % (urllib.parse.quote(o.geturl())), headers={'User-Agent': "Nemubot v%s" % __version__}) raw = urllib.request.urlopen(req, timeout=10) except urllib.error.HTTPError as e: raise IMException("HTTP error occurs: %s %s" % (e.code, e.reason)) headers = dict() for Hname, Hval in raw.getheaders(): headers[Hname] = Hval if "X-W3C-Validator-Status" not in headers or ( headers["X-W3C-Validator-Status"] != "Valid" and headers["X-W3C-Validator-Status"] != "Invalid"): raise IMException("Unexpected error on W3C servers" + (" (" + headers["X-W3C-Validator-Status"] + ")" if "X-W3C-Validator-Status" in headers else "")) return headers, json.loads(raw.read().decode())
def headers(url): """Retrieve HTTP header for the given URL Argument: url -- the page URL to get header """ o = urllib.parse.urlparse(web.getNormalizedURL(url), "http") if o.netloc == "": raise IMException("invalid URL") if o.scheme == "http": conn = http.client.HTTPConnection(o.hostname, port=o.port, timeout=5) else: conn = http.client.HTTPSConnection(o.hostname, port=o.port, timeout=5) try: conn.request("HEAD", o.path, None, {"User-agent": "Nemubot v%s" % __version__}) except ConnectionError as e: raise IMException(e.strerror) except socket.timeout: raise IMException("request timeout") except socket.gaierror: print("<tools.web> Unable to receive page %s from %s on %d." % (o.path, o.hostname, o.port if o.port is not None else 0)) raise IMException("an unexpected error occurs") try: res = conn.getresponse() except http.client.BadStatusLine: raise IMException("An error occurs") finally: conn.close() return (res.version, res.status, res.reason, res.getheaders())
def isup(url): """Determine if the given URL is up or not Argument: url -- the URL to check """ o = urllib.parse.urlparse(getNormalizedURL(url), "http") if o.netloc != "": isup = getJSON("https://isitup.org/%s.json" % o.netloc) if isup is not None and "status_code" in isup and isup[ "status_code"] == 1: return isup["response_time"] return None
def del_site(url, nick, channel, frm_owner): """Remove a site from watching list Argument: url -- URL to unwatch """ o = urlparse(getNormalizedURL(url), "http") if o.scheme != "" and url in DATAS.index: site = DATAS.index[url] for a in site.getNodes("alert"): if a["channel"] == channel: # if not (nick == a["nick"] or frm_owner): # raise IMException("you cannot unwatch this URL.") site.delChild(a) if not site.hasNode("alert"): del_event(site["_evt_id"]) DATAS.delChild(site) save() return Response("I don't watch this URL anymore.", channel=channel, nick=nick) raise IMException("I didn't watch this URL!")
IMException("An error occured while shortening %s." % data) # MODULE VARIABLES #################################################### PROVIDERS = { "tinyurl": (default_reducer, "https://tinyurl.com/api-create.php?url="), "ycc": (ycc_reducer, "https://ycc.fr/redirection/create/"), "framalink": (lstu_reducer, "https://frama.link/a?format=json"), "huitre": (lstu_reducer, "https://huit.re/a?format=json"), "lstu": (lstu_reducer, "https://lstu.fr/a?format=json"), } DEFAULT_PROVIDER = "framalink" PROVIDERS_NETLOC = [ urlparse(web.getNormalizedURL(url), "http").netloc for f, url in PROVIDERS.values() ] # LOADING ############################################################# def load(context): global DEFAULT_PROVIDER if "provider" in context.config: if context.config["provider"] == "custom": PROVIDERS["custom"] = context.config["provider_url"] DEFAULT_PROVIDER = context.config["provider"]
def find_rss_links(url): url = web.getNormalizedURL(url) soup = BeautifulSoup(web.getURLContent(url)) for rss in soup.find_all( 'link', attrs={"type": re.compile("^application/(atom|rss)")}): yield urljoin(url, rss["href"])