def get_elysee_speech_from_elysees( title, url="http://www.elysee.fr/chronologie/article/"): """ retrieve the text from Elysées @param title title of the document @param url weebiste @return html page The function tries something like:: url + title.replace(" ","-") """ if title.startswith("http"): full = title else: if not url.endswith("/"): raise Exception("url should end with /: " + url) link = remove_accent(title.lower()).replace( " ", "-").replace("'", "-").replace('"', "") full = url + "/" + link + "/" try: text = get_url_content(full) except Exception as e: return None raise Exception( "unable to fetch content from: " + title + "\n" + full) from e return xmlParsingLongestDiv(text)
def get_elysee_speech_from_elysees( title, url="http://www.elysee.fr/chronologie/article/"): """ retrieve the text from Elysées @param title title of the document @param url weebiste @return html page The function tries something like:: url + title.replace(" ","-") """ if title.startswith("http"): full = title else: if not url.endswith("/"): raise Exception("url should end with /: " + url) link = remove_accent(title.lower()).replace(" ", "-").replace( "'", "-").replace('"', "") full = url + "/" + link + "/" try: text = get_url_content(full) except Exception as e: return None raise Exception("unable to fetch content from: " + title + "\n" + full) from e return xmlParsingLongestDiv(text)
def test_flask(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") if "travis" in sys.executable: # skip travis and Flask return th = FlaskInThread(app, host="localhost", port=8025) th.start() site = "http://localhost:8025/" # main page c = get_url_content(site) assert "Simple Flask Site" # exception c = get_url_content(site + "help/exception") assert "STACK:" in c # help for c = get_url_content(site + "help/ask/for/help") fLOG(c) assert "help for command: ask/for/help" in c # shutdown c = requests.post(site + "shutdown/") fLOG(c.text) assert "Server shutting down..." in c.text nb = 0 while th.is_alive() and nb < 5: fLOG("waiting...", nb) time.sleep(1) nb += 1 if th.is_alive(): fLOG("thread is still alive (1)?", th.is_alive()) assert False
def enumerate_speeches_from_elysees(skip=0, use_json=False): """ enumerates speeches Elysees Speeches @param skip skip the first one in the list @param use_json or json format or xml (json format is incomplete) @return enumerate dictionaries @example(Récupérer des discours du président de la république) @code for i,disc in enumerate(enumerate_speeches_from_elysees()): print(disc) @endcode @endexample """ if use_json: url = "http://www.elysee.fr/chronologie/download/json" js = retrieve_speeches_json(url) for i, event in enumerate(js): if i < skip: continue items = event.get("items", None) title = event.get("title", None) if items is not None and title is not None and len(title) > 0: load = False for it in items: if it is None: continue if not isinstance(it, dict): continue tit = it.get("title", "") if tit is not None and "title" in it and "discours" in tit: load = True break if load: content = get_elysee_speech_from_elysees(title) if content is not None: yield dict(text=content, title=title, date=event.get("date", None), description=event.get("description", None)) else: url = "http://www.elysee.fr/chronologie/download/xml" xml = get_url_content(url) reg = re.compile("(http://.*?/article/.*?/)") links = reg.findall(xml) for i, link in enumerate(links): content = get_elysee_speech_from_elysees(link) if content is not None: yield dict(link=link, text=content)
def retrieve_speeches_json( url="http://www.elysee.fr/chronologie/download/json"): """ retrieve the speeches from the Elysées @param url url @return list of documents """ text = get_url_content(url) stream = io.StringIO(text) js = json.load(stream) return js
def test_server_start_run (self) : fLOG (__file__, self._testMethodName, OutputPrint = __name__ == "__main__") path = os.path.abspath(os.path.split(__file__)[0]) data = os.path.join(path,"data") server = 'localhost' thread = run_doc_server(server, {"pyquickhelper":data}, True, port = 8094) url = "http://localhost:8094/pyquickhelper/" cont = get_url_content(url) assert len(cont)> 0 assert "GitHub/pyquickhelper</a>" in cont fLOG("-------") url = "http://localhost:8094/pyquickhelper/search.html?q=flog&check_keywords=yes&area=default" cont = get_url_content(url) assert len(cont)> 0 assert "Please activate JavaScript to enable the search" in cont assert "http://sphinx.pocoo.org/" in cont thread.shutdown() assert not thread.is_alive()
def serve_content(self, cpath, method="GET"): """ Tells what to do based on the path. The function intercepts the path /localfile/, otherwise it calls ``serve_content_web``. If you type ``http://localhost:8080/root/file``, assuming ``root`` is mapped to a local folder. It will display this file. @param cpath ParseResult @param method GET or POST """ if cpath.path == "" or cpath.path == "/": params = parse_qs(cpath.query) self.serve_main_page() else: params = parse_qs(cpath.query) params["__path__"] = cpath # fullurl = cpath.geturl() fullfile = cpath.path params["__url__"] = cpath spl = fullfile.strip("/").split("/") project = spl[0] link = "/".join(spl[1:]) value = DocumentationHandler.mappings.get(project, None) if value is None: self.LOG("can't serve", cpath) self.LOG("with params", params) return #raise KeyError("unable to find a mapping associated to: " + project + "\nURL:\n" + url + "\nPARAMS:\n" + str(params)) if value == "shut://": self.LOG("call shutdown") self.shutdown() elif value == "http://": self.send_response(200) self.send_headers("debug.html") url = cpath.path.replace("/%s/" % project, "") try: content = get_url_content(url) except Exception as e: content = "<html><body>ERROR (2): %s</body></html>" % e self.feed(content, False, params={}) else: if ".." in link: # we avoid that case to prevent users from digging others paths # than the mapped ones, just in that the browser does not # remove them self.send_error(404) self.feed("Requested resource %s unavailable" % link) else: localpath = link if localpath in [None, "/", ""]: localpath = "index.html" fullpath = os.path.join(value, localpath) self.LOG("localpath ", fullpath, os.path.isfile(fullpath)) self.send_response(200) _, ftype = self.get_ftype(localpath) execute = eval(params.get("execute", ["True"])[0]) spath = params.get("path", [None])[0] # keep = eval(params.get("keep", ["False"])[0]) if ftype != 'execute' or not execute: content = self.get_file_content(fullpath, ftype, spath) if content is None: self.LOG("** w,unable to get file for key:", spath) self.send_error(404) self.feed( "Requested resource %s unavailable" % localpath) else: ext = os.path.splitext(localpath)[-1].lower() if ext in [ ".py", ".c", ".cpp", ".hpp", ".h", ".r", ".sql", ".java"]: self.send_headers(".html") self.feed( DocumentationHandler.html_code_renderer(localpath, content)) elif ext in [".html"]: content = DocumentationHandler.process_html_path( project, content) self.send_headers(localpath) self.feed(content) else: self.send_headers(localpath) self.feed(content) else: self.LOG("execute file ", localpath) out, err = DocumentationHandler.execute(localpath) if len(err) > 0: self.send_error(404) self.feed( "Requested resource %s unavailable" % localpath) else: self.send_headers(localpath) self.feed(out)