Python get_url_content Exemples, pyquickhelper.get_url_content Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : discours_politique.py Projet : vincentCGI/ensae_teaching_cs

def get_elysee_speech_from_elysees(
        title, url="http://www.elysee.fr/chronologie/article/"):
    """
    retrieve the text from Elysées

    @param      title       title of the document
    @param      url         weebiste
    @return                 html page

    The function tries something like::

        url + title.replace(" ","-")
    """
    if title.startswith("http"):
        full = title
    else:
        if not url.endswith("/"):
            raise Exception("url should end with /: " + url)
        link = remove_accent(title.lower()).replace(
            " ", "-").replace("'", "-").replace('"', "")
        full = url + "/" + link + "/"
    try:
        text = get_url_content(full)
    except Exception as e:
        return None
        raise Exception(
            "unable to fetch content from: " +
            title +
            "\n" +
            full) from e
    return xmlParsingLongestDiv(text)

Exemple #2

0

Afficher le fichier

Fichier : discours_politique.py Projet : xugonglei/ensae_teaching_cs

def get_elysee_speech_from_elysees(
        title, url="http://www.elysee.fr/chronologie/article/"):
    """
    retrieve the text from Elysées

    @param      title       title of the document
    @param      url         weebiste
    @return                 html page

    The function tries something like::

        url + title.replace(" ","-")
    """
    if title.startswith("http"):
        full = title
    else:
        if not url.endswith("/"):
            raise Exception("url should end with /: " + url)
        link = remove_accent(title.lower()).replace(" ", "-").replace(
            "'", "-").replace('"', "")
        full = url + "/" + link + "/"
    try:
        text = get_url_content(full)
    except Exception as e:
        return None
        raise Exception("unable to fetch content from: " + title + "\n" +
                        full) from e
    return xmlParsingLongestDiv(text)

Exemple #3

0

Afficher le fichier

    def test_flask(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")

        if "travis" in sys.executable:
            # skip travis and Flask
            return

        th = FlaskInThread(app, host="localhost", port=8025)
        th.start()

        site = "http://localhost:8025/"

        # main page
        c = get_url_content(site)
        assert "Simple Flask Site"

        # exception
        c = get_url_content(site + "help/exception")
        assert "STACK:" in c

        # help for
        c = get_url_content(site + "help/ask/for/help")
        fLOG(c)
        assert "help for command: ask/for/help" in c

        # shutdown
        c = requests.post(site + "shutdown/")
        fLOG(c.text)
        assert "Server shutting down..." in c.text

        nb = 0
        while th.is_alive() and nb < 5:
            fLOG("waiting...", nb)
            time.sleep(1)
            nb += 1

        if th.is_alive():
            fLOG("thread is still alive (1)?", th.is_alive())
            assert False

Exemple #4

0

Afficher le fichier

Fichier : discours_politique.py Projet : xugonglei/ensae_teaching_cs

def enumerate_speeches_from_elysees(skip=0, use_json=False):
    """
    enumerates speeches Elysees Speeches

    @param      skip        skip the first one in the list
    @param      use_json    or json format or xml (json format is incomplete)
    @return                 enumerate dictionaries

    @example(Récupérer des discours du président de la république)

    @code
    for i,disc in enumerate(enumerate_speeches_from_elysees()):
        print(disc)
    @endcode

    @endexample

    """
    if use_json:
        url = "http://www.elysee.fr/chronologie/download/json"
        js = retrieve_speeches_json(url)
        for i, event in enumerate(js):
            if i < skip:
                continue
            items = event.get("items", None)
            title = event.get("title", None)
            if items is not None and title is not None and len(title) > 0:
                load = False
                for it in items:
                    if it is None:
                        continue
                    if not isinstance(it, dict):
                        continue
                    tit = it.get("title", "")
                    if tit is not None and "title" in it and "discours" in tit:
                        load = True
                        break
                if load:
                    content = get_elysee_speech_from_elysees(title)
                    if content is not None:
                        yield dict(text=content,
                                   title=title,
                                   date=event.get("date", None),
                                   description=event.get("description", None))
    else:
        url = "http://www.elysee.fr/chronologie/download/xml"
        xml = get_url_content(url)
        reg = re.compile("(http://.*?/article/.*?/)")
        links = reg.findall(xml)
        for i, link in enumerate(links):
            content = get_elysee_speech_from_elysees(link)
            if content is not None:
                yield dict(link=link, text=content)

Exemple #5

0

Afficher le fichier

Fichier : discours_politique.py Projet : xugonglei/ensae_teaching_cs

def retrieve_speeches_json(
        url="http://www.elysee.fr/chronologie/download/json"):
    """
    retrieve the speeches from the Elysées

    @param      url     url
    @return             list of documents
    """
    text = get_url_content(url)
    stream = io.StringIO(text)
    js = json.load(stream)
    return js

Exemple #6

0

Afficher le fichier

Fichier : test_documentation_server.py Projet : ped4747/pyquickhelper

 def test_server_start_run (self) :
     fLOG (__file__, self._testMethodName, OutputPrint = __name__ == "__main__")
     path = os.path.abspath(os.path.split(__file__)[0])
     data = os.path.join(path,"data")
     
     server = 'localhost'
     thread = run_doc_server(server, {"pyquickhelper":data}, True, port = 8094)
     
     url = "http://localhost:8094/pyquickhelper/"
     cont = get_url_content(url)
     assert len(cont)> 0
     assert "GitHub/pyquickhelper</a>" in cont
     fLOG("-------")
     url = "http://localhost:8094/pyquickhelper/search.html?q=flog&check_keywords=yes&area=default"
     cont = get_url_content(url)
     assert len(cont)> 0
     assert "Please activate JavaScript to enable the search" in cont
     assert "http://sphinx.pocoo.org/" in cont
     
     thread.shutdown()    
     assert not thread.is_alive()

Exemple #7

0

Afficher le fichier

Fichier : discours_politique.py Projet : vincentCGI/ensae_teaching_cs

def enumerate_speeches_from_elysees(skip=0, use_json=False):
    """
    enumerates speeches Elysees Speeches

    @param      skip        skip the first one in the list
    @param      use_json    or json format or xml (json format is incomplete)
    @return                 enumerate dictionaries

    @example(Récupérer des discours du président de la république)

    @code
    for i,disc in enumerate(enumerate_speeches_from_elysees()):
        print(disc)
    @endcode

    @endexample

    """
    if use_json:
        url = "http://www.elysee.fr/chronologie/download/json"
        js = retrieve_speeches_json(url)
        for i, event in enumerate(js):
            if i < skip:
                continue
            items = event.get("items", None)
            title = event.get("title", None)
            if items is not None and title is not None and len(title) > 0:
                load = False
                for it in items:
                    if it is None:
                        continue
                    if not isinstance(it, dict):
                        continue
                    tit = it.get("title", "")
                    if tit is not None and "title" in it and "discours" in tit:
                        load = True
                        break
                if load:
                    content = get_elysee_speech_from_elysees(title)
                    if content is not None:
                        yield dict(text=content,
                                   title=title,
                                   date=event.get("date", None),
                                   description=event.get("description", None))
    else:
        url = "http://www.elysee.fr/chronologie/download/xml"
        xml = get_url_content(url)
        reg = re.compile("(http://.*?/article/.*?/)")
        links = reg.findall(xml)
        for i, link in enumerate(links):
            content = get_elysee_speech_from_elysees(link)
            if content is not None:
                yield dict(link=link, text=content)

Exemple #8

0

Afficher le fichier

Fichier : discours_politique.py Projet : vincentCGI/ensae_teaching_cs

def retrieve_speeches_json(
        url="http://www.elysee.fr/chronologie/download/json"):
    """
    retrieve the speeches from the Elysées

    @param      url     url
    @return             list of documents
    """
    text = get_url_content(url)
    stream = io.StringIO(text)
    js = json.load(stream)
    return js

Exemple #9

0

Afficher le fichier

Fichier : documentation_server.py Projet : pjc42/pyquickhelper

    def serve_content(self, cpath, method="GET"):
        """
        Tells what to do based on the path. The function intercepts the
        path /localfile/, otherwise it calls ``serve_content_web``.

        If you type ``http://localhost:8080/root/file``,
        assuming ``root`` is mapped to a local folder.
        It will display this file.

        @param      cpath        ParseResult
        @param      method      GET or POST
        """
        if cpath.path == "" or cpath.path == "/":
            params = parse_qs(cpath.query)
            self.serve_main_page()
        else:
            params = parse_qs(cpath.query)
            params["__path__"] = cpath

            # fullurl = cpath.geturl()
            fullfile = cpath.path
            params["__url__"] = cpath
            spl = fullfile.strip("/").split("/")

            project = spl[0]
            link = "/".join(spl[1:])
            value = DocumentationHandler.mappings.get(project, None)

            if value is None:
                self.LOG("can't serve", cpath)
                self.LOG("with params", params)
                return
                #raise KeyError("unable to find a mapping associated to: " + project + "\nURL:\n" + url + "\nPARAMS:\n" + str(params))

            if value == "shut://":
                self.LOG("call shutdown")
                self.shutdown()

            elif value == "http://":
                self.send_response(200)
                self.send_headers("debug.html")
                url = cpath.path.replace("/%s/" % project, "")
                try:
                    content = get_url_content(url)
                except Exception as e:
                    content = "<html><body>ERROR (2): %s</body></html>" % e
                self.feed(content, False, params={})

            else:
                if ".." in link:
                    # we avoid that case to prevent users from digging others paths
                    # than the mapped ones, just in that the browser does not
                    # remove them
                    self.send_error(404)
                    self.feed("Requested resource %s unavailable" % link)
                else:
                    localpath = link
                    if localpath in [None, "/", ""]:
                        localpath = "index.html"
                    fullpath = os.path.join(value, localpath)
                    self.LOG("localpath ", fullpath, os.path.isfile(fullpath))

                    self.send_response(200)
                    _, ftype = self.get_ftype(localpath)

                    execute = eval(params.get("execute", ["True"])[0])
                    spath = params.get("path", [None])[0]
                    # keep = eval(params.get("keep", ["False"])[0])

                    if ftype != 'execute' or not execute:
                        content = self.get_file_content(fullpath, ftype, spath)
                        if content is None:
                            self.LOG("** w,unable to get file for key:", spath)
                            self.send_error(404)
                            self.feed(
                                "Requested resource %s unavailable" % localpath)
                        else:
                            ext = os.path.splitext(localpath)[-1].lower()
                            if ext in [
                                    ".py", ".c", ".cpp", ".hpp", ".h", ".r", ".sql", ".java"]:
                                self.send_headers(".html")
                                self.feed(
                                    DocumentationHandler.html_code_renderer(localpath, content))
                            elif ext in [".html"]:
                                content = DocumentationHandler.process_html_path(
                                    project, content)
                                self.send_headers(localpath)
                                self.feed(content)
                            else:
                                self.send_headers(localpath)
                                self.feed(content)
                    else:
                        self.LOG("execute file ", localpath)
                        out, err = DocumentationHandler.execute(localpath)
                        if len(err) > 0:
                            self.send_error(404)
                            self.feed(
                                "Requested resource %s unavailable" % localpath)
                        else:
                            self.send_headers(localpath)
                            self.feed(out)