Python getURLContent Examples, nemubot.tools.web.getURLContent Python Examples

Example #1

0

Show file

def get_last_news(url):
    from xml.parsers.expat import ExpatError
    try:
        feed = Feed(web.getURLContent(url))
        return feed.entries
    except ExpatError:
        return []

Example #2

0

Show file

File: news.py Project: pombredanne/nemubot

def get_last_news(url):
    from xml.parsers.expat import ExpatError
    try:
        feed = Feed(web.getURLContent(url))
        return feed.entries
    except ExpatError:
        return []

Example #3

0

Show file

def get_laposte_info(laposte_id):
    data = urllib.parse.urlencode({'id': laposte_id})
    laposte_baseurl = "http://www.part.csuivi.courrier.laposte.fr/suivi/index"

    laposte_data = getURLContent(laposte_baseurl, data.encode('utf-8'))
    soup = BeautifulSoup(laposte_data)
    search_res = soup.find(class_='resultat_rech_simple_table').tbody.tr
    if (soup.find(class_='resultat_rech_simple_table').thead
            and soup.find(class_='resultat_rech_simple_table').thead.tr
            and len(search_res.find_all('td')) > 3):
        field = search_res.find('td')
        poste_id = field.get_text()

        field = field.find_next('td')
        poste_type = field.get_text()

        field = field.find_next('td')
        poste_date = field.get_text()

        field = field.find_next('td')
        poste_location = field.get_text()

        field = field.find_next('td')
        poste_status = field.get_text()

        return (poste_type.lower(), poste_id.strip(), poste_status.lower(),
                poste_location, poste_date)

Example #4

0

Show file

File: urlreducer.py Project: nbr23/nemubot

def lstu_reducer(url, data):
    json_data = json.loads(web.getURLContent(url, "lsturl=" + quote(data),
        header={"Content-Type": "application/x-www-form-urlencoded"}))
    if 'short' in json_data:
        return json_data['short']
    elif 'msg' in json_data:
        raise IMException("Error: %s" % json_data['msg'])
    else:
        IMException("An error occured while shortening %s." % data)

Example #5

0

Show file

File: conjugaison.py Project: pombredanne/nemubot

def get_conjug(verb, stringTens):
    url = ("https://leconjugueur.lefigaro.fr/conjugaison/verbe/%s.html" %
           quote(verb.encode("ISO-8859-1")))
    page = web.getURLContent(url)

    if page is not None:
        for line in page.split("\n"):
            if re.search('<div class="modeBloc">', line) is not None:
                return compute_line(line, stringTens)
    return list()

Example #6

0

Show file

File: ctfs.py Project: pombredanne/nemubot

def get_info_yt(msg):
    soup = BeautifulSoup(getURLContent(URL))

    res = Response(channel=msg.channel, nomore="No more upcoming CTF")

    for line in soup.body.find_all('tr'):
        n = line.find_all('td')
        if len(n) == 7:
            res.append_message("\x02%s:\x0F from %s type %s at %s. Weight: %s. %s%s" %
                               tuple([striphtml(x.text).strip() for x in n]))

    return res

Example #7

0

Show file

def get_colissimo_info(colissimo_id):
    colissimo_data = getURLContent("http://www.colissimo.fr/portail_colissimo/"
                                   "suivre.do?colispart=%s" % colissimo_id)
    soup = BeautifulSoup(colissimo_data)

    dataArray = soup.find(class_='dataArray')
    if dataArray and dataArray.tbody and dataArray.tbody.tr:
        date = dataArray.tbody.tr.find(headers="Date").get_text()
        libelle = re.sub(r'[\n\t\r]', '',
                         dataArray.tbody.tr.find(headers="Libelle").get_text())
        site = dataArray.tbody.tr.find(headers="site").get_text().strip()
        return (date, libelle, site.strip())

Example #8

0

Show file

def get_colisprive_info(track_id):
    data = urllib.parse.urlencode({'numColis': track_id})
    track_baseurl = "https://www.colisprive.com/moncolis/pages/detailColis.aspx"
    track_data = getURLContent(track_baseurl, data.encode('utf-8'))
    soup = BeautifulSoup(track_data)

    dataArray = soup.find(class_='BandeauInfoColis')
    if (dataArray and dataArray.find(class_='divStatut')
            and dataArray.find(class_='divStatut').find(class_='tdText')):
        status = dataArray.find(class_='divStatut') \
                          .find(class_='tdText').get_text()
        return status

Example #9

0

Show file

File: urlreducer.py Project: pombredanne/nemubot

def lstu_reducer(url, data):
    json_data = json.loads(
        web.getURLContent(
            url,
            "lsturl=" + quote(data),
            header={"Content-Type": "application/x-www-form-urlencoded"}))
    if 'short' in json_data:
        return json_data['short']
    elif 'msg' in json_data:
        raise IMException("Error: %s" % json_data['msg'])
    else:
        IMException("An error occured while shortening %s." % data)

Example #10

0

Show file

def get_colissimo_info(colissimo_id):
    colissimo_data = getURLContent("https://www.laposte.fr/particulier/outils/suivre-vos-envois?code=%s" % colissimo_id)
    soup = BeautifulSoup(colissimo_data)

    dataArray = soup.find(class_='results-suivi')
    if dataArray and dataArray.table and dataArray.table.tbody and dataArray.table.tbody.tr:
        td = dataArray.table.tbody.tr.find_all('td')
        if len(td) > 2:
            date = td[0].get_text()
            libelle = re.sub(r'[\n\t\r]', '', td[1].get_text())
            site = td[2].get_text().strip()
            return (date, libelle, site.strip())

Example #11

0

Show file

def get_tnt_info(track_id):
    values = []
    data = getURLContent('https://www.tnt.fr/public/suivi_colis/recherche/visubontransport.do?bonTransport=%s' % track_id)
    soup = BeautifulSoup(data)
    status_list = soup.find('div', class_='result__content')
    if not status_list:
        return None
    last_status = status_list.find('div', class_='roster')
    if last_status:
        for info in last_status.find_all('div', class_='roster__item'):
            values.append(info.get_text().strip())
    if len(values) == 3:
        return (values[0], values[1], values[2])

Example #12

0

Show file

def get_cve(cve_id):
    search_url = BASEURL_NIST + quote(cve_id.upper())

    soup = BeautifulSoup(getURLContent(search_url))
    vuln = soup.body.find(class_="vuln-detail")
    cvss = vuln.findAll('div')[4]

    return [
        "Base score: " + cvss.findAll('div')[0].findAll('a')[0].text.strip(),
        vuln.findAll('p')[0].text, # description
        striphtml(vuln.findAll('div')[0].text).strip(), # publication date
        striphtml(vuln.findAll('div')[1].text).strip(), # last revised
    ]

Example #13

0

Show file

File: cve.py Project: nbr23/nemubot

def get_cve(cve_id):
    search_url = BASEURL_NIST + quote(cve_id.upper())

    soup = BeautifulSoup(getURLContent(search_url))

    vuln = {}

    for vd in VULN_DATAS:
        r = soup.body.find(attrs={"data-testid": VULN_DATAS[vd]})
        if r:
            vuln[vd] = r.text.strip()

    return vuln

Example #14

0

Show file

def get_cve(cve_id):
    search_url = BASEURL_NIST + quote(cve_id.upper())

    soup = BeautifulSoup(getURLContent(search_url))

    vuln = {}

    for vd in VULN_DATAS:
        r = soup.body.find(attrs={"data-testid": VULN_DATAS[vd]})
        if r:
            vuln[vd] = r.text.strip()

    return vuln

Example #15

0

Show file

def get_chronopost_info(track_id):
    data = urllib.parse.urlencode({'listeNumeros': track_id})
    track_baseurl = "https://www.chronopost.fr/expedier/inputLTNumbersNoJahia.do?lang=fr_FR"
    track_data = getURLContent(track_baseurl, data.encode('utf-8'))
    soup = BeautifulSoup(track_data)

    infoClass = soup.find(class_='numeroColi2')
    if infoClass and infoClass.get_text():
        info = infoClass.get_text().split("\n")
        if len(info) >= 1:
            info = info[1].strip().split("\"")
            if len(info) >= 2:
                date = info[2]
                libelle = info[1]
                return (date, libelle)

Example #16

0

Show file

def get_usps_info(usps_id):
    usps_parcelurl = "https://tools.usps.com/go/TrackConfirmAction_input?" + urllib.parse.urlencode({'qtc_tLabels1': usps_id})

    usps_data = getURLContent(usps_parcelurl)
    soup = BeautifulSoup(usps_data)
    if (soup.find(class_="tracking_history")
            and soup.find(class_="tracking_history").find(class_="row_notification")
            and soup.find(class_="tracking_history").find(class_="row_top").find_all("td")):
        notification = soup.find(class_="tracking_history").find(class_="row_notification").text.strip()
        date = re.sub(r"\s+", " ", soup.find(class_="tracking_history").find(class_="row_top").find_all("td")[0].text.strip())
        status = soup.find(class_="tracking_history").find(class_="row_top").find_all("td")[1].text.strip()
        last_location = soup.find(class_="tracking_history").find(class_="row_top").find_all("td")[2].text.strip()

        print(notification)

        return (notification, date, status, last_location)

Example #17

0

Show file

def get_json_info(msg):
    if not len(msg.args):
      raise IMException("Please specify a url and a list of JSON keys.")

    request_data = web.getURLContent(msg.args[0].replace(' ', "%20"))
    if not request_data:
      raise IMException("Please specify a valid url.")
    json_data = json.loads(request_data)

    if len(msg.args) == 1:
      raise IMException("Please specify the keys to return (%s)" % ", ".join(getJsonKeys(json_data)))

    tags = ','.join(msg.args[1:]).split(',')
    response = getRequestedTags(tags, json_data)

    return Response(response, channel=msg.channel, nomore="No more content", count=" (%d more lines)")

Example #18

0

Show file

File: freetarifs.py Project: pombredanne/nemubot

def get_land_tarif(country, forfait="pkgFREE"):
    url = "http://mobile.international.free.fr/?" + urllib.parse.urlencode({'pays': country})
    page = web.getURLContent(url)
    soup = BeautifulSoup(page)

    fact = soup.find(class_=forfait)

    if fact is None:
        raise IMException("Country or forfait not found.")

    res = {}
    for s in ACT.keys():
        try:
            res[s] = fact.find(attrs={"data-bind": "text: " + s}).text + " " + fact.find(attrs={"data-bind": "html: " + s + "Unit"}).text
        except AttributeError:
            res[s] = "inclus"

    return res

Example #19

0

Show file

def get_postnl_info(postnl_id):
    data = urllib.parse.urlencode({'barcodes': postnl_id})
    postnl_baseurl = "http://www.postnl.post/details/"

    postnl_data = getURLContent(postnl_baseurl, data.encode('utf-8'))
    soup = BeautifulSoup(postnl_data)
    if (soup.find(id='datatables') and soup.find(id='datatables').tbody
            and soup.find(id='datatables').tbody.tr):
        search_res = soup.find(id='datatables').tbody.tr
        if len(search_res.find_all('td')) >= 3:
            field = field.find_next('td')
            post_date = field.get_text()

            field = field.find_next('td')
            post_status = field.get_text()

            field = field.find_next('td')
            post_destination = field.get_text()

            return (post_status.lower(), post_destination, post_date)

Example #20

0

Show file

File: jsonbot.py Project: pombredanne/nemubot

def get_json_info(msg):
    if not len(msg.args):
        raise IMException("Please specify a url and a list of JSON keys.")

    request_data = web.getURLContent(msg.args[0].replace(' ', "%20"))
    if not request_data:
        raise IMException("Please specify a valid url.")
    json_data = json.loads(request_data)

    if len(msg.args) == 1:
        raise IMException("Please specify the keys to return (%s)" %
                          ", ".join(getJsonKeys(json_data)))

    tags = ','.join(msg.args[1:]).split(',')
    response = getRequestedTags(tags, json_data)

    return Response(response,
                    channel=msg.channel,
                    nomore="No more content",
                    count=" (%d more lines)")

Example #21

0

Show file

File: imdb.py Project: nemunaire/nemubot

def get_movie_by_id(imdbid):
    """Returns the information about the matching movie"""

    url = "http://www.imdb.com/title/" + urllib.parse.quote(imdbid)
    soup = BeautifulSoup(web.getURLContent(url))

    return {
        "imdbID": imdbid,
        "Title": soup.body.find('h1').contents[0].strip(),
        "Year": soup.body.find(id="titleYear").find("a").text.strip() if soup.body.find(id="titleYear") else ", ".join([y.text.strip() for y in soup.body.find(attrs={"class": "seasons-and-year-nav"}).find_all("a")[1:]]),
        "Duration": soup.body.find(attrs={"class": "title_wrapper"}).find("time").text.strip() if soup.body.find(attrs={"class": "title_wrapper"}).find("time") else None,
        "imdbRating": soup.body.find(attrs={"class": "ratingValue"}).find("strong").text.strip(),
        "imdbVotes": soup.body.find(attrs={"class": "imdbRating"}).find("a").text.strip(),
        "Plot": re.sub(r"\s+", " ", soup.body.find(attrs={"class": "summary_text"}).text).strip(),

        "Type": "TV Series" if soup.find(id="title-episode-widget") else "Movie",
        "Genre": ", ".join([x.text.strip() for x in soup.body.find(id="titleStoryLine").find_all("a") if x.get("href") is not None and x.get("href")[:21] == "/search/title?genres="]),
        "Country": ", ".join([x.text.strip() for x in soup.body.find(id="titleDetails").find_all("a") if x.get("href") is not None and x.get("href")[:32] == "/search/title?country_of_origin="]),
        "Credits": " ; ".join([x.find("h4").text.strip() + " " + (", ".join([y.text.strip() for y in x.find_all("a") if y.get("href") is not None and y.get("href")[:6] == "/name/"])) for x in soup.body.find_all(attrs={"class": "credit_summary_item"})]),
    }

Example #22

0

Show file

File: sap.py Project: pombredanne/nemubot

def cmd_tcode(msg):
    if not len(msg.args):
        raise IMException("indicate a transaction code or "
                          "a keyword to search!")

    url = ("https://www.tcodesearch.com/tcodes/search?q=%s" %
           urllib.parse.quote(msg.args[0]))

    page = web.getURLContent(url)
    soup = BeautifulSoup(page)

    res = Response(channel=msg.channel,
                   nomore="No more transaction code",
                   count=" (%d more tcodes)")

    search_res = soup.find("", {'id': 'searchresults'})
    for item in search_res.find_all('dd'):
        res.append_message(item.get_text().split('\n')[1].strip())

    return res

Example #23

0

Show file

def cmd_tcode(msg):
    if not len(msg.args):
        raise IMException("indicate a transaction code or "
                           "a keyword to search!")

    url = ("http://www.tcodesearch.com/tcodes/search?q=%s" %
           urllib.parse.quote(msg.args[0]))

    page = web.getURLContent(url)
    soup = BeautifulSoup(page)

    res = Response(channel=msg.channel,
                   nomore="No more transaction code",
                   count=" (%d more tcodes)")


    search_res = soup.find("", {'id':'searchresults'})
    for item in search_res.find_all('dd'):
        res.append_message(item.get_text().split('\n')[1].strip())

    return res

Example #24

0

Show file

File: suivi.py Project: nbr23/nemubot

def get_postnl_info(postnl_id):
    data = urllib.parse.urlencode({'barcodes': postnl_id})
    postnl_baseurl = "http://www.postnl.post/details/"

    postnl_data = getURLContent(postnl_baseurl, data.encode('utf-8'))
    soup = BeautifulSoup(postnl_data)
    if (soup.find(id='datatables')
            and soup.find(id='datatables').tbody
            and soup.find(id='datatables').tbody.tr):
        search_res = soup.find(id='datatables').tbody.tr
        if len(search_res.find_all('td')) >= 3:
            field = field.find_next('td')
            post_date = field.get_text()

            field = field.find_next('td')
            post_status = field.get_text()

            field = field.find_next('td')
            post_destination = field.get_text()

            return (post_status.lower(), post_destination, post_date)

Example #25

0

Show file

def fetch(url, onNone=_onNoneDefault):
    """Retrieve the content of the given URL

    Argument:
    url -- the URL to fetch
    """

    try:
        req = web.getURLContent(url)
        if req is not None:
            return req
        else:
            if callable(onNone):
                return onNone()
            else:
                return None
    except ConnectionError as e:
        raise IMException(e.strerror)
    except socket.timeout:
        raise IMException("The request timeout when trying to access the page")
    except socket.error as e:
        raise IMException(e.strerror)

Example #26

0

Show file

File: page.py Project: nbr23/nemubot

def fetch(url, onNone=_onNoneDefault):
    """Retrieve the content of the given URL

    Argument:
    url -- the URL to fetch
    """

    try:
        req = web.getURLContent(url)
        if req is not None:
            return req
        else:
            if callable(onNone):
                return onNone()
            else:
                return None
    except ConnectionError as e:
        raise IMException(e.strerror)
    except socket.timeout:
        raise IMException("The request timeout when trying to access the page")
    except socket.error as e:
        raise IMException(e.strerror)

Example #27

0

Show file

def get_french_synos(word):
    url = "http://www.crisco.unicaen.fr/des/synonymes/" + quote(word.encode("ISO-8859-1"))
    page = web.getURLContent(url)

    best = list(); synos = list(); anton = list()

    if page is not None:
        for line in page.split("\n"):

            if line.find("!-- Fin liste des antonymes --") > 0:
                for elt in re.finditer(">([^<>]+)</a>", line):
                    anton.append(elt.group(1))

            elif line.find("!--Fin liste des synonymes--") > 0:
                for elt in re.finditer(">([^<>]+)</a>", line):
                    synos.append(elt.group(1))

            elif re.match("[ \t]*<tr[^>]*>.*</tr>[ \t]*</table>.*", line) is not None:
                for elt in re.finditer(">&[^;]+;([^&]*)&[^;]+;<", line):
                    best.append(elt.group(1))

    return (best, synos, anton)

Example #28

0

Show file

def get_french_synos(word):
    url = "http://www.crisco.unicaen.fr/des/synonymes/" + quote(word)
    page = web.getURLContent(url)

    best = list(); synos = list(); anton = list()

    if page is not None:
        for line in page.split("\n"):

            if line.find("!-- Fin liste des antonymes --") > 0:
                for elt in re.finditer(">([^<>]+)</a>", line):
                    anton.append(elt.group(1))

            elif line.find("!--Fin liste des synonymes--") > 0:
                for elt in re.finditer(">([^<>]+)</a>", line):
                    synos.append(elt.group(1))

            elif re.match("[ \t]*<tr[^>]*>.*</tr>[ \t]*</table>.*", line) is not None:
                for elt in re.finditer(">&[^;]+;([^&]*)&[^;]+;<", line):
                    best.append(elt.group(1))

    return (best, synos, anton)

Example #29

0

Show file

File: urlreducer.py Project: nbr23/nemubot

def default_reducer(url, data):
    snd_url = url + quote(data, "/:%@&=?")
    return web.getURLContent(snd_url)

Example #30

0

Show file

File: github.py Project: pombredanne/nemubot

def user_keys(username):
    keys = web.getURLContent("https://github.com/%s.keys" % quote(username))
    return keys.split('\n')

Example #31

0

Show file

def find_rss_links(url):
    url = web.getNormalizedURL(url)
    soup = BeautifulSoup(web.getURLContent(url))
    for rss in soup.find_all('link', attrs={"type": re.compile("^application/(atom|rss)")}):
        yield urljoin(url, rss["href"])

Example #32

0

Show file

File: github.py Project: nbr23/nemubot

def user_keys(username):
    keys = web.getURLContent("https://github.com/%s.keys" % quote(username))
    return keys.split("\n")

Example #33

0

Show file

File: urlreducer.py Project: pombredanne/nemubot

def default_reducer(url, data):
    snd_url = url + quote(data, "/:%@&=?")
    return web.getURLContent(snd_url)

Example #34

0

Show file

def get_movie_by_id(imdbid):
    """Returns the information about the matching movie"""

    url = "http://www.imdb.com/title/" + urllib.parse.quote(imdbid)
    soup = BeautifulSoup(web.getURLContent(url))

    return {
        "imdbID":
        imdbid,
        "Title":
        soup.body.find('h1').contents[0].strip(),
        "Year":
        soup.body.find(id="titleYear").find("a").text.strip()
        if soup.body.find(id="titleYear") else ", ".join([
            y.text.strip()
            for y in soup.body.find(attrs={
                "class": "seasons-and-year-nav"
            }).find_all("a")[1:]
        ]),
        "Duration":
        soup.body.find(attrs={
            "class": "title_wrapper"
        }).find("time").text.strip() if soup.body.find(attrs={
            "class": "title_wrapper"
        }).find("time") else None,
        "imdbRating":
        soup.body.find(attrs={
            "class": "ratingValue"
        }).find("strong").text.strip() if soup.body.find(
            attrs={"class": "ratingValue"}) else None,
        "imdbVotes":
        soup.body.find(attrs={
            "class": "imdbRating"
        }).find("a").text.strip() if soup.body.find(
            attrs={"class": "imdbRating"}) else None,
        "Plot":
        re.sub(r"\s+", " ",
               soup.body.find(attrs={
                   "class": "summary_text"
               }).text).strip(),
        "Type":
        "TV Series" if soup.find(id="title-episode-widget") else "Movie",
        "Genre":
        ", ".join([
            x.text.strip()
            for x in soup.body.find(id="titleStoryLine").find_all("a")
            if x.get("href") is not None
            and x.get("href")[:21] == "/search/title?genres="
        ]),
        "Country":
        ", ".join([
            x.text.strip()
            for x in soup.body.find(id="titleDetails").find_all("a")
            if x.get("href") is not None
            and x.get("href")[:32] == "/search/title?country_of_origin="
        ]),
        "Credits":
        " ; ".join([
            x.find("h4").text.strip() + " " + (", ".join([
                y.text.strip() for y in x.find_all("a")
                if y.get("href") is not None and y.get("href")[:6] == "/name/"
            ]))
            for x in soup.body.find_all(attrs={"class": "credit_summary_item"})
        ]),
    }

Example #35

0

Show file

File: news.py Project: pombredanne/nemubot

def find_rss_links(url):
    url = web.getNormalizedURL(url)
    soup = BeautifulSoup(web.getURLContent(url))
    for rss in soup.find_all(
            'link', attrs={"type": re.compile("^application/(atom|rss)")}):
        yield urljoin(url, rss["href"])