def get_last_news(url): from xml.parsers.expat import ExpatError try: feed = Feed(web.getURLContent(url)) return feed.entries except ExpatError: return []
def get_laposte_info(laposte_id): data = urllib.parse.urlencode({'id': laposte_id}) laposte_baseurl = "http://www.part.csuivi.courrier.laposte.fr/suivi/index" laposte_data = getURLContent(laposte_baseurl, data.encode('utf-8')) soup = BeautifulSoup(laposte_data) search_res = soup.find(class_='resultat_rech_simple_table').tbody.tr if (soup.find(class_='resultat_rech_simple_table').thead and soup.find(class_='resultat_rech_simple_table').thead.tr and len(search_res.find_all('td')) > 3): field = search_res.find('td') poste_id = field.get_text() field = field.find_next('td') poste_type = field.get_text() field = field.find_next('td') poste_date = field.get_text() field = field.find_next('td') poste_location = field.get_text() field = field.find_next('td') poste_status = field.get_text() return (poste_type.lower(), poste_id.strip(), poste_status.lower(), poste_location, poste_date)
def lstu_reducer(url, data): json_data = json.loads(web.getURLContent(url, "lsturl=" + quote(data), header={"Content-Type": "application/x-www-form-urlencoded"})) if 'short' in json_data: return json_data['short'] elif 'msg' in json_data: raise IMException("Error: %s" % json_data['msg']) else: IMException("An error occured while shortening %s." % data)
def get_conjug(verb, stringTens): url = ("https://leconjugueur.lefigaro.fr/conjugaison/verbe/%s.html" % quote(verb.encode("ISO-8859-1"))) page = web.getURLContent(url) if page is not None: for line in page.split("\n"): if re.search('<div class="modeBloc">', line) is not None: return compute_line(line, stringTens) return list()
def get_info_yt(msg): soup = BeautifulSoup(getURLContent(URL)) res = Response(channel=msg.channel, nomore="No more upcoming CTF") for line in soup.body.find_all('tr'): n = line.find_all('td') if len(n) == 7: res.append_message("\x02%s:\x0F from %s type %s at %s. Weight: %s. %s%s" % tuple([striphtml(x.text).strip() for x in n])) return res
def get_colissimo_info(colissimo_id): colissimo_data = getURLContent("http://www.colissimo.fr/portail_colissimo/" "suivre.do?colispart=%s" % colissimo_id) soup = BeautifulSoup(colissimo_data) dataArray = soup.find(class_='dataArray') if dataArray and dataArray.tbody and dataArray.tbody.tr: date = dataArray.tbody.tr.find(headers="Date").get_text() libelle = re.sub(r'[\n\t\r]', '', dataArray.tbody.tr.find(headers="Libelle").get_text()) site = dataArray.tbody.tr.find(headers="site").get_text().strip() return (date, libelle, site.strip())
def get_colisprive_info(track_id): data = urllib.parse.urlencode({'numColis': track_id}) track_baseurl = "https://www.colisprive.com/moncolis/pages/detailColis.aspx" track_data = getURLContent(track_baseurl, data.encode('utf-8')) soup = BeautifulSoup(track_data) dataArray = soup.find(class_='BandeauInfoColis') if (dataArray and dataArray.find(class_='divStatut') and dataArray.find(class_='divStatut').find(class_='tdText')): status = dataArray.find(class_='divStatut') \ .find(class_='tdText').get_text() return status
def lstu_reducer(url, data): json_data = json.loads( web.getURLContent( url, "lsturl=" + quote(data), header={"Content-Type": "application/x-www-form-urlencoded"})) if 'short' in json_data: return json_data['short'] elif 'msg' in json_data: raise IMException("Error: %s" % json_data['msg']) else: IMException("An error occured while shortening %s." % data)
def get_colissimo_info(colissimo_id): colissimo_data = getURLContent("https://www.laposte.fr/particulier/outils/suivre-vos-envois?code=%s" % colissimo_id) soup = BeautifulSoup(colissimo_data) dataArray = soup.find(class_='results-suivi') if dataArray and dataArray.table and dataArray.table.tbody and dataArray.table.tbody.tr: td = dataArray.table.tbody.tr.find_all('td') if len(td) > 2: date = td[0].get_text() libelle = re.sub(r'[\n\t\r]', '', td[1].get_text()) site = td[2].get_text().strip() return (date, libelle, site.strip())
def get_tnt_info(track_id): values = [] data = getURLContent('https://www.tnt.fr/public/suivi_colis/recherche/visubontransport.do?bonTransport=%s' % track_id) soup = BeautifulSoup(data) status_list = soup.find('div', class_='result__content') if not status_list: return None last_status = status_list.find('div', class_='roster') if last_status: for info in last_status.find_all('div', class_='roster__item'): values.append(info.get_text().strip()) if len(values) == 3: return (values[0], values[1], values[2])
def get_cve(cve_id): search_url = BASEURL_NIST + quote(cve_id.upper()) soup = BeautifulSoup(getURLContent(search_url)) vuln = soup.body.find(class_="vuln-detail") cvss = vuln.findAll('div')[4] return [ "Base score: " + cvss.findAll('div')[0].findAll('a')[0].text.strip(), vuln.findAll('p')[0].text, # description striphtml(vuln.findAll('div')[0].text).strip(), # publication date striphtml(vuln.findAll('div')[1].text).strip(), # last revised ]
def get_cve(cve_id): search_url = BASEURL_NIST + quote(cve_id.upper()) soup = BeautifulSoup(getURLContent(search_url)) vuln = {} for vd in VULN_DATAS: r = soup.body.find(attrs={"data-testid": VULN_DATAS[vd]}) if r: vuln[vd] = r.text.strip() return vuln
def get_chronopost_info(track_id): data = urllib.parse.urlencode({'listeNumeros': track_id}) track_baseurl = "https://www.chronopost.fr/expedier/inputLTNumbersNoJahia.do?lang=fr_FR" track_data = getURLContent(track_baseurl, data.encode('utf-8')) soup = BeautifulSoup(track_data) infoClass = soup.find(class_='numeroColi2') if infoClass and infoClass.get_text(): info = infoClass.get_text().split("\n") if len(info) >= 1: info = info[1].strip().split("\"") if len(info) >= 2: date = info[2] libelle = info[1] return (date, libelle)
def get_usps_info(usps_id): usps_parcelurl = "https://tools.usps.com/go/TrackConfirmAction_input?" + urllib.parse.urlencode({'qtc_tLabels1': usps_id}) usps_data = getURLContent(usps_parcelurl) soup = BeautifulSoup(usps_data) if (soup.find(class_="tracking_history") and soup.find(class_="tracking_history").find(class_="row_notification") and soup.find(class_="tracking_history").find(class_="row_top").find_all("td")): notification = soup.find(class_="tracking_history").find(class_="row_notification").text.strip() date = re.sub(r"\s+", " ", soup.find(class_="tracking_history").find(class_="row_top").find_all("td")[0].text.strip()) status = soup.find(class_="tracking_history").find(class_="row_top").find_all("td")[1].text.strip() last_location = soup.find(class_="tracking_history").find(class_="row_top").find_all("td")[2].text.strip() print(notification) return (notification, date, status, last_location)
def get_json_info(msg): if not len(msg.args): raise IMException("Please specify a url and a list of JSON keys.") request_data = web.getURLContent(msg.args[0].replace(' ', "%20")) if not request_data: raise IMException("Please specify a valid url.") json_data = json.loads(request_data) if len(msg.args) == 1: raise IMException("Please specify the keys to return (%s)" % ", ".join(getJsonKeys(json_data))) tags = ','.join(msg.args[1:]).split(',') response = getRequestedTags(tags, json_data) return Response(response, channel=msg.channel, nomore="No more content", count=" (%d more lines)")
def get_land_tarif(country, forfait="pkgFREE"): url = "http://mobile.international.free.fr/?" + urllib.parse.urlencode({'pays': country}) page = web.getURLContent(url) soup = BeautifulSoup(page) fact = soup.find(class_=forfait) if fact is None: raise IMException("Country or forfait not found.") res = {} for s in ACT.keys(): try: res[s] = fact.find(attrs={"data-bind": "text: " + s}).text + " " + fact.find(attrs={"data-bind": "html: " + s + "Unit"}).text except AttributeError: res[s] = "inclus" return res
def get_postnl_info(postnl_id): data = urllib.parse.urlencode({'barcodes': postnl_id}) postnl_baseurl = "http://www.postnl.post/details/" postnl_data = getURLContent(postnl_baseurl, data.encode('utf-8')) soup = BeautifulSoup(postnl_data) if (soup.find(id='datatables') and soup.find(id='datatables').tbody and soup.find(id='datatables').tbody.tr): search_res = soup.find(id='datatables').tbody.tr if len(search_res.find_all('td')) >= 3: field = field.find_next('td') post_date = field.get_text() field = field.find_next('td') post_status = field.get_text() field = field.find_next('td') post_destination = field.get_text() return (post_status.lower(), post_destination, post_date)
def get_movie_by_id(imdbid): """Returns the information about the matching movie""" url = "http://www.imdb.com/title/" + urllib.parse.quote(imdbid) soup = BeautifulSoup(web.getURLContent(url)) return { "imdbID": imdbid, "Title": soup.body.find('h1').contents[0].strip(), "Year": soup.body.find(id="titleYear").find("a").text.strip() if soup.body.find(id="titleYear") else ", ".join([y.text.strip() for y in soup.body.find(attrs={"class": "seasons-and-year-nav"}).find_all("a")[1:]]), "Duration": soup.body.find(attrs={"class": "title_wrapper"}).find("time").text.strip() if soup.body.find(attrs={"class": "title_wrapper"}).find("time") else None, "imdbRating": soup.body.find(attrs={"class": "ratingValue"}).find("strong").text.strip(), "imdbVotes": soup.body.find(attrs={"class": "imdbRating"}).find("a").text.strip(), "Plot": re.sub(r"\s+", " ", soup.body.find(attrs={"class": "summary_text"}).text).strip(), "Type": "TV Series" if soup.find(id="title-episode-widget") else "Movie", "Genre": ", ".join([x.text.strip() for x in soup.body.find(id="titleStoryLine").find_all("a") if x.get("href") is not None and x.get("href")[:21] == "/search/title?genres="]), "Country": ", ".join([x.text.strip() for x in soup.body.find(id="titleDetails").find_all("a") if x.get("href") is not None and x.get("href")[:32] == "/search/title?country_of_origin="]), "Credits": " ; ".join([x.find("h4").text.strip() + " " + (", ".join([y.text.strip() for y in x.find_all("a") if y.get("href") is not None and y.get("href")[:6] == "/name/"])) for x in soup.body.find_all(attrs={"class": "credit_summary_item"})]), }
def cmd_tcode(msg): if not len(msg.args): raise IMException("indicate a transaction code or " "a keyword to search!") url = ("https://www.tcodesearch.com/tcodes/search?q=%s" % urllib.parse.quote(msg.args[0])) page = web.getURLContent(url) soup = BeautifulSoup(page) res = Response(channel=msg.channel, nomore="No more transaction code", count=" (%d more tcodes)") search_res = soup.find("", {'id': 'searchresults'}) for item in search_res.find_all('dd'): res.append_message(item.get_text().split('\n')[1].strip()) return res
def cmd_tcode(msg): if not len(msg.args): raise IMException("indicate a transaction code or " "a keyword to search!") url = ("http://www.tcodesearch.com/tcodes/search?q=%s" % urllib.parse.quote(msg.args[0])) page = web.getURLContent(url) soup = BeautifulSoup(page) res = Response(channel=msg.channel, nomore="No more transaction code", count=" (%d more tcodes)") search_res = soup.find("", {'id':'searchresults'}) for item in search_res.find_all('dd'): res.append_message(item.get_text().split('\n')[1].strip()) return res
def fetch(url, onNone=_onNoneDefault): """Retrieve the content of the given URL Argument: url -- the URL to fetch """ try: req = web.getURLContent(url) if req is not None: return req else: if callable(onNone): return onNone() else: return None except ConnectionError as e: raise IMException(e.strerror) except socket.timeout: raise IMException("The request timeout when trying to access the page") except socket.error as e: raise IMException(e.strerror)
def get_french_synos(word): url = "http://www.crisco.unicaen.fr/des/synonymes/" + quote(word.encode("ISO-8859-1")) page = web.getURLContent(url) best = list(); synos = list(); anton = list() if page is not None: for line in page.split("\n"): if line.find("!-- Fin liste des antonymes --") > 0: for elt in re.finditer(">([^<>]+)</a>", line): anton.append(elt.group(1)) elif line.find("!--Fin liste des synonymes--") > 0: for elt in re.finditer(">([^<>]+)</a>", line): synos.append(elt.group(1)) elif re.match("[ \t]*<tr[^>]*>.*</tr>[ \t]*</table>.*", line) is not None: for elt in re.finditer(">&[^;]+;([^&]*)&[^;]+;<", line): best.append(elt.group(1)) return (best, synos, anton)
def get_french_synos(word): url = "http://www.crisco.unicaen.fr/des/synonymes/" + quote(word) page = web.getURLContent(url) best = list(); synos = list(); anton = list() if page is not None: for line in page.split("\n"): if line.find("!-- Fin liste des antonymes --") > 0: for elt in re.finditer(">([^<>]+)</a>", line): anton.append(elt.group(1)) elif line.find("!--Fin liste des synonymes--") > 0: for elt in re.finditer(">([^<>]+)</a>", line): synos.append(elt.group(1)) elif re.match("[ \t]*<tr[^>]*>.*</tr>[ \t]*</table>.*", line) is not None: for elt in re.finditer(">&[^;]+;([^&]*)&[^;]+;<", line): best.append(elt.group(1)) return (best, synos, anton)
def default_reducer(url, data): snd_url = url + quote(data, "/:%@&=?") return web.getURLContent(snd_url)
def user_keys(username): keys = web.getURLContent("https://github.com/%s.keys" % quote(username)) return keys.split('\n')
def find_rss_links(url): url = web.getNormalizedURL(url) soup = BeautifulSoup(web.getURLContent(url)) for rss in soup.find_all('link', attrs={"type": re.compile("^application/(atom|rss)")}): yield urljoin(url, rss["href"])
def user_keys(username): keys = web.getURLContent("https://github.com/%s.keys" % quote(username)) return keys.split("\n")
def get_movie_by_id(imdbid): """Returns the information about the matching movie""" url = "http://www.imdb.com/title/" + urllib.parse.quote(imdbid) soup = BeautifulSoup(web.getURLContent(url)) return { "imdbID": imdbid, "Title": soup.body.find('h1').contents[0].strip(), "Year": soup.body.find(id="titleYear").find("a").text.strip() if soup.body.find(id="titleYear") else ", ".join([ y.text.strip() for y in soup.body.find(attrs={ "class": "seasons-and-year-nav" }).find_all("a")[1:] ]), "Duration": soup.body.find(attrs={ "class": "title_wrapper" }).find("time").text.strip() if soup.body.find(attrs={ "class": "title_wrapper" }).find("time") else None, "imdbRating": soup.body.find(attrs={ "class": "ratingValue" }).find("strong").text.strip() if soup.body.find( attrs={"class": "ratingValue"}) else None, "imdbVotes": soup.body.find(attrs={ "class": "imdbRating" }).find("a").text.strip() if soup.body.find( attrs={"class": "imdbRating"}) else None, "Plot": re.sub(r"\s+", " ", soup.body.find(attrs={ "class": "summary_text" }).text).strip(), "Type": "TV Series" if soup.find(id="title-episode-widget") else "Movie", "Genre": ", ".join([ x.text.strip() for x in soup.body.find(id="titleStoryLine").find_all("a") if x.get("href") is not None and x.get("href")[:21] == "/search/title?genres=" ]), "Country": ", ".join([ x.text.strip() for x in soup.body.find(id="titleDetails").find_all("a") if x.get("href") is not None and x.get("href")[:32] == "/search/title?country_of_origin=" ]), "Credits": " ; ".join([ x.find("h4").text.strip() + " " + (", ".join([ y.text.strip() for y in x.find_all("a") if y.get("href") is not None and y.get("href")[:6] == "/name/" ])) for x in soup.body.find_all(attrs={"class": "credit_summary_item"}) ]), }
def find_rss_links(url): url = web.getNormalizedURL(url) soup = BeautifulSoup(web.getURLContent(url)) for rss in soup.find_all( 'link', attrs={"type": re.compile("^application/(atom|rss)")}): yield urljoin(url, rss["href"])