def google_news (self, e): query = urllib.quote(e.input) url = "" if not query: url = "http://news.google.com/news?ned=us&topic=h&output=rss" else: url = "http://news.google.com/news?q=%s&output=rss" % query dom = xml.dom.minidom.parse(urllib2.urlopen(url)) newest_news = dom.getElementsByTagName('item')[0] title = newest_news.getElementsByTagName('title')[0].childNodes[0].data description = BeautifulSoup(newest_news.getElementsByTagName('description')[0].childNodes[0].data) links = description.findAll('a') for link in links: link.extract() links = description.findAll(color='#6f6f6f') for link in links: link.extract() description = str(description).strip().decode("utf-8", 'ignore') description = tools.remove_html_tags(description) description = tools.decode_htmlentities(description) description = description[0:len(description) - 9] if description.rfind(".")!=-1: description = description[0:description.rfind(".")+1] link = tools.shorten_url(newest_news.getElementsByTagName('link')[0].childNodes[0].data) e.output = "%s - %s [ %s ]" % (title.encode("utf-8", 'ignore'), description.encode("utf-8", 'ignore'), link.encode("utf-8", 'ignore')) return e
def google_sun(self, location, sun, nick): if location == "" and user: location = user.get_location(nick) location = urllib.parse.quote(location) url = "http://www.google.com/search?hl=en&client=opera&hs=6At&rls=en&q=%s+%s&aq=f&aqi=g1&aql=&oq=&gs_rfai=" % (sun, location) request = urllib.request.Request(url, None, {}) request.add_header('User-Agent', "Opera/9.80 (Windows NT 6.0; U; en) Presto/2.2.15 Version/10.10") request.add_header('Range', "bytes=0-40960") response = urllib.request.urlopen(request).read().decode('utf-8') m = re.search('(-40.gif.*?\<b\>)(.*?)(\<\/b\> )(.*?)( -\s*\<b\>)(.*?)(\<\/b\> in\s*)(.*?)(\s*?\<tr\>.*?top\"\>)(.*?)(\<\/table\>)', response) try: settime = m.group(2) setday = m.group(4) setday = re.sub("\s+"," ",setday) setword = m.group(6) setcity = m.group(8) settimeword = m.group(10) result = "%s in %s: %s %s (%s)" % (sun, setcity,settime,setday,settimeword) #print result except: pass return return tools.remove_html_tags(result)
def google_sun(term, sun, nick): if term == "" and user: term = user.get_location(nick) term = urllib.quote(term) url = "http://www.google.com/search?hl=en&client=opera&hs=6At&rls=en&q=%s+%s&aq=f&aqi=g1&aql=&oq=&gs_rfai=" % (sun, term) request = urllib2.Request(url, None, {}) request.add_header('User-Agent', "Opera/9.80 (Windows NT 6.0; U; en) Presto/2.2.15 Version/10.10") request.add_header('Range', "bytes=0-40960") response = urllib2.urlopen(request).read() m = re.search('(-40.gif.*?\<b\>)(.*?)(\<\/b\> )(.*?)( -\s*\<b\>)(.*?)(\<\/b\> in\s*)(.*?)(\s*?\<tr\>.*?top\"\>)(.*?)(\<\/table\>)', response) try: settime = m.group(2) setday = m.group(4) setday = re.sub("\s+"," ",setday) setword = m.group(6) setcity = m.group(8) settimeword = m.group(10) result = "%s in %s: %s %s (%s)" % (sun, setcity,settime,setday,settimeword) #print result except: pass return return tools.remove_html_tags(result)
def google_news(self, e): query = urllib.quote(e.input) url = "" if not query: url = "http://news.google.com/news?ned=us&topic=h&output=rss" else: url = "http://news.google.com/news?q=%s&output=rss" % query dom = xml.dom.minidom.parse(urllib2.urlopen(url)) newest_news = dom.getElementsByTagName('item')[0] title = newest_news.getElementsByTagName('title')[0].childNodes[0].data description = BeautifulSoup( newest_news.getElementsByTagName('description')[0].childNodes[0].data) links = description.findAll('a') for link in links: link.extract() links = description.findAll(color='#6f6f6f') for link in links: link.extract() description = str(description).strip().decode("utf-8", 'ignore') description = tools.remove_html_tags(description) description = tools.decode_htmlentities(description) description = description[0:len(description) - 9] if description.rfind(".") != -1: description = description[0:description.rfind(".") + 1] link = tools.shorten_url( newest_news.getElementsByTagName('link')[0].childNodes[0].data) e.output = "%s - %s [ %s ]" % (title.encode( "utf-8", 'ignore'), description.encode( "utf-8", 'ignore'), link.encode("utf-8", 'ignore')) return e
def gwiki(bot, e): url = ( 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=site:wikipedia.org+' + urllib.parse.quote(e.input)) request = urllib.request.Request(url, None, {'Referer': 'http://irc.00id.net'}) response = urllib.request.urlopen(request) results_json = json.loads(response.read().decode('utf-8')) results = results_json['responseData']['results'] regexstring = "wikipedia.org/wiki/" result = results[0] m = re.search(regexstring, result['url']) if (m): url = result['url'] url = tools.shorten_url(url.replace('%25', '%')) #content = result['content'].encode('utf-8') content = tools.decode_htmlentities( tools.remove_html_tags(result['content'])) content = re.sub('\s+', ' ', content) content = content.replace("...", "") #print content #content = content.decode('unicode-escape') #e.output = content e.output = "%s [ %s ]" % (content, url) return e
def get_title(url): # extracts the title tag from a page title = "" try: opener = urllib2.build_opener() readlength = 10240 if url.find("amazon.") != -1: readlength = 100096 # because amazon is coded like shit opener.addheaders = [("User-Agent", "Opera/9.10 (YourMom 8.0)"), ("Range", "bytes=0-" + str(readlength))] pagetmp = opener.open(url) page = pagetmp.read(readlength) opener.close() titletmp = tools.remove_html_tags(re.search("(?is)\<title\>.*?<\/title\>", page).group(0)) title = "Title: " + titletmp.strip()[0:180] except: pass return title
def get_title(url): #extracts the title tag from a page title = "" try: opener = urllib2.build_opener() readlength = 10240 if url.find("amazon.") != -1: readlength = 100096 #because amazon is coded like shit opener.addheaders = [('User-Agent', "Opera/9.10 (YourMom 8.0)"), ('Range', "bytes=0-" + str(readlength))] pagetmp = opener.open(url) page = pagetmp.read(readlength) opener.close() titletmp = tools.remove_html_tags( re.search('(?is)\<title\>.*?<\/title\>', page).group(0)) title = "Title: " + titletmp.strip()[0:180] except: pass return title
def gwiki(bot, e): url = ('http://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=site:wikipedia.org+' + urllib.parse.quote(e.input)) request = urllib.request.Request(url, None, {'Referer': 'http://irc.00id.net'}) response = urllib.request.urlopen(request) results_json = json.loads(response.read().decode('utf-8')) results = results_json['responseData']['results'] regexstring = "wikipedia.org/wiki/" result = results[0] m = re.search(regexstring,result['url']) if (m): url = result['url'] url = tools.shorten_url(url.replace('%25','%')) #content = result['content'].encode('utf-8') content = tools.decode_htmlentities(tools.remove_html_tags(result['content'])) content = re.sub('\s+', ' ', content) content = content.replace("...", "") #print content #content = content.decode('unicode-escape') #e.output = content e.output = "%s [ %s ]" % (content, url) return e
def get_imdb(self, e, urlposted=False): #reads title, rating, and movie description of movie titles searchterm = e.input if urlposted: url = searchterm else: url = tools.google_url("site:imdb.com/title " + searchterm, "imdb.com/title/tt\\d{7}/") title = "" if not url: pass elif url.find("imdb.com/title/tt") != -1: try: movietitle = "" rating = "" summary = "" imdbid = re.search("tt\\d{7}", url) imdburl = ('http://www.imdb.com/title/' + imdbid.group(0) + '/') opener = urllib2.build_opener() opener.addheaders = [('User-Agent', "Opera/9.10 (YourMom 8.0)"), ('Range', "bytes=0-40960")] pagetmp = opener.open(imdburl) page = BeautifulSoup(pagetmp.read(40960)) opener.close() movietitle = tools.decode_htmlentities( tools.remove_html_tags(str(page.find('title'))).replace( " - IMDb", "")) movietitle = movietitle.replace("IMDb - ", "") movietitle = "Title: " + movietitle if page.find(id="overview-top") != None: page = page.find(id="overview-top").extract() if page.find("div", "star-box-giga-star") != None: rating = tools.remove_html_tags( str(page.find("div", "star-box-giga-star").text)) rating = " - Rating: " + rating if len(page.findAll('p')) == 2: summary = str(page.findAll('p')[1]) removelink = re.compile(r'\<a.*\/a\>') summary = removelink.sub('', summary) summary = tools.remove_html_tags(summary) summary = summary.replace('»', "") summary = tools.decode_htmlentities( summary.decode("utf-8", 'ignore')) summary = re.sub("\&.*?\;", " ", summary) summary = summary.replace("\n", " ") summary = " - " + summary title = movietitle + rating + summary if not urlposted: title = title + " [ %s ]" % url e.output = title.encode('utf-8', 'ignore') return e except Exception as inst: print "!imdb " + searchterm + ": " + str(inst) return None
def get_wiki(self, e, urlposted=False): #read the first paragraph of a wikipedia article searchterm = e.input if urlposted: url = searchterm else: if searchterm == "": url = "http://en.wikipedia.org/wiki/Special:Random" else: url = tools.google_url("site:wikipedia.org " + searchterm,"wikipedia.org/wiki") title = "" if url and url.find("wikipedia.org/wiki/File:") != -1: file_title=get_wiki_file_description(url) if file_title: e.output = file_title return e if url and url.find("wikipedia.org/wiki/") != -1: try: opener = urllib2.build_opener() opener.addheaders = [('User-Agent',"Opera/9.10 (YourMom 8.0)")] pagetmp = opener.open(url) page = pagetmp.read() url = pagetmp.geturl() opener.close() if url.find('#') != -1: anchor = url.split('#')[1] page = page[page.find('id="' + anchor):] page = BeautifulSoup(page) tables = page.findAll('table') for table in tables: table.extract() page = page.findAll('p') if str(page[0])[0:9] == '<p><span ': page = unicode(page[1].extract()) else: page = unicode(page[0].extract()) title = tools.remove_html_tags(re.search('(?s)\<p\>(.*?)\<\/p\>',page).group(1)) title = title.encode("utf-8", 'ignore') title = title.replace("<",""); rembracket = re.compile(r'\[.*?\]') title = rembracket.sub('',title) #title = re.sub("\&.*?\;", " ", title) title = title.replace("\n", " ") title = tools.decode_htmlentities(title.decode("utf-8", 'ignore')).encode("utf-8", 'ignore') title = title[0:420] if title.rfind(".")!=-1: title = title[0:title.rfind(".")+1] if not urlposted: url = tools.shorten_url(url) title = (title.decode('utf-8', 'ignore') + " [ %s ]" % url).encode('utf-8', 'ignore') except Exception as inst: print "!wiki " + searchterm + " : " + str(inst) title = tools.remove_html_tags(re.search('\<p\>(.*?\.) ',str(page)).group(1)) e.output = title return e
def get_wiki(self, e, urlposted=False): # read the first paragraph of a wikipedia article searchterm = e.input if urlposted: url = searchterm else: if searchterm == "": url = "http://en.wikipedia.org/wiki/Special:Random" else: url = tools.google_url("site:wikipedia.org " + searchterm, "wikipedia.org/wiki") title = "" if url and url.find("wikipedia.org/wiki/File:") != -1: file_title = get_wiki_file_description(url) if file_title: e.output = file_title return e if url and url.find("wikipedia.org/wiki/") != -1: try: opener = urllib2.build_opener() opener.addheaders = [("User-Agent", "Opera/9.10 (YourMom 8.0)")] pagetmp = opener.open(url) page = pagetmp.read() url = pagetmp.geturl() opener.close() if url.find("#") != -1: anchor = url.split("#")[1] page = page[page.find('id="' + anchor) :] page = BeautifulSoup(page) tables = page.findAll("table") for table in tables: table.extract() page = page.findAll("p") if str(page[0])[0:9] == "<p><span ": page = unicode(page[1].extract()) else: page = unicode(page[0].extract()) title = tools.remove_html_tags(re.search("(?s)\<p\>(.*?)\<\/p\>", page).group(1)) title = title.encode("utf-8", "ignore") title = title.replace("<", "") rembracket = re.compile(r"\[.*?\]") title = rembracket.sub("", title) # title = re.sub("\&.*?\;", " ", title) title = title.replace("\n", " ") title = tools.decode_htmlentities(title.decode("utf-8", "ignore")).encode("utf-8", "ignore") title = title[0:420] if title.rfind(".") != -1: title = title[0 : title.rfind(".") + 1] if not urlposted: url = tools.shorten_url(url) title = (title.decode("utf-8", "ignore") + " [ %s ]" % url).encode("utf-8", "ignore") except Exception as inst: print "!wiki " + searchterm + " : " + str(inst) title = tools.remove_html_tags(re.search("\<p\>(.*?\.) ", str(page)).group(1)) e.output = title return e