def searchByTitle(title): print "IMDB SEARCH HAPPENED" #uses google instead of IMDB search because it's better and easier to parse url = ('http://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=' + urllib2.quote("site:imdb.com/title " + title)) request = urllib2.Request(url, None, {'Referer': 'http://irc.00id.net'}) response = urllib2.urlopen(request) results_json = json.load(response) results = results_json['responseData']['results'] movieResults = [] p = re.compile(r'<.*?>') for result in results: if not re.search("TV [a-zA-Z]", result['titleNoFormatting']) and re.search( "imdb.com/title/tt\\d{7}/$", result['url']): title = tools.decode_htmlentities( p.sub('', result['titleNoFormatting'].replace(" - IMDb", ""))) content = tools.decode_htmlentities(p.sub('', result['content'])) imdbid = re.search("tt\\d{7}", result['url']).group(0) movieResults.append({ "name": title, "desc": content, "id": "IMDB=" + imdbid }) return movieResults
def Persons(self): personlist = [] if self.imdbpage.find(text=re.compile("Directors?:")): directors = self.imdbpage.find( text=re.compile("Directors?:")).parent.parent directors = directors.findAll("a") for person in directors: if person.text.find('more credit') == -1: personlist.append({ "Name": person.text, "Type": "Director", "Role": "" }) if self.imdbpage.find("table", "cast_list"): cast = self.imdbpage.find("table", "cast_list") cast = cast.findAll('tr') for person in cast: name = tools.decode_htmlentities( tools.remove_html_tags(str(person.find( 'td', 'name'))).strip()).replace("\n", " ") role = tools.decode_htmlentities( tools.remove_html_tags(str(person.find( 'td', 'character'))).strip()).replace("\n", " ") name = re.sub('\s+', ' ', name) role = re.sub('\s+', ' ', role) if name != "None": if role == "None": role = "" personlist.append({ "Name": name, "Type": "Actor", "Role": role }) return personlist
def Persons(self): personlist = [] if self.imdbpage.find(text=re.compile("Directors?:")): directors = self.imdbpage.find(text=re.compile("Directors?:")).parent.parent directors = directors.findAll("a") for person in directors: if person.text.find("more credit") == -1: personlist.append({"Name": person.text, "Type": "Director", "Role": ""}) if self.imdbpage.find("table", "cast_list"): cast = self.imdbpage.find("table", "cast_list") cast = cast.findAll("tr") for person in cast: name = tools.decode_htmlentities( tools.remove_html_tags(str(person.find("td", "name"))).strip() ).replace("\n", " ") role = tools.decode_htmlentities( tools.remove_html_tags(str(person.find("td", "character"))).strip() ).replace("\n", " ") name = re.sub("\s+", " ", name) role = re.sub("\s+", " ", role) if name != "None": if role == "None": role = "" personlist.append({"Name": name, "Type": "Actor", "Role": role}) return personlist
def searchByTitle(title): print "IMDB SEARCH HAPPENED" # uses google instead of IMDB search because it's better and easier to parse url = "http://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=" + urllib2.quote("site:imdb.com/title " + title) request = urllib2.Request(url, None, {"Referer": "http://irc.00id.net"}) response = urllib2.urlopen(request) results_json = json.load(response) results = results_json["responseData"]["results"] movieResults = [] p = re.compile(r"<.*?>") for result in results: if not re.search("TV [a-zA-Z]", result["titleNoFormatting"]) and re.search( "imdb.com/title/tt\\d{7}/$", result["url"] ): title = tools.decode_htmlentities(p.sub("", result["titleNoFormatting"].replace(" - IMDb", ""))) content = tools.decode_htmlentities(p.sub("", result["content"])) imdbid = re.search("tt\\d{7}", result["url"]).group(0) movieResults.append({"name": title, "desc": content, "id": "IMDB=" + imdbid}) return movieResults
def Description(self): page = self.imdbpage.find(id="overview-top") if len(page.findAll("p")) == 2: summary = str(page.findAll("p")[1]) removelink = re.compile(r"\<a.*\/a\>") summary = removelink.sub("", summary) summary = tools.remove_html_tags(summary) summary = summary.replace("»", "") summary = tools.decode_htmlentities(summary.decode("utf-8", "ignore")) summary = summary.replace("\n", " ") return summary
def Description(self): page = self.imdbpage.find(id="overview-top") if len(page.findAll('p')) == 2: summary = str(page.findAll('p')[1]) removelink = re.compile(r'\<a.*\/a\>') summary = removelink.sub('', summary) summary = tools.remove_html_tags(summary) summary = summary.replace('»', "") summary = tools.decode_htmlentities( summary.decode("utf-8", 'ignore')) summary = summary.replace("\n", " ") return summary
def ProductionYear(self): movietitle = tools.decode_htmlentities( tools.remove_html_tags(str(self.imdbpage.find('title'))).replace( " - IMDb", "")) movietitle = re.search("\(.*\)", movietitle).group(0).strip() return re.search("[1-2][0-9]{3}", movietitle).group(0).strip()
def LocalTitle(self): movietitle = tools.decode_htmlentities( tools.remove_html_tags(str(self.imdbpage.find('title'))).replace( " - IMDb", "")) movietitle = re.sub("\(.*\)", "", movietitle).strip() return movietitle
def ProductionYear(self): movietitle = tools.decode_htmlentities( tools.remove_html_tags(str(self.imdbpage.find("title"))).replace(" - IMDb", "") ) movietitle = re.search("\(.*\)", movietitle).group(0).strip() return re.search("[1-2][0-9]{3}", movietitle).group(0).strip()
def LocalTitle(self): movietitle = tools.decode_htmlentities( tools.remove_html_tags(str(self.imdbpage.find("title"))).replace(" - IMDb", "") ) movietitle = re.sub("\(.*\)", "", movietitle).strip() return movietitle