def get_country(self): self.country = gutils.trim(self.page, u'<b>PAÍS</b></td>', '</td>') tmp = gutils.trim(self.country, 'alt="', '"') if tmp == '': self.country = gutils.trim(self.country, 'title="', '"') else: self.country = tmp
def get_genre(self): self.genre = gutils.trim(self.page, 'Genres:</td>', '</td>') if self.genre == '': self.genre = gutils.trim(self.page, 'Genre:</td>', '</td>') self.genre = self.genre.replace('\t', '') self.genre = self.genre.replace('\n', '') self.genre = self.genre.replace('\r', '')
def get_director(self): if (gutils.trim(self.url, "typ=", "&") <> "credits"): self.url = self.url_to_use + "typ=credits&nr=" + str(self.movie_id) self.open_page(self.parent_window) self.director = gutils.trim(self.page,"Regie","</a>") self.director = gutils.after(self.director,"mitwirk.php4") self.director = gutils.after(self.director,">")
def get_studio(self): self.studio = gutils.strip_tags(gutils.trim(self.detail_page, 'alt="Produktion"', ' ')) if self.studio == '': self.studio = gutils.trim(self.detail_page, 'alt="Produktion"', '</TABLE>') self.studio = gutils.after(self.studio, '>') self.studio = self.studio.replace('\n', ', ') self.studio = re.sub('((^, )|(, $))', '', self.studio)
def get_searches(self): elements = string.split(self.page, '<td><b>') if (elements[0]<>''): for element in elements: self.ids.append(gutils.trim(element, 'ver.php?art=',"'")) self.titles.append(gutils.strip_tags(gutils.convert_entities(gutils.trim(element, "target='_top'>", '</a>'))))
def get_notes(self): self.notes = '' tmp_page = gutils.trim(self.page, '<h3>Features</h3>', '</p>') tmp_page = tmp_page.replace('<br>', '\n') tmp_page = tmp_page.replace('<br />', '\n') tmp_page = gutils.strip_tags(tmp_page) if tmp_page <> '': self.notes = self.notes + 'Features:\n' + tmp_page + '\n\n' tmp_page = gutils.trim(self.page, 'Video</strong>', '<strong>') tmp_page = tmp_page.replace('\r\n', '') tmp_page = re.sub('[ \t]+', ' ', tmp_page) tmp_page = tmp_page.replace('<br>', '\n') tmp_page = tmp_page.replace('<br />', '\n') tmp_page = gutils.strip_tags(tmp_page) if tmp_page <> '': self.notes = self.notes + 'Video:' + tmp_page tmp_page = gutils.trim(self.page, 'Audio</strong>', '</div>') tmp_page = tmp_page.replace('\r\n', '') tmp_page = re.sub('[ \t]+', ' ', tmp_page) tmp_page = tmp_page.replace('<br>', '\n') tmp_page = tmp_page.replace('<br />', '\n') tmp_page = tmp_page.replace('(more info)', '\n') tmp_page = gutils.strip_tags(tmp_page) if tmp_page <> '': self.notes = self.notes + 'Audio:' + tmp_page
def get_plot(self): text = re.search(r"\?text=([\d]*)", self.page) if text: page_content = self.open_page(url=self.url+"?text="+text.group(1)) self.plot = gutils.strip_tags(gutils.trim(page_content,"Obsah:"," <b><i>(")) else: self.plot = gutils.strip_tags(gutils.trim(self.page,"Obsah:"," <b><i>("))
def search(self,parent_window): self.open_search(parent_window) tmp_page = gutils.trim(self.page, 'Matchning', 'Hittade') if tmp_page == '': tmp_page = gutils.trim(self.page, 'Matchning', 'Visa fler') self.page = tmp_page return self.page
def get_studio(self): self.studio = gutils.strip_tags(gutils.trim(self.detail_page, 'alt="Produktion"', " ")) if self.studio == "": self.studio = gutils.trim(self.detail_page, 'alt="Produktion"', "</TABLE>") self.studio = gutils.after(self.studio, ">") self.studio = self.studio.replace("\n", ", ") self.studio = re.sub("((^, )|(, $))", "", self.studio)
def get_searches(self): elements = string.split(self.page, '<li>') if (elements[0]<>''): for element in elements: self.ids.append(gutils.trim(element, '/title/tt','/?fr=')) self.titles.append(gutils.strip_tags(gutils.convert_entities(gutils.trim(element, ';fm=1">', '</li>'))))
def get_notes(self): self.notes = "" tmp_page = gutils.trim(self.page, "Features:", "<b>") tmp_page = tmp_page.replace("<br>", "\n") tmp_page = tmp_page.replace("<br />", "\n") tmp_page = gutils.strip_tags(tmp_page) if tmp_page <> "": tmp_page = re.sub("[\r\n]+", "\n", tmp_page) self.notes = self.notes + "\nFeatures:" + tmp_page + "\n" tmp_page = gutils.trim(self.page, "Video:", "<b>") tmp_page = tmp_page.replace("<br>", "\n") tmp_page = tmp_page.replace("<br />", "\n") tmp_page = gutils.strip_tags(tmp_page) if tmp_page <> "": tmp_page = re.sub("[\r\n]+", "\n", tmp_page) self.notes = self.notes + "\nVideo:" + tmp_page + "\n" tmp_page = gutils.trim(self.page, "Audio:", "<b>") tmp_page = tmp_page.replace("<br>", "\n") tmp_page = tmp_page.replace("<br />", "\n") tmp_page = gutils.strip_tags(tmp_page) if tmp_page <> "": tmp_page = re.sub("[\r\n]+", "\n", tmp_page) self.notes = self.notes + "\nAudio:" + tmp_page tmp_page = gutils.trim(self.page, "Subtitles:", "<b>") tmp_page = gutils.strip_tags(tmp_page) if tmp_page <> "": tmp_page = re.sub("[\r\n]+", "\n", tmp_page) self.notes = self.notes + "\nSubtitles:\n" + tmp_page + "\n"
def get_notes(self): self.notes = "" tmp_notes = re.sub('^[ \t]+', '', gutils.strip_tags( re.sub('(<br>|<br />)', '\r\n', re.sub('[\r\n]+', '', re.sub('[ \t][ \t\r\n]+', ' ', gutils.trim(self.page, 'Bildformat(e)', '</TR>'))))) ) if (tmp_notes != ""): self.notes = self.notes + "Bildformat(e):\n" + tmp_notes + "\n" tmp_notes = re.sub('^[ \t]+', '', gutils.strip_tags( re.sub('(<br>|<br />)', '\r\n', re.sub('[\r\n]+', '', re.sub('[ \t][ \t\r\n]+', ' ', gutils.trim(self.page, 'Tonformat(e)', '</TR>'))))) ) if (tmp_notes != ""): self.notes = self.notes + "Tonformat(e):\n" + tmp_notes + "\n\n" tmp_notes = re.sub('^[ \t]+', '', gutils.strip_tags( re.sub('(<br>|<br />)', '\r\n', re.sub('[\r\n]+', '', re.sub('[ \t][ \t\r\n]+', ' ', gutils.trim(self.page, 'Untertitel', '</TR>'))))) ) if (tmp_notes != ""): self.notes = self.notes + "Untertitel:" + tmp_notes + "\n\n"
def get_country(self): self.country = gutils.trim(self.page, u'<h5>País:</h5>', '</div>') if self.country == '': self.country = gutils.trim(self.page, '<h5>País:</h5>', '</a>') self.country = self.__before_more(self.country) self.country = re.sub('[\n]+', '', self.country) self.country = re.sub('[ ]+', ' ', self.country)
def get_notes(self): self.notes = '' language = gutils.trim(self.page, '<h5>Lingua:</h5>', '</div>') language = gutils.strip_tags(language) language = re.sub('[\n]+', '', language) language = re.sub('[ ]+', ' ', language) language = language.rstrip() color = gutils.trim(self.page, '<h5>Colore:</h5>', '</div>') color = gutils.strip_tags(color) color = re.sub('[\n]+', '', color) color = re.sub('[ ]+', ' ', color) color = color.rstrip() sound = gutils.trim(self.page, '<h5>Sonoro:</h5>', '</div>') sound = gutils.strip_tags(sound) sound = re.sub('[\n]+', '', sound) sound = re.sub('[ ]+', ' ', sound) sound = sound.rstrip() date = gutils.trim(self.page, '<h5>Data di uscita:</h5>', '<a class="tn15more inline"') date = re.sub('[\n]+', '', date) date = re.sub('[ ]+', ' ', date) date = gutils.clean(date) if len(language)>0: self.notes = "%s: %s\n" %(_('Language'), language) if len(sound)>0: self.notes += "%s: %s\n" %(gutils.strip_tags(_('<b>Audio</b>')), sound) if len(color)>0: self.notes += "%s: %s\n" %(_('Color'), color) if len(date)>0: self.notes += "%s: %s\n" %(_('Data di uscita'), date)
def search(self,parent_window): self.open_search(parent_window) # short the content tmp_page = gutils.trim(self.page,'<select name="sort"', 'Click Here to make a Suggestion</a>') # # try to get all result pages (not so nice, but it works) # tmp_pagecount = gutils.trim(self.page, '<div id="Search_Container" name="Search_Container">', '</table>') tmp_pagecountintuse = 1 elements = tmp_pagecount.split("&page=") for element in elements: try: tmp_pagecountint = int(gutils.before(element, '\'')) except: tmp_pagecountint = 0 if tmp_pagecountint > tmp_pagecountintuse: tmp_pagecountintuse = tmp_pagecountint tmp_pagecountintcurrent = 1 while tmp_pagecountintuse > tmp_pagecountintcurrent and tmp_pagecountintuse < 4: tmp_pagecountintcurrent = tmp_pagecountintcurrent + 1 self.url = "http://www.dvdempire.com/Exec/v1_search_all.asp?&site_media_id=0&pp=&search_refined=32&used=0&page=" + str(tmp_pagecountintcurrent) + "&string=" self.open_search(parent_window) tmp_page2 = gutils.trim(self.page,'<select name="sort"', 'Click Here to make a Suggestion</a>') tmp_page = tmp_page + tmp_page2 self.page = tmp_page return self.page
def get_image(self): self.image_url = gutils.trim(self.page, 'class="moviePosterTable"', '</td>'); self.image_url = gutils.trim(self.image_url,' src="',"\"\n") if self.image_url.endswith('no_picture.png'): self.image_url = '' else: self.image_url = "http://fdb.pl%s" % self.image_url
def get_o_title(self): self.o_title = gutils.trim(self.tmp_page,"span class=\"standardsmall\"><br />(",")<") if self.o_title == "": if self.url_type == "V": self.o_title = gutils.after(gutils.trim(self.tmp_page,"\"headline2\"><a href=\"/videofilm", "</a>"), ">") else: self.o_title = gutils.after(gutils.trim(self.tmp_page,"\"headline2\"><a href=\"/kinofilm", "</a>"), ">")
def get_searches(self): elements1 = re.split('headline3"><a href="(http://www.kino.de)*/kinofilm/', self.page) elements1[0] = None for element in elements1: if element <> None: self.ids.append("K_" + re.sub('[?].*', '', gutils.before(element,'"'))) self.titles.append(string.replace(string.replace( gutils.strip_tags( gutils.trim(element,'>','</a>') + ' (' + string.replace( gutils.trim(element, '<span class="standardsmall">', "</span>"), '<br />', ' - ') + ')' ), '( - (', '('), '))', ')') ) elements2 = re.split('headline3"><a href="(http://www.kino.de)*/videofilm/', self.page) elements2[0] = None for element in elements2: if element <> None: self.ids.append("V_" + re.sub('[?].*', '', gutils.before(element,'"'))) self.titles.append(string.replace(string.replace( gutils.strip_tags( gutils.trim(element,'>','</a>') + ' (' + string.replace( gutils.trim(element, '<span class="standardsmall">', '</span>'), '<br />', ' - ') + ')' ), '( - (', '('), '))', ')') )
def search(self, parent_window): self.open_search(parent_window) tmp_page = gutils.trim(self.page, "Matchning", "Hittade") if tmp_page == "": tmp_page = gutils.trim(self.page, "Matchning", "Visa fler") self.page = tmp_page return self.page
def get_notes(self): self.notes = '' tmp_page = gutils.trim(self.page, 'Features:', '<b>') tmp_page = tmp_page.replace('<br>', '\n') tmp_page = tmp_page.replace('<br />', '\n') tmp_page = gutils.strip_tags(tmp_page) if tmp_page <> '': tmp_page = re.sub('[\r\n]+', '\n', tmp_page) self.notes = self.notes + '\nFeatures:' + tmp_page + '\n' tmp_page = gutils.trim(self.page, 'Video:', '<b>') tmp_page = tmp_page.replace('<br>', '\n') tmp_page = tmp_page.replace('<br />', '\n') tmp_page = gutils.strip_tags(tmp_page) if tmp_page <> '': tmp_page = re.sub('[\r\n]+', '\n', tmp_page) self.notes = self.notes + '\nVideo:' + tmp_page + '\n' tmp_page = gutils.trim(self.page, 'Audio:', '<b>') tmp_page = tmp_page.replace('<br>', '\n') tmp_page = tmp_page.replace('<br />', '\n') tmp_page = tmp_page.replace('(more info)', '\n') tmp_page = gutils.strip_tags(tmp_page) if tmp_page <> '': tmp_page = re.sub('[\r\n]+', '\n', tmp_page) self.notes = self.notes + '\nAudio:' + tmp_page tmp_page = gutils.trim(self.page, 'Subtitles:', '<b>') tmp_page = gutils.strip_tags(tmp_page) if tmp_page <> '': tmp_page = re.sub('[\r\n]+', '\n', tmp_page) self.notes = self.notes + '\nSubtitles:\n' + tmp_page + '\n'
def get_searches(self): if self.page is None: # movie page self.ids.append(self.url) self.titles.append(gutils.convert_entities(self.title)) elif self.page is False: # no movie found self.number_results = 0 else: # multiple matches elements = self.page.split('<li ') self.number_results = elements[-1] if elements != '': for element in elements: if (element == ''): continue element = gutils.after(element, 'href="') self.ids.append('http://' + plugin_url_other + gutils.before(element, '"')) element_title = gutils.trim(element, '">', '</a>') element_title = element_title.replace('\t', '') element = gutils.after(element, 'class=searchResultDetails') element_year = gutils.trim(element, '>', '|') element_year = element_year.replace(" ", '') element_year = gutils.strip_tags(element_year) element_country = '' pos_country = element.find('countryIds') if pos_country != -1: element_country = gutils.trim(element[pos_country:], '">', '</a>') element = element_title.strip() if element_year: element += ' (' + element_year.strip() + ')' if element_country: element += ' - ' + element_country.strip() element = gutils.convert_entities(element) element = gutils.strip_tags(element) self.titles.append(element) else: self.number_results = 0
def picture(self): self.page = gutils.trim(self.page,"<table width=\"100%\" cellpadding=\"0\" cellspacing=\"0\" border=\"0\">","<script ") # should go to sub_page function! if string.find(self.page,"http://film.wp.pl/f/no.gif") > -1: self.picture_url = "" else: self.picture_url = gutils.trim(self.page,"http://film.wp.pl/f/prev/","\" width=") self.picture_url = 'http://film.wp.pl/f/prev/' + self.picture_url
def get_searches(self): elements = string.split(self.page,"<br>") if (elements[0]<>''): for element in elements: self.ids.append(gutils.trim(element,"<a href='view.php?page=film&fid=","'>")) self.titles.append(gutils.trim(element,">","</a>"))
def get_notes(self): self.notes = '' language = gutils.trim(self.page, '<h5>Language:</h5>', '</div>') language = gutils.strip_tags(language) language = re.sub('[\n]+', '', language) language = re.sub('[ ]+', ' ', language) language = language.rstrip() color = gutils.trim(self.page, '<h5>Color:</h5>', '</div>') color = gutils.strip_tags(color) color = re.sub('[\n]+', '', color) color = re.sub('[ ]+', ' ', color) color = color.rstrip() sound = gutils.trim(self.page, '<h5>Sound Mix:</h5>', '</div>') sound = gutils.strip_tags(sound) sound = re.sub('[\n]+', '', sound) sound = re.sub('[ ]+', ' ', sound) sound = sound.rstrip() tagline = gutils.trim(self.page, '<h5>Tagline:</h5>', '</div>') tagline = self.__before_more(tagline) tagline = gutils.strip_tags(tagline) tagline = re.sub('[\n]+', '', tagline) tagline = re.sub('[ ]+', ' ', tagline) tagline = tagline.rstrip() if len(language)>0: self.notes = "%s: %s\n" %(_('Language'), language) if len(sound)>0: self.notes += "%s: %s\n" %(gutils.strip_tags(_('<b>Audio</b>')), sound) if len(color)>0: self.notes += "%s: %s\n" %(_('Color'), color) if len(tagline)>0: self.notes += "%s: %s\n" %('Tagline', tagline)
def get_plot(self): self.plot = gutils.trim(self.page, '<b class="ch">Resumen', '<a href="/rg/title-tease/plot') self.plot = gutils.after(self.plot, ':</b> ') self.plot = gutils.trim(self.page, '<h5>Trama:</h5>', '</div>') self.plot = self.__before_more(self.plot) tmp = gutils.trim(self.plot_page, '<div id="swiki.2.1">', '</div>') if tmp: self.plot = tmp elements = string.split(self.plot_page, '<p class="plotpar">') if len(elements) > 1: self.plot = self.plot + '\n\n' elements[0] = '' for element in elements: if element != '': self.plot = self.plot + gutils.strip_tags(gutils.before(element, '</a>')) + '\n' if not self.plot: # nothing in spanish found, try original self.plot = gutils.regextrim(self.imdb_page, '<h5>Plot:</h5>', '(</div>|<a href.*)') self.plot = self.__before_more(self.plot) elements = string.split(self.imdb_plot_page, '<p class="plotpar">') if len(elements) > 1: self.plot = self.plot + '\n\n' elements[0] = '' for element in elements: if element <> '': self.plot = self.plot + gutils.strip_tags(gutils.before(element, '</a>')) + '\n\n'
def get_notes(self): self.notes = "" self.url = self.url_to_use + "typ=features&nr=" + str(self.movie_id) self.open_page(self.parent_window) self.tmp_page = gutils.trim(self.page, "<!-- PRINT-CONTENT-START-->", "<!-- PRINT-CONTENT-ENDE-->") tmp_notes = string.replace( gutils.strip_tags(gutils.trim(self.tmp_page, "<b>Sprache</b>", "</TD></TR>")), " ", "" ) if tmp_notes != "": self.notes = self.notes + "Sprachen:\n" + tmp_notes + "\n\n" tmp_notes = string.replace( gutils.strip_tags(gutils.trim(self.tmp_page, "<b>Untertitel</b>", "</TD></TR>")), " ", "" ) if tmp_notes != "": self.notes = self.notes + "Untertitel:\n" + tmp_notes + "\n\n" tmp_notes = string.replace( gutils.strip_tags(gutils.trim(self.tmp_page, "<b>Mehrkanalton</b>", "</TD></TR>")), " ", "" ) if tmp_notes != "": self.notes = self.notes + "Mehrkanalton:\n" + tmp_notes + "\n\n" tmp_notes = string.replace( gutils.strip_tags(gutils.trim(self.tmp_page, "<b>EAN</b>", "</TD></TR>")), " ", "" ) if tmp_notes != "": self.notes = self.notes + "EAN:\n" + tmp_notes + "\n\n"
def get_rating(self): self.rating = gutils.trim(self.page, '<div class=rates>', '</div>') self.rating = gutils.trim(self.rating, '<span property="v:average">', '</span>') if self.rating != '': self.rating = self.rating.replace(' ', '') self.rating = self.rating.replace(',', '.') self.rating = str(float(self.rating.strip()))
def get_image(self): tmp = gutils.trim(self.page, 'plakat.php?', '\'') if tmp: page_image = self.open_page(url='http://www.filmdb.de/plakat.php?' + tmp) tmp = gutils.trim(self.page, 'bilder.filmdb.de', '\'') if tmp: self.image_url = 'http://bilder.filmdb.de' + tmp
def get_searches(self): elements1 = re.split('href="/kinofilm/', self.page) elements1[0] = None for element in elements1: if element <> None: title = gutils.clean(gutils.trim(element,'>','</a>')) + string.replace(' (' + gutils.clean(gutils.trim(element, '<p>', "<br />")) + ')', '()', '') if title != ' ': self.ids.append("K_" + re.sub('[?].*', '', gutils.before(element,'"'))) self.titles.append('Kino: ' + title) elements2 = re.split('href="http://www.video.de/videofilm/', self.page) elements2[0] = None for element in elements2: if element <> None: title = gutils.clean(gutils.trim(element,'>','</a>')) + string.replace(' (' + gutils.clean(gutils.trim(gutils.before(element, '</li>'), '<p>', "<br />")) + ')', '()', '') if title != ' ': id = re.sub('[?].*', '', gutils.before(element,'"')) self.ids.append("V_" + id) type = '' if 'blu-ray-disc-kauf' in id: type = ' (Bluray-Kauf)' if 'blu-ray-disc-leih' in id: type = ' (Bluray-Verleih)' if 'dvd-leih' in id: type = ' (DVD-Verleih)' if 'dvd-kauf' in id: type = ' (DVD-Kauf)' self.titles.append('Video: ' + title + type)
def get_o_title(self): self.url = self.url.replace(plugin_server, plugin_url) self.o_title = gutils.trim(self.page, '<title>', '</title>') if self.o_title.find('/') > -1: self.o_title = gutils.trim(self.o_title, '/', '(') if self.o_title.find('(') > -1: self.o_title = gutils.before(self.o_title, '(')
def search(self, parent_window): if not self.open_search(parent_window): return None tmp = string.find(self.page, "<table class=\"animelist strippedlist\"") if tmp == -1: # only one match! self.page = '' else: # multiple matches self.page = gutils.trim(self.page, "<table class=\"animelist strippedlist\"", "</tbody>") self.page = gutils.after(self.page, "<tbody>") return self.page
def get_searches(self): elements = string.split(self.page, "<h3 style=\"margin:0px;\">") self.number_results = len(elements) - 1 if self.number_results > 0: i = 1 while i < len(elements): element = gutils.trim(elements[i], "<a", "</a>") # print "******* elemento "+str(i)+" **********\n\n\n\n\n"+element+"\n******fine*******\n\n\n\n\n\n" # print "id = "+gutils.trim(element,"recensione.asp?id=","\"") # print "title = "+gutils.convert_entities(gutils.strip_tags(gutils.trim(element,'" title="', '"'))) self.ids.append( gutils.trim(element, "recensione.asp?id=", "\"")) self.titles.append( gutils.convert_entities( gutils.strip_tags( gutils.trim(element, '" title="', '"')))) i += 1 else: self.number_results = 0
def get_searches(self): elements = re.split('class="card-link', self.page) elements[0] = None for element in elements: if element <> None: element = gutils.trim(element, 'href="', '</a>') url = gutils.before(element, '"') if url[0] == '/': url = "https:" + url title = gutils.after(element, '>') self.ids.append(url) self.titles.append(title)
def get_cast(self): self.cast = '' elements = string.split(gutils.before(self.castpage, 'Filmmakers</div>'), '<div class="linkblu"') i = 3 while i < len(elements): actorandrole = gutils.after(gutils.trim(elements[i], '<div', '</div>'), '>') actorandrole = string.replace(actorandrole, '</a>', _(' as ')) actorandrole = gutils.clean(actorandrole) actorandrole = re.sub('(\n|\r)', '', actorandrole) actorandrole = re.sub('[ \t]+', ' ', actorandrole) self.cast = self.cast + actorandrole + '\n' i += 1
def get_studio(self): # OK v0.1 self.studio = '' tmp = gutils.regextrim(self.comp_page, 'Sociétés de Production<[^>]+', '</ul>') tmp = string.split(tmp, 'href="') for entry in tmp: entry = gutils.trim(entry, '>', '<') if entry: self.studio = self.studio + entry + ', ' if self.studio: self.studio = self.studio[:-2]
def get_rating(self): self.rating = gutils.trim(self.page, '<h5>Nutzer-Bewertung:</h5>', '/10') if self.rating: try: tmp = re.findall('[0-9.,]+', gutils.clean(self.rating)) if tmp and len(tmp) > 0: self.rating = round(float(tmp[0].replace(',', '.'))) except: self.rating = 0 else: self.rating = 0
def get_year(self): self.year = '' tmp = gutils.trim(self.page, '<div class="description">', '</div>') if tmp: searchyearandcountry = re.search('([0-9]{4})<br', tmp) if searchyearandcountry: self.year = searchyearandcountry.group(1) if not self.year: tmp = gutils.trim(self.page, '<span class="standardsmall"><strong>', '<br') if tmp: tmp = gutils.trim(tmp, '<strong>', '</strong>') if tmp: srchyear = re.search('([0-9]{4})', tmp) if srchyear: self.year = srchyear.group(1) if not self.year and self.videopage: tmp = gutils.trim(self.videopage, '<div class="description">', '</div>') if tmp: searchyearandcountry = re.search('([0-9]{4})<br', tmp) if searchyearandcountry: self.year = searchyearandcountry.group(1)
def get_rating(self): self.rating = gutils.trim(self.page, 'Unsere User haben diesen Film mit ', ' bewertet.') self.rating = self.rating.replace('%', '') self.rating = gutils.strip_tags(self.rating) elements = self.rating.split('.') try: tmprating = int(elements[0]) self.rating = str(tmprating / 10) except: self.rating = '0'
def get_studio(self): self.studio = '' tmp = gutils.regextrim(self.comp_page, 'name="production"', '</ul>') tmp = string.split(tmp, 'href="') if len(tmp) > 1: for entry in tmp[1:]: entry = string.strip( string.replace(gutils.trim(entry, '>', '<'), '\n', '')) if entry: self.studio = self.studio + entry + ', ' if self.studio: self.studio = self.studio[:-2]
def search(self,parent_window): self.url = self.real_url_search # compatibility pre 0.13.1 self.open_search(parent_window) print self.url pagemovie = self.page # # Sub Pages # pagesarea = gutils.trim(pagemovie, 'class="pagination-list"', '</ol>') pagelements = re.split('href="', pagesarea) self.title = '' self.o_title = '' for index in range(1, len(pagelements), 1): pagelement = pagelements[index] self.url = gutils.before(pagelement, '"') self.open_search(parent_window) if self.page: pagemovie = pagemovie + gutils.after(gutils.trim(self.page, 'class="search-result-list figure"', '</ol>'), '>') self.page = pagemovie return self.page
def get_plot(self): plotlist = string.split(gutils.trim(self.plot_page, 'id="plot-summaries-content">', '</ul>'), '<li') plotcompilation = '' for listelement in plotlist: if listelement <> '' and not 'It looks like we don\'t have any Plot Summaries for this title yet.' in listelement: plotcompilation = plotcompilation + gutils.trim(listelement, '<p>', '</p>') + '\n' plotcompilation = plotcompilation + re.sub('<[^<]+?>', '', gutils.trim(listelement, '<div class="author-container">', '</div>').replace('\n','').lstrip()) + '\n\n' if plotcompilation <> '': self.plot = plotcompilation else: self.plot = gutils.regextrim(self.page, 'itemprop="description"', '<') self.plot = gutils.after(self.plot, '>') elements = string.split(self.plot_page, '<p class="plotpar">') if len(elements) < 2: elements = re.split('<li class="(?:odd|even)">', self.plot_page) if len(elements) > 1: self.plot = self.plot + '\n\n' elements[0] = '' for element in elements[1:]: if element <> '': self.plot = self.plot + gutils.strip_tags(gutils.before(element, '</a>')) + '\n\n'
def get_notes(self): self.notes = '' language = gutils.trim(self.page, '<h5>Sprache:</h5>', '</div>') language = gutils.strip_tags(language) language = re.sub('[\n]+', '', language) language = re.sub('[ ]+', ' ', language) language = language.rstrip() color = gutils.trim(self.page, '<h5>Farbe:</h5>', '</div>') color = gutils.strip_tags(color) color = re.sub('[\n]+', '', color) color = re.sub('[ ]+', ' ', color) color = re.sub('[ ]+$', '', color) color = color.rstrip() sound = gutils.trim(self.page, '<h5>Tonverfahren:</h5>', '</div>') sound = gutils.strip_tags(sound) sound = re.sub('[\n]+', '', sound) sound = re.sub('[ ]+', ' ', sound) sound = sound.rstrip() soundsplit = sound.split(' | ') if len(soundsplit) > 1: soundsplit.sort() sound = '' for elem in soundsplit: sound += elem + ' | ' sound = sound[0:len(sound) - 3] tagline = gutils.trim(self.page, '<h5>Werbezeile:</h5>', '</div>') tagline = self.__before_more(tagline) tagline = gutils.strip_tags(tagline) tagline = re.sub('[\n]+', '', tagline) tagline = re.sub('[ ]+', ' ', tagline) tagline = tagline.rstrip() if len(language) > 0: self.notes = "%s: %s\n" % (_('Language').encode('utf8'), language) if len(sound) > 0: self.notes += "%s: %s\n" % (gutils.strip_tags( _('<b>Audio</b>').encode('utf8')), sound) if len(color) > 0: self.notes += "%s: %s\n" % (_('Color').encode('utf8'), color) if len(tagline) > 0: self.notes += "%s: %s\n" % ('Tagline', tagline)
def get_searches(self): elements = string.split(self.page, "<td width=100% height=18") self.number_results = elements[-1] if (elements[0] != ''): for element in elements: print( gutils.trim(element, "align=center valign=top><a href=\"filme_", "_")) self.ids.append( gutils.trim(element, "align=center valign=top><a href=\"filme_", "_")) print( gutils.strip_tags( gutils.trim( element, "<font color=#FFFFFF face=Verdana size=2>", "</font></a></b></td>") + ' - ' + gutils.trim(element, "Ano de Lançamento:", "<br>"))) self.titles.append( gutils.strip_tags( gutils.trim( element, "<font color=#FFFFFF face=Verdana size=2>", "</font></a></b></td>") + ' - ' + gutils.trim(element, "Ano de Lançamento:", "<br>"))) else: self.number_results = 0
def get_notes(self): self.notes = "Czas trwania: " + gutils.trim( self.page, "<div class=\"sitem\">Czas trwania: <b>\n\t\t", "\n</b>") + '\n' t = self.page.find("<tr><th scope=\"row\">Autor:</th>") if t != -1: self.notes += "Autor: %s\n" % gutils.trim(self.page[t:], "<td>\n", "\t</td>") t = self.page.find("<th scope=\"row\">Projekt:</th>") if t != -1: self.notes += "Projekt: %s\n" % gutils.trim( self.page[t:], "<td>\n", "\t</td>") t = self.page.find("<tr><th scope=\"row\">Scenariusz:</th>") if t != -1: self.notes += "Scenariusz: %s\n" % gutils.trim( self.page[t:], "<td>\n", "\t</td>") t = self.page.find("<th scope=\"row\">Muzyka:</th>") if t != -1: self.notes += "Muzyka: %s\n" % gutils.trim(self.page[t:], "<td>\n", "\t</td>") self.notes += "\n%s" % gutils.trim( self.page, "<p class=\"dwazdania\">\n\t\t", "\n</p>")
def get_notes(self): self.notes = "" tmp_notes = gutils.clean( gutils.trim(self.page, "<strong>Sprachen:</strong>", "</p>")) if tmp_notes != "": self.notes = self.notes + "Sprachen:\n" + tmp_notes + "\n\n" tmp_notes = gutils.clean( gutils.trim(self.page, "<strong>Untertitel:</strong>", "</p>")) if tmp_notes != "": self.notes = self.notes + "Untertitel:\n" + tmp_notes + "\n\n" tmp_notes = gutils.clean( gutils.trim(self.page, "<strong>Tonformat:</strong>", "</p>")) if tmp_notes != "": self.notes = self.notes + "Tonformat:\n" + tmp_notes + "\n\n" tmp_notes = gutils.clean( gutils.trim(self.page, "<strong>Bildformat:</strong>", "</p>")) if tmp_notes != "": self.notes = self.notes + "Bildformat:\n" + tmp_notes + "\n\n" tmp_notes = gutils.clean( gutils.trim(self.page, "<strong>EAN</strong>", "</p>")) if tmp_notes != "": self.notes = self.notes + "EAN:\n" + tmp_notes + "\n\n"
def get_searches(self): if string.find(self.page, '>Films<') < 0: self.ids.append(gutils.regextrim(self.page, '="/db/movies/view[.]mhtml[?]id=', '([&"])')) self.titles.append('') else: elements = string.split(self.page, '<a href="/db/movies/view.mhtml?id=') elements[0] = '' for element in elements: if element <> '' and string.find(element, 'display_set=eng') > -1: id = gutils.before(gutils.before(element, '"'), '&') if id <> '': self.ids.append(id) self.titles.append(gutils.trim(element, '>', '</a>'))
def get_cameraman(self): self.cameraman = '' tmp = gutils.regextrim(self.cast_page, '>Cinematography by', '</table>') tmp = string.split(tmp, 'href="') if len(tmp) > 1: for entry in tmp[1:]: entry = string.strip( string.replace(gutils.trim(entry, '>', '<'), '\n', '')) if entry: self.cameraman = self.cameraman + entry + ', ' if self.cameraman: self.cameraman = self.cameraman[:-2]
def _find_actor(self, type, delimiter): elements = string.split(self.castpage, '<div class="linkblu"') result = '' i = 1 while i < len(elements): actorandrole = gutils.after(gutils.trim(elements[i], '<div', '</div>'), '>') if string.find(actorandrole, type) > 0: result = result + gutils.before(actorandrole, '</a>') + delimiter i += 1 if result: result = result[:-len(delimiter)] result = re.sub('[\n\r\t ]+', ' ', result) return result
def get_notes(self): self.notes = "" tmp_notes = re.sub( '^[ \t]+', '', gutils.strip_tags( re.sub( '(<br>|<br />)', '\r\n', re.sub( '[\r\n]+', '', re.sub( '[ \t][ \t\r\n]+', ' ', gutils.trim(self.page, 'Bildformat(e)', '</TR>')))))) if (tmp_notes != ""): self.notes = self.notes + "Bildformat(e):\n" + tmp_notes + "\n" tmp_notes = re.sub( '^[ \t]+', '', gutils.strip_tags( re.sub( '(<br>|<br />)', '\r\n', re.sub( '[\r\n]+', '', re.sub('[ \t][ \t\r\n]+', ' ', gutils.trim(self.page, 'Tonformat(e)', '</TR>')))))) if (tmp_notes != ""): self.notes = self.notes + "Tonformat(e):\n" + tmp_notes + "\n\n" tmp_notes = re.sub( '^[ \t]+', '', gutils.strip_tags( re.sub( '(<br>|<br />)', '\r\n', re.sub( '[\r\n]+', '', re.sub('[ \t][ \t\r\n]+', ' ', gutils.trim(self.page, 'Untertitel', '</TR>')))))) if (tmp_notes != ""): self.notes = self.notes + "Untertitel:" + tmp_notes + "\n\n"
def search(self, parent_window): if not self.open_search(parent_window): return None tmp_page = gutils.trim(self.page, 'Titres Populaires', '</table>') if not tmp_page: has_results = re.search( '[(]Affichant [1-9][0-9]* Résultats[)]', self.page) if not has_results: # nothing or one result found, try another url which looks deeper in the imdb database # example: Adventures of Falcon -> one result, jumps directly to the movie page # which isn't supported by this plugin self.url = 'http://www.imdb.fr/find?more=tt&q=' if not self.open_search(parent_window): return None self.page = gutils.trim(self.page, '(Affichant', '>Suggestions pour améliorer') else: self.page = tmp_page self.page = self.page.decode('iso-8859-1') # correction of all &#xxx entities self.page = gutils.convert_entities(self.page) return self.page
def get_searches(self): elements = re.split('(?:href=["]*filmanzeige[.]php[?]filmid=)', self.page) elements[0] = '' for element in elements: if element <> '': idmatch = re.search('([>]|["])', element) if idmatch: self.ids.append(element[:idmatch.end() - 1]) # I don't know what <wbr> means but it breaks the result list element = string.replace(element, '<wbr>', '') element = string.replace(element, '<wbr />', '') # line breaks sometimes within the title element = string.replace(element, '<wbr>', '') element = string.replace(element, '<wbr />', '') self.titles.append( string.replace( gutils.trim(element, '>', '<') + ' (' + gutils.trim(gutils.after(element, '<td>'), '<td>', '</td>') + ', ' + gutils.trim(gutils.after(element, '</a>'), '<td>', '</td>') + ')', '\n', ' - '))
def get_searches(self): # Try to find both id and film title for each search result elements = string.split(self.page, "<li>") self.number_results = elements[-1] if (elements[0] != ''): for element in elements: id = gutils.trim(element, "?codice=", "\">") if id <> '': self.ids.append(id) title = self.capwords( gutils.convert_entities( gutils.trim(element, "<b>", "</b>"))) year = re.search('([[][0-9]{4}[]])', element) if year: year = year.group(0) if year: self.titles.append(title + ' ' + year) else: self.titles.append(title) else: self.number_results = 0
def get_screenplay(self): # Find the screenplay self.screenplay = gutils.trim( self.page, '>Sceneggiatura</font></td>', '<td class="bd_scheda_td"><font class="fontViolaB">Fotografia</font></td>' ) # beautification self.screenplay = string.replace(self.screenplay, '<a href', ',<a href') self.screenplay = gutils.clean(self.screenplay) self.screenplay = self.screenplay[1:] self.screenplay = re.sub('[ ]*,', ', ', self.screenplay) self.screenplay = re.sub('[ ]+', ' ', self.screenplay)
def get_cameraman(self): self.cameraman = '' tmp = gutils.regextrim(gutils.trim(self.page, 'id="person-collection"', '</section>'), 'Kamera[^<]*[<][/]h3[>]', '<h3') tmpelements = re.split('href="', tmp) delimiter = '' for index in range(1, len(tmpelements), 1): tmpelement = gutils.before(gutils.after(gutils.after(tmpelements[index], '"'), '>'), '<') tmpelement = re.sub('<small[^>]*>[^<]*</small>', '', tmpelement) tmpelement = gutils.strip_tags(tmpelement) tmpelement = string.replace(tmpelement, '\n', '') tmpelement = re.sub('[ \t]+', ' ', tmpelement) self.cameraman = self.cameraman + tmpelement + delimiter delimiter = ', '
def get_o_title(self): self.o_title = gutils.clean( gutils.after( gutils.regextrim(self.page, 'class="text_ergebniss_faz_3"', '[ \t]+[(]Originaltitel[)]'), '</a>')) p1 = string.rfind(self.o_title, ',') if p1 > 0: self.o_title = self.o_title[p1 + 1:] self.o_title = string.capwords(self.o_title) if self.o_title == '': self.o_title = gutils.after( gutils.trim(self.page, 'class="text_ergebniss_faz_3"', '</a>'), '>')
def get_searches(self): elements1 = re.split('href="/kinofilm/', self.page) elements1[0] = None for element in elements1: if element <> None: title = gutils.clean(gutils.trim( element, '>', '</a>')) + string.replace( ' (' + gutils.clean(gutils.trim(element, '<p>', "<br />")) + ')', '()', '') if title != ' ': self.ids.append( "K_" + re.sub('[?].*', '', gutils.before(element, '"'))) self.titles.append('Kino: ' + title) elements2 = re.split('href="http://www.video.de/videofilm/', self.page) elements2[0] = None for element in elements2: if element <> None: title = gutils.clean(gutils.trim( element, '>', '</a>')) + string.replace( ' (' + gutils.clean( gutils.trim(gutils.before(element, '</li>'), '<p>', "<br />")) + ')', '()', '') if title != ' ': id = re.sub('[?].*', '', gutils.before(element, '"')) self.ids.append("V_" + id) type = '' if 'blu-ray-disc-kauf' in id: type = ' (Bluray-Kauf)' if 'blu-ray-disc-leih' in id: type = ' (Bluray-Verleih)' if 'dvd-leih' in id: type = ' (DVD-Verleih)' if 'dvd-kauf' in id: type = ' (DVD-Kauf)' self.titles.append('Video: ' + title + type)
def search(self, parent_window): if not self.open_search(parent_window): return None tmp_page = gutils.trim(self.page, 'Here are the', '</TABLE>') if not tmp_page: has_results = re.match('[(]Displaying [1-9][0-7]* Result[s]*[)]', self.page) if not has_results: # nothing or one result found, try another url which looks deeper in the imdb database # example: Adventures of Falcon -> one result, jumps directly to the movie page # which isn't supported by this plugin self.url = 'http://www.imdb.com/find?more=tt;q=' if not self.open_search(parent_window): return None self.page = gutils.trim( self.page, '(Displaying', '>Suggestions For Improving Your Results<') else: self.page = tmp_page self.page = self.page.decode('iso-8859-1') # correction of all &#xxx entities self.page = gutils.convert_entities(self.page) return self.page
def search(self, parent_window): if not self.open_search(parent_window): return None tmp_page = gutils.trim(self.page, 'Titoli popolari', '</table>') if not tmp_page: has_results = re.match( '[(]Visualizza [1-9][0-7]* risultat[io]*[)]', self.page) if not has_results: # nothing or one result found, try another url which looks deeper in the imdb database # example: Adventures of Falcon -> one result, jumps directly to the movie page # which isn't supported by this plugin self.url = 'http://italian.imdb.com/find?more=tt;q=' if not self.open_search(parent_window): return None self.page = gutils.trim( self.page, '(Visualizza', '>Suggerimenti per migliorare i tuoi risultati<') else: self.page = tmp_page self.page = self.page.decode('iso-8859-1') # correction of all &#xxx entities self.page = gutils.convert_entities(self.page) return self.page
def get_cast(self): # OK v1.5 # Find the actors. Try to make it comma separated. self.cast = gutils.trim( self.page, '>Attori</font></td>', '<td class="bd_scheda_td"><font class="fontViolaB">Soggetto</font></td>' ) # beautification self.cast = string.replace(self.cast, '<a href', '---<a href') self.cast = gutils.clean(self.cast) self.cast = self.cast[3:] self.cast = re.sub('[ ]*---', '\n', self.cast) self.cast = re.sub('[ ]+', ' ', self.cast) self.cast = string.replace(self.cast, '( ', '(') self.cast = string.replace(self.cast, ' )', ')')