def get_searches(self): elements = re.split(' <a title="[^"]+" href="(/datenbank/medien/dvd/|/datenbank/medien/blu-ray/)', self.page) elements[0] = None for index in range(1, len(elements), 2): element = elements[index + 1] if element <> None: if elements[index] == '/datenbank/medien/blu-ray/': medium = 'Blu-Ray' self.ids.append('blu-ray/' + gutils.before(element,'"')) else: medium = 'DVD' self.ids.append('dvd/' + gutils.before(element,'"')) self.titles.append( gutils.trim(element, '>', '</a>') + gutils.clean( '(' + medium + ' - ' + re.sub('[ \t\n]+', ' ', string.replace( string.replace( gutils.regextrim(element, '<div [^>]*>', '</div>'), '<br>', ' - '), ' ', '')) + ')' ) )
def get_runtime(self): self.runtime = gutils.clean(gutils.trim(self.page, u'Durée : ', '</span>')) if self.runtime: if self.runtime.find('H') > 0: self.runtime = str (int(gutils.before(self.runtime,'H'))*60 + int(gutils.after(self.runtime,'H'))) else: self.runtime = gutils.before(self.runtime,' mn')
def get_searches(self): elements1 = re.split('href="/kinofilm/', self.page) elements1[0] = None for element in elements1: if element <> None: title = gutils.clean(gutils.trim(element,'>','</a>')) + string.replace(' (' + gutils.clean(gutils.trim(element, '<p>', "<br />")) + ')', '()', '') if title != ' ': self.ids.append("K_" + re.sub('[?].*', '', gutils.before(element,'"'))) self.titles.append('Kino: ' + title) elements2 = re.split('href="http://www.video.de/videofilm/', self.page) elements2[0] = None for element in elements2: if element <> None: title = gutils.clean(gutils.trim(element,'>','</a>')) + string.replace(' (' + gutils.clean(gutils.trim(gutils.before(element, '</li>'), '<p>', "<br />")) + ')', '()', '') if title != ' ': id = re.sub('[?].*', '', gutils.before(element,'"')) self.ids.append("V_" + id) type = '' if 'blu-ray-disc-kauf' in id: type = ' (Bluray-Kauf)' if 'blu-ray-disc-leih' in id: type = ' (Bluray-Verleih)' if 'dvd-leih' in id: type = ' (DVD-Verleih)' if 'dvd-kauf' in id: type = ' (DVD-Kauf)' self.titles.append('Video: ' + title + type)
def get_title(self): self.url = self.url.replace(plugin_server, plugin_url) self.title = gutils.trim(self.page, '<title>', '</title>') if self.title.find('(') > -1: self.title = gutils.before(self.title, '(') if self.title.find('/') > -1: self.title = gutils.before(self.title, '/')
def get_image(self): self.image_url = string.replace( string.replace(gutils.trim(self.page, '"picture":', ','), '"', ''), '\\', '') if not self.image_url: tmpdata = gutils.regextrim(self.page, '<div class="cover-area">', '</div>') if tmpdata: # video page tmpdata = re.search( '(http[:][/][/][^/]+[/]flbilder[/][^"\']+)', tmpdata) if tmpdata: self.image_url = tmpdata.group(1) else: # kino page tmpdata = gutils.before(self.page, '<span style="line-height: 15px;">') if tmpdata: tmpparts = re.split('http://images.kino.de/s/', tmpdata) if len(tmpparts) > 2: self.image_url = 'http://images.kino.de/s/' + gutils.before( tmpparts[2], '"') elif len(tmpparts) > 1: self.image_url = 'http://images.kino.de/s/' + gutils.before( tmpparts[1], '"') if not self.image_url and self.videopage: tmpdata = gutils.regextrim(self.videopage, '<div class="cover-area">', '</div>') if tmpdata: # video page tmpdata = re.search( '(http[:][/][/][^/]+[/]flbilder[/][^"\']+)', tmpdata) if tmpdata: self.image_url = tmpdata.group(1)
def get_image(self): self.image_url = string.replace(string.replace(gutils.trim(self.page, '"picture":', ','), '"', ''), '\\', '') if not self.image_url: tmpdata = gutils.regextrim(self.page, '<div class="cover-area">', '</div>') if tmpdata: # video page tmpdata = re.search('(http[:][/][/][^/]+[/]flbilder[/][^"\']+)', tmpdata) if tmpdata: self.image_url = tmpdata.group(1) else: # kino page tmpdata = gutils.before(self.page, '<span style="line-height: 15px;">') if tmpdata: tmpparts = re.split('http://images.kino.de/s/', tmpdata) if len(tmpparts) > 2: self.image_url = 'http://images.kino.de/s/' + gutils.before(tmpparts[2], '"') elif len(tmpparts) > 1: self.image_url = 'http://images.kino.de/s/' + gutils.before(tmpparts[1], '"') if not self.image_url and self.videopage: tmpdata = gutils.regextrim(self.videopage, '<div class="cover-area">', '</div>') if tmpdata: # video page tmpdata = re.search('(http[:][/][/][^/]+[/]flbilder[/][^"\']+)', tmpdata) if tmpdata: self.image_url = tmpdata.group(1)
def get_plot(self): self.plot = gutils.trim(self.page, '<b class="ch">Resumen', '<a href="/rg/title-tease/plot') self.plot = gutils.after(self.plot, ':</b> ') self.plot = gutils.trim(self.page, '<h5>Trama:</h5>', '</div>') self.plot = self.__before_more(self.plot) tmp = gutils.trim(self.plot_page, '<div id="swiki.2.1">', '</div>') if tmp: self.plot = tmp elements = string.split(self.plot_page, '<p class="plotpar">') if len(elements) > 1: self.plot = self.plot + '\n\n' elements[0] = '' for element in elements: if element != '': self.plot = self.plot + gutils.strip_tags(gutils.before(element, '</a>')) + '\n' if not self.plot: # nothing in spanish found, try original self.plot = gutils.regextrim(self.imdb_page, '<h5>Plot:</h5>', '(</div>|<a href.*)') self.plot = self.__before_more(self.plot) elements = string.split(self.imdb_plot_page, '<p class="plotpar">') if len(elements) > 1: self.plot = self.plot + '\n\n' elements[0] = '' for element in elements: if element <> '': self.plot = self.plot + gutils.strip_tags(gutils.before(element, '</a>')) + '\n\n'
def get_searches(self): elements1 = re.split('headline3"><a href="(http://www.kino.de)*/kinofilm/', self.page) elements1[0] = None for element in elements1: if element <> None: self.ids.append("K_" + re.sub('[?].*', '', gutils.before(element,'"'))) self.titles.append(string.replace(string.replace( gutils.strip_tags( gutils.trim(element,'>','</a>') + ' (' + string.replace( gutils.trim(element, '<span class="standardsmall">', "</span>"), '<br />', ' - ') + ')' ), '( - (', '('), '))', ')') ) elements2 = re.split('headline3"><a href="(http://www.kino.de)*/videofilm/', self.page) elements2[0] = None for element in elements2: if element <> None: self.ids.append("V_" + re.sub('[?].*', '', gutils.before(element,'"'))) self.titles.append(string.replace(string.replace( gutils.strip_tags( gutils.trim(element,'>','</a>') + ' (' + string.replace( gutils.trim(element, '<span class="standardsmall">', '</span>'), '<br />', ' - ') + ')' ), '( - (', '('), '))', ')') )
def get_searches(self): elements_tmp = string.split(self.page,"kinofilm.php4") if (elements_tmp[0]<>self.page): elements = string.split(self.page,"headline3\"><A HREF=\"/kinofilm.php4?nr=") if (elements[0]<>''): elements[0] = '' for element in elements: if (element <> ''): self.ids.append("K_" + gutils.before(element,"&")) self.titles.append(gutils.strip_tags( gutils.trim(element,">","</A>") + " " + gutils.trim(element, "<span CLASS=\"standardsmall\"><br>", "</SPAN>") + " (" + string.replace( gutils.trim(element, "<span class=\"standardsmall\"><b>", "</span>"), "<b>", ", ") + ")")) else: elements_tmp2 = string.split(self.page, "videofilm.php4") if (elements_tmp2[0]<>self.page): elements = string.split(self.page,"headline3\"><A HREF=\"/videofilm.php4?nr=") if (elements[0]<>''): elements[0] = '' for element in elements: if (element <> ''): self.ids.append("V_" + gutils.before(element,"&")) self.titles.append(gutils.strip_tags( gutils.trim(element,">","</A>") + " " + gutils.trim(element, "<span CLASS=\"standardsmall\"><br>", "</SPAN>") + " (" + string.replace( gutils.trim(element, "<span class=\"standardsmall\"><b>", "</span>"), "<b>", ", ") + ")"))
def get_plot(self): self.plot = gutils.before(gutils.after(gutils.trim(self.page, 'name="description"', '/>'), 'content="'), '"') germanplotelements = string.split(self.plot_page, 'class="plotSummary"') if len(germanplotelements) > 1: self.plot = self.plot + '\n\n' germanplotelements[0] = '' for element in germanplotelements: if element <> '': self.plot = self.plot + gutils.strip_tags(gutils.before(gutils.after(element, '>'), '</a>')) + '\n\n' if self.plot == '': # nothing in german found, try original self.plot = gutils.before(gutils.after(gutils.trim(self.imdb_page, 'name="description"', '/>'), 'content="'), '"') elements = string.split(self.imdb_plot_page, 'class="plotSummary"') if len(elements) > 1: self.plot = self.plot + '\n\n' elements[0] = '' for element in elements: if element <> '': self.plot = self.plot + gutils.strip_tags(gutils.before(gutils.after(element, '>'), '</a>')) + '\n\n' plotlist = string.split(gutils.trim(self.plot_page, 'id="plot-summaries-content">', '</ul>'), '<li') plotcompilation = '' for listelement in plotlist: if listelement <> '': plotcompilation = plotcompilation + gutils.trim(listelement, '<p>', '</p>') + '\n' plotcompilation = plotcompilation + re.sub('<[^<]+?>', '', gutils.trim(listelement, '<div class="author-container">', '</div>').replace('\n','').lstrip()) + '\n\n' if plotcompilation <> '': self.plot = plotcompilation
def get_searches(self): elements1 = re.split('href="/kinofilm/', self.page) elements1[0] = None for element in elements1: if element <> None: title = gutils.clean(gutils.trim(element,'>','</a>')) + string.replace(' (' + gutils.clean(gutils.trim(element, '<p>', "<br />")) + ')', '()', '') if title != ' ': self.ids.append("K_" + re.sub('[?].*', '', gutils.before(element,'"'))) self.titles.append('Kino: ' + title) elements2 = re.split('href="http://www.video.de/videofilm/', self.page) elements2[0] = None for element in elements2: if element <> None: title = gutils.clean(gutils.trim(element,'>','</a>')) + string.replace(' (' + gutils.clean(gutils.trim(element, '<p>', "<br />")) + ')', '()', '') if title != ' ': id = re.sub('[?].*', '', gutils.before(element,'"')) self.ids.append("V_" + id) type = '' if 'blu-ray-disc-kauf' in id: type = ' (Bluray-Kauf)' if 'blu-ray-disc-leih' in id: type = ' (Bluray-Verleih)' if 'dvd-leih' in id: type = ' (DVD-Verleih)' if 'dvd-kauf' in id: type = ' (DVD-Kauf)' self.titles.append('Video: ' + title + type)
def initialize(self): self.tmp_page = gutils.before(self.page, 'kinode_navi1') self.url = self.url_to_use + string.replace(str(self.movie_id), '/', '/credits/') self.open_page(self.parent_window) self.tmp_creditspage = gutils.before(self.page, 'kinode_navi1') self.url = self.url_to_use + string.replace(str(self.movie_id), "/", "/features/") self.open_page(self.parent_window) self.tmp_dvdfeaturespage = gutils.before(self.page, 'kinode_navi1')
def get_director(self): self.director = gutils.trim(self.page, '<BR>Re\xbfyseria: ', '<BR>') if string.find(self.director, '-->') <> -1: self.director = gutils.after(self.director, '-->') self.director = gutils.before(self.director,"<!--") else: self.director = gutils.after(self.director, '<B>') self.director = gutils.before(self.director, '</B>')
def get_image(self): urls = re.split('<img[ \t]+src=[\'"]', gutils.trim(self.page, '<div class="poster">', '</div>')) for url in urls[1:]: url = gutils.before(url, '"') url = gutils.before(url, '\'') if string.find(url, '.jpg') >= 0: self.image_url = url break
def initialize(self): self.tmp_page = gutils.before(self.page, "<!-- PRINT-CONTENT-ENDE-->") self.url = self.url_to_use + string.replace(str(self.movie_id), "/", "/credits/") self.open_page(self.parent_window) self.tmp_creditspage = gutils.before(self.page, "<!-- PRINT-CONTENT-ENDE-->") self.url = self.url_to_use + string.replace(str(self.movie_id), "/", "/features/") self.open_page(self.parent_window) self.tmp_dvdfeaturespage = gutils.before(self.page, "<!-- PRINT-CONTENT-ENDE-->")
def director(self): self.director = gutils.trim(self.page,"<BR>Re¿yseria: ","<BR>") if string.find(self.director,"-->") <> -1: self.director = gutils.after(self.director,"-->") self.director = gutils.before(self.director,"<!--") else: self.director = gutils.after(self.director,"<B>") self.director = gutils.before(self.director,"</B>")
def get_o_title(self): self.url = self.url.replace(plugin_server, plugin_url) self.o_title = gutils.trim(self.page, '<h2 class=origTitle>', '</h2>') self.o_title = gutils.after(self.o_title, '</span>') if self.o_title == '': self.o_title = gutils.trim(self.page, '<title>', '</title>') if self.o_title.find('(') > -1: self.o_title = gutils.before(self.o_title, '(') if self.o_title.find('/') > -1: self.o_title = gutils.before(self.o_title, '/')
def get_image(self): self.image_url = '' tmpdata = self.regextrim(self.tmp_page, '(PRINT[-]CONTENT[-]START|<td class="content">)', '(Dieser Film wurde |>FOTOSHOW<|>KRITIK<)') tmpdatasplit = re.split('src="http://images.kino.de/flbilder', tmpdata) if len(tmpdatasplit) > 2: tmpdata = gutils.before(tmpdatasplit[2], '.jpg') if tmpdata <> '': self.image_url = 'http://images.kino.de/flbilder' + tmpdata + '.jpg' elif len(tmpdatasplit) > 1: tmpdata = gutils.before(tmpdatasplit[1], '.jpg') if tmpdata <> '': self.image_url = 'http://images.kino.de/flbilder' + tmpdata + '.jpg'
def get_searches(self): elements = string.split(self.page, '<a href="/filmes/') self.number_results = elements[-1] if len(elements[0]): for element in elements: id = gutils.digits_only(gutils.before(element, '"')) title = gutils.clean(re.sub('</div>.*', '', string.replace(gutils.before(gutils.after(element, '>'), '</small>'), '<small>', ' / '))) if id and title and title[0] != '<': self.ids.append(id) self.titles.append(gutils.convert_entities(title)) else: self.number_results = 0
def get_searches(self): if string.find(self.page, '>Films<') < 0: self.ids.append(gutils.regextrim(self.page, '="/db/movies/view[.]mhtml[?]id=', '([&"])')) self.titles.append('') else: elements = string.split(self.page, '<a href="/db/movies/view.mhtml?id=') elements[0] = '' for element in elements: if element <> '' and string.find(element, 'display_set=eng') > -1: id = gutils.before(gutils.before(element, '"'), '&') if id <> '': self.ids.append(id) self.titles.append(gutils.trim(element, '>', '</a>'))
def get_searches(self): elements1 = re.split('headline3"[^>]*>[ \t\r\n]*<a href="(http://www.kino.de)*/kinofilm/', self.page) elements1[0] = None for element in elements1: if element <> None: self.ids.append("K_" + re.sub("[?].*", "", gutils.before(element, '"'))) self.titles.append( "Kino: " + string.replace( string.replace( gutils.strip_tags( gutils.trim(element, ">", "</a>") + " (" + string.replace( gutils.trim(element, '<span class="standardsmall">', "</span>"), "<br />", " - " ) + ")" ), "( - (", "(", ), "))", ")", ) ) elements2 = re.split('headline3"[^>]*>[ \t\r\n]*<a href="(http://www.kino.de)*/videofilm/', self.page) elements2[0] = None for element in elements2: if element <> None: self.ids.append("V_" + re.sub("[?].*", "", gutils.before(element, '"'))) self.titles.append( "Video: " + string.replace( string.replace( gutils.strip_tags( gutils.trim(element, ">", "</a>") + " (" + string.replace( gutils.trim(element, '<span class="standardsmall">', "</span>"), "<br />", " - " ) + ")" ), "( - (", "(", ), "))", ")", ) )
def get_searches(self): if not self.page: return if len(self.page) < 20: # immidietly redirection to movie page self.number_results = 1 self.ids.append(self.page) self.titles.append(self.url) else: # multiple matches elements = string.split(self.page, '</a></b>') if (elements[0] <> ''): for index in range(0, len(elements) - 1, 1): element = elements[index] nextelement = elements[index + 1] id = gutils.trim(element, '<b><a href="/es/film', '.html') if id: self.ids.append(id) title = gutils.clean( gutils.after(element, '<b><a href="/es/film')).replace( "\n", "") title = gutils.strip_tags( gutils.convert_entities(gutils.after( title, '>'))) + ' ' + string.strip( gutils.before(nextelement, '<')) self.titles.append(title)
def get_nextpage_url(self): match = re.search('(siguientes >|siguientes >)', self.page) if match: start = string.rfind(self.page, '<a href="', 0, match.start()) if start >= 0: return 'http://www.filmaffinity.com/es/' + gutils.before(self.page[start + 9:match.start()], '"') return None
def search(self, parent_window): if not self.open_search(parent_window): return None # used for looking for subpages tmp_page = gutils.trim(self.page, 'class="pages"', '</div>') elements = string.split(tmp_page, 'href="') # first results tmp_page = gutils.trim(self.page, 'class="category-products"', 'class="toolbar-bottom"') # look for subpages i = 1 while i < len(elements): element = elements[i] i = i + 1 p1 = string.find(element, 'title="Nächste"') if p1 >= 0: continue element = gutils.before(element, '"') if element: self.title = '' self.o_title = '' self.url = element if self.open_search(parent_window): tmp_page2 = gutils.trim(self.page, 'class="category-products"', 'class="toolbar-bottom"') tmp_page = tmp_page + tmp_page2 self.page = tmp_page return self.page
def get_plot(self): plotlist = string.split( gutils.trim(self.plot_page, 'id="plot-summaries-content">', '</ul>'), '<li') plotcompilation = '' for listelement in plotlist: if listelement <> '' and not 'It looks like we don\'t have any Plot Summaries for this title yet.' in listelement: plotcompilation = plotcompilation + gutils.trim( listelement, '<p>', '</p>') + '\n' plotcompilation = plotcompilation + re.sub( '<[^<]+?>', '', gutils.trim(listelement, '<div class="author-container">', '</div>').replace('\n', '').lstrip()) + '\n\n' if plotcompilation <> '': self.plot = plotcompilation else: self.plot = gutils.regextrim(self.page, 'itemprop="description"', '<') self.plot = gutils.after(self.plot, '>') elements = string.split(self.plot_page, '<p class="plotpar">') if len(elements) < 2: elements = re.split('<li class="(?:odd|even)">', self.plot_page) if len(elements) > 1: self.plot = self.plot + '\n\n' elements[0] = '' for element in elements[1:]: if element <> '': self.plot = self.plot + gutils.strip_tags( gutils.before(element, '</a>')) + '\n\n'
def search(self, parent_window): if not self.open_search(parent_window): return None # used for looking for subpages tmp_page = gutils.trim(self.page, '<span class="trefferliste">', '</span>') elements = string.split(tmp_page, 'cp=') # first results tmp_page = gutils.after( gutils.trim(self.page, 'Alle Treffer aus der Kategorie', '<span class="trefferliste">'), "Titel:") # look for subpages for element in elements: element = gutils.before(element, '"') try: tmp_element = int(element) except: tmp_element = 1 if tmp_element != 1: self.url = 'http://www.zweitausendeins.de/filmlexikon/?sucheNach=Titel&cp=' + str( tmp_element) + "&wert=" if self.open_search(parent_window): tmp_page2 = gutils.trim(self.page, 'Alle Treffer aus der Kategorie', '<span class="trefferliste">') tmp_page = tmp_page + tmp_page2 self.page = tmp_page return self.page
def get_notes(self): self.notes = '' language = gutils.regextrim(self.page, 'Language:<[^>]+>', '</div>') language = gutils.strip_tags(language) language = re.sub('[\n]+', '', language) language = re.sub('[ ]+', ' ', language) language = language.strip() color = gutils.regextrim(self.page, 'Color:<[^>]+>', '</div>') color = gutils.strip_tags(color) color = re.sub('[\n]+', '', color) color = re.sub('[ ]+', ' ', color) color = color.strip() sound = gutils.regextrim(self.page, 'Sound Mix:<[^>]+>', '</div>') sound = gutils.strip_tags(sound) sound = re.sub('[\n]+', '', sound) sound = re.sub('[ ]+', ' ', sound) sound = sound.strip() tagline = gutils.regextrim(self.tagl_page, '>Taglines', '>See also') taglines = re.split('<div[^>]+class="soda[^>]*>', tagline) tagline = '' if len(taglines) > 1: for entry in taglines[1:]: entry = gutils.clean(gutils.before(entry, '</div>')) if entry: tagline = tagline + entry + '\n' if len(language) > 0: self.notes = "%s: %s\n" % (_('Language'), language) if len(sound) > 0: self.notes += "%s: %s\n" % (gutils.strip_tags( _('<b>Audio</b>')), sound) if len(color) > 0: self.notes += "%s: %s\n" % (_('Color'), color) if len(tagline) > 0: self.notes += "%s: %s\n" % ('Tagline', tagline)
def get_notes(self): self.notes = '' language = gutils.regextrim(self.page, 'Language:<[^>]+>', '</div>') language = gutils.strip_tags(language) language = re.sub('[\n]+', '', language) language = re.sub('[ ]+', ' ', language) language = language.strip() color = gutils.regextrim(self.page, 'Color:<[^>]+>', '</div>') color = gutils.strip_tags(color) color = re.sub('[\n]+', '', color) color = re.sub('[ ]+', ' ', color) color = color.strip() sound = gutils.regextrim(self.page, 'Sound Mix:<[^>]+>', '</div>') sound = gutils.strip_tags(sound) sound = re.sub('[\n]+', '', sound) sound = re.sub('[ ]+', ' ', sound) sound = sound.strip() tagline = gutils.regextrim(self.tagl_page, '>Taglines', '>See also') taglines = re.split('<div[^>]+class="soda[^>]*>', tagline) tagline = '' if len(taglines)>1: for entry in taglines[1:]: entry = gutils.clean(gutils.before(entry, '</div>')) if entry: tagline = tagline + entry + '\n' if len(language)>0: self.notes = "%s: %s\n" %(_('Language'), language) if len(sound)>0: self.notes += "%s: %s\n" %(gutils.strip_tags(_('<b>Audio</b>')), sound) if len(color)>0: self.notes += "%s: %s\n" %(_('Color'), color) if len(tagline)>0: self.notes += "%s: %s\n" %('Tagline', tagline)
def get_trailer(self): self.trailer = '' tmp = string.find(self.page, u'(trailers)') if tmp >= 0: index = string.rfind(self.page[:tmp], u'<a href="') if index >= 0: self.trailer = gutils.before(self.page[index + 9:], '"')
def get_o_site(self): self.o_site = '' tmp = string.find(self.page, u'(site oficial)') if tmp >= 0: index = string.rfind(self.page[:tmp], u'<a href="') if index >= 0: self.o_site = gutils.before(self.page[index + 9:], '"')
def search(self, parent_window): if not self.open_search(parent_window): return None # used for looking for subpages tmp_page = gutils.trim(self.page, '<li>Treffer', '</div>') elements = string.split(tmp_page, 'cp=') # first results tmp_page = gutils.trim(self.page, '<ul class=\'film-liste\'>', '</ul>') # look for subpages for element in elements: element = gutils.before(element, '\'') try: tmp_element = int(element) except: tmp_element = 1 if tmp_element != 1: self.url = 'http://www.zweitausendeins.de/filmlexikon/?sucheNach=filmtitel&cp=' + str( tmp_element) + "&wert=" if self.open_search(parent_window): tmp_page2 = gutils.trim(self.page, '<ul class=\'film-liste\'>', '</ul>') tmp_page = tmp_page + tmp_page2 self.page = tmp_page return self.page
def get_o_site(self): self.o_site = '' offsindex = string.find(self.page, '>Official Site</a>') if offsindex >= 0: offsindex = string.rfind(self.page[:offsindex], 'href="') if offsindex >= 0: self.o_site = gutils.before(self.page[offsindex + 6:], '"')
def get_image(self): tmp = re.search('pics[0-9]*.filmaffinity.com/', self.page) if not tmp: self.image_url = '' else: self.image_url = 'http://' + gutils.before(self.page[tmp.start():], '"')
def get_cameraman(self): self.cameraman = '' tmp = gutils.trim(self.page_cast, '<h2>crew</h2>', '</dl>') elements = string.split(tmp, '<dt>') for element in elements: if string.find(element, 'Cinematographer') > 0: self.cameraman = gutils.clean(gutils.before(element, '</a>'))
def search(self, parent_window): if not self.open_search(parent_window): return None # short the content tmp_page = gutils.trim(self.page, '<select name="sort"', 'Click Here to make a Suggestion</a>') # # try to get all result pages (not so nice, but it works) # tmp_pagecount = gutils.trim( self.page, '<div id="Search_Container" name="Search_Container">', '</table>') tmp_pagecountintuse = 1 elements = tmp_pagecount.split("&page=") for element in elements: try: tmp_pagecountint = int(gutils.before(element, '\'')) except: tmp_pagecountint = 0 if tmp_pagecountint > tmp_pagecountintuse: tmp_pagecountintuse = tmp_pagecountint tmp_pagecountintcurrent = 1 while tmp_pagecountintuse > tmp_pagecountintcurrent and tmp_pagecountintuse < 4: tmp_pagecountintcurrent = tmp_pagecountintcurrent + 1 self.url = "http://www.dvdempire.com/Exec/v1_search_all.asp?&site_media_id=0&pp=&search_refined=32&used=0&page=" + str( tmp_pagecountintcurrent) + "&string=" self.open_search(parent_window) if self.open_search(parent_window): tmp_page2 = gutils.trim(self.page, '<select name="sort"', 'Click Here to make a Suggestion</a>') tmp_page = tmp_page + tmp_page2 self.page = tmp_page return self.page
def search(self,parent_window): self.open_search(parent_window) # short the content tmp_page = gutils.trim(self.page,'<select name="sort"', 'Click Here to make a Suggestion</a>') # # try to get all result pages (not so nice, but it works) # tmp_pagecount = gutils.trim(self.page, '<div id="Search_Container" name="Search_Container">', '</table>') tmp_pagecountintuse = 1 elements = tmp_pagecount.split("&page=") for element in elements: try: tmp_pagecountint = int(gutils.before(element, '\'')) except: tmp_pagecountint = 0 if tmp_pagecountint > tmp_pagecountintuse: tmp_pagecountintuse = tmp_pagecountint tmp_pagecountintcurrent = 1 while tmp_pagecountintuse > tmp_pagecountintcurrent and tmp_pagecountintuse < 4: tmp_pagecountintcurrent = tmp_pagecountintcurrent + 1 self.url = "http://www.dvdempire.com/Exec/v1_search_all.asp?&site_media_id=0&pp=&search_refined=32&used=0&page=" + str(tmp_pagecountintcurrent) + "&string=" self.open_search(parent_window) tmp_page2 = gutils.trim(self.page,'<select name="sort"', 'Click Here to make a Suggestion</a>') tmp_page = tmp_page + tmp_page2 self.page = tmp_page return self.page
def get_notes(self): self.notes = "" language = gutils.regextrim(self.page, "Language:<[^>]+>", "</div>") language = gutils.strip_tags(language) language = re.sub("[\n]+", "", language) language = re.sub("[ ]+", " ", language) language = language.strip() color = gutils.regextrim(self.page, "Color:<[^>]+>", "</div>") color = gutils.strip_tags(color) color = re.sub("[\n]+", "", color) color = re.sub("[ ]+", " ", color) color = color.strip() sound = gutils.regextrim(self.page, "Sound Mix:<[^>]+>", "</div>") sound = gutils.strip_tags(sound) sound = re.sub("[\n]+", "", sound) sound = re.sub("[ ]+", " ", sound) sound = sound.strip() tagline = gutils.regextrim(self.tagl_page, ">Taglines", ">See also") taglines = re.split('<div[^>]+class="soda[^>]*>', tagline) tagline = "" if len(taglines) > 1: for entry in taglines[1:]: entry = gutils.clean(gutils.before(entry, "</div>")) if entry: tagline = tagline + entry + "\n" if len(language) > 0: self.notes = "%s: %s\n" % (_("Language"), language) if len(sound) > 0: self.notes += "%s: %s\n" % (gutils.strip_tags(_("<b>Audio</b>")), sound) if len(color) > 0: self.notes += "%s: %s\n" % (_("Color"), color) if len(tagline) > 0: self.notes += "%s: %s\n" % ("Tagline", tagline)
def get_searches(self): if self.page is None: # movie page self.ids.append(self.url) self.titles.append(gutils.convert_entities(self.title)) elif self.page is False: # no movie found self.number_results = 0 else: # multiple matches elements = self.page.split('<li ') self.number_results = elements[-1] if elements != '': for element in elements: if (element == ''): continue element = gutils.after(element, 'href="') self.ids.append('http://' + plugin_url_other + gutils.before(element, '"')) element_title = gutils.trim(element, '">', '</a>') element_title = element_title.replace('\t', '') element = gutils.after(element, 'class=searchResultDetails') element_year = gutils.trim(element, '>', '|') element_year = element_year.replace(" ", '') element_year = gutils.strip_tags(element_year) element_country = '' pos_country = element.find('countryIds') if pos_country != -1: element_country = gutils.trim(element[pos_country:], '">', '</a>') element = element_title.strip() if element_year: element += ' (' + element_year.strip() + ')' if element_country: element += ' - ' + element_country.strip() element = gutils.convert_entities(element) element = gutils.strip_tags(element) self.titles.append(element) else: self.number_results = 0
def get_title(self): self.title = gutils.trim(self.page, '<b>Tytu\xB3 polski:</b>', "\t\t</div><div") if self.title == '': self.title = gutils.before(self.page, '</h1>') pos = string.find(self.title, '(') if pos > -1: self.title = self.title[:pos]
def get_screenplay(self): self.screenplay = '' tmp = gutils.trim(self.page_cast, '<h2>crew</h2>', '</dl>') elements = string.split(tmp, '<dt>') for element in elements: if string.find(element, 'Screenwriter') > 0: self.screenplay = gutils.clean(gutils.before(element, '</a>'))
def running_time(self): self.running_time = gutils.trim(self.page,"<div class=\"sitem\">Czas trwania: <b>\n\t\t","\n</b>") if self.running_time.find("?") != -1: self.running_time = '' else: self.running_time = gutils.after(self.running_time, "×") self.running_time = gutils.before(self.running_time, " min")
def get_image(self): urls = re.split('<img[ \t]+src=[\'"]', self.page) for index in range(1, len(urls), 1): url = gutils.before(urls[index], '"') if string.find(url, '.jpg') >= 0: self.image_url = url break
def get_searches(self): elements = string.split(self.page, '<h4><a href="/film/fichefilm_gen_cfilm=') if (elements[0]<>''): for index in range(1, len(elements), 1): element = elements[index] self.ids.append(gutils.before(element, '.')) self.titles.append(gutils.strip_tags(gutils.convert_entities(gutils.trim(element, '>', '</a>'))))
def get_title(self): self.title = gutils.trim(self.page, u'<b>Tytuł polski:</b>', "\t\t</div><div") if self.title == '': self.title = gutils.before(self.page, '</h1>') pos = string.find(self.title, '(') if pos > -1: self.title = self.title[:pos]
def get_nextpage_url(self): match = re.search('(siguientes >|siguientes >)', self.page) if match: start = string.rfind(self.page, '<a href="', 0, match.start()) if start >= 0: return 'http://www.filmaffinity.com/es/' + gutils.before( self.page[start + 9:match.start()], '"') return None
def get_searches(self): elements = string.split(self.page, '<a href="/filmes/') self.number_results = elements[-1] if len(elements[0]): for element in elements: id = gutils.digits_only(gutils.before(element, '"')) title = gutils.clean( re.sub( '</div>.*', '', string.replace( gutils.before(gutils.after(element, '>'), '</small>'), '<small>', ' / '))) if id and title and title[0] != '<': self.ids.append(id) self.titles.append(gutils.convert_entities(title)) else: self.number_results = 0
def get_director(self): self.director = gutils.trim(self.cast_page,'>Directed by', '</table>') tmpelements = re.split('href="', self.director) delimiter = '' self.director = '' for index in range(1, len(tmpelements), 1): tmpelement = gutils.before(gutils.after(tmpelements[index], '>'), '<') self.director = self.director + tmpelement + delimiter delimiter =', '
def get_title(self): self.title = gutils.trim(self.page, '<h1>', '<span') elements = string.split(gutils.regextrim(self.page, '<h5>(Alternativ|Auch bekannt als):', '</div>'), '<i class="transl"') if len(elements) > 1: for element in elements: tmp = gutils.before(gutils.trim(element, '>', '[de]'), '(') if tmp <> '': self.title = tmp break
def search(self, parent_window): if not self.open_search(parent_window): return None tmp = string.find(self.page, '<h1>Wyniki wyszukiwania dla') if tmp == -1: # already a movie page self.page = '' else: # multiple matches self.page = gutils.before(self.page[tmp:], '>Mapa strony</h3>') return self.page
def get_runtime(self): self.runtime = gutils.trim( self.page, "<div class=\"sitem\">Czas trwania: <b>\n\t\t", '\n</b>') if self.runtime.find('?') != -1: self.runtime = '' else: self.runtime = gutils.after(self.runtime, '×') self.runtime = gutils.before(self.runtime, ' min')
def get_runtime(self): self.runtime = '' tmp = gutils.strip_tags(gutils.trim(self.page, 'Length', '<br')) #1 hrs. 59 mins. try: self.runtime = int(gutils.before(tmp, 'hrs')) * 60 + int( gutils.trim(tmp, '.', 'mins')) except: self.runtime = ''
def get_searches(self): elements = string.split(self.page, "href='ficha_filme_if.cdv?numero_filme=") for element in elements[1:]: id = gutils.before(element, "'") title = gutils.trim(element, '>', '</a>') if id and title: self.ids.append(id) self.titles.append(title)