def get_notes(self): self.notes = '' tmp_page = gutils.trim(self.page, 'Features:', '<b>') tmp_page = tmp_page.replace('<br>', '\n') tmp_page = tmp_page.replace('<br />', '\n') tmp_page = gutils.strip_tags(tmp_page) if tmp_page <> '': tmp_page = re.sub('[\r\n]+', '\n', tmp_page) self.notes = self.notes + '\nFeatures:' + tmp_page + '\n' tmp_page = gutils.trim(self.page, 'Video:', '<b>') tmp_page = tmp_page.replace('<br>', '\n') tmp_page = tmp_page.replace('<br />', '\n') tmp_page = gutils.strip_tags(tmp_page) if tmp_page <> '': tmp_page = re.sub('[\r\n]+', '\n', tmp_page) self.notes = self.notes + '\nVideo:' + tmp_page + '\n' tmp_page = gutils.trim(self.page, 'Audio:', '<b>') tmp_page = tmp_page.replace('<br>', '\n') tmp_page = tmp_page.replace('<br />', '\n') tmp_page = tmp_page.replace('(more info)', '\n') tmp_page = gutils.strip_tags(tmp_page) if tmp_page <> '': tmp_page = re.sub('[\r\n]+', '\n', tmp_page) self.notes = self.notes + '\nAudio:' + tmp_page tmp_page = gutils.trim(self.page, 'Subtitles:', '<b>') tmp_page = gutils.strip_tags(tmp_page) if tmp_page <> '': tmp_page = re.sub('[\r\n]+', '\n', tmp_page) self.notes = self.notes + '\nSubtitles:\n' + tmp_page + '\n'
def get_notes(self): self.notes = '' language = gutils.regextrim(self.page, 'Language:<[^>]+>', '</div>') language = gutils.strip_tags(language) language = re.sub('[\n]+', '', language) language = re.sub('[ ]+', ' ', language) language = language.strip() color = gutils.regextrim(self.page, 'Color:<[^>]+>', '</div>') color = gutils.strip_tags(color) color = re.sub('[\n]+', '', color) color = re.sub('[ ]+', ' ', color) color = color.strip() sound = gutils.regextrim(self.page, 'Sound Mix:<[^>]+>', '</div>') sound = gutils.strip_tags(sound) sound = re.sub('[\n]+', '', sound) sound = re.sub('[ ]+', ' ', sound) sound = sound.strip() tagline = gutils.regextrim(self.tagl_page, '>Taglines', '>See also') taglines = re.split('<div[^>]+class="soda[^>]*>', tagline) tagline = '' if len(taglines) > 1: for entry in taglines[1:]: entry = gutils.clean(gutils.before(entry, '</div>')) if entry: tagline = tagline + entry + '\n' if len(language) > 0: self.notes = "%s: %s\n" % (_('Language'), language) if len(sound) > 0: self.notes += "%s: %s\n" % (gutils.strip_tags( _('<b>Audio</b>')), sound) if len(color) > 0: self.notes += "%s: %s\n" % (_('Color'), color) if len(tagline) > 0: self.notes += "%s: %s\n" % ('Tagline', tagline)
def get_notes(self): self.notes = '' language = gutils.trim(self.page, '<h5>Lingua:</h5>', '</div>') language = gutils.strip_tags(language) language = re.sub('[\n]+', '', language) language = re.sub('[ ]+', ' ', language) language = language.rstrip() color = gutils.trim(self.page, '<h5>Colore:</h5>', '</div>') color = gutils.strip_tags(color) color = re.sub('[\n]+', '', color) color = re.sub('[ ]+', ' ', color) color = color.rstrip() sound = gutils.trim(self.page, '<h5>Sonoro:</h5>', '</div>') sound = gutils.strip_tags(sound) sound = re.sub('[\n]+', '', sound) sound = re.sub('[ ]+', ' ', sound) sound = sound.rstrip() date = gutils.trim(self.page, '<h5>Data di uscita:</h5>', '<a class="tn15more inline"') date = re.sub('[\n]+', '', date) date = re.sub('[ ]+', ' ', date) date = gutils.clean(date) if len(language) > 0: self.notes = "%s: %s\n" % (_('Language'), language) if len(sound) > 0: self.notes += "%s: %s\n" % (gutils.strip_tags( _('<b>Audio</b>')), sound) if len(color) > 0: self.notes += "%s: %s\n" % (_('Color'), color) if len(date) > 0: self.notes += "%s: %s\n" % (_('Data di uscita'), date)
def get_notes(self): self.notes = '' language = gutils.regextrim(self.page, 'Language:<[^>]+>', '</div>') language = gutils.strip_tags(language) language = re.sub('[\n]+', '', language) language = re.sub('[ ]+', ' ', language) language = language.strip() color = gutils.regextrim(self.page, 'Color:<[^>]+>', '</div>') color = gutils.strip_tags(color) color = re.sub('[\n]+', '', color) color = re.sub('[ ]+', ' ', color) color = color.strip() sound = gutils.regextrim(self.page, 'Sound Mix:<[^>]+>', '</div>') sound = gutils.strip_tags(sound) sound = re.sub('[\n]+', '', sound) sound = re.sub('[ ]+', ' ', sound) sound = sound.strip() tagline = gutils.regextrim(self.tagl_page, '>Taglines', '>See also') taglines = re.split('<div[^>]+class="soda[^>]*>', tagline) tagline = '' if len(taglines)>1: for entry in taglines[1:]: entry = gutils.clean(gutils.before(entry, '</div>')) if entry: tagline = tagline + entry + '\n' if len(language)>0: self.notes = "%s: %s\n" %(_('Language'), language) if len(sound)>0: self.notes += "%s: %s\n" %(gutils.strip_tags(_('<b>Audio</b>')), sound) if len(color)>0: self.notes += "%s: %s\n" %(_('Color'), color) if len(tagline)>0: self.notes += "%s: %s\n" %('Tagline', tagline)
def get_searches(self): if self.page is None: # movie page self.ids.append(self.url) self.titles.append(gutils.convert_entities(self.title)) elif self.page is False: # no movie found self.number_results = 0 else: # multiple matches elements = string.split(self.page, '<li ') self.number_results = elements[-1] if (elements[0] <> ''): for element in elements: element = gutils.after(element, '<a href="') self.ids.append('http://' + plugin_server + gutils.before(element, '"')) element_title = gutils.trim(element, 'class="searchResultTitle"', '</a>') element_title = gutils.after(element_title, '">') element_title = string.replace(element_title, "\t", '') element = gutils.after(element, 'class="searchResultDetails"') element_year = gutils.trim(element, '>', '|') element_year = string.replace(element_year, " ", '') element_year = gutils.strip_tags(element_year) element_country = gutils.trim(element, '">', '</a>') element = string.strip(element_title) if (element_year <> ''): element = element + ' (' + string.strip(element_year) + ')' if (element_country <> ''): element = element + ' - ' + string.strip(element_country) element = gutils.convert_entities(element) element = gutils.strip_tags(element) self.titles.append(element) else: self.number_results = 0
def get_searches(self): elements = string.split(self.page, "<td width=100% height=18") self.number_results = elements[-1] if (elements[0] != ''): for element in elements: print( gutils.trim(element, "align=center valign=top><a href=\"filme_", "_")) self.ids.append( gutils.trim(element, "align=center valign=top><a href=\"filme_", "_")) print( gutils.strip_tags( gutils.trim( element, "<font color=#FFFFFF face=Verdana size=2>", "</font></a></b></td>") + ' - ' + gutils.trim(element, "Ano de Lançamento:", "<br>"))) self.titles.append( gutils.strip_tags( gutils.trim( element, "<font color=#FFFFFF face=Verdana size=2>", "</font></a></b></td>") + ' - ' + gutils.trim(element, "Ano de Lançamento:", "<br>"))) else: self.number_results = 0
def get_notes(self): self.notes = '' language = gutils.trim(self.page, '<h5>Lingua:</h5>', '</div>') language = gutils.strip_tags(language) language = re.sub('[\n]+', '', language) language = re.sub('[ ]+', ' ', language) language = language.rstrip() color = gutils.trim(self.page, '<h5>Colore:</h5>', '</div>') color = gutils.strip_tags(color) color = re.sub('[\n]+', '', color) color = re.sub('[ ]+', ' ', color) color = color.rstrip() sound = gutils.trim(self.page, '<h5>Sonoro:</h5>', '</div>') sound = gutils.strip_tags(sound) sound = re.sub('[\n]+', '', sound) sound = re.sub('[ ]+', ' ', sound) sound = sound.rstrip() tagline = gutils.trim(self.page, '<h5>Trama:</h5>', '</div>') tagline = self.__before_more(tagline) tagline = gutils.strip_tags(tagline) tagline = re.sub('[\n]+', '', tagline) tagline = re.sub('[ ]+', ' ', tagline) tagline = tagline.rstrip() if len(language)>0: self.notes = "%s: %s\n" %(_('Language'), language) if len(sound)>0: self.notes += "%s: %s\n" %(gutils.strip_tags(_('<b>Audio</b>')), sound) if len(color)>0: self.notes += "%s: %s\n" %(_('Color'), color) if len(tagline)>0 and tagline != "Aggiungi o traduci un riassunto della trama": self.notes += "%s: %s\n" %('Tagline', tagline)
def get_plot(self): text = re.search(r"\?text=([\d]*)", self.page) if text: page_content = self.open_page(url=self.url+"?text="+text.group(1)) self.plot = gutils.strip_tags(gutils.trim(page_content,"Obsah:"," <b><i>(")) else: self.plot = gutils.strip_tags(gutils.trim(self.page,"Obsah:"," <b><i>("))
def get_notes(self): self.notes = "" language = gutils.regextrim(self.page, "Language:<[^>]+>", "</div>") language = gutils.strip_tags(language) language = re.sub("[\n]+", "", language) language = re.sub("[ ]+", " ", language) language = language.strip() color = gutils.regextrim(self.page, "Color:<[^>]+>", "</div>") color = gutils.strip_tags(color) color = re.sub("[\n]+", "", color) color = re.sub("[ ]+", " ", color) color = color.strip() sound = gutils.regextrim(self.page, "Sound Mix:<[^>]+>", "</div>") sound = gutils.strip_tags(sound) sound = re.sub("[\n]+", "", sound) sound = re.sub("[ ]+", " ", sound) sound = sound.strip() tagline = gutils.regextrim(self.tagl_page, ">Taglines", ">See also") taglines = re.split('<div[^>]+class="soda[^>]*>', tagline) tagline = "" if len(taglines) > 1: for entry in taglines[1:]: entry = gutils.clean(gutils.before(entry, "</div>")) if entry: tagline = tagline + entry + "\n" if len(language) > 0: self.notes = "%s: %s\n" % (_("Language"), language) if len(sound) > 0: self.notes += "%s: %s\n" % (gutils.strip_tags(_("<b>Audio</b>")), sound) if len(color) > 0: self.notes += "%s: %s\n" % (_("Color"), color) if len(tagline) > 0: self.notes += "%s: %s\n" % ("Tagline", tagline)
def get_notes(self): self.notes = "" tmp_page = gutils.trim(self.page, "Features:", "<b>") tmp_page = tmp_page.replace("<br>", "\n") tmp_page = tmp_page.replace("<br />", "\n") tmp_page = gutils.strip_tags(tmp_page) if tmp_page <> "": tmp_page = re.sub("[\r\n]+", "\n", tmp_page) self.notes = self.notes + "\nFeatures:" + tmp_page + "\n" tmp_page = gutils.trim(self.page, "Video:", "<b>") tmp_page = tmp_page.replace("<br>", "\n") tmp_page = tmp_page.replace("<br />", "\n") tmp_page = gutils.strip_tags(tmp_page) if tmp_page <> "": tmp_page = re.sub("[\r\n]+", "\n", tmp_page) self.notes = self.notes + "\nVideo:" + tmp_page + "\n" tmp_page = gutils.trim(self.page, "Audio:", "<b>") tmp_page = tmp_page.replace("<br>", "\n") tmp_page = tmp_page.replace("<br />", "\n") tmp_page = gutils.strip_tags(tmp_page) if tmp_page <> "": tmp_page = re.sub("[\r\n]+", "\n", tmp_page) self.notes = self.notes + "\nAudio:" + tmp_page tmp_page = gutils.trim(self.page, "Subtitles:", "<b>") tmp_page = gutils.strip_tags(tmp_page) if tmp_page <> "": tmp_page = re.sub("[\r\n]+", "\n", tmp_page) self.notes = self.notes + "\nSubtitles:\n" + tmp_page + "\n"
def get_notes(self): self.notes = "" tmp_notes = re.sub('^[ \t]+', '', gutils.strip_tags( re.sub('(<br>|<br />)', '\r\n', re.sub('[\r\n]+', '', re.sub('[ \t][ \t\r\n]+', ' ', gutils.trim(self.page, 'Bildformat(e)', '</TR>'))))) ) if (tmp_notes != ""): self.notes = self.notes + "Bildformat(e):\n" + tmp_notes + "\n" tmp_notes = re.sub('^[ \t]+', '', gutils.strip_tags( re.sub('(<br>|<br />)', '\r\n', re.sub('[\r\n]+', '', re.sub('[ \t][ \t\r\n]+', ' ', gutils.trim(self.page, 'Tonformat(e)', '</TR>'))))) ) if (tmp_notes != ""): self.notes = self.notes + "Tonformat(e):\n" + tmp_notes + "\n\n" tmp_notes = re.sub('^[ \t]+', '', gutils.strip_tags( re.sub('(<br>|<br />)', '\r\n', re.sub('[\r\n]+', '', re.sub('[ \t][ \t\r\n]+', ' ', gutils.trim(self.page, 'Untertitel', '</TR>'))))) ) if (tmp_notes != ""): self.notes = self.notes + "Untertitel:" + tmp_notes + "\n\n"
def get_searches(self): if self.page is None: # movie page self.ids.append(self.url) self.titles.append(gutils.convert_entities(self.title)) elif self.page is False: # no movie found self.number_results = 0 else: # multiple matches elements = self.page.split('<li ') self.number_results = elements[-1] if elements != '': for element in elements: if (element == ''): continue element = gutils.after(element, 'href="') self.ids.append('http://' + plugin_url_other + gutils.before(element, '"')) element_title = gutils.trim(element, '">', '</a>') element_title = element_title.replace('\t', '') element = gutils.after(element, 'class=searchResultDetails') element_year = gutils.trim(element, '>', '|') element_year = element_year.replace(" ", '') element_year = gutils.strip_tags(element_year) element_country = '' pos_country = element.find('countryIds') if pos_country != -1: element_country = gutils.trim(element[pos_country:], '">', '</a>') element = element_title.strip() if element_year: element += ' (' + element_year.strip() + ')' if element_country: element += ' - ' + element_country.strip() element = gutils.convert_entities(element) element = gutils.strip_tags(element) self.titles.append(element) else: self.number_results = 0
def get_notes(self): self.notes = '' tmp_page = gutils.trim(self.page, '<h3>Features</h3>', '</p>') tmp_page = tmp_page.replace('<br>', '\n') tmp_page = tmp_page.replace('<br />', '\n') tmp_page = gutils.strip_tags(tmp_page) if tmp_page <> '': self.notes = self.notes + 'Features:\n' + tmp_page + '\n\n' tmp_page = gutils.trim(self.page, 'Video</strong>', '<strong>') tmp_page = tmp_page.replace('\r\n', '') tmp_page = re.sub('[ \t]+', ' ', tmp_page) tmp_page = tmp_page.replace('<br>', '\n') tmp_page = tmp_page.replace('<br />', '\n') tmp_page = gutils.strip_tags(tmp_page) if tmp_page <> '': self.notes = self.notes + 'Video:' + tmp_page tmp_page = gutils.trim(self.page, 'Audio</strong>', '</div>') tmp_page = tmp_page.replace('\r\n', '') tmp_page = re.sub('[ \t]+', ' ', tmp_page) tmp_page = tmp_page.replace('<br>', '\n') tmp_page = tmp_page.replace('<br />', '\n') tmp_page = tmp_page.replace('(more info)', '\n') tmp_page = gutils.strip_tags(tmp_page) if tmp_page <> '': self.notes = self.notes + 'Audio:' + tmp_page
def get_searches(self): elements_tmp = string.split(self.page,"kinofilm.php4") if (elements_tmp[0]<>self.page): elements = string.split(self.page,"headline3\"><A HREF=\"/kinofilm.php4?nr=") if (elements[0]<>''): elements[0] = '' for element in elements: if (element <> ''): self.ids.append("K_" + gutils.before(element,"&")) self.titles.append(gutils.strip_tags( gutils.trim(element,">","</A>") + " " + gutils.trim(element, "<span CLASS=\"standardsmall\"><br>", "</SPAN>") + " (" + string.replace( gutils.trim(element, "<span class=\"standardsmall\"><b>", "</span>"), "<b>", ", ") + ")")) else: elements_tmp2 = string.split(self.page, "videofilm.php4") if (elements_tmp2[0]<>self.page): elements = string.split(self.page,"headline3\"><A HREF=\"/videofilm.php4?nr=") if (elements[0]<>''): elements[0] = '' for element in elements: if (element <> ''): self.ids.append("V_" + gutils.before(element,"&")) self.titles.append(gutils.strip_tags( gutils.trim(element,">","</A>") + " " + gutils.trim(element, "<span CLASS=\"standardsmall\"><br>", "</SPAN>") + " (" + string.replace( gutils.trim(element, "<span class=\"standardsmall\"><b>", "</span>"), "<b>", ", ") + ")"))
def get_plot(self): self.plot = gutils.trim(self.page, '<b class="ch">Resumen', '<a href="/rg/title-tease/plot') self.plot = gutils.after(self.plot, ':</b> ') self.plot = gutils.trim(self.page, '<h5>Trama:</h5>', '</div>') self.plot = self.__before_more(self.plot) tmp = gutils.trim(self.plot_page, '<div id="swiki.2.1">', '</div>') if tmp: self.plot = tmp elements = string.split(self.plot_page, '<p class="plotpar">') if len(elements) > 1: self.plot = self.plot + '\n\n' elements[0] = '' for element in elements: if element != '': self.plot = self.plot + gutils.strip_tags(gutils.before(element, '</a>')) + '\n' if not self.plot: # nothing in spanish found, try original self.plot = gutils.regextrim(self.imdb_page, '<h5>Plot:</h5>', '(</div>|<a href.*)') self.plot = self.__before_more(self.plot) elements = string.split(self.imdb_plot_page, '<p class="plotpar">') if len(elements) > 1: self.plot = self.plot + '\n\n' elements[0] = '' for element in elements: if element <> '': self.plot = self.plot + gutils.strip_tags(gutils.before(element, '</a>')) + '\n\n'
def get_searches(self): elements1 = re.split('headline3"><a href="(http://www.kino.de)*/kinofilm/', self.page) elements1[0] = None for element in elements1: if element <> None: self.ids.append("K_" + re.sub('[?].*', '', gutils.before(element,'"'))) self.titles.append(string.replace(string.replace( gutils.strip_tags( gutils.trim(element,'>','</a>') + ' (' + string.replace( gutils.trim(element, '<span class="standardsmall">', "</span>"), '<br />', ' - ') + ')' ), '( - (', '('), '))', ')') ) elements2 = re.split('headline3"><a href="(http://www.kino.de)*/videofilm/', self.page) elements2[0] = None for element in elements2: if element <> None: self.ids.append("V_" + re.sub('[?].*', '', gutils.before(element,'"'))) self.titles.append(string.replace(string.replace( gutils.strip_tags( gutils.trim(element,'>','</a>') + ' (' + string.replace( gutils.trim(element, '<span class="standardsmall">', '</span>'), '<br />', ' - ') + ')' ), '( - (', '('), '))', ')') )
def get_notes(self): self.notes = '' language = gutils.trim(self.page, '<h5>Lingua:</h5>', '</div>') language = gutils.strip_tags(language) language = re.sub('[\n]+', '', language) language = re.sub('[ ]+', ' ', language) language = language.rstrip() color = gutils.trim(self.page, '<h5>Colore:</h5>', '</div>') color = gutils.strip_tags(color) color = re.sub('[\n]+', '', color) color = re.sub('[ ]+', ' ', color) color = color.rstrip() sound = gutils.trim(self.page, '<h5>Sonoro:</h5>', '</div>') sound = gutils.strip_tags(sound) sound = re.sub('[\n]+', '', sound) sound = re.sub('[ ]+', ' ', sound) sound = sound.rstrip() date = gutils.trim(self.page, '<h5>Data di uscita:</h5>', '<a class="tn15more inline"') date = re.sub('[\n]+', '', date) date = re.sub('[ ]+', ' ', date) date = gutils.clean(date) if len(language)>0: self.notes = "%s: %s\n" %(_('Language'), language) if len(sound)>0: self.notes += "%s: %s\n" %(gutils.strip_tags(_('<b>Audio</b>')), sound) if len(color)>0: self.notes += "%s: %s\n" %(_('Color'), color) if len(date)>0: self.notes += "%s: %s\n" %(_('Data di uscita'), date)
def get_notes(self): self.notes = '' language = gutils.trim(self.page, '<h5>Language:</h5>', '</div>') language = gutils.strip_tags(language) language = re.sub('[\n]+', '', language) language = re.sub('[ ]+', ' ', language) language = language.rstrip() color = gutils.trim(self.page, '<h5>Color:</h5>', '</div>') color = gutils.strip_tags(color) color = re.sub('[\n]+', '', color) color = re.sub('[ ]+', ' ', color) color = color.rstrip() sound = gutils.trim(self.page, '<h5>Sound Mix:</h5>', '</div>') sound = gutils.strip_tags(sound) sound = re.sub('[\n]+', '', sound) sound = re.sub('[ ]+', ' ', sound) sound = sound.rstrip() tagline = gutils.trim(self.page, '<h5>Tagline:</h5>', '</div>') tagline = self.__before_more(tagline) tagline = gutils.strip_tags(tagline) tagline = re.sub('[\n]+', '', tagline) tagline = re.sub('[ ]+', ' ', tagline) tagline = tagline.rstrip() if len(language)>0: self.notes = "%s: %s\n" %(_('Language'), language) if len(sound)>0: self.notes += "%s: %s\n" %(gutils.strip_tags(_('<b>Audio</b>')), sound) if len(color)>0: self.notes += "%s: %s\n" %(_('Color'), color) if len(tagline)>0: self.notes += "%s: %s\n" %('Tagline', tagline)
def get_plot(self): self.plot = gutils.before(gutils.after(gutils.trim(self.page, 'name="description"', '/>'), 'content="'), '"') germanplotelements = string.split(self.plot_page, 'class="plotSummary"') if len(germanplotelements) > 1: self.plot = self.plot + '\n\n' germanplotelements[0] = '' for element in germanplotelements: if element <> '': self.plot = self.plot + gutils.strip_tags(gutils.before(gutils.after(element, '>'), '</a>')) + '\n\n' if self.plot == '': # nothing in german found, try original self.plot = gutils.before(gutils.after(gutils.trim(self.imdb_page, 'name="description"', '/>'), 'content="'), '"') elements = string.split(self.imdb_plot_page, 'class="plotSummary"') if len(elements) > 1: self.plot = self.plot + '\n\n' elements[0] = '' for element in elements: if element <> '': self.plot = self.plot + gutils.strip_tags(gutils.before(gutils.after(element, '>'), '</a>')) + '\n\n' plotlist = string.split(gutils.trim(self.plot_page, 'id="plot-summaries-content">', '</ul>'), '<li') plotcompilation = '' for listelement in plotlist: if listelement <> '': plotcompilation = plotcompilation + gutils.trim(listelement, '<p>', '</p>') + '\n' plotcompilation = plotcompilation + re.sub('<[^<]+?>', '', gutils.trim(listelement, '<div class="author-container">', '</div>').replace('\n','').lstrip()) + '\n\n' if plotcompilation <> '': self.plot = plotcompilation
def get_notes(self): self.notes = "" self.url = self.url_to_use + "typ=features&nr=" + str(self.movie_id) self.open_page(self.parent_window) self.tmp_page = gutils.trim(self.page, "<!-- PRINT-CONTENT-START-->", "<!-- PRINT-CONTENT-ENDE-->") tmp_notes = string.replace( gutils.strip_tags(gutils.trim(self.tmp_page, "<b>Sprache</b>", "</TD></TR>")), " ", "" ) if tmp_notes != "": self.notes = self.notes + "Sprachen:\n" + tmp_notes + "\n\n" tmp_notes = string.replace( gutils.strip_tags(gutils.trim(self.tmp_page, "<b>Untertitel</b>", "</TD></TR>")), " ", "" ) if tmp_notes != "": self.notes = self.notes + "Untertitel:\n" + tmp_notes + "\n\n" tmp_notes = string.replace( gutils.strip_tags(gutils.trim(self.tmp_page, "<b>Mehrkanalton</b>", "</TD></TR>")), " ", "" ) if tmp_notes != "": self.notes = self.notes + "Mehrkanalton:\n" + tmp_notes + "\n\n" tmp_notes = string.replace( gutils.strip_tags(gutils.trim(self.tmp_page, "<b>EAN</b>", "</TD></TR>")), " ", "" ) if tmp_notes != "": self.notes = self.notes + "EAN:\n" + tmp_notes + "\n\n"
def get_notes(self): self.notes = "" tmp_notes = gutils.strip_tags(string.replace(gutils.trim(self.page, 'Bildformat(e)', '</TR>'), '<br />', '\n')) if (tmp_notes != ""): self.notes = self.notes + "Bildformat(e):\n" + tmp_notes + "\n" tmp_notes = gutils.strip_tags(string.replace(gutils.trim(self.page, 'Untertitel', '</TR>'), '<br>', '\n')) if (tmp_notes != ""): self.notes = self.notes + "Untertitel:" + tmp_notes + "\n\n" tmp_notes = gutils.strip_tags(string.replace(gutils.trim(self.page, 'Tonformat(e)', '</TR>'), '<br />', '\n')) if (tmp_notes != ""): self.notes = self.notes + "Tonformat(e):\n" + tmp_notes + "\n\n"
def get_cast(self): self.cast = '' tmp = re.split('(?:[<]div[ \t]+class="name"[>])', self.page_cast) for index in range(1, len(tmp), 1): entry = tmp[index] if string.find(entry, '<h3>Director</h3>') >= 0 or string.find(entry, '<h3>Producer</h3>') >= 0 or string.find(entry, '<h3>Writer</h3>') >= 0: break name = string.strip(gutils.strip_tags(gutils.before(entry, '</div>'))) role = string.strip(gutils.strip_tags(gutils.trim(entry, '<div class="role">', '</div>'))) if role: self.cast = self.cast + name + _(' as ') + role + '\n' else: self.cast = self.cast + name + '\n'
def get_searches(self): elements = string.split(self.page, "<td width=100% height=18") self.number_results = elements[-1] if (elements[0] != ''): for element in elements: print (gutils.trim(element, "align=center valign=top><a href=\"filme_", "_")) self.ids.append(gutils.trim(element, "align=center valign=top><a href=\"filme_", "_")) print(gutils.strip_tags(gutils.trim(element, "<font color=#FFFFFF face=Verdana size=2>", "</font></a></b></td>")+' - '+gutils.trim(element, "Ano de Lançamento:", "<br>"))) self.titles.append(gutils.strip_tags(gutils.trim(element, "<font color=#FFFFFF face=Verdana size=2>", "</font></a></b></td>")+' - '+gutils.trim(element, "Ano de Lançamento:", "<br>"))) else: self.number_results = 0
def get_notes(self): self.notes = "" tmp_notes = string.replace(gutils.strip_tags(gutils.trim(self.tmp_dvdfeaturespage, "<b>Sprache</b>", "</td></tr>")), " ", "") if (tmp_notes != ""): self.notes = self.notes + "Sprachen:\n" + tmp_notes + "\n\n" tmp_notes = string.replace(gutils.strip_tags(gutils.trim(self.tmp_dvdfeaturespage, "<b>Untertitel</b>", "</td></tr>")), " ", "") if (tmp_notes != ""): self.notes = self.notes + "Untertitel:\n" + tmp_notes + "\n\n" tmp_notes = string.replace(gutils.strip_tags(gutils.trim(self.tmp_dvdfeaturespage, "<b>Mehrkanalton</b>", "</td></tr>")), " ", "") if (tmp_notes != ""): self.notes = self.notes + "Mehrkanalton:\n" + tmp_notes + "\n\n" tmp_notes = string.replace(gutils.strip_tags(gutils.trim(self.tmp_dvdfeaturespage, "<b>EAN</b>", "</td></tr>")), " ", "") if (tmp_notes != ""): self.notes = self.notes + "EAN:\n" + tmp_notes + "\n\n"
def get_searches(self): elements1 = re.split('headline3"[^>]*>[ \t\r\n]*<a href="(http://www.kino.de)*/kinofilm/', self.page) elements1[0] = None for element in elements1: if element <> None: self.ids.append("K_" + re.sub("[?].*", "", gutils.before(element, '"'))) self.titles.append( "Kino: " + string.replace( string.replace( gutils.strip_tags( gutils.trim(element, ">", "</a>") + " (" + string.replace( gutils.trim(element, '<span class="standardsmall">', "</span>"), "<br />", " - " ) + ")" ), "( - (", "(", ), "))", ")", ) ) elements2 = re.split('headline3"[^>]*>[ \t\r\n]*<a href="(http://www.kino.de)*/videofilm/', self.page) elements2[0] = None for element in elements2: if element <> None: self.ids.append("V_" + re.sub("[?].*", "", gutils.before(element, '"'))) self.titles.append( "Video: " + string.replace( string.replace( gutils.strip_tags( gutils.trim(element, ">", "</a>") + " (" + string.replace( gutils.trim(element, '<span class="standardsmall">', "</span>"), "<br />", " - " ) + ")" ), "( - (", "(", ), "))", ")", ) )
def get_cast(self): self.cast = "<%s" % gutils.trim(self.page, '/ob.gif"',"zobacz więcej") self.cast = string.replace(self.cast, "\n",'') self.cast = string.replace(self.cast, "\t",'') self.cast = string.replace(self.cast, ":", _(" as ")) self.cast = string.replace(self.cast, '</span>', "\n") self.cast = gutils.strip_tags(self.cast)
def get_rating(self): self.rating = gutils.trim(self.page, u'<div class="starbar-meta">', '/10') self.rating = gutils.strip_tags(self.rating) self.rating = string.replace(self.rating, ",", ".") self.rating = float(self.rating) self.rating = round(self.rating)
def get_cast(self): self.cast = '' self.cast = gutils.trim(self.page, '<th>REPARTO</th>', '</td>') self.cast = re.sub('</a>,[ ]*', '\n', self.cast) self.cast = string.strip(gutils.strip_tags(self.cast)) self.cast = re.sub('[ ]+', ' ', self.cast) self.cast = re.sub('\n[ ]+', '\n', self.cast)
def get_searches(self): if not self.page: return if len(self.page) < 20: # immidietly redirection to movie page self.number_results = 1 self.ids.append(self.page) self.titles.append(self.url) else: # multiple matches elements = string.split(self.page, '</a></b>') if (elements[0] <> ''): for index in range(0, len(elements) - 1, 1): element = elements[index] nextelement = elements[index + 1] id = gutils.trim(element, '<b><a href="/es/film', '.html') if id: self.ids.append(id) title = gutils.clean( gutils.after(element, '<b><a href="/es/film')).replace( "\n", "") title = gutils.strip_tags( gutils.convert_entities(gutils.after( title, '>'))) + ' ' + string.strip( gutils.before(nextelement, '<')) self.titles.append(title)
def get_director(self): """Finds the film's director""" self.director = gutils.trim( self.page, "<b>REALIZADOR</b></font>\n<br />\n<font face=arial size=-1>", "\n<br /><br />") self.director = gutils.strip_tags(self.director)
def get_plot(self): plotlist = string.split( gutils.trim(self.plot_page, 'id="plot-summaries-content">', '</ul>'), '<li') plotcompilation = '' for listelement in plotlist: if listelement <> '' and not 'It looks like we don\'t have any Plot Summaries for this title yet.' in listelement: plotcompilation = plotcompilation + gutils.trim( listelement, '<p>', '</p>') + '\n' plotcompilation = plotcompilation + re.sub( '<[^<]+?>', '', gutils.trim(listelement, '<div class="author-container">', '</div>').replace('\n', '').lstrip()) + '\n\n' if plotcompilation <> '': self.plot = plotcompilation else: self.plot = gutils.regextrim(self.page, 'itemprop="description"', '<') self.plot = gutils.after(self.plot, '>') elements = string.split(self.plot_page, '<p class="plotpar">') if len(elements) < 2: elements = re.split('<li class="(?:odd|even)">', self.plot_page) if len(elements) > 1: self.plot = self.plot + '\n\n' elements[0] = '' for element in elements[1:]: if element <> '': self.plot = self.plot + gutils.strip_tags( gutils.before(element, '</a>')) + '\n\n'
def get_searches(self): elements = string.split(self.page, "<h3 style=\"margin:0px;\">") self.number_results = len(elements) - 1 if self.number_results > 0: i = 1 while i < len(elements): element = gutils.trim(elements[i], "<a", "</a>") y = gutils.trim(elements[i], "anno=", "\">") # print "******* elemento "+str(i)+" **********\n\n\n\n\n"+element+"\n******fine*******\n\n\n\n\n\n" # print "id = "+gutils.trim(element,"recensione.asp?id=","\"") # print "title = "+gutils.convert_entities(gutils.strip_tags(gutils.trim(element,'" title="', '"'))) self.ids.append( gutils.trim(element, "recensione.asp?id=", "\"")) self.titles.append( gutils.convert_entities( gutils.strip_tags( gutils.trim(element, '" title="', '"'))) + "\nAnno: " + y) i += 1 else: self.number_results = 0
def get_o_title(self): self.o_title = gutils.trim(self.page, '<h2 class="after-title">', '</h2>') self.o_title = gutils.strip_tags(self.o_title) self.o_title = string.strip(self.o_title) if self.o_title == '': self.o_title = self.get_title(True)
def get_cast(self): self.cast = '' self.cast = gutils.trim(self.page, '<dt>Reparto</dt>', '</dd>') self.cast = re.sub('</a>,[ ]*', '\n', self.cast) self.cast = string.strip(gutils.strip_tags(self.cast)) self.cast = re.sub('[ ]+', ' ', self.cast) self.cast = re.sub('\n[ ]+', '\n', self.cast)
def get_studio(self): self.studio = gutils.strip_tags(gutils.trim(self.detail_page, 'alt="Produktion"', ' ')) if self.studio == '': self.studio = gutils.trim(self.detail_page, 'alt="Produktion"', '</TABLE>') self.studio = gutils.after(self.studio, '>') self.studio = self.studio.replace('\n', ', ') self.studio = re.sub('((^, )|(, $))', '', self.studio)
def get_genre(self): self.genre = gutils.trim(self.page,"Genre(s):","</table>") self.genre = string.replace(self.genre, "<br>", ", ") self.genre = gutils.strip_tags(self.genre) self.genre = string.replace(self.genre, "/", ", ") self.genre = gutils.clean(self.genre) self.genre = self.genre[0:-1]
def get_studio(self): self.studio = gutils.strip_tags(gutils.trim(self.detail_page, 'alt="Produktion"', " ")) if self.studio == "": self.studio = gutils.trim(self.detail_page, 'alt="Produktion"', "</TABLE>") self.studio = gutils.after(self.studio, ">") self.studio = self.studio.replace("\n", ", ") self.studio = re.sub("((^, )|(, $))", "", self.studio)
def get_plot(self): self.plot = gutils.strip_tags( gutils.trim(self.page, '<td width="100%" valign="top" class="fontsmall3">', '</td>')) self.plot = self.plot.replace(u'\x93', '"') self.plot = self.plot.replace(u'\x84', '"')
def get_cast(self): self.cast = "<%s" % gutils.trim(self.page, '/ob.gif"',"<br/>\n\t") self.cast = string.replace(self.cast, "\n",'') self.cast = string.replace(self.cast, "\t",'') self.cast = string.replace(self.cast, '<div class="filmActor">', "\n") self.cast = string.replace(self.cast, ":", _(" as ")) self.cast = gutils.strip_tags(self.cast)
def get_cast(self): self.cast = gutils.trim(self.cast_page, '<h2>OBSADA:</h2>', '<div class="b') self.cast = gutils.after(self.cast, '<div class="clr"></div>') self.cast = string.replace(self.cast, '\t', '') self.cast = gutils.strip_tags(self.cast) self.cast = string.replace(self.cast, '\n\n ', _(' as ')) self.cast = string.replace(self.cast, "%s\n" % _(' as '), "\n")
def get_plot(self): self.plot = gutils.trim(self.page,"<td valign=\"top\" align=\"left\">","</td>") self.plot = string.strip(self.plot.decode('latin-1')) self.plot = string.replace(self.plot,"<br>", " ") self.plot = string.replace(self.plot,"<p>", " ") self.plot = string.replace(self.plot,"'","_") self.plot = string.strip(gutils.strip_tags(self.plot))
def get_classification(self): self.classification = gutils.trim( self.page, u'<h5>Certificação:</h5><div class="info-content">', u'</div>') self.classification = gutils.strip_tags(self.classification) self.classification = string.replace(self.classification, " | ", ", ") self.classification = self.classification.encode(self.encode)
def get_searches(self): elements = string.split(self.page, '<li>') if (elements[0]<>''): for element in elements: self.ids.append(gutils.trim(element, '/title/tt','/?fr=')) self.titles.append(gutils.strip_tags(gutils.convert_entities(gutils.trim(element, ';fm=1">', '</li>'))))
def get_searches(self): elements = string.split(self.page, '<h4><a href="/film/fichefilm_gen_cfilm=') if (elements[0]<>''): for index in range(1, len(elements), 1): element = elements[index] self.ids.append(gutils.before(element, '.')) self.titles.append(gutils.strip_tags(gutils.convert_entities(gutils.trim(element, '>', '</a>'))))
def get_cast(self): self.cast = '' self.cast = gutils.trim(self.page, "<font class = 'titulo3'>Actores:</font><br>", '<br><br>') self.cast = string.replace(self.cast, '<br>', "\n") self.cast = string.strip(gutils.strip_tags(self.cast))
def get_rating(self): self.rating = gutils.strip_tags( gutils.trim(self.page, 'IMDb RATING', '</span>')) if self.rating: try: self.rating = round(float(self.rating), 0) except Exception, e: self.rating = 0
def get_plot(self): self.plot = gutils.trim(self.page, "<td valign=\"top\" align=\"left\">", "</td>") self.plot = string.strip(self.plot.decode('latin-1')) self.plot = string.replace(self.plot, "<br>", " ") self.plot = string.replace(self.plot, "<p>", " ") self.plot = string.replace(self.plot, "'", "_") self.plot = string.strip(gutils.strip_tags(self.plot))
def get_cast(self): self.cast = gutils.trim(self.page, '>Actors:', '</td><td') self.cast = self.cast.replace('<br>', '\n') self.cast = self.cast.replace('<br />', '\n') self.cast = self.cast.replace(' ', '') self.cast = self.cast.replace('•', '') self.cast = self.cast.replace('•', '') self.cast = gutils.strip_tags(self.cast)
def get_cast(self): "Find the actors. Try to make it line separated." self.cast = "" tmp = gutils.trim(self.page_cast, '<b>Elenco / Cast</b>', '</table>') elements = tmp.split('<tr>') for index in range(1, len(elements), 1): element = elements[index] self.cast = self.cast + gutils.strip_tags(element.replace(' ... ', _(' as '))) + '\n'
def get_cast(self): self.cast = '' tmp = re.split('(?:[<]div[ \t]+class="name"[>])', self.page_cast) for index in range(1, len(tmp), 1): entry = tmp[index] if string.find(entry, '<h3>Director</h3>') >= 0 or string.find( entry, '<h3>Producer</h3>') >= 0 or string.find( entry, '<h3>Writer</h3>') >= 0: break name = string.strip( gutils.strip_tags(gutils.before(entry, '</div>'))) role = string.strip( gutils.strip_tags( gutils.trim(entry, '<div class="role">', '</div>'))) if role: self.cast = self.cast + name + _(' as ') + role + '\n' else: self.cast = self.cast + name + '\n'
def get_runtime(self): self.runtime = '' tmp = gutils.strip_tags(gutils.trim(self.page, 'Length', '<br')) #1 hrs. 59 mins. try: self.runtime = int(gutils.before(tmp, 'hrs')) * 60 + int( gutils.trim(tmp, '.', 'mins')) except: self.runtime = ''
def get_cast(self): self.cast = '' self.cast = gutils.trim(self.cast_page, 'Darsteller</i>', '</table>') self.cast = re.sub('(\n|\t| )', '', self.cast) self.cast = string.replace(self.cast, '\t', '') self.cast = string.replace(self.cast, 'class="Daten">', '>\n') self.cast = string.strip(gutils.strip_tags(self.cast)) self.cast = string.replace(self.cast, '... ', _(' as ')) self.cast = gutils.clean(self.cast)
def get_runtime(self): self.runtime = gutils.strip_tags( gutils.regextrim(self.page, 'Runtime<[^>]+>', 'min<')) tmp = string.split(self.runtime, 'h ') if len(tmp) > 1: try: self.runtime = int(tmp[0]) * 60 + int(tmp[1]) except: None
def get_studio(self): self.studio = gutils.trim(self.detail_page, 'alt="Produktion"', '<img') if self.studio == '': self.studio = gutils.trim(self.detail_page, 'alt="Produktion"', '</TABLE>') self.studio = gutils.after(self.studio, '>') self.studio = self.studio.replace('<tr>', ', ') self.studio = gutils.strip_tags(self.studio) self.studio = re.sub('((^, )|(, $))', '', self.studio)
def get_searches(self): """Try to find both id and film title for each search result""" elements = re.split('index[.]php[?]op=Movie&id=([0-9]+)" ', self.page) for index in range(2, len(elements), 2): id = elements[index - 1] title = gutils.clean(gutils.trim(elements[index], '>', '</')) if id and title: self.ids.append(id) self.titles.append(gutils.strip_tags(gutils.convert_entities(title)))