def get_plot(self): self.plot = gutils.before(gutils.after(gutils.trim(self.page, 'name="description"', '/>'), 'content="'), '"') germanplotelements = string.split(self.plot_page, 'class="plotSummary"') if len(germanplotelements) > 1: self.plot = self.plot + '\n\n' germanplotelements[0] = '' for element in germanplotelements: if element <> '': self.plot = self.plot + gutils.strip_tags(gutils.before(gutils.after(element, '>'), '</a>')) + '\n\n' if self.plot == '': # nothing in german found, try original self.plot = gutils.before(gutils.after(gutils.trim(self.imdb_page, 'name="description"', '/>'), 'content="'), '"') elements = string.split(self.imdb_plot_page, 'class="plotSummary"') if len(elements) > 1: self.plot = self.plot + '\n\n' elements[0] = '' for element in elements: if element <> '': self.plot = self.plot + gutils.strip_tags(gutils.before(gutils.after(element, '>'), '</a>')) + '\n\n' plotlist = string.split(gutils.trim(self.plot_page, 'id="plot-summaries-content">', '</ul>'), '<li') plotcompilation = '' for listelement in plotlist: if listelement <> '': plotcompilation = plotcompilation + gutils.trim(listelement, '<p>', '</p>') + '\n' plotcompilation = plotcompilation + re.sub('<[^<]+?>', '', gutils.trim(listelement, '<div class="author-container">', '</div>').replace('\n','').lstrip()) + '\n\n' if plotcompilation <> '': self.plot = plotcompilation
def get_o_title(self): self.o_title = gutils.trim(self.tmp_page, 'span class="standardsmall">(', ')<') if self.o_title == '': if self.url_type == 'V': self.o_title = gutils.after(self.regextrim(self.tmp_page, 'headline2"[^>]*>[ \t\r\n]*<a href="/videofilm', '</a>'), '>') else: self.o_title = gutils.after(self.regextrim(self.tmp_page, 'headline2"[^>]*>[ \t\r\n]*<a href="/kinofilm', '</a>'), '>')
def get_searches(self): if self.page is None: # movie page self.ids.append(self.url) self.titles.append(gutils.convert_entities(self.title)) elif self.page is False: # no movie found self.number_results = 0 else: # multiple matches elements = self.page.split('<li ') self.number_results = elements[-1] if elements != '': for element in elements: if (element == ''): continue element = gutils.after(element, 'href="') self.ids.append('http://' + plugin_url_other + gutils.before(element, '"')) element_title = gutils.trim(element, '">', '</a>') element_title = element_title.replace('\t', '') element = gutils.after(element, 'class=searchResultDetails') element_year = gutils.trim(element, '>', '|') element_year = element_year.replace(" ", '') element_year = gutils.strip_tags(element_year) element_country = '' pos_country = element.find('countryIds') if pos_country != -1: element_country = gutils.trim(element[pos_country:], '">', '</a>') element = element_title.strip() if element_year: element += ' (' + element_year.strip() + ')' if element_country: element += ' - ' + element_country.strip() element = gutils.convert_entities(element) element = gutils.strip_tags(element) self.titles.append(element) else: self.number_results = 0
def get_o_title(self): self.o_title = gutils.trim(self.tmp_page,"span class=\"standardsmall\"><br />(",")<") if self.o_title == "": if self.url_type == "V": self.o_title = gutils.after(gutils.trim(self.tmp_page,"\"headline2\"><a href=\"/videofilm", "</a>"), ">") else: self.o_title = gutils.after(gutils.trim(self.tmp_page,"\"headline2\"><a href=\"/kinofilm", "</a>"), ">")
def get_searches(self): if not self.page: return if len(self.page) < 20: # immidietly redirection to movie page self.number_results = 1 self.ids.append(self.page) self.titles.append(self.url) else: # multiple matches elements = string.split(self.page, '</a></b>') if (elements[0] <> ''): for index in range(0, len(elements) - 1, 1): element = elements[index] nextelement = elements[index + 1] id = gutils.trim(element, '<b><a href="/es/film', '.html') if id: self.ids.append(id) title = gutils.clean( gutils.after(element, '<b><a href="/es/film')).replace( "\n", "") title = gutils.strip_tags( gutils.convert_entities(gutils.after( title, '>'))) + ' ' + string.strip( gutils.before(nextelement, '<')) self.titles.append(title)
def get_director(self): if (gutils.trim(self.url, "typ=", "&") <> "credits"): self.url = self.url_to_use + "typ=credits&nr=" + str(self.movie_id) self.open_page(self.parent_window) self.director = gutils.trim(self.page,"Regie","</a>") self.director = gutils.after(self.director,"mitwirk.php4") self.director = gutils.after(self.director,">")
def get_searches(self): if self.page is None: # movie page self.ids.append(self.url) self.titles.append(gutils.convert_entities(self.title)) elif self.page is False: # no movie found self.number_results = 0 else: # multiple matches elements = string.split(self.page, '<li ') self.number_results = elements[-1] if (elements[0] <> ''): for element in elements: element = gutils.after(element, '<a href="') self.ids.append('http://' + plugin_server + gutils.before(element, '"')) element_title = gutils.trim(element, 'class="searchResultTitle"', '</a>') element_title = gutils.after(element_title, '">') element_title = string.replace(element_title, "\t", '') element = gutils.after(element, 'class="searchResultDetails"') element_year = gutils.trim(element, '>', '|') element_year = string.replace(element_year, " ", '') element_year = gutils.strip_tags(element_year) element_country = gutils.trim(element, '">', '</a>') element = string.strip(element_title) if (element_year <> ''): element = element + ' (' + string.strip(element_year) + ')' if (element_country <> ''): element = element + ' - ' + string.strip(element_country) element = gutils.convert_entities(element) element = gutils.strip_tags(element) self.titles.append(element) else: self.number_results = 0
def get_cast(self): self.cast = "" casts = gutils.trim(self.page_cast, "Acteurs", "</table>") parts = string.split(casts, "<td ") for index in range(1, len(parts) - 1, 3): character = gutils.after(parts[index + 1], ">") actor = gutils.after(parts[index + 2], ">") self.cast = self.cast + gutils.clean(actor) + _(" as ") + gutils.clean(character) + "\n"
def get_cast(self): self.cast = "" casts = gutils.trim(self.page_cast, 'Acteurs', '</table>') parts = string.split(casts, '<td ') for index in range(1, len(parts) - 1, 2): character = gutils.after(parts[index], '>') actor = gutils.after(parts[index + 1], '>') self.cast = self.cast + gutils.clean(actor) + _(' as ') + gutils.clean(character) + '\n'
def director(self): self.director = gutils.trim(self.page,"<BR>Re¿yseria: ","<BR>") if string.find(self.director,"-->") <> -1: self.director = gutils.after(self.director,"-->") self.director = gutils.before(self.director,"<!--") else: self.director = gutils.after(self.director,"<B>") self.director = gutils.before(self.director,"</B>")
def get_director(self): self.director = gutils.trim(self.page, '<BR>Re\xbfyseria: ', '<BR>') if string.find(self.director, '-->') <> -1: self.director = gutils.after(self.director, '-->') self.director = gutils.before(self.director,"<!--") else: self.director = gutils.after(self.director, '<B>') self.director = gutils.before(self.director, '</B>')
def get_plot(self): self.plot = gutils.trim(self.page, u'<b>GÉNERO Y CRÍTICA</b>', '</tr>') if self.plot == '': self.plot = gutils.trim(self.page, '<b>GÉNERO Y CRÍTICA</b>', '</tr>') self.plot = gutils.after(self.plot, '<td valign="top">') self.plot = gutils.after(self.plot, 'SINOPSIS:') self.plot = string.replace(self.plot, ' (FILMAFFINITY)', '') self.plot = string.replace(self.plot, '(FILMAFFINITY)', '')
def get_searches(self): elements = string.split(self.page,'class="dvdtitle">') elements[0] = '' for element in elements: element = gutils.trim( element, '<a href="', '<br/>' ) if element != '': self.ids.append( gutils.after( gutils.trim( element, 'movie/','/main') , '/' ) ) self.titles.append( string.replace( gutils.after( element, '">' ), '</a></span>', '' ) )
def get_o_title(self): self.o_title = gutils.clean(gutils.after( gutils.regextrim(self.page, 'class="text_ergebniss_faz_3"', '[ \t]+[(]Originaltitel[)]'), '</a>')) p1 = string.rfind(self.o_title, ',') if p1 > 0: self.o_title = self.o_title[p1 + 1:] self.o_title = string.capwords(self.o_title) if self.o_title == '': self.o_title = gutils.after(gutils.trim(self.page, 'class="text_ergebniss_faz_3"', '</a>'), '>')
def get_title(self): if self.url_type == "V": self.title = gutils.after( self.regextrim(self.tmp_page, 'headline2"[^>]*>[ \t\r\n]*<a href="/videofilm', "</a>"), ">" ) else: self.title = gutils.after( self.regextrim(self.tmp_page, 'headline2"[^>]*>[ \t\r\n]*<a href="/kinofilm', "</a>"), ">" )
def get_searches(self): elements = string.split(self.page, "<!--") elements[0] = '' for element in elements: if element <> '': self.ids.append(gutils.trim(element, "filmid=", ">")) self.titles.append(gutils.trim( gutils.after(element, "filmid="), ">", "<") + " - " + gutils.trim(gutils.after(element, "</a>"), "<td>", "</td>") + " - " + gutils.trim(gutils.after(gutils.after(element, "<td>"), "<td>"), "<td>", "</td>"))
def get_plot(self): self.plot = gutils.trim(self.page,'<h2 id="o-filmie-header" class="replace">','</div>') self.plot = gutils.after(self.plot, '<p>') url = gutils.trim(self.plot,"\t...","</a>") url = gutils.trim(url, 'href="','"') self.plot = gutils.strip_tags(self.plot) if url != '': plot_page = self.open_page(url=url) self.plot = gutils.trim(plot_page, '<div class="filmContent">', '</ul>') self.plot = gutils.after(self.plot, 'zgłoś poprawkę')
def get_o_title(self): self.o_title = gutils.trim(self.tmp_page, 'span class="standardsmall">(', ")<") if self.o_title == "": if self.url_type == "V": self.o_title = gutils.after( self.regextrim(self.tmp_page, 'headline2"[^>]*>[ \t\r\n]*<a href="/videofilm', "</a>"), ">" ) else: self.o_title = gutils.after( self.regextrim(self.tmp_page, 'headline2"[^>]*>[ \t\r\n]*<a href="/kinofilm', "</a>"), ">" )
def get_searches(self): elements = re.split('(?:href=["]*filmanzeige[.]php[?]filmid=)', self.page) elements[0] = '' for element in elements: if element <> '': idmatch = re.search('([>]|["])', element) if idmatch: self.ids.append(element[:idmatch.end() - 1]) self.titles.append(gutils.trim(element, '>', '<') + ' - ' + gutils.trim(gutils.after(element, '</a>'), '<td>', '</td>') + ' - ' + gutils.trim(gutils.after(element, '<td>'), '<td>', '</td>'))
def get_searches(self): elements = string.split(self.page, '<tr') self.number_results = elements[-1] if (elements[0]<>''): for element in elements: self.ids.append(gutils.trim(element, 'IDfilm=', '"')) title = gutils.convert_entities(gutils.strip_tags(gutils.after(gutils.trim(element, 'IDfilm=', '</a>'), '>'))) year = gutils.after(gutils.trim(element, '<td valign="bottom"', '</td>'), '>') self.titles.append(title + ' (' + year + ')') else: self.number_results = 0
def get_cameraman(self): self.cameraman = '' tmp = gutils.regextrim(gutils.trim(self.page, 'id="person-collection"', '</section>'), 'Kamera[^<]*[<][/]h3[>]', '<h3') tmpelements = re.split('href="', tmp) delimiter = '' for index in range(1, len(tmpelements), 1): tmpelement = gutils.before(gutils.after(gutils.after(tmpelements[index], '"'), '>'), '<') tmpelement = re.sub('<small[^>]*>[^<]*</small>', '', tmpelement) tmpelement = gutils.strip_tags(tmpelement) tmpelement = string.replace(tmpelement, '\n', '') tmpelement = re.sub('[ \t]+', ' ', tmpelement) self.cameraman = self.cameraman + tmpelement + delimiter delimiter = ', '
def get_plot(self): # little steps to perfect plot (I hope ... it's a terrible structured content ... ) self.plot = gutils.trim(self.tmp_page, '<span style="line-height:', '</spa') if self.plot == '': self.plot = gutils.trim(self.tmp_page,"Kurzinfo", "</td></tr><tr><td></td>") if (self.plot == ''): self.plot = gutils.trim(self.tmp_page,"Kurzinfo", '<script ') self.plot = gutils.after(self.plot, '>') while len(self.plot) and string.find(self.plot, '</A>') > -1: self.plot = gutils.after(self.plot, '</A>'); self.plot = gutils.after(gutils.after(self.plot, '</table>'), '>') else: self.plot = gutils.after(self.plot, '>')
def get_plot(self): if gutils.trim(self.url, "typ=", "&") <> "film": self.url = self.url_to_use + "typ=film&nr=" + str(self.movie_id) self.open_page(self.parent_window) self.tmp_page = gutils.trim(self.page, "<!-- PRINT-CONTENT-START-->", "<!-- PRINT-CONTENT-ENDE-->") if self.url_type == "V": self.plot = gutils.after(self.tmp_page, 'IMG SRC="/pix/MBBILDER/VIDEO') self.plot = gutils.trim(self.plot, "</TABLE>", "</TD>") else: self.plot = gutils.after(self.tmp_page, 'IMG SRC="/pix/MBBILDER/KINOPLAK') self.plot = gutils.trim(self.plot, "</TABLE>", "</TD>") if self.plot == "": self.plot = gutils.trim(self.tmp_page, 'BORDER="0" align="left" ><TR><TD>', "</TD>")
def get_searches(self): elements = string.split(self.page,"\n <a href=\"filmsuche.cfm?wert=") if (elements[0]<>''): elements[0] = '' for element in elements: if (element <> ''): self.ids.append(gutils.before(element,"&")) self.titles.append(gutils.strip_tags( gutils.trim(element,">","</a>") + " (" + string.capwords(gutils.trim(element, "\n ", "(Orginaltitel)")) + ", " + gutils.after(gutils.trim(element, "sucheNach=produktionsland", "</a>"), ">") + ", " + gutils.after(gutils.trim(element, "sucheNach=produktionsjahr", "</a>"), ">") + ")"))
def get_o_title(self): self.o_title = gutils.clean( gutils.after( gutils.regextrim(self.page, 'class="text_ergebniss_faz_3"', '[ \t]+[(]Originaltitel[)]'), '</a>')) p1 = string.rfind(self.o_title, ',') if p1 > 0: self.o_title = self.o_title[p1 + 1:] self.o_title = string.capwords(self.o_title) if self.o_title == '': self.o_title = gutils.after( gutils.trim(self.page, 'class="text_ergebniss_faz_3"', '</a>'), '>')
def get_searches(self): elements = string.split(self.page, '<div class="text_ergebniss_faz_3"') i = 1 while i < len(elements): element = elements[i] i = i + 1 self.ids.append(gutils.trim(element, 'filmlexikon/?wert=', '&')) self.titles.append(string.strip(gutils.clean( gutils.trim(element, '>', '</a>') + ' (' + string.capwords(gutils.trim(element, '</a>', '(Originaltitel)')) + ', ' + gutils.after(gutils.trim(element, 'sucheNach=produktionsland', '</a>'), '>') + ', ' + gutils.after(gutils.trim(element, 'sucheNach=produktionsjahr', '</a>'), '>') + ')')))
def get_plot(self): if (gutils.trim(self.url, "typ=", "&") <> "film"): self.url = self.url_to_use + "typ=film&nr=" + str(self.movie_id) self.open_page(self.parent_window) self.tmp_page = gutils.trim(self.page, "<!-- PRINT-CONTENT-START-->", "<!-- PRINT-CONTENT-ENDE-->") if (self.url_type == "V"): self.plot = gutils.after(self.tmp_page,"IMG SRC=\"/pix/MBBILDER/VIDEO") self.plot = gutils.trim(self.plot,"</TABLE>", "</TD>") else: self.plot = gutils.after(self.tmp_page,"IMG SRC=\"/pix/MBBILDER/KINOPLAK") self.plot = gutils.trim(self.plot,"</TABLE>", "</TD>") if self.plot == '': self.plot = gutils.trim(self.tmp_page, "BORDER=\"0\" align=\"left\" ><TR><TD>", "</TD>")
def get_searches(self): if len(self.page) < 20: # immidietly redirection to movie page self.number_results = 1 self.ids.append(self.page) self.titles.append(self.url) else: # multiple matches elements = string.split(self.page, '</a></b>') if (elements[0]<>''): for element in elements[:-1]: self.ids.append(gutils.trim(element, '<b><a href="/es/film','.html')) title = gutils.after(element, '<b><a href="/es/film') self.titles.append(gutils.strip_tags(gutils.convert_entities(gutils.after(title, '>'))))
def get_studio(self): self.studio = '' tmp = gutils.trim(self.page, '<div class="description">', '</div>') if tmp: tmp = gutils.trim(tmp, 'Regie:', '</p>') if tmp: self.studio = string.replace(gutils.after(tmp, '<br/>'), 'Verleih: ', '') if not self.studio: self.studio = gutils.trim(self.page, 'Verleih: ', '<') if not self.studio and self.videopage: tmp = gutils.trim(self.videopage, '<div class="description">', '</div>') if tmp: tmp = gutils.trim(tmp, 'Regie:', '</p>') if tmp: self.studio = string.replace(gutils.after(tmp, '<br/>'), 'Verleih: ', '')
def get_plot(self): if gutils.trim(self.url, "typ=", "&") <> "film": self.url = self.url_to_use + "typ=film&nr=" + str(self.movie_id) self.open_page(self.parent_window) self.tmp_page = gutils.trim(self.page, "<!-- PRINT-CONTENT-START-->", "<!-- PRINT-CONTENT-ENDE-->") # little steps to perfect plot (I hope ... it's a terrible structured content ... ) self.plot = gutils.trim(self.tmp_page, "Kurzinfo", "</TD></TR><tr><td></td></tr><TR>") if self.plot == "": self.plot = gutils.trim(self.tmp_page, "Kurzinfo", '<script language="JavaScript">') self.plot = gutils.after(self.plot, "Fotoshow</A>") self.plot = gutils.after(self.plot, "Filmpreise</A>") self.plot = gutils.after(self.plot, "Games zum Film</A>") self.plot = gutils.after(self.plot, " Crew</A>") self.plot = gutils.before(self.plot, "FOTOSHOW</SPAN>") self.plot = gutils.after(self.plot, "</TABLE>")
def get_searches(self): elements = string.split(self.page,'class="text_ergebniss_titel"') i = 0 while i < len(elements) - 1: id_part = elements[i] i = i + 1 text_part = elements[i] i = i + 1 self.ids.append(gutils.trim(id_part, 'filmsuche.cfm?wert=', '&')) self.titles.append(gutils.strip_tags( gutils.trim(text_part, '>', '</a>') + ' (' + string.capwords(gutils.trim(text_part, '</a>', '(Originaltitel)')) + ', ' + gutils.after(gutils.trim(text_part, 'sucheNach=produktionsland', '</a>'), '>') + ', ' + gutils.after(gutils.trim(text_part, 'sucheNach=produktionsjahr', '</a>'), '>') + ')'))
def search(self, parent_window): if not self.open_search(parent_window): return None # used for looking for subpages tmp_page = gutils.trim(self.page, '<span class="trefferliste">', '</span>') elements = string.split(tmp_page, 'cp=') # first results tmp_page = gutils.after( gutils.trim(self.page, 'Alle Treffer aus der Kategorie', '<span class="trefferliste">'), "Titel:") # look for subpages for element in elements: element = gutils.before(element, '"') try: tmp_element = int(element) except: tmp_element = 1 if tmp_element != 1: self.url = 'http://www.zweitausendeins.de/filmlexikon/?sucheNach=Titel&cp=' + str( tmp_element) + "&wert=" if self.open_search(parent_window): tmp_page2 = gutils.trim(self.page, 'Alle Treffer aus der Kategorie', '<span class="trefferliste">') tmp_page = tmp_page + tmp_page2 self.page = tmp_page return self.page
def get_plot(self): self.plot = gutils.trim(self.page, '<div class="yui-content">', '<div class="footer">') if not self.plot: # kino page self.plot = gutils.after( gutils.trim(self.page, 'Filmhandlung & Hintergrund', '</div>'), '</h2>') if not self.plot and self.videopage: self.plot = gutils.trim(self.videopage, '<div class="yui-content">', '<div class="footer">') if self.plot: # video page self.plot = re.sub('<script type="text/javascript">[^<]+</script>', '', self.plot) self.plot = string.replace(self.plot, '>Großansicht</a>', '>') self.plot = string.replace(self.plot, '>Schließen</a>', '>') self.plot = string.replace(self.plot, '>zurück </a>', '>') self.plot = string.replace(self.plot, '>1</a>', '>') self.plot = string.replace(self.plot, '> weiter</a>', '>') self.plot = string.replace(self.plot, '</h4>', '\n') self.plot = gutils.clean(self.plot) compiledmultiline = re.compile(r'^[^(]+[(]Foto[:][^)]+[)][ ]*$', re.MULTILINE) self.plot = compiledmultiline.sub('', self.plot) compiledmultiline = re.compile(r"(^\s+$|^\s*//\s*$)", re.MULTILINE) self.plot = compiledmultiline.sub('', self.plot) compiledmultiline = re.compile("^[\n]+$", re.MULTILINE) self.plot = compiledmultiline.sub("\n", self.plot)
def get_plot(self): self.plot = gutils.trim(self.page, '<b class="ch">Resumen', '<a href="/rg/title-tease/plot') self.plot = gutils.after(self.plot, ':</b> ') self.plot = gutils.trim(self.page, '<h5>Trama:</h5>', '</div>') self.plot = self.__before_more(self.plot) tmp = gutils.trim(self.plot_page, '<div id="swiki.2.1">', '</div>') if tmp: self.plot = tmp elements = string.split(self.plot_page, '<p class="plotpar">') if len(elements) > 1: self.plot = self.plot + '\n\n' elements[0] = '' for element in elements: if element != '': self.plot = self.plot + gutils.strip_tags(gutils.before(element, '</a>')) + '\n' if not self.plot: # nothing in spanish found, try original self.plot = gutils.regextrim(self.imdb_page, '<h5>Plot:</h5>', '(</div>|<a href.*)') self.plot = self.__before_more(self.plot) elements = string.split(self.imdb_plot_page, '<p class="plotpar">') if len(elements) > 1: self.plot = self.plot + '\n\n' elements[0] = '' for element in elements: if element <> '': self.plot = self.plot + gutils.strip_tags(gutils.before(element, '</a>')) + '\n\n'
def get_runtime(self): self.runtime = gutils.clean(gutils.trim(self.page, u'Durée : ', '</span>')) if self.runtime: if self.runtime.find('H') > 0: self.runtime = str (int(gutils.before(self.runtime,'H'))*60 + int(gutils.after(self.runtime,'H'))) else: self.runtime = gutils.before(self.runtime,' mn')
def search(self, parent_window): if not self.open_search(parent_window): return None self.page = gutils.trim(self.page, '>Wynik wyszukiwania<', '<TABLE border=0 cellpadding=0') self.page = gutils.after(self.page, '</SPAN></DIV><BR>') return self.page
def get_cast(self): self.cast = gutils.trim(self.cast_page, '<h2>OBSADA:</h2>', '<div class="b') self.cast = gutils.after(self.cast, '<div class="clr"></div>') self.cast = string.replace(self.cast, '\t', '') self.cast = gutils.strip_tags(self.cast) self.cast = string.replace(self.cast, '\n\n ', _(' as ')) self.cast = string.replace(self.cast, "%s\n" % _(' as '), "\n")
def get_genre(self): self.genre = '' tmp = gutils.trim(self.page, '<th>GÉNERO</th>', '</tr>') tmp = gutils.after(tmp, '<td>') if tmp: self.genre = gutils.clean(string.replace(tmp, ' | ', '. ')) self.genre = re.sub('[.][ \t]+', '. ', self.genre)
def get_site(self): """Find the film's imdb details page""" self.site = gutils.trim(self.page, \ "/imagens/bf_siteoficial.gif' WIDTH=89 HEIGHT=18 BORDER=0 ALT=''>", \ "' TARGET=_blank><IMG SRC='/imagens/bf_imdb.gif'") self.site = gutils.after(self.site, "<A HREF='") self.site = string.replace(self.site, "'", "")
def get_director(self): self.director = gutils.trim(self.creditspage, u'reżyseria: <', '</tr>') self.director = gutils.after(self.director, '>') self.director = self.director.replace('<br />', ', ') self.director = gutils.clean(self.director) if self.director.endswith(','): self.director = self.director[:-1]
def get_plot(self): plotlist = string.split( gutils.trim(self.plot_page, 'id="plot-summaries-content">', '</ul>'), '<li') plotcompilation = '' for listelement in plotlist: if listelement <> '' and not 'It looks like we don\'t have any Plot Summaries for this title yet.' in listelement: plotcompilation = plotcompilation + gutils.trim( listelement, '<p>', '</p>') + '\n' plotcompilation = plotcompilation + re.sub( '<[^<]+?>', '', gutils.trim(listelement, '<div class="author-container">', '</div>').replace('\n', '').lstrip()) + '\n\n' if plotcompilation <> '': self.plot = plotcompilation else: self.plot = gutils.regextrim(self.page, 'itemprop="description"', '<') self.plot = gutils.after(self.plot, '>') elements = string.split(self.plot_page, '<p class="plotpar">') if len(elements) < 2: elements = re.split('<li class="(?:odd|even)">', self.plot_page) if len(elements) > 1: self.plot = self.plot + '\n\n' elements[0] = '' for element in elements[1:]: if element <> '': self.plot = self.plot + gutils.strip_tags( gutils.before(element, '</a>')) + '\n\n'
def get_year(self): self.year = gutils.trim(self.page, '<a href="/year/', '</a>') self.year = gutils.after(self.year, '>') if not self.year: tmp = gutils.trim(self.page, '<title>', '</title>') tmp = re.search('([0-9]{4})[)]', tmp) if tmp: self.year = tmp.group(0)
def get_o_title(self): self.o_title = string.capwords( gutils.clean( gutils.regextrim(self.page, '<b>Originaltitel:', '(</p>|<b>)'))) if not self.o_title: self.o_title = gutils.after( gutils.trim(self.page, 'class=\'film-titel\'', '</h1>'), '>')
def get_screenplay(self): self.screenplay = gutils.trim(self.creditspage, u'scenariusz: <', '</tr>') self.screenplay = gutils.after(self.screenplay, '>') self.screenplay = self.screenplay.replace('<br />', ', ') self.screenplay = gutils.clean(self.screenplay) if self.screenplay.endswith(','): self.screenplay = self.screenplay[:-1]
def get_cameraman(self): self.cameraman = gutils.regextrim(self.creditspage, u'zdjęcia: <', '(</tr>|<tr>)') self.cameraman = gutils.after(self.cameraman, '>') self.cameraman = self.cameraman.replace('<br />', ', ') self.cameraman = gutils.clean(self.cameraman) if self.cameraman.endswith(','): self.cameraman = self.cameraman[:-1]
def get_rating(self): self.rating = gutils.after( gutils.trim(self.page, 'id="movie-rat-avg"', '</div>'), '>') if self.rating: self.rating = str( round( float(gutils.clean(string.replace(self.rating, ',', '.')))))
def initialize(self): try: self.jsondata = json.loads( gutils.trim(gutils.after(self.page, '<body'), '<script type="application/ld+json">', '</script>')) except: self.jsondata = {}
def get_o_title(self): self.o_title = gutils.trim(self.page, u'<th>TÍTULO ORIGINAL</th>', '</strong></td>') self.o_title = gutils.after(self.o_title, '<strong>') self.o_title = re.sub('[ ]+', ' ', self.o_title) self.o_title = re.sub('([(]Serie de TV[)]|[(]TV[)]|[(]TV Series[)])', '', self.o_title)
def get_runtime(self): self.runtime = gutils.trim( self.page, "<div class=\"sitem\">Czas trwania: <b>\n\t\t", '\n</b>') if self.runtime.find('?') != -1: self.runtime = '' else: self.runtime = gutils.after(self.runtime, '×') self.runtime = gutils.before(self.runtime, ' min')
def get_plot(self): self.plot = gutils.after(gutils.trim(self.page, 'class="movie-plot-synopsis"', '</section>'), '>') start = string.find(self.plot, '<script') end = string.find(self.plot, '</script>', start) while start > -1 and end > -1: self.plot = self.plot[:start]+self.plot[end:] start = string.find(self.plot, '<script') end = string.find(self.plot, '</script>', start) self.plot = string.replace(self.plot, '\n', '')
def get_studio(self): self.studio = gutils.trim(self.detail_page, 'alt="Produktion"', '<img') if self.studio == '': self.studio = gutils.trim(self.detail_page, 'alt="Produktion"', '</TABLE>') self.studio = gutils.after(self.studio, '>') self.studio = self.studio.replace('<tr>', ', ') self.studio = gutils.strip_tags(self.studio) self.studio = re.sub('((^, )|(, $))', '', self.studio)
def get_director(self): self.director = gutils.trim(self.cast_page,'>Directed by', '</table>') tmpelements = re.split('href="', self.director) delimiter = '' self.director = '' for index in range(1, len(tmpelements), 1): tmpelement = gutils.before(gutils.after(tmpelements[index], '>'), '<') self.director = self.director + tmpelement + delimiter delimiter =', '
def get_rating(self): self.rating = gutils.after(gutils.trim(self.page,">Note: <","</span>"), '>') if self.rating == '': self.rating = "0" if self.rating: try: self.rating = round(float(self.rating), 0) except Exception, e: self.rating = 0
def get_o_title(self): self.url = self.url.replace(plugin_server, plugin_url) self.o_title = gutils.trim(self.page, '<h2 class=origTitle>', '</h2>') self.o_title = gutils.after(self.o_title, '</span>') if self.o_title == '': self.o_title = gutils.trim(self.page, '<title>', '</title>') if self.o_title.find('(') > -1: self.o_title = gutils.before(self.o_title, '(') if self.o_title.find('/') > -1: self.o_title = gutils.before(self.o_title, '/')
def get_cast(self): self.cast = '' tmp = self.page.split('itemprop="actor"') for element in tmp[1:]: actor = gutils.trim(element, '>', '</span>') role = gutils.after(gutils.trim(element, u'Rôle', '</span>'), ': ') if role: self.cast = self.cast + actor + _(' as ') + role + '\n' else: self.cast = self.cast + actor + '\n'
def get_searches(self): elements = string.split(self.page, 'class="product-name"') i = 1 while i < len(elements): element = elements[i] i = i + 1 self.ids.append(gutils.trim(element, 'href="', '"')) self.titles.append( re.sub('Zweitausendeins Edition.*', '', gutils.trim(gutils.after(element, '>'), '>', '<')))
def get_rating(self): """Find the film's rating. From 0 to 10. Convert if needed when assigning.""" tmp_rating = gutils.trim(self.page, "<br><b>Cota", " (") tmp_rating = gutils.after(tmp_rating, "</b>") if tmp_rating <> "": tmp_rating = string.replace(tmp_rating, ',', '.') self.rating = str(float(string.strip(tmp_rating))) else: self.rating = ""