Ejemplo n.º 1
0
 def get_plot(self):
     self.plot = gutils.before(gutils.after(gutils.trim(self.page, 'name="description"', '/>'), 'content="'), '"')
     germanplotelements = string.split(self.plot_page, 'class="plotSummary"')
     if len(germanplotelements) > 1:
         self.plot = self.plot + '\n\n'
         germanplotelements[0] = ''
         for element in germanplotelements:
             if element <> '':
                 self.plot = self.plot + gutils.strip_tags(gutils.before(gutils.after(element, '>'), '</a>')) + '\n\n'
     if self.plot == '':
         # nothing in german found, try original
         self.plot = gutils.before(gutils.after(gutils.trim(self.imdb_page, 'name="description"', '/>'), 'content="'), '"')
         elements = string.split(self.imdb_plot_page, 'class="plotSummary"')
         if len(elements) > 1:
             self.plot = self.plot + '\n\n'
             elements[0] = ''
             for element in elements:
                 if element <> '':
                     self.plot = self.plot + gutils.strip_tags(gutils.before(gutils.after(element, '>'), '</a>')) + '\n\n'
     plotlist = string.split(gutils.trim(self.plot_page, 'id="plot-summaries-content">', '</ul>'), '<li')
     plotcompilation = ''
     for listelement in plotlist:
         if listelement <> '':
             plotcompilation = plotcompilation + gutils.trim(listelement, '<p>', '</p>') + '\n'
             plotcompilation = plotcompilation + re.sub('<[^<]+?>', '', gutils.trim(listelement, '<div class="author-container">', '</div>').replace('\n','').lstrip()) + '\n\n'
     if plotcompilation <> '':
         self.plot = plotcompilation
	def get_o_title(self):
		self.o_title = gutils.trim(self.tmp_page, 'span class="standardsmall">(', ')<')
		if self.o_title == '':
			if self.url_type == 'V':
				self.o_title = gutils.after(self.regextrim(self.tmp_page, 'headline2"[^>]*>[ \t\r\n]*<a href="/videofilm', '</a>'), '>')
			else:
				self.o_title = gutils.after(self.regextrim(self.tmp_page, 'headline2"[^>]*>[ \t\r\n]*<a href="/kinofilm', '</a>'), '>')
 def get_searches(self):
     if self.page is None:  # movie page
         self.ids.append(self.url)
         self.titles.append(gutils.convert_entities(self.title))
     elif self.page is False:  # no movie found
         self.number_results = 0
     else:  # multiple matches
         elements = self.page.split('<li ')
         self.number_results = elements[-1]
         if elements != '':
             for element in elements:
                 if (element == ''):
                     continue
                 element = gutils.after(element, 'href="')
                 self.ids.append('http://' + plugin_url_other + gutils.before(element, '"'))
                 element_title = gutils.trim(element, '">', '</a>')
                 element_title = element_title.replace('\t', '')
                 element = gutils.after(element, 'class=searchResultDetails')
                 element_year = gutils.trim(element, '>', '|')
                 element_year = element_year.replace(" ", '')
                 element_year = gutils.strip_tags(element_year)
                 element_country = ''
                 pos_country = element.find('countryIds')
                 if pos_country != -1:
                     element_country = gutils.trim(element[pos_country:], '">', '</a>')
                 element = element_title.strip()
                 if element_year:
                     element += ' (' + element_year.strip() + ')'
                 if element_country:
                     element += ' - ' + element_country.strip()
                 element = gutils.convert_entities(element)
                 element = gutils.strip_tags(element)
                 self.titles.append(element)
         else:
             self.number_results = 0
	def get_o_title(self):
		self.o_title = gutils.trim(self.tmp_page,"span class=\"standardsmall\"><br />(",")<")
		if self.o_title == "":
			if self.url_type == "V":
				self.o_title = gutils.after(gutils.trim(self.tmp_page,"\"headline2\"><a href=\"/videofilm", "</a>"), ">")
			else:
				self.o_title = gutils.after(gutils.trim(self.tmp_page,"\"headline2\"><a href=\"/kinofilm", "</a>"), ">")
    def get_searches(self):
        if not self.page:
            return
        if len(self.page) < 20:  # immidietly redirection to movie page
            self.number_results = 1
            self.ids.append(self.page)
            self.titles.append(self.url)
        else:  # multiple matches
            elements = string.split(self.page, '</a></b>')

            if (elements[0] <> ''):
                for index in range(0, len(elements) - 1, 1):
                    element = elements[index]
                    nextelement = elements[index + 1]
                    id = gutils.trim(element, '<b><a href="/es/film', '.html')
                    if id:
                        self.ids.append(id)
                        title = gutils.clean(
                            gutils.after(element,
                                         '<b><a href="/es/film')).replace(
                                             "\n", "")
                        title = gutils.strip_tags(
                            gutils.convert_entities(gutils.after(
                                title, '>'))) + ' ' + string.strip(
                                    gutils.before(nextelement, '<'))
                        self.titles.append(title)
	def get_director(self):
		if (gutils.trim(self.url, "typ=", "&") <> "credits"):
			self.url = self.url_to_use + "typ=credits&nr=" + str(self.movie_id)
			self.open_page(self.parent_window)
		self.director = gutils.trim(self.page,"Regie","</a>")
		self.director = gutils.after(self.director,"mitwirk.php4")
		self.director = gutils.after(self.director,">")
 def get_searches(self):
     if self.page is None: # movie page
         self.ids.append(self.url)
         self.titles.append(gutils.convert_entities(self.title))
     elif self.page is False: # no movie found
         self.number_results = 0
     else: # multiple matches
         elements = string.split(self.page, '<li ')
         self.number_results = elements[-1]
         if (elements[0] <> ''):
             for element in elements:
                 element = gutils.after(element, '<a href="')
                 self.ids.append('http://' + plugin_server + gutils.before(element, '"'))
                 element_title = gutils.trim(element, 'class="searchResultTitle"', '</a>')
                 element_title = gutils.after(element_title, '">')
                 element_title = string.replace(element_title, "\t", '')
                 element = gutils.after(element, 'class="searchResultDetails"')
                 element_year = gutils.trim(element, '>', '|')
                 element_year = string.replace(element_year, " ", '')
                 element_year = gutils.strip_tags(element_year)
                 element_country = gutils.trim(element, '">', '</a>')
                 element = string.strip(element_title)
                 if (element_year <> ''):
                     element = element + ' (' + string.strip(element_year) + ')'
                 if (element_country <> ''):
                     element = element + ' - ' + string.strip(element_country)
                 element = gutils.convert_entities(element)
                 element = gutils.strip_tags(element)
                 self.titles.append(element)
         else:
             self.number_results = 0
 def get_cast(self):
     self.cast = ""
     casts = gutils.trim(self.page_cast, "Acteurs", "</table>")
     parts = string.split(casts, "<td ")
     for index in range(1, len(parts) - 1, 3):
         character = gutils.after(parts[index + 1], ">")
         actor = gutils.after(parts[index + 2], ">")
         self.cast = self.cast + gutils.clean(actor) + _(" as ") + gutils.clean(character) + "\n"
 def get_cast(self):
     self.cast = ""
     casts = gutils.trim(self.page_cast, 'Acteurs', '</table>')
     parts = string.split(casts, '<td ')
     for index in range(1, len(parts) - 1, 2):
         character = gutils.after(parts[index], '>')
         actor = gutils.after(parts[index + 1], '>')
         self.cast = self.cast + gutils.clean(actor) + _(' as ') + gutils.clean(character) + '\n'
	def director(self):
		self.director = gutils.trim(self.page,"<BR>Re¿yseria:&nbsp;&nbsp;","<BR>")
		if string.find(self.director,"-->") <> -1:
			self.director = gutils.after(self.director,"-->")
			self.director = gutils.before(self.director,"<!--")
		else:
			self.director = gutils.after(self.director,"<B>")
			self.director = gutils.before(self.director,"</B>")
 def get_director(self):
     self.director = gutils.trim(self.page, '<BR>Re\xbfyseria:&nbsp;&nbsp;', '<BR>')
     if string.find(self.director, '-->') <> -1:
         self.director = gutils.after(self.director, '-->')
         self.director = gutils.before(self.director,"<!--")
     else:
         self.director = gutils.after(self.director, '<B>')
         self.director = gutils.before(self.director, '</B>')
 def get_plot(self):
     self.plot = gutils.trim(self.page, u'<b>GÉNERO Y CRÍTICA</b>', '</tr>')
     if self.plot == '':
         self.plot = gutils.trim(self.page, '<b>G&Eacute;NERO Y CR&Iacute;TICA</b>', '</tr>')
     self.plot = gutils.after(self.plot, '<td valign="top">')
     self.plot = gutils.after(self.plot, 'SINOPSIS:')
     self.plot = string.replace(self.plot, ' (FILMAFFINITY)', '')
     self.plot = string.replace(self.plot, '(FILMAFFINITY)', '')
	def get_searches(self):
		elements = string.split(self.page,'class="dvdtitle">')
		elements[0] = ''

		for element in elements:
			element = gutils.trim( element, '<a href="', '<br/>' )
			if element != '':
				self.ids.append( gutils.after( gutils.trim( element, 'movie/','/main') , '/' ) )
				self.titles.append( string.replace( gutils.after( element, '">' ), '</a></span>', '' ) )
 def get_o_title(self):
     self.o_title = gutils.clean(gutils.after(
         gutils.regextrim(self.page, 'class="text_ergebniss_faz_3"', '[ \t]+[(]Originaltitel[)]'), '</a>'))
     p1 = string.rfind(self.o_title, ',')
     if p1 > 0:
         self.o_title = self.o_title[p1 + 1:]
     self.o_title = string.capwords(self.o_title)
     if self.o_title == '':
         self.o_title = gutils.after(gutils.trim(self.page, 'class="text_ergebniss_faz_3"', '</a>'), '>')
 def get_title(self):
     if self.url_type == "V":
         self.title = gutils.after(
             self.regextrim(self.tmp_page, 'headline2"[^>]*>[ \t\r\n]*<a href="/videofilm', "</a>"), ">"
         )
     else:
         self.title = gutils.after(
             self.regextrim(self.tmp_page, 'headline2"[^>]*>[ \t\r\n]*<a href="/kinofilm', "</a>"), ">"
         )
	def get_searches(self):
		elements = string.split(self.page, "<!--")
		elements[0] = ''
		for element in elements:
			if element <> '':
				self.ids.append(gutils.trim(element, "filmid=", ">"))
				self.titles.append(gutils.trim(
					gutils.after(element, "filmid="), ">", "<") + " - " +
					gutils.trim(gutils.after(element, "</a>"), "<td>", "</td>") + " - " +
					gutils.trim(gutils.after(gutils.after(element, "<td>"), "<td>"), "<td>", "</td>"))
 def get_plot(self):
     self.plot = gutils.trim(self.page,'<h2 id="o-filmie-header" class="replace">','</div>')
     self.plot = gutils.after(self.plot, '<p>')
     url = gutils.trim(self.plot,"\t...","</a>")
     url = gutils.trim(url, 'href="','"')
     self.plot = gutils.strip_tags(self.plot)
     if url != '':
         plot_page = self.open_page(url=url)
         self.plot = gutils.trim(plot_page, '<div class="filmContent">', '</ul>')
         self.plot = gutils.after(self.plot, 'zgłoś poprawkę')
 def get_o_title(self):
     self.o_title = gutils.trim(self.tmp_page, 'span class="standardsmall">(', ")<")
     if self.o_title == "":
         if self.url_type == "V":
             self.o_title = gutils.after(
                 self.regextrim(self.tmp_page, 'headline2"[^>]*>[ \t\r\n]*<a href="/videofilm', "</a>"), ">"
             )
         else:
             self.o_title = gutils.after(
                 self.regextrim(self.tmp_page, 'headline2"[^>]*>[ \t\r\n]*<a href="/kinofilm', "</a>"), ">"
             )
	def get_searches(self):
		elements = re.split('(?:href=["]*filmanzeige[.]php[?]filmid=)', self.page)
		elements[0] = ''
		for element in elements:
			if element <> '':
				idmatch = re.search('([>]|["])', element)
				if idmatch:
					self.ids.append(element[:idmatch.end() - 1])
					self.titles.append(gutils.trim(element, '>', '<') + ' - ' +
						gutils.trim(gutils.after(element, '</a>'), '<td>', '</td>') + ' - ' +
						gutils.trim(gutils.after(element, '<td>'), '<td>', '</td>'))
    def get_searches(self):
        elements = string.split(self.page, '<tr')
        self.number_results = elements[-1]

        if (elements[0]<>''):
            for element in elements:
                self.ids.append(gutils.trim(element, 'IDfilm=', '"'))
                title = gutils.convert_entities(gutils.strip_tags(gutils.after(gutils.trim(element, 'IDfilm=', '</a>'), '>')))
                year = gutils.after(gutils.trim(element, '<td valign="bottom"', '</td>'), '>')
                self.titles.append(title + ' (' + year + ')')
        else:
            self.number_results = 0
Ejemplo n.º 21
0
 def get_cameraman(self):
     self.cameraman = ''
     tmp = gutils.regextrim(gutils.trim(self.page, 'id="person-collection"', '</section>'), 'Kamera[^<]*[<][/]h3[>]', '<h3')
     tmpelements = re.split('href="', tmp)
     delimiter = ''
     for index in range(1, len(tmpelements), 1):
         tmpelement = gutils.before(gutils.after(gutils.after(tmpelements[index], '"'), '>'), '<')
         tmpelement = re.sub('<small[^>]*>[^<]*</small>', '', tmpelement)
         tmpelement = gutils.strip_tags(tmpelement)
         tmpelement = string.replace(tmpelement, '\n', '')
         tmpelement = re.sub('[ \t]+', ' ', tmpelement)
         self.cameraman = self.cameraman + tmpelement + delimiter
         delimiter = ', '
	def get_plot(self):
		# little steps to perfect plot (I hope ... it's a terrible structured content ... )
		self.plot = gutils.trim(self.tmp_page, '<span style="line-height:', '</spa')
		if self.plot == '':
			self.plot = gutils.trim(self.tmp_page,"Kurzinfo", "</td></tr><tr><td></td>")
			if (self.plot == ''):
				self.plot = gutils.trim(self.tmp_page,"Kurzinfo", '<script ')
				self.plot = gutils.after(self.plot, '>')
			while len(self.plot) and string.find(self.plot, '</A>') > -1:
				self.plot = gutils.after(self.plot, '</A>');
			self.plot = gutils.after(gutils.after(self.plot, '</table>'), '>')
		else:
			self.plot = gutils.after(self.plot, '>')
 def get_plot(self):
     if gutils.trim(self.url, "typ=", "&") <> "film":
         self.url = self.url_to_use + "typ=film&nr=" + str(self.movie_id)
         self.open_page(self.parent_window)
     self.tmp_page = gutils.trim(self.page, "<!-- PRINT-CONTENT-START-->", "<!-- PRINT-CONTENT-ENDE-->")
     if self.url_type == "V":
         self.plot = gutils.after(self.tmp_page, 'IMG SRC="/pix/MBBILDER/VIDEO')
         self.plot = gutils.trim(self.plot, "</TABLE>", "</TD>")
     else:
         self.plot = gutils.after(self.tmp_page, 'IMG SRC="/pix/MBBILDER/KINOPLAK')
         self.plot = gutils.trim(self.plot, "</TABLE>", "</TD>")
         if self.plot == "":
             self.plot = gutils.trim(self.tmp_page, 'BORDER="0" align="left" ><TR><TD>', "</TD>")
	def get_searches(self):
		elements = string.split(self.page,"\n	<a href=\"filmsuche.cfm?wert=")
		if (elements[0]<>''):
			elements[0] = ''
			for element in elements:
				if (element <> ''):
					self.ids.append(gutils.before(element,"&"))
					self.titles.append(gutils.strip_tags(
						gutils.trim(element,">","</a>") + " (" +
						string.capwords(gutils.trim(element, "\n			", "(Orginaltitel)")) + ", " +
						gutils.after(gutils.trim(element, "sucheNach=produktionsland", "</a>"), ">") + ", " +
						gutils.after(gutils.trim(element, "sucheNach=produktionsjahr", "</a>"), ">") +
						")"))
Ejemplo n.º 25
0
 def get_o_title(self):
     self.o_title = gutils.clean(
         gutils.after(
             gutils.regextrim(self.page, 'class="text_ergebniss_faz_3"',
                              '[ \t]+[(]Originaltitel[)]'), '</a>'))
     p1 = string.rfind(self.o_title, ',')
     if p1 > 0:
         self.o_title = self.o_title[p1 + 1:]
     self.o_title = string.capwords(self.o_title)
     if self.o_title == '':
         self.o_title = gutils.after(
             gutils.trim(self.page, 'class="text_ergebniss_faz_3"', '</a>'),
             '>')
 def get_searches(self):
     elements = string.split(self.page, '<div class="text_ergebniss_faz_3"')
     i = 1
     while i < len(elements):
         element = elements[i]
         i = i + 1
         self.ids.append(gutils.trim(element, 'filmlexikon/?wert=', '&'))
         self.titles.append(string.strip(gutils.clean(
                     gutils.trim(element, '>', '</a>') + ' (' +
                     string.capwords(gutils.trim(element, '</a>', '(Originaltitel)')) + ', ' +
                     gutils.after(gutils.trim(element, 'sucheNach=produktionsland', '</a>'), '>') + ', ' +
                     gutils.after(gutils.trim(element, 'sucheNach=produktionsjahr', '</a>'), '>') +
                     ')')))
	def get_plot(self):
		if (gutils.trim(self.url, "typ=", "&") <> "film"):
			self.url = self.url_to_use + "typ=film&nr=" + str(self.movie_id)
			self.open_page(self.parent_window)
		self.tmp_page = gutils.trim(self.page, "<!-- PRINT-CONTENT-START-->", "<!-- PRINT-CONTENT-ENDE-->")
		if (self.url_type == "V"):
			self.plot = gutils.after(self.tmp_page,"IMG SRC=\"/pix/MBBILDER/VIDEO")
			self.plot = gutils.trim(self.plot,"</TABLE>", "</TD>")
		else:
			self.plot = gutils.after(self.tmp_page,"IMG SRC=\"/pix/MBBILDER/KINOPLAK")
			self.plot = gutils.trim(self.plot,"</TABLE>", "</TD>")
			if self.plot == '':
				self.plot = gutils.trim(self.tmp_page, "BORDER=\"0\" align=\"left\" ><TR><TD>", "</TD>")
    def get_searches(self):
        if len(self.page) < 20:    # immidietly redirection to movie page
            self.number_results = 1
            self.ids.append(self.page)
            self.titles.append(self.url)
        else:            # multiple matches
            elements = string.split(self.page, '</a></b>')

            if (elements[0]<>''):
                for element in elements[:-1]:
                    self.ids.append(gutils.trim(element, '<b><a href="/es/film','.html'))
                    title = gutils.after(element, '<b><a href="/es/film')
                    self.titles.append(gutils.strip_tags(gutils.convert_entities(gutils.after(title, '>'))))
Ejemplo n.º 29
0
 def get_studio(self):
     self.studio = ''
     tmp = gutils.trim(self.page, '<div class="description">', '</div>')
     if tmp:
         tmp = gutils.trim(tmp, 'Regie:', '</p>')
         if tmp:
             self.studio = string.replace(gutils.after(tmp, '<br/>'), 'Verleih: ', '')
     if not self.studio:
         self.studio = gutils.trim(self.page, 'Verleih: ', '<')
     if not self.studio and self.videopage:
         tmp = gutils.trim(self.videopage, '<div class="description">', '</div>')
         if tmp:
             tmp = gutils.trim(tmp, 'Regie:', '</p>')
             if tmp:
                 self.studio = string.replace(gutils.after(tmp, '<br/>'), 'Verleih: ', '')
 def get_plot(self):
     if gutils.trim(self.url, "typ=", "&") <> "film":
         self.url = self.url_to_use + "typ=film&nr=" + str(self.movie_id)
         self.open_page(self.parent_window)
     self.tmp_page = gutils.trim(self.page, "<!-- PRINT-CONTENT-START-->", "<!-- PRINT-CONTENT-ENDE-->")
     # little steps to perfect plot (I hope ... it's a terrible structured content ... )
     self.plot = gutils.trim(self.tmp_page, "Kurzinfo", "</TD></TR><tr><td></td></tr><TR>")
     if self.plot == "":
         self.plot = gutils.trim(self.tmp_page, "Kurzinfo", '<script language="JavaScript">')
     self.plot = gutils.after(self.plot, "Fotoshow</A>")
     self.plot = gutils.after(self.plot, "Filmpreise</A>")
     self.plot = gutils.after(self.plot, "Games zum Film</A>")
     self.plot = gutils.after(self.plot, " Crew</A>")
     self.plot = gutils.before(self.plot, "FOTOSHOW</SPAN>")
     self.plot = gutils.after(self.plot, "</TABLE>")
 def get_searches(self):
     elements = string.split(self.page,'class="text_ergebniss_titel"')
     i = 0
     while i < len(elements) - 1:
         id_part = elements[i]
         i = i + 1
         text_part = elements[i]
         i = i + 1
         self.ids.append(gutils.trim(id_part, 'filmsuche.cfm?wert=', '&'))
         self.titles.append(gutils.strip_tags(
                     gutils.trim(text_part, '>', '</a>') + ' (' +
                     string.capwords(gutils.trim(text_part, '</a>', '(Originaltitel)')) + ', ' +
                     gutils.after(gutils.trim(text_part, 'sucheNach=produktionsland', '</a>'), '>') + ', ' +
                     gutils.after(gutils.trim(text_part, 'sucheNach=produktionsjahr', '</a>'), '>') +
                     ')'))
Ejemplo n.º 32
0
    def search(self, parent_window):
        if not self.open_search(parent_window):
            return None
        # used for looking for subpages
        tmp_page = gutils.trim(self.page, '<span class="trefferliste">',
                               '</span>')
        elements = string.split(tmp_page, 'cp=')
        # first results
        tmp_page = gutils.after(
            gutils.trim(self.page, 'Alle Treffer aus der Kategorie',
                        '<span class="trefferliste">'), "Titel:")
        # look for subpages
        for element in elements:
            element = gutils.before(element, '"')
            try:
                tmp_element = int(element)
            except:
                tmp_element = 1
            if tmp_element != 1:
                self.url = 'http://www.zweitausendeins.de/filmlexikon/?sucheNach=Titel&cp=' + str(
                    tmp_element) + "&wert="
                if self.open_search(parent_window):
                    tmp_page2 = gutils.trim(self.page,
                                            'Alle Treffer aus der Kategorie',
                                            '<span class="trefferliste">')
                    tmp_page = tmp_page + tmp_page2
        self.page = tmp_page

        return self.page
Ejemplo n.º 33
0
 def get_plot(self):
     self.plot = gutils.trim(self.page, '<div class="yui-content">',
                             '<div class="footer">')
     if not self.plot:
         # kino page
         self.plot = gutils.after(
             gutils.trim(self.page, 'Filmhandlung & Hintergrund', '</div>'),
             '</h2>')
     if not self.plot and self.videopage:
         self.plot = gutils.trim(self.videopage,
                                 '<div class="yui-content">',
                                 '<div class="footer">')
     if self.plot:
         # video page
         self.plot = re.sub('<script type="text/javascript">[^<]+</script>',
                            '', self.plot)
         self.plot = string.replace(self.plot, '>Großansicht</a>', '>')
         self.plot = string.replace(self.plot, '>Schließen</a>', '>')
         self.plot = string.replace(self.plot, '>zur&uuml;ck </a>', '>')
         self.plot = string.replace(self.plot, '>1</a>', '>')
         self.plot = string.replace(self.plot, '> weiter</a>', '>')
         self.plot = string.replace(self.plot, '</h4>', '\n')
         self.plot = gutils.clean(self.plot)
         compiledmultiline = re.compile(r'^[^(]+[(]Foto[:][^)]+[)][ ]*$',
                                        re.MULTILINE)
         self.plot = compiledmultiline.sub('', self.plot)
         compiledmultiline = re.compile(r"(^\s+$|^\s*//\s*$)", re.MULTILINE)
         self.plot = compiledmultiline.sub('', self.plot)
         compiledmultiline = re.compile("^[\n]+$", re.MULTILINE)
         self.plot = compiledmultiline.sub("\n", self.plot)
Ejemplo n.º 34
0
    def get_plot(self):
        self.plot = gutils.trim(self.page, '<b class="ch">Resumen', '<a href="/rg/title-tease/plot')
        self.plot = gutils.after(self.plot, ':</b> ')

        self.plot = gutils.trim(self.page, '<h5>Trama:</h5>', '</div>')
        self.plot = self.__before_more(self.plot)
        tmp = gutils.trim(self.plot_page, '<div id="swiki.2.1">', '</div>')
        if tmp:
            self.plot = tmp
        elements = string.split(self.plot_page, '<p class="plotpar">')
        if len(elements) > 1:
            self.plot = self.plot + '\n\n'
            elements[0] = ''
            for element in elements:
                if element != '':
                    self.plot = self.plot + gutils.strip_tags(gutils.before(element, '</a>')) + '\n'
        if not self.plot:
            # nothing in spanish found, try original
            self.plot = gutils.regextrim(self.imdb_page, '<h5>Plot:</h5>', '(</div>|<a href.*)')
            self.plot = self.__before_more(self.plot)
            elements = string.split(self.imdb_plot_page, '<p class="plotpar">')
            if len(elements) > 1:
                self.plot = self.plot + '\n\n'
                elements[0] = ''
                for element in elements:
                    if element <> '':
                        self.plot = self.plot + gutils.strip_tags(gutils.before(element, '</a>')) + '\n\n'
Ejemplo n.º 35
0
 def get_runtime(self):
     self.runtime = gutils.clean(gutils.trim(self.page, u'Durée : ', '</span>'))
     if self.runtime:
        if self.runtime.find('H') > 0:
           self.runtime = str (int(gutils.before(self.runtime,'H'))*60 + int(gutils.after(self.runtime,'H')))
        else:
           self.runtime = gutils.before(self.runtime,' mn')
Ejemplo n.º 36
0
 def search(self, parent_window):
     if not self.open_search(parent_window):
         return None
     self.page = gutils.trim(self.page, '>Wynik wyszukiwania<',
                             '<TABLE border=0 cellpadding=0')
     self.page = gutils.after(self.page, '</SPAN></DIV><BR>')
     return self.page
Ejemplo n.º 37
0
	def get_cast(self):
		self.cast = gutils.trim(self.cast_page, '<h2>OBSADA:</h2>', '<div class="b')
		self.cast = gutils.after(self.cast, '<div class="clr"></div>')
		self.cast = string.replace(self.cast, '\t', '')
		self.cast = gutils.strip_tags(self.cast)
		self.cast = string.replace(self.cast, '\n\n                ', _(' as '))
		self.cast = string.replace(self.cast,  "%s\n" % _(' as '), "\n")
 def get_genre(self):
     self.genre = ''
     tmp = gutils.trim(self.page, '<th>G&Eacute;NERO</th>', '</tr>')
     tmp = gutils.after(tmp, '<td>')
     if tmp:
         self.genre = gutils.clean(string.replace(tmp, ' | ', '. '))
         self.genre = re.sub('[.][ \t]+', '. ', self.genre)
Ejemplo n.º 39
0
 def get_site(self):
     """Find the film's imdb details page"""
     self.site = gutils.trim(self.page, \
         "/imagens/bf_siteoficial.gif' WIDTH=89 HEIGHT=18 BORDER=0 ALT=''>", \
         "' TARGET=_blank><IMG SRC='/imagens/bf_imdb.gif'")
     self.site = gutils.after(self.site, "<A HREF='")
     self.site = string.replace(self.site, "'", "")
Ejemplo n.º 40
0
 def get_director(self):
     self.director = gutils.trim(self.creditspage, u'reżyseria: <', '</tr>')
     self.director = gutils.after(self.director, '>')
     self.director = self.director.replace('<br />', ', ')
     self.director = gutils.clean(self.director)
     if self.director.endswith(','):
         self.director = self.director[:-1]
	def get_site(self):
		"""Find the film's imdb details page"""
		self.site = gutils.trim(self.page, \
			"/imagens/bf_siteoficial.gif' WIDTH=89 HEIGHT=18 BORDER=0 ALT=''>", \
			"' TARGET=_blank><IMG SRC='/imagens/bf_imdb.gif'")
		self.site = gutils.after(self.site, "<A HREF='")
		self.site = string.replace(self.site, "'", "")
Ejemplo n.º 42
0
 def get_plot(self):
     plotlist = string.split(
         gutils.trim(self.plot_page, 'id="plot-summaries-content">',
                     '</ul>'), '<li')
     plotcompilation = ''
     for listelement in plotlist:
         if listelement <> '' and not 'It looks like we don\'t have any Plot Summaries for this title yet.' in listelement:
             plotcompilation = plotcompilation + gutils.trim(
                 listelement, '<p>', '</p>') + '\n'
             plotcompilation = plotcompilation + re.sub(
                 '<[^<]+?>', '',
                 gutils.trim(listelement, '<div class="author-container">',
                             '</div>').replace('\n', '').lstrip()) + '\n\n'
     if plotcompilation <> '':
         self.plot = plotcompilation
     else:
         self.plot = gutils.regextrim(self.page, 'itemprop="description"',
                                      '<')
         self.plot = gutils.after(self.plot, '>')
         elements = string.split(self.plot_page, '<p class="plotpar">')
         if len(elements) < 2:
             elements = re.split('<li class="(?:odd|even)">',
                                 self.plot_page)
         if len(elements) > 1:
             self.plot = self.plot + '\n\n'
             elements[0] = ''
             for element in elements[1:]:
                 if element <> '':
                     self.plot = self.plot + gutils.strip_tags(
                         gutils.before(element, '</a>')) + '\n\n'
Ejemplo n.º 43
0
 def get_year(self):
     self.year = gutils.trim(self.page, '<a href="/year/', '</a>')
     self.year = gutils.after(self.year, '>')
     if not self.year:
         tmp = gutils.trim(self.page, '<title>', '</title>')
         tmp = re.search('([0-9]{4})[)]', tmp)
         if tmp:
             self.year = tmp.group(0)
Ejemplo n.º 44
0
 def get_o_title(self):
     self.o_title = string.capwords(
         gutils.clean(
             gutils.regextrim(self.page, '<b>Originaltitel:',
                              '(</p>|<b>)')))
     if not self.o_title:
         self.o_title = gutils.after(
             gutils.trim(self.page, 'class=\'film-titel\'', '</h1>'), '>')
Ejemplo n.º 45
0
 def get_screenplay(self):
     self.screenplay = gutils.trim(self.creditspage, u'scenariusz: <',
                                   '</tr>')
     self.screenplay = gutils.after(self.screenplay, '>')
     self.screenplay = self.screenplay.replace('<br />', ', ')
     self.screenplay = gutils.clean(self.screenplay)
     if self.screenplay.endswith(','):
         self.screenplay = self.screenplay[:-1]
Ejemplo n.º 46
0
 def get_cameraman(self):
     self.cameraman = gutils.regextrim(self.creditspage, u'zdjęcia: <',
                                       '(</tr>|<tr>)')
     self.cameraman = gutils.after(self.cameraman, '>')
     self.cameraman = self.cameraman.replace('<br />', ', ')
     self.cameraman = gutils.clean(self.cameraman)
     if self.cameraman.endswith(','):
         self.cameraman = self.cameraman[:-1]
Ejemplo n.º 47
0
 def get_rating(self):
     self.rating = gutils.after(
         gutils.trim(self.page, 'id="movie-rat-avg"', '</div>'), '>')
     if self.rating:
         self.rating = str(
             round(
                 float(gutils.clean(string.replace(self.rating, ',',
                                                   '.')))))
Ejemplo n.º 48
0
 def initialize(self):
     try:
         self.jsondata = json.loads(
             gutils.trim(gutils.after(self.page, '<body'),
                         '<script type="application/ld+json">',
                         '</script>'))
     except:
         self.jsondata = {}
 def get_o_title(self):
     self.o_title = gutils.trim(self.page,
                                u'<th>T&Iacute;TULO ORIGINAL</th>',
                                '</strong></td>')
     self.o_title = gutils.after(self.o_title, '<strong>')
     self.o_title = re.sub('[ ]+', ' ', self.o_title)
     self.o_title = re.sub('([(]Serie de TV[)]|[(]TV[)]|[(]TV Series[)])',
                           '', self.o_title)
Ejemplo n.º 50
0
 def get_runtime(self):
     self.runtime = gutils.trim(
         self.page, "<div class=\"sitem\">Czas trwania: <b>\n\t\t",
         '\n</b>')
     if self.runtime.find('?') != -1:
         self.runtime = ''
     else:
         self.runtime = gutils.after(self.runtime, '×')
         self.runtime = gutils.before(self.runtime, ' min')
Ejemplo n.º 51
0
 def get_plot(self):
     self.plot = gutils.after(gutils.trim(self.page, 'class="movie-plot-synopsis"', '</section>'), '>')
     start = string.find(self.plot, '<script')
     end = string.find(self.plot, '</script>', start)
     while start > -1 and end > -1:
         self.plot = self.plot[:start]+self.plot[end:]
         start = string.find(self.plot, '<script')
         end = string.find(self.plot, '</script>', start)
     self.plot = string.replace(self.plot, '\n', '')
Ejemplo n.º 52
0
 def get_studio(self):
     self.studio = gutils.trim(self.detail_page, 'alt="Produktion"', '<img')
     if self.studio == '':
         self.studio = gutils.trim(self.detail_page, 'alt="Produktion"',
                                   '</TABLE>')
     self.studio = gutils.after(self.studio, '>')
     self.studio = self.studio.replace('<tr>', ', ')
     self.studio = gutils.strip_tags(self.studio)
     self.studio = re.sub('((^, )|(, $))', '', self.studio)
Ejemplo n.º 53
0
 def get_director(self):
     self.director = gutils.trim(self.cast_page,'>Directed by', '</table>')
     tmpelements = re.split('href="', self.director)
     delimiter = ''
     self.director = ''
     for index in range(1, len(tmpelements), 1):
         tmpelement = gutils.before(gutils.after(tmpelements[index], '>'), '<')
         self.director = self.director + tmpelement + delimiter
         delimiter =', '
Ejemplo n.º 54
0
 def get_rating(self):
     self.rating = gutils.after(gutils.trim(self.page,">Note: <","</span>"), '>')
     if self.rating == '':
         self.rating = "0"
     if self.rating:
         try:
             self.rating = round(float(self.rating), 0)
         except Exception, e:
             self.rating = 0
Ejemplo n.º 55
0
 def get_o_title(self):
     self.url = self.url.replace(plugin_server, plugin_url)
     self.o_title = gutils.trim(self.page, '<h2 class=origTitle>', '</h2>')
     self.o_title = gutils.after(self.o_title, '</span>')
     if self.o_title == '':
         self.o_title = gutils.trim(self.page, '<title>', '</title>')
         if self.o_title.find('(') > -1:
             self.o_title = gutils.before(self.o_title, '(')
         if self.o_title.find('/') > -1:
             self.o_title = gutils.before(self.o_title, '/')
Ejemplo n.º 56
0
 def get_cast(self):
     self.cast = ''
     tmp = self.page.split('itemprop="actor"')
     for element in tmp[1:]:
         actor = gutils.trim(element, '>', '</span>')
         role = gutils.after(gutils.trim(element, u'Rôle', '</span>'), ': ')
         if role:
             self.cast = self.cast + actor + _(' as ') + role + '\n'
         else:
             self.cast = self.cast + actor + '\n'
Ejemplo n.º 57
0
 def get_searches(self):
     elements = string.split(self.page, 'class="product-name"')
     i = 1
     while i < len(elements):
         element = elements[i]
         i = i + 1
         self.ids.append(gutils.trim(element, 'href="', '"'))
         self.titles.append(
             re.sub('Zweitausendeins Edition.*', '',
                    gutils.trim(gutils.after(element, '>'), '>', '<')))
Ejemplo n.º 58
0
 def get_rating(self):
     """Find the film's rating. From 0 to 10.
     Convert if needed when assigning."""
     tmp_rating = gutils.trim(self.page, "<br><b>Cota", " (")
     tmp_rating = gutils.after(tmp_rating, "</b>")
     if tmp_rating <> "":
         tmp_rating = string.replace(tmp_rating, ',', '.')
         self.rating = str(float(string.strip(tmp_rating)))
     else:
         self.rating = ""