def get_searches(self):
        elements1 = re.split('href="/kinofilm/', self.page)
        elements1[0] = None
        for element in elements1:
            if element <> None:
                title = gutils.clean(gutils.trim(element,'>','</a>')) + string.replace(' (' +
                            gutils.clean(gutils.trim(element, '<p>', "<br />")) + ')', '()', '')
                if title != ' ':
                    self.ids.append("K_" + re.sub('[?].*', '', gutils.before(element,'"')))
                    self.titles.append('Kino: ' + title)

        elements2 = re.split('href="http://www.video.de/videofilm/', self.page)
        elements2[0] = None
        for element in elements2:
            if element <> None:
                title = gutils.clean(gutils.trim(element,'>','</a>')) + string.replace(' (' +
                            gutils.clean(gutils.trim(gutils.before(element, '</li>'), '<p>', "<br />")) + ')', '()', '')
                if title != ' ':
                    id = re.sub('[?].*', '', gutils.before(element,'"'))
                    self.ids.append("V_" + id)
                    type = ''
                    if 'blu-ray-disc-kauf' in id:
                        type = ' (Bluray-Kauf)'
                    if 'blu-ray-disc-leih' in id:
                        type = ' (Bluray-Verleih)'
                    if 'dvd-leih' in id:
                        type = ' (DVD-Verleih)'
                    if 'dvd-kauf' in id:
                        type = ' (DVD-Kauf)'
                    self.titles.append('Video: ' + title + type)
    def get_searches(self):
        elements1 = re.split('href="/kinofilm/', self.page)
        elements1[0] = None
        for element in elements1:
            if element <> None:
                title = gutils.clean(gutils.trim(element,'>','</a>')) + string.replace(' (' +
                            gutils.clean(gutils.trim(element, '<p>', "<br />")) + ')', '()', '')
                if title != ' ':
                    self.ids.append("K_" + re.sub('[?].*', '', gutils.before(element,'"')))
                    self.titles.append('Kino: ' + title)

        elements2 = re.split('href="http://www.video.de/videofilm/', self.page)
        elements2[0] = None
        for element in elements2:
            if element <> None:
                title = gutils.clean(gutils.trim(element,'>','</a>')) + string.replace(' (' +
                            gutils.clean(gutils.trim(element, '<p>', "<br />")) + ')', '()', '')
                if title != ' ':
                    id = re.sub('[?].*', '', gutils.before(element,'"'))
                    self.ids.append("V_" + id)
                    type = ''
                    if 'blu-ray-disc-kauf' in id:
                        type = ' (Bluray-Kauf)'
                    if 'blu-ray-disc-leih' in id:
                        type = ' (Bluray-Verleih)'
                    if 'dvd-leih' in id:
                        type = ' (DVD-Verleih)'
                    if 'dvd-kauf' in id:
                        type = ' (DVD-Kauf)'
                    self.titles.append('Video: ' + title + type)
 def get_cast(self):
     self.cast = ""
     casts = gutils.trim(self.page_cast, 'Acteurs', '</table>')
     parts = string.split(casts, '<td ')
     for index in range(1, len(parts) - 1, 2):
         character = gutils.after(parts[index], '>')
         actor = gutils.after(parts[index + 1], '>')
         self.cast = self.cast + gutils.clean(actor) + _(' as ') + gutils.clean(character) + '\n'
 def get_cast(self):
     self.cast = ""
     casts = gutils.trim(self.page_cast, "Acteurs", "</table>")
     parts = string.split(casts, "<td ")
     for index in range(1, len(parts) - 1, 3):
         character = gutils.after(parts[index + 1], ">")
         actor = gutils.after(parts[index + 2], ">")
         self.cast = self.cast + gutils.clean(actor) + _(" as ") + gutils.clean(character) + "\n"
Exemple #5
0
    def parse_movie(self):
        try:
            fields = list(self.fields_to_fetch)  # make a copy

            self.initialize()

            if 'year' in fields:
                self.get_year()
                self.year = gutils.digits_only(self.year, 2100)
                fields.pop(fields.index('year'))
            if 'runtime' in fields:
                self.get_runtime()
                self.runtime = gutils.digits_only(self.runtime)
                fields.pop(fields.index('runtime'))
            if 'rating' in fields:
                self.get_rating()
                self.rating = gutils.digits_only(self.rating, 10)
                fields.pop(fields.index('rating'))
            if 'cast' in fields:
                self.get_cast()
                self.cast = gutils.clean(self.cast)
                if not isinstance(self.cast, unicode):
                    self.cast = gutils.gdecode(self.cast, self.encode)
                fields.pop(fields.index('cast'))
            if 'plot' in fields:
                self.get_plot()
                self.plot = gutils.clean(self.plot)
                if not isinstance(self.plot, unicode):
                    self.plot = gutils.gdecode(self.plot, self.encode)
                fields.pop(fields.index('plot'))
            if 'notes' in fields:
                self.get_notes()
                self.notes = gutils.clean(self.notes)
                if not isinstance(self.notes, unicode):
                    self.notes = gutils.gdecode(self.notes, self.encode)
                fields.pop(fields.index('notes'))
            if 'image' in fields:
                self.get_image()
                self.fetch_picture()
                fields.pop(fields.index('image'))

            for i in fields:
                getattr(self, "get_%s" % i)()
                self[i] = gutils.clean(self[i])
                if not isinstance(self[i], unicode):
                    self[i] = gutils.gdecode(self[i], self.encode)

            if 'o_title' in self.fields_to_fetch and self.o_title is not None:
                if self.o_title[:4] == u'The ':
                    self.o_title = self.o_title[4:] + u', The'
            if 'title' in self.fields_to_fetch and self.title is not None:
                if self.title[:4] == u'The ':
                    self.title = self.title[4:] + u', The'
        except:
            log.exception('')
        finally:
            # close the progress dialog which was opened in get_movie
            self.progress.hide()
    def parse_movie(self):
        try:
            fields = list(self.fields_to_fetch)  # make a copy

            self.initialize()

            if 'year' in fields:
                self.get_year()
                self.year = gutils.digits_only(self.year, 2100)
                fields.pop(fields.index('year'))
            if 'runtime' in fields:
                self.get_runtime()
                self.runtime = gutils.digits_only(self.runtime)
                fields.pop(fields.index('runtime'))
            if 'rating' in fields:
                self.get_rating()
                self.rating = gutils.digits_only(self.rating, 10)
                fields.pop(fields.index('rating'))
            if 'cast' in fields:
                self.get_cast()
                self.cast = gutils.clean(self.cast)
                if not isinstance(self.cast, unicode):
                    self.cast = gutils.gdecode(self.cast, self.encode)
                fields.pop(fields.index('cast'))
            if 'plot' in fields:
                self.get_plot()
                self.plot = gutils.clean(self.plot)
                if not isinstance(self.plot, unicode):
                    self.plot = gutils.gdecode(self.plot, self.encode)
                fields.pop(fields.index('plot'))
            if 'notes' in fields:
                self.get_notes()
                self.notes = gutils.clean(self.notes)
                if not isinstance(self.notes, unicode):
                    self.notes = gutils.gdecode(self.notes, self.encode)
                fields.pop(fields.index('notes'))
            if 'image' in fields:
                self.get_image()
                self.fetch_picture()
                fields.pop(fields.index('image'))

            for i in fields:
                getattr(self, "get_%s" % i)()
                self[i] = gutils.clean(self[i])
                if not isinstance(self[i], unicode):
                    self[i] = gutils.gdecode(self[i], self.encode)

            if 'o_title' in self.fields_to_fetch and self.o_title is not None:
                if self.o_title[:4] == u'The ':
                    self.o_title = self.o_title[4:] + u', The'
            if 'title' in self.fields_to_fetch and self.title is not None:
                if self.title[:4] == u'The ':
                    self.title = self.title[4:] + u', The'
        except:
            log.exception('')
        finally:
            # close the progress dialog which was opened in get_movie
            self.progress.hide()
    def parse_movie(self):
        try:
            fields = list(self.fields_to_fetch)  # make a copy

            self.initialize()

            if "year" in fields:
                self.get_year()
                self.year = gutils.digits_only(self.year, 2100)
                fields.pop(fields.index("year"))
            if "runtime" in fields:
                self.get_runtime()
                self.runtime = gutils.digits_only(self.runtime)
                fields.pop(fields.index("runtime"))
            if "rating" in fields:
                self.get_rating()
                self.rating = gutils.digits_only(self.rating, 10)
                fields.pop(fields.index("rating"))
            if "cast" in fields:
                self.get_cast()
                self.cast = gutils.clean(self.cast)
                if not isinstance(self.cast, unicode):
                    self.cast = gutils.gdecode(self.cast, self.encode)
                fields.pop(fields.index("cast"))
            if "plot" in fields:
                self.get_plot()
                self.plot = gutils.clean(self.plot)
                if not isinstance(self.plot, unicode):
                    self.plot = gutils.gdecode(self.plot, self.encode)
                fields.pop(fields.index("plot"))
            if "notes" in fields:
                self.get_notes()
                self.notes = gutils.clean(self.notes)
                if not isinstance(self.notes, unicode):
                    self.notes = gutils.gdecode(self.notes, self.encode)
                fields.pop(fields.index("notes"))
            if "image" in fields:
                self.get_image()
                self.fetch_picture()
                fields.pop(fields.index("image"))

            for i in fields:
                getattr(self, "get_%s" % i)()
                self[i] = gutils.clean(self[i])
                if not isinstance(self[i], unicode):
                    self[i] = gutils.gdecode(self[i], self.encode)

            if "o_title" in self.fields_to_fetch and self.o_title is not None:
                if self.o_title[:4] == u"The ":
                    self.o_title = self.o_title[4:] + u", The"
            if "title" in self.fields_to_fetch and self.title is not None:
                if self.title[:4] == u"The ":
                    self.title = self.title[4:] + u", The"
        finally:
            # close the progress dialog which was opened in get_movie
            self.progress.hide()
 def get_notes(self):
     self.notes = ''
     critica = gutils.clean(string.replace(gutils.regextrim(self.page, 'Critica</font>', "(</td>|\n|Note<)"), '<br>', '\n'))
     if critica:
         self.notes = 'Critica:\n\n' + critica + '\n\n'
     note = gutils.clean(string.replace(gutils.regextrim(self.page, 'Note</font>', "(</td>|\n|Critica<)"), '<br>', '--BR--'))
     if note:
         # string.capwords removes line breaks, preventing them with placeholder --BR--
         note = self.capwords(note)
         self.notes = self.notes + 'Note:\n\n' + string.replace(note, '--br--', '\n')
 def get_searches(self):
     elements = re.split('<tr>', self.page)
     for index in range(1, len(elements), 1):
         element = elements[index]
         titleandid = gutils.trim(element, '<td class="title">', '</td>')
         title = gutils.clean(titleandid)
         id = gutils.trim(titleandid, 'href="', '"')
         idstart = string.rfind(id, '/')
         id = id[idstart + 1:]
         year = gutils.trim(element, '<td class="year">', '</td>')
         self.ids.append(id)
         self.titles.append(title + ' (' + gutils.clean(year)+ ')')
Exemple #10
0
 def get_cast(self):
     self.cast = ''
     tmp = gutils.trim(self.page, '>Cast<', '</TABLE>')
     elements = re.split('(href|HREF)="/db/people', tmp)
     for element in elements:
         actor = gutils.clean(gutils.trim(element, '>', '<'))
         if actor:
             role = gutils.clean(gutils.trim(element, '>...', '</TR>'))
             if role:
                 self.cast = self.cast + actor + _(' as ') + role + '\n'
             else:
                 self.cast = self.cast + actor + '\n'
Exemple #11
0
 def get_cast(self):
     self.cast = ''
     tmp = gutils.trim(self.page, '>Cast<', '</TABLE>')
     elements = re.split('(href|HREF)="/db/people', tmp)
     for element in elements:
         actor = gutils.clean(gutils.trim(element, '>', '<'))
         if actor:
             role = gutils.clean(gutils.trim(element, '>...', '</TR>'))
             if role:
                 self.cast = self.cast + actor + _(' as ') + role + '\n'
             else:
                 self.cast = self.cast + actor + '\n'
	def get_notes(self):
		self.notes = ''
		# ...type
		atype = gutils.trim(self.page, '"field">Type', '</td>')
		atype = gutils.clean(atype)
		if atype != '':
			self.notes += "Type: %s\n" % atype
		# ...number of episodes
		episodes = gutils.trim(self.page, '"field">Episodes', '</td>')
		episodes = gutils.clean(episodes)
		if episodes != '':
			self.notes += "Episodes: %s\n" % episodes
 def get_searches(self):
     elements = string.split(self.page, '<a href=\'/film/fichefilm_gen_cfilm=')
     if (elements[0] <> ''):
         for index in range(1, len(elements), 1):
             element = elements[index]
             title = gutils.clean(gutils.convert_entities(gutils.trim(element, '>', '</a>')))
             year = gutils.clean(gutils.trim(element, '<span class="fs11">', '<br'))
             if title:
                 self.ids.append(gutils.before(element, '.'))
                 if year:
                     self.titles.append(title + ' (' + year + ')')
                 else:
                     self.titles.append(title)
    def parse_movie(self):
        from copy import deepcopy

        fields = deepcopy(self.fields_to_fetch)

        self.initialize()

        if "year" in fields:
            self.get_year()
            self.year = gutils.digits_only(self.year, 2100)
            fields.pop(fields.index("year"))
        if "runtime" in fields:
            self.get_runtime()
            self.runtime = gutils.digits_only(self.runtime)
            fields.pop(fields.index("runtime"))
        if "rating" in fields:
            self.get_rating()
            self.rating = gutils.digits_only(self.rating, 10)
            fields.pop(fields.index("rating"))
        if "cast" in fields:
            self.get_cast()
            self.cast = gutils.clean(self.cast)
            self.cast = gutils.gdecode(self.cast, self.encode)
            fields.pop(fields.index("cast"))
        if "plot" in fields:
            self.get_plot()
            self.plot = gutils.clean(self.plot)
            self.plot = gutils.gdecode(self.plot, self.encode)
            fields.pop(fields.index("plot"))
        if "notes" in fields:
            self.get_notes()
            self.notes = gutils.clean(self.notes)
            self.notes = gutils.gdecode(self.notes, self.encode)
            fields.pop(fields.index("notes"))
        if "image" in fields:
            self.get_image()
            self.fetch_picture()
            fields.pop(fields.index("image"))

        for i in fields:
            getattr(self, "get_%s" % i)()
            self[i] = gutils.clean(self[i])
            self[i] = gutils.gdecode(self[i], self.encode)

        if "o_title" in self.fields_to_fetch and self.o_title is not None:
            if self.o_title[:4] == "The ":
                self.o_title = self.o_title[4:] + ", The"
        if "title" in self.fields_to_fetch and self.title is not None:
            if self.title[:4] == "The ":
                self.title = self.title[4:] + ", The"
 def get_cast(self):
     self.cast = ""
     casts = gutils.trim(self.page_cast, 'Acteurs, rôles, personnages', '<h2>')
     parts = string.split(casts, 'href="/personne/fichepersonne_gen_cpersonne=')
     for index in range(1, len(parts), 1):
         character = gutils.clean(gutils.trim(parts[index], 'Rôle :', '<'))
         if not character:
             character = gutils.clean(gutils.trim(parts[index - 1], '<td>', '</td>'))
         actor = gutils.clean(gutils.trim(parts[index], '>', '<'))
         if actor:
             if character:
                 self.cast = self.cast + actor + _(' as ') + character + '\n'
             else:
                 self.cast = self.cast + actor + '\n'
	def parse_movie(self):
		from copy import deepcopy
		fields = deepcopy(self.fields_to_fetch)

		self.initialize()

		if 'year' in fields:
			self.get_year()
			self.year = gutils.digits_only(self.year, 2100)
			fields.pop(fields.index('year'))
		if 'runtime' in fields:
			self.get_runtime()
			self.runtime = gutils.digits_only(self.runtime)
			fields.pop(fields.index('runtime'))
		if 'rating' in fields:
			self.get_rating()
			self.rating = gutils.digits_only(self.rating, 10)
			fields.pop(fields.index('rating'))
		if 'cast' in fields:
			self.get_cast()
			self.cast = gutils.clean(self.cast)
			self.cast = gutils.gdecode(self.cast, self.encode)
			fields.pop(fields.index('cast'))
		if 'plot' in fields:
			self.get_plot()
			self.plot = gutils.clean(self.plot)
			self.plot = gutils.gdecode(self.plot, self.encode)
			fields.pop(fields.index('plot'))
		if 'notes' in fields:
			self.get_notes()
			self.notes = gutils.clean(self.notes)
			self.notes = gutils.gdecode(self.notes, self.encode)
			fields.pop(fields.index('notes'))
		if 'image' in fields:
			self.get_image()
			self.fetch_picture()
			fields.pop(fields.index('image'))

		for i in fields:
			getattr(self, "get_%s" % i)()
			self[i] = gutils.clean(self[i])
			self[i] = gutils.gdecode(self[i], self.encode)
		
		if 'o_title' in self.fields_to_fetch and self.o_title is not None:
			if self.o_title[:4] == 'The ':
				self.o_title = self.o_title[4:] + ', The'
		if 'title' in self.fields_to_fetch and self.title is not None:
			if self.title[:4] == 'The ':
				self.title = self.title[4:] + ', The'
    def search(self, parent_window):
        self.open_search(parent_window)
        tmp_pagemovie = self.page
        #
        # try to get all result pages (not so nice, but it works)
        #
        tmp_pagecount = gutils.clean(gutils.trim(tmp_pagemovie, ">von", "</a>"))
        try:
            tmp_pagecountint = int(tmp_pagecount)
        except:
            tmp_pagecountint = 1
        tmp_pagecountintcurrent = 1
        while tmp_pagecountint > tmp_pagecountintcurrent and tmp_pagecountintcurrent < 5:
            tmp_pagecountintcurrent = tmp_pagecountintcurrent + 1
            self.url = (
                "http://www.kino.de/search.php?mode=megaSearch&searchCategory=film&page="
                + str(tmp_pagecountintcurrent)
                + "&inputSearch="
            )
            self.open_search(parent_window)
            tmp_pagemovie = tmp_pagemovie + self.page
            #
            # Look for DVD and VHS
            #
        self.url = "http://www.kino.de/search.php?mode=megaSearch&searchCategory=video&inputSearch="
        self.open_search(parent_window)
        tmp_pagevideo = tmp_pagemovie + self.page
        #
        # try to get all result pages (not so nice, but it works)
        #
        tmp_pagecount = gutils.clean(gutils.trim(self.page, ">von", "</a>"))
        try:
            tmp_pagecountint = int(tmp_pagecount)
        except:
            tmp_pagecountint = 1
        tmp_pagecountintcurrent = 1
        while tmp_pagecountint > tmp_pagecountintcurrent and tmp_pagecountintcurrent < 5:
            tmp_pagecountintcurrent = tmp_pagecountintcurrent + 1
            self.url = (
                "http://www.kino.de/search.php?mode=megaSearch&searchCategory=video&page="
                + str(tmp_pagecountintcurrent)
                + "&inputSearch="
            )
            self.open_search(parent_window)
            tmp_pagevideo = tmp_pagevideo + self.page

        self.page = tmp_pagevideo
        return self.page
	def get_rating(self):
		self.rating = gutils.trim(self.page, '<h5>Nutzer-Bewertung:</h5>', '/10')
		if self.rating:
			try:
				self.rating = str(float(gutils.clean(self.rating)))
			except:
				self.rating = ''
 def get_cast(self):
     self.cast = gutils.regextrim(self.page, '[(]Darsteller[)]',
                                  '(</td>|<br><span[^>]+>)')
     self.cast = gutils.clean(self.cast)
     self.cast = self.cast.replace(' als ', _(' as '))
     self.cast = re.sub('( \t|\t|\r|\n)', '', self.cast)
     self.cast = self.cast.replace(',', '\n')
 def get_notes(self):
     self.notes = ''
     language = gutils.regextrim(self.page, 'Language:<[^>]+>', '</div>')
     language = gutils.strip_tags(language)
     language = re.sub('[\n]+', '', language)
     language = re.sub('[ ]+', ' ', language)
     language = language.strip()
     color = gutils.regextrim(self.page, 'Color:<[^>]+>', '</div>')
     color = gutils.strip_tags(color)
     color = re.sub('[\n]+', '', color)
     color = re.sub('[ ]+', ' ', color)
     color = color.strip()
     sound = gutils.regextrim(self.page, 'Sound Mix:<[^>]+>', '</div>')
     sound = gutils.strip_tags(sound)
     sound = re.sub('[\n]+', '', sound)
     sound = re.sub('[ ]+', ' ', sound)
     sound = sound.strip()
     tagline = gutils.regextrim(self.tagl_page, '>Taglines', '>See also')
     taglines = re.split('<div[^>]+class="soda[^>]*>', tagline)
     tagline = ''
     if len(taglines)>1:
         for entry in taglines[1:]:
             entry = gutils.clean(gutils.before(entry, '</div>'))
             if entry:
                 tagline = tagline + entry + '\n'
     if len(language)>0:
         self.notes = "%s: %s\n" %(_('Language'), language)
     if len(sound)>0:
         self.notes += "%s: %s\n" %(gutils.strip_tags(_('<b>Audio</b>')), sound)
     if len(color)>0:
         self.notes += "%s: %s\n" %(_('Color'), color)
     if len(tagline)>0:
         self.notes += "%s: %s\n" %('Tagline', tagline)
Exemple #21
0
 def get_notes(self):
     self.notes = ''
     language = gutils.regextrim(self.page, 'Language:<[^>]+>', '</div>')
     language = gutils.strip_tags(language)
     language = re.sub('[\n]+', '', language)
     language = re.sub('[ ]+', ' ', language)
     language = language.strip()
     color = gutils.regextrim(self.page, 'Color:<[^>]+>', '</div>')
     color = gutils.strip_tags(color)
     color = re.sub('[\n]+', '', color)
     color = re.sub('[ ]+', ' ', color)
     color = color.strip()
     sound = gutils.regextrim(self.page, 'Sound Mix:<[^>]+>', '</div>')
     sound = gutils.strip_tags(sound)
     sound = re.sub('[\n]+', '', sound)
     sound = re.sub('[ ]+', ' ', sound)
     sound = sound.strip()
     tagline = gutils.regextrim(self.tagl_page, '>Taglines', '>See also')
     taglines = re.split('<div[^>]+class="soda[^>]*>', tagline)
     tagline = ''
     if len(taglines) > 1:
         for entry in taglines[1:]:
             entry = gutils.clean(gutils.before(entry, '</div>'))
             if entry:
                 tagline = tagline + entry + '\n'
     if len(language) > 0:
         self.notes = "%s: %s\n" % (_('Language'), language)
     if len(sound) > 0:
         self.notes += "%s: %s\n" % (gutils.strip_tags(
             _('<b>Audio</b>')), sound)
     if len(color) > 0:
         self.notes += "%s: %s\n" % (_('Color'), color)
     if len(tagline) > 0:
         self.notes += "%s: %s\n" % ('Tagline', tagline)
 def get_director(self):
     self.director = gutils.trim(self.creditspage, u"reżyseria: <", "</tr>")
     self.director = gutils.after(self.director, ">")
     self.director = self.director.replace("<br />", ", ")
     self.director = gutils.clean(self.director)
     if self.director.endswith(","):
         self.director = self.director[:-1]
 def get_screenplay(self):
     self.screenplay = gutils.trim(self.creditspage, u"scenariusz: <", "</tr>")
     self.screenplay = gutils.after(self.screenplay, ">")
     self.screenplay = self.screenplay.replace("<br />", ", ")
     self.screenplay = gutils.clean(self.screenplay)
     if self.screenplay.endswith(","):
         self.screenplay = self.screenplay[:-1]
Exemple #24
0
 def get_plot(self):
     self.plot = gutils.trim(self.page, '<div class="yui-content">',
                             '<div class="footer">')
     if not self.plot:
         # kino page
         self.plot = gutils.after(
             gutils.trim(self.page, 'Filmhandlung & Hintergrund', '</div>'),
             '</h2>')
     if not self.plot and self.videopage:
         self.plot = gutils.trim(self.videopage,
                                 '<div class="yui-content">',
                                 '<div class="footer">')
     if self.plot:
         # video page
         self.plot = re.sub('<script type="text/javascript">[^<]+</script>',
                            '', self.plot)
         self.plot = string.replace(self.plot, '>Großansicht</a>', '>')
         self.plot = string.replace(self.plot, '>Schließen</a>', '>')
         self.plot = string.replace(self.plot, '>zur&uuml;ck </a>', '>')
         self.plot = string.replace(self.plot, '>1</a>', '>')
         self.plot = string.replace(self.plot, '> weiter</a>', '>')
         self.plot = string.replace(self.plot, '</h4>', '\n')
         self.plot = gutils.clean(self.plot)
         compiledmultiline = re.compile(r'^[^(]+[(]Foto[:][^)]+[)][ ]*$',
                                        re.MULTILINE)
         self.plot = compiledmultiline.sub('', self.plot)
         compiledmultiline = re.compile(r"(^\s+$|^\s*//\s*$)", re.MULTILINE)
         self.plot = compiledmultiline.sub('', self.plot)
         compiledmultiline = re.compile("^[\n]+$", re.MULTILINE)
         self.plot = compiledmultiline.sub("\n", self.plot)
    def get_searches(self):
        if not self.page:
            return
        if len(self.page) < 20:  # immidietly redirection to movie page
            self.number_results = 1
            self.ids.append(self.page)
            self.titles.append(self.url)
        else:  # multiple matches
            elements = string.split(self.page, '</a></b>')

            if (elements[0] <> ''):
                for index in range(0, len(elements) - 1, 1):
                    element = elements[index]
                    nextelement = elements[index + 1]
                    id = gutils.trim(element, '<b><a href="/es/film', '.html')
                    if id:
                        self.ids.append(id)
                        title = gutils.clean(
                            gutils.after(element,
                                         '<b><a href="/es/film')).replace(
                                             "\n", "")
                        title = gutils.strip_tags(
                            gutils.convert_entities(gutils.after(
                                title, '>'))) + ' ' + string.strip(
                                    gutils.before(nextelement, '<'))
                        self.titles.append(title)
Exemple #26
0
 def get_notes(self):
     self.notes = ''
     language = gutils.trim(self.page, '<h5>Lingua:</h5>', '</div>')
     language = gutils.strip_tags(language)
     language = re.sub('[\n]+', '', language)
     language = re.sub('[ ]+', ' ', language)
     language = language.rstrip()
     color = gutils.trim(self.page, '<h5>Colore:</h5>', '</div>')
     color = gutils.strip_tags(color)
     color = re.sub('[\n]+', '', color)
     color = re.sub('[ ]+', ' ', color)
     color = color.rstrip()
     sound = gutils.trim(self.page, '<h5>Sonoro:</h5>', '</div>')
     sound = gutils.strip_tags(sound)
     sound = re.sub('[\n]+', '', sound)
     sound = re.sub('[ ]+', ' ', sound)
     sound = sound.rstrip()
     date = gutils.trim(self.page, '<h5>Data di uscita:</h5>',
                        '<a class="tn15more inline"')
     date = re.sub('[\n]+', '', date)
     date = re.sub('[ ]+', ' ', date)
     date = gutils.clean(date)
     if len(language) > 0:
         self.notes = "%s: %s\n" % (_('Language'), language)
     if len(sound) > 0:
         self.notes += "%s: %s\n" % (gutils.strip_tags(
             _('<b>Audio</b>')), sound)
     if len(color) > 0:
         self.notes += "%s: %s\n" % (_('Color'), color)
     if len(date) > 0:
         self.notes += "%s: %s\n" % (_('Data di uscita'), date)
 def get_searches(self):
     elements = re.split('&nbsp;<a title="[^"]+" href="(/datenbank/medien/dvd/|/datenbank/medien/blu-ray/)', self.page)
     elements[0] = None
     for index in range(1, len(elements), 2):
         element = elements[index + 1]
         if element <> None:
             if elements[index] == '/datenbank/medien/blu-ray/':
                 medium = 'Blu-Ray'
                 self.ids.append('blu-ray/' + gutils.before(element,'"'))
             else:
                 medium = 'DVD'
                 self.ids.append('dvd/' + gutils.before(element,'"'))
             self.titles.append(
                 gutils.trim(element, '>', '</a>') +
                 gutils.clean(
                     '(' + medium + ' - ' +
                     re.sub('[ \t\n]+', ' ',
                     string.replace(
                     string.replace(
                         gutils.regextrim(element, '<div [^>]*>', '</div>'),
                         '<br>', ' - '),
                         '&nbsp;', ''))
                     + ')'
                 )
             )
Exemple #28
0
 def get_genre(self):
     self.genre = gutils.trim(self.page,"Genre(s):","</table>")
     self.genre = string.replace(self.genre, "<br>", ", ")
     self.genre = gutils.strip_tags(self.genre)
     self.genre = string.replace(self.genre, "/", ", ")
     self.genre = gutils.clean(self.genre)
     self.genre = self.genre[0:-1]
 def get_genre(self):
     self.genre = ''
     tmp = gutils.trim(self.page, '<th>G&Eacute;NERO</th>', '</tr>')
     tmp = gutils.after(tmp, '<td>')
     if tmp:
         self.genre = gutils.clean(string.replace(tmp, ' | ', '. '))
         self.genre = re.sub('[.][ \t]+', '. ', self.genre)
 def get_searches(self):
     elements = re.split('&nbsp;<a title="[^"]+" href="(/datenbank/medien/dvd/|/datenbank/medien/blu-ray/)', self.page)
     elements[0] = None
     for index in range(1, len(elements), 2):
         element = elements[index + 1]
         if element <> None:
             if elements[index] == '/datenbank/medien/blu-ray/':
                 medium = 'Blu-Ray'
                 self.ids.append('blu-ray/' + gutils.before(element,'"'))
             else:
                 medium = 'DVD'
                 self.ids.append('dvd/' + gutils.before(element,'"'))
             self.titles.append(
                 gutils.trim(element, '>', '</a>') +
                 gutils.clean(
                     '(' + medium + ' - ' +
                     re.sub('[ \t\n]+', ' ',
                     string.replace(
                     string.replace(
                         gutils.regextrim(element, '<div [^>]*>', '</div>'),
                         '<br>', ' - '),
                         '&nbsp;', ''))
                     + ')'
                 )
             )
 def get_cameraman(self):
     self.cameraman = gutils.regextrim(self.creditspage, u"zdjęcia: <", "(</tr>|<tr>)")
     self.cameraman = gutils.after(self.cameraman, ">")
     self.cameraman = self.cameraman.replace("<br />", ", ")
     self.cameraman = gutils.clean(self.cameraman)
     if self.cameraman.endswith(","):
         self.cameraman = self.cameraman[:-1]
 def get_notes(self):
     self.notes = ''
     language = gutils.trim(self.page, '<h5>Lingua:</h5>', '</div>')
     language = gutils.strip_tags(language)
     language = re.sub('[\n]+', '', language)
     language = re.sub('[ ]+', ' ', language)
     language = language.rstrip()
     color = gutils.trim(self.page, '<h5>Colore:</h5>', '</div>')
     color = gutils.strip_tags(color)
     color = re.sub('[\n]+', '', color)
     color = re.sub('[ ]+', ' ', color)
     color = color.rstrip()
     sound = gutils.trim(self.page, '<h5>Sonoro:</h5>', '</div>')
     sound = gutils.strip_tags(sound)
     sound = re.sub('[\n]+', '', sound)
     sound = re.sub('[ ]+', ' ', sound)
     sound = sound.rstrip()
     date = gutils.trim(self.page, '<h5>Data di uscita:</h5>', '<a class="tn15more inline"')
     date = re.sub('[\n]+', '', date)
     date = re.sub('[ ]+', ' ', date)
     date = gutils.clean(date)
     if len(language)>0:
         self.notes = "%s: %s\n" %(_('Language'), language)
     if len(sound)>0:
         self.notes += "%s: %s\n" %(gutils.strip_tags(_('<b>Audio</b>')), sound)
     if len(color)>0:
         self.notes += "%s: %s\n" %(_('Color'), color)
     if len(date)>0:
         self.notes += "%s: %s\n" %(_('Data di uscita'), date)
 def get_rating(self):
     self.rating = gutils.clean(gutils.after(gutils.trim(self.page, '<span class="rating', '</a>'), '>'))
     if self.rating:
         try:
             self.rating = str(round(float(self.rating)))
         except:
             self.rating = ''
 def get_genre(self):
     self.genre = ''
     tmp = gutils.trim(self.page, '<th>G&Eacute;NERO</th>', '</tr>')
     tmp = gutils.after(tmp, '<td>')
     if tmp:
         self.genre = gutils.clean(string.replace(tmp, ' | ', '. '))
         self.genre = re.sub('[.][ \t]+', '. ', self.genre)
 def get_rating(self):
     self.rating = gutils.trim(self.page, "<b>Nutzer-Bewertung:</b>", "/10")
     if self.rating:
         try:
             self.rating = str(float(gutils.clean(self.rating)))
         except:
             self.rating = ""
 def get_notes(self):
     self.notes = ""
     language = gutils.regextrim(self.page, "Language:<[^>]+>", "</div>")
     language = gutils.strip_tags(language)
     language = re.sub("[\n]+", "", language)
     language = re.sub("[ ]+", " ", language)
     language = language.strip()
     color = gutils.regextrim(self.page, "Color:<[^>]+>", "</div>")
     color = gutils.strip_tags(color)
     color = re.sub("[\n]+", "", color)
     color = re.sub("[ ]+", " ", color)
     color = color.strip()
     sound = gutils.regextrim(self.page, "Sound Mix:<[^>]+>", "</div>")
     sound = gutils.strip_tags(sound)
     sound = re.sub("[\n]+", "", sound)
     sound = re.sub("[ ]+", " ", sound)
     sound = sound.strip()
     tagline = gutils.regextrim(self.tagl_page, ">Taglines", ">See also")
     taglines = re.split('<div[^>]+class="soda[^>]*>', tagline)
     tagline = ""
     if len(taglines) > 1:
         for entry in taglines[1:]:
             entry = gutils.clean(gutils.before(entry, "</div>"))
             if entry:
                 tagline = tagline + entry + "\n"
     if len(language) > 0:
         self.notes = "%s: %s\n" % (_("Language"), language)
     if len(sound) > 0:
         self.notes += "%s: %s\n" % (gutils.strip_tags(_("<b>Audio</b>")), sound)
     if len(color) > 0:
         self.notes += "%s: %s\n" % (_("Color"), color)
     if len(tagline) > 0:
         self.notes += "%s: %s\n" % ("Tagline", tagline)
 def get_cast(self):
     self.cast = gutils.regextrim(self.page, '[(]Darsteller[)]', '(<[pP]>|<br><span[^>]+>)')
     self.cast = gutils.clean(self.cast)
     self.cast = self.cast.replace(' als ', _(' as '))
     self.cast = re.sub('( \t|\t|\r|\n)', '', self.cast)
     self.cast = self.cast.replace(', ', '\n')
     self.cast = self.cast.replace(',', '')
 def get_genre(self):
     self.genre = gutils.trim(self.page,"Genre(s):","</table>")
     self.genre = string.replace(self.genre, "<br>", ", ")
     self.genre = gutils.strip_tags(self.genre)
     self.genre = string.replace(self.genre, "/", ", ")
     self.genre = gutils.clean(self.genre)
     self.genre = self.genre[0:-1]
 def get_screenplay(self):
     self.screenplay = ''
     tmp = gutils.trim(self.page_cast, '<h2>crew</h2>', '</dl>')
     elements = string.split(tmp, '<dt>')
     for element in elements:
         if string.find(element, 'Screenwriter') > 0:
             self.screenplay = gutils.clean(gutils.before(element, '</a>'))
 def get_cameraman(self):
     self.cameraman = ''
     tmp = gutils.trim(self.page_cast, '<h2>crew</h2>', '</dl>')
     elements = string.split(tmp, '<dt>')
     for element in elements:
         if string.find(element, 'Cinematographer') > 0:
             self.cameraman = gutils.clean(gutils.before(element, '</a>'))
Exemple #41
0
 def capwords(
         self, name
 ):  # Does not work with accented letters => discarded in titles
     tmp = gutils.clean(name)
     if tmp == string.upper(tmp):
         return string.capwords(name)
     return name
 def get_director(self):
     self.director = gutils.trim(self.creditspage, u'reżyseria: <', '</tr>')
     self.director = gutils.after(self.director, '>')
     self.director = self.director.replace('<br />', ', ')
     self.director = gutils.clean(self.director)
     if self.director.endswith(','):
         self.director = self.director[:-1]
Exemple #43
0
 def get_runtime(self):
     self.runtime = gutils.clean(gutils.trim(self.page, u'Durée : ', '</span>'))
     if self.runtime:
        if self.runtime.find('H') > 0:
           self.runtime = str (int(gutils.before(self.runtime,'H'))*60 + int(gutils.after(self.runtime,'H')))
        else:
           self.runtime = gutils.before(self.runtime,' mn')
Exemple #44
0
 def get_director(self):
     self.director = gutils.trim(self.page,'<h5>Regie</h5>', '<br/>')
     if self.director == '':
         self.director = gutils.trim(self.page,'<h5>Regisseur:</h5>', '</div>')
     self.director = self.__before_more(self.director)
     self.director = self.director.replace('<br/>', ', ')
     self.director = gutils.clean(self.director)
     self.director = re.sub(',$', '', self.director)
 def get_rating(self):
     self.rating = gutils.after(
         gutils.trim(self.page, 'id="movie-rat-avg"', '</div>'), '>')
     if self.rating:
         self.rating = str(
             round(
                 float(gutils.clean(string.replace(self.rating, ',',
                                                   '.')))))
 def get_cameraman(self):
     self.cameraman = gutils.regextrim(self.creditspage, u'zdjęcia: <',
                                       '(</tr>|<tr>)')
     self.cameraman = gutils.after(self.cameraman, '>')
     self.cameraman = self.cameraman.replace('<br />', ', ')
     self.cameraman = gutils.clean(self.cameraman)
     if self.cameraman.endswith(','):
         self.cameraman = self.cameraman[:-1]
Exemple #47
0
 def get_cast(self):
     self.cast = gutils.trim(self.page, u'<b>Elenco:</b>', u'</td>')
     self.cast = string.replace(self.cast, u'<br>', u'\n')
     self.cast = string.replace(self.cast, u', ', u'')
     self.cast = string.replace(self.cast, u'\t', u'')
     self.cast = string.replace(self.cast, u'\n ', u'\n')
     self.cast = gutils.clean(self.cast)
     self.cast = re.sub('[ \t]*[\n]+[ \t]*' , '\n', self.cast)
Exemple #48
0
 def get_rating(self):
     """Find the film's rating. From 0 to 10.
     Convert if needed when assigning."""
     self.rating = gutils.clean(gutils.trim(self.page, u'IMDB: ', u'</span>'))
     try:
         self.rating = round(float(self.rating), 0)
     except Exception, e:
         self.rating = 0
Exemple #49
0
 def get_cameraman(self):
     self.cameraman = gutils.trim(self.cast_page, '>Kamera</a>', '</table>')
     self.cameraman = string.replace(self.cameraman, '(Kamera)', '')
     self.cameraman = string.replace(self.cameraman, '(nicht im Abspann)', '')
     self.cameraman = string.replace(self.cameraman, '</a>', ', ')
     self.cameraman = gutils.clean(self.cameraman)
     self.cameraman = re.sub(',[ \t]*$', '', self.cameraman)
     self.cameraman = re.sub('[ ]+', ' ', self.cameraman)
Exemple #50
0
 def get_o_title(self):
     self.o_title = string.capwords(
         gutils.clean(
             gutils.regextrim(self.page, '<b>Originaltitel:',
                              '(</p>|<b>)')))
     if not self.o_title:
         self.o_title = gutils.after(
             gutils.trim(self.page, 'class=\'film-titel\'', '</h1>'), '>')
 def get_screenplay(self):
     self.screenplay = gutils.trim(self.creditspage, u'scenariusz: <',
                                   '</tr>')
     self.screenplay = gutils.after(self.screenplay, '>')
     self.screenplay = self.screenplay.replace('<br />', ', ')
     self.screenplay = gutils.clean(self.screenplay)
     if self.screenplay.endswith(','):
         self.screenplay = self.screenplay[:-1]
 def get_notes(self):
     self.notes = ""
     tmp_notes = gutils.clean(gutils.trim(self.page, "<strong>Sprachen:</strong>", "</p>"))
     if tmp_notes != "":
         self.notes = self.notes + "Sprachen:\n" + tmp_notes + "\n\n"
     tmp_notes = gutils.clean(gutils.trim(self.page, "<strong>Untertitel:</strong>", "</p>"))
     if tmp_notes != "":
         self.notes = self.notes + "Untertitel:\n" + tmp_notes + "\n\n"
     tmp_notes = gutils.clean(gutils.trim(self.page, "<strong>Tonformat:</strong>", "</p>"))
     if tmp_notes != "":
         self.notes = self.notes + "Tonformat:\n" + tmp_notes + "\n\n"
     tmp_notes = gutils.clean(gutils.trim(self.page, "<strong>Bildformat:</strong>", "</p>"))
     if tmp_notes != "":
         self.notes = self.notes + "Bildformat:\n" + tmp_notes + "\n\n"
     tmp_notes = gutils.clean(gutils.trim(self.page, "<strong>EAN</strong>", "</p>"))
     if tmp_notes != "":
         self.notes = self.notes + "EAN:\n" + tmp_notes + "\n\n"
 def get_cameraman(self):
     # Find the cameraman
     self.cameraman = gutils.trim(self.page, 'Fotografia</font></td></tr><tr>', '<td colspan="2"')
     self.cameraman = string.replace(self.cameraman, '<tr>', ', ')
     # beautification
     self.cameraman = gutils.clean(self.cameraman)
     self.cameraman = string.replace(self.cameraman, ' ,', ',')
     self.cameraman = re.sub('[ ]+', ' ', self.cameraman)
     self.cameraman = re.sub('[,][ ]*$', '', self.cameraman)
 def get_screenplay(self):
     # Find the screenplay
     self.screenplay = gutils.trim(self.page, 'Sceneggiatura</font></td></tr><tr>', '<td colspan="2"')
     self.screenplay = string.replace(self.screenplay, '<tr>', ', ')
     # beautification
     self.screenplay = gutils.clean(self.screenplay)
     self.screenplay = string.replace(self.screenplay, ' ,', ',')
     self.screenplay = re.sub('[ ]+', ' ', self.screenplay)
     self.screenplay = re.sub('[,][ ]*$', '', self.screenplay)
    def get_searches(self):
        elements = re.findall(
            """/film/film.asp\?fi=(\d+)"[^>]*>.*?searchTitle\s*textB">(.*?)</span>.*?"> (.*?)</span>""",
            self.page)
        self.number_results = len(elements)

        for element in elements:
            self.ids.append(element[0])
            self.titles.append(gutils.clean(element[1]) + ' ' + element[2])